From 6e4826c4f46b35c7de2875b1e8b8154287494479 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 12 Jul 2018 00:44:28 -0500 Subject: [PATCH 0001/1459] initial commit --- libm/.gitignore | 5 + libm/Cargo.toml | 7 + libm/LICENSE-APACHE | 201 ++++++++++++++++++++ libm/LICENSE-MIT | 25 +++ libm/README.md | 111 +++++++++++ libm/ci/script.sh | 12 ++ libm/src/fabsf.rs | 3 + libm/src/lib.rs | 12 ++ libm/src/powf.rs | 326 ++++++++++++++++++++++++++++++++ libm/src/scalbnf.rs | 34 ++++ libm/src/sqrtf.rs | 83 ++++++++ libm/test-generator/Cargo.toml | 8 + libm/test-generator/README.md | 8 + libm/test-generator/src/main.rs | 234 +++++++++++++++++++++++ 14 files changed, 1069 insertions(+) create mode 100644 libm/.gitignore create mode 100644 libm/Cargo.toml create mode 100644 libm/LICENSE-APACHE create mode 100644 libm/LICENSE-MIT create mode 100644 libm/README.md create mode 100644 libm/ci/script.sh create mode 100644 libm/src/fabsf.rs create mode 100644 libm/src/lib.rs create mode 100644 libm/src/powf.rs create mode 100644 libm/src/scalbnf.rs create mode 100644 libm/src/sqrtf.rs create mode 100644 libm/test-generator/Cargo.toml create mode 100644 libm/test-generator/README.md create mode 100644 libm/test-generator/src/main.rs diff --git a/libm/.gitignore b/libm/.gitignore new file mode 100644 index 000000000..6db0ab6ef --- /dev/null +++ b/libm/.gitignore @@ -0,0 +1,5 @@ +**/*.rs.bk +.#* +/target +/tests +Cargo.lock diff --git a/libm/Cargo.toml b/libm/Cargo.toml new file mode 100644 index 000000000..96bc290fb --- /dev/null +++ b/libm/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "libm" +version = "0.1.0" +authors = ["Jorge Aparicio "] + +[workspace] +members = ["test-generator"] \ No newline at end of file diff --git a/libm/LICENSE-APACHE b/libm/LICENSE-APACHE new file mode 100644 index 000000000..16fe87b06 --- /dev/null +++ b/libm/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/libm/LICENSE-MIT b/libm/LICENSE-MIT new file mode 100644 index 000000000..432fbea04 --- /dev/null +++ b/libm/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2018 Jorge Aparicio + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/libm/README.md b/libm/README.md new file mode 100644 index 000000000..fe384e118 --- /dev/null +++ b/libm/README.md @@ -0,0 +1,111 @@ +# `libm` + +A port of [MUSL]'s libm to Rust. + +[MUSL]: https://www.musl-libc.org/ + +## Testing + +The test suite of this crate can only be run on x86_64 Linux systems. + +``` +$ # The test suite depends on the `cross` tool so install it if you don't have it +$ cargo install cross + +$ # and the `cross` tool requires docker to be running +$ systemctl start docker + +$ # execute the test suite for the x86_64 target +$ TARGET=x86_64-unknown-linux-gnu bash ci/script.sh + +$ # execute the test suite for the ARMv7 target +$ TARGET=armv7-unknown-linux-gnueabihf bash ci/script.sh +``` + +## Contributing + +- Pick your favorite math function from the list below. +- Look for the C implementation of the function in the [MUSL source code][src]. +- Copy paste the C code into a Rust file in the `src` directory and adjust `src/lib.rs` accordingly. +- Run `cargo watch check` and fix the compiler errors. +- If you can, run the test suite locally. If you can't, no problem! Your PR will be tested + automatically. +- Send us a pull request! +- :tada: + +### Notes + +- To reinterpret a float as an integer use the `to_bits` method. The MUSL code uses the + `GET_FLOAT_WORD` macro, or a union, to do this operation. + +- To reinterpret an integer as a float use the `f32::from_bits` constructor. The MUSL code uses the + `SET_FLOAT_WORD` macro, or a union, to do this operation. + +- Rust code panics on arithmetic overflows when not optimized. You may need to use the [`Wrapping`] + newtype to avoid this problem. + +[src]: https://git.musl-libc.org/cgit/musl/tree/src/math +[`Wrapping`]: https://doc.rust-lang.org/std/num/struct.Wrapping.html + +## Progress + +### Functions wanted by the wasm WG + +cf. [rustwasm/team#84](https://github.com/rustwasm/team/issues/84). + +- [ ] acos +- [ ] asin +- [ ] atan +- [ ] atan2 +- [ ] cbrt +- [ ] cos +- [ ] cosf +- [ ] cosh +- [ ] exp +- [ ] exp2 +- [ ] exp2f +- [ ] expf +- [ ] expm1 +- [ ] fma +- [ ] fmaf +- [ ] fmod +- [ ] fmodf +- [ ] hypot +- [ ] log +- [ ] log10 +- [ ] log10f +- [ ] log1p +- [ ] log2 +- [ ] log2f +- [ ] logf +- [ ] pow +- [x] powf +- [ ] round +- [ ] roundf +- [ ] sin +- [ ] sinf +- [ ] sinh +- [ ] tan +- [ ] tanh + +### Other functions + +- [x] fabsf +- [x] scalbnf +- [x] sqrtf + +## License + +Licensed under either of + +- Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) +- MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the +work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any +additional terms or conditions. diff --git a/libm/ci/script.sh b/libm/ci/script.sh new file mode 100644 index 000000000..64550573c --- /dev/null +++ b/libm/ci/script.sh @@ -0,0 +1,12 @@ +set -euxo pipefail + +main() { + cargo run --package test-generator --target x86_64-unknown-linux-musl + if hash cargo-fmt; then + # nicer syntax error messages (if any) + cargo fmt + fi + cross test --target $TARGET --release +} + +main diff --git a/libm/src/fabsf.rs b/libm/src/fabsf.rs new file mode 100644 index 000000000..be60d06cc --- /dev/null +++ b/libm/src/fabsf.rs @@ -0,0 +1,3 @@ +pub fn fabsf(x: f32) -> f32 { + f32::from_bits(x.to_bits() & 0x7fffffff) +} diff --git a/libm/src/lib.rs b/libm/src/lib.rs new file mode 100644 index 000000000..3d4062f3d --- /dev/null +++ b/libm/src/lib.rs @@ -0,0 +1,12 @@ +#![deny(warnings)] +#![no_std] + +mod fabsf; +mod powf; +mod scalbnf; +mod sqrtf; + +pub use fabsf::fabsf; +pub use powf::powf; +pub use scalbnf::scalbnf; +pub use sqrtf::sqrtf; diff --git a/libm/src/powf.rs b/libm/src/powf.rs new file mode 100644 index 000000000..770987c2a --- /dev/null +++ b/libm/src/powf.rs @@ -0,0 +1,326 @@ +use {scalbnf, sqrtf}; + +const BP: [f32; 2] = [1.0, 1.5]; +const DP_H: [f32; 2] = [0.0, 5.84960938e-01]; /* 0x3f15c000 */ +const DP_L: [f32; 2] = [0.0, 1.56322085e-06]; /* 0x35d1cfdc */ +const TWO24: f32 = 16777216.0; /* 0x4b800000 */ +const HUGE: f32 = 1.0e30; +const TINY: f32 = 1.0e-30; +const L1: f32 = 6.0000002384e-01; /* 0x3f19999a */ +const L2: f32 = 4.2857143283e-01; /* 0x3edb6db7 */ +const L3: f32 = 3.3333334327e-01; /* 0x3eaaaaab */ +const L4: f32 = 2.7272811532e-01; /* 0x3e8ba305 */ +const L5: f32 = 2.3066075146e-01; /* 0x3e6c3255 */ +const L6: f32 = 2.0697501302e-01; /* 0x3e53f142 */ +const P1: f32 = 1.6666667163e-01; /* 0x3e2aaaab */ +const P2: f32 = -2.7777778450e-03; /* 0xbb360b61 */ +const P3: f32 = 6.6137559770e-05; /* 0x388ab355 */ +const P4: f32 = -1.6533901999e-06; /* 0xb5ddea0e */ +const P5: f32 = 4.1381369442e-08; /* 0x3331bb4c */ +const LG2: f32 = 6.9314718246e-01; /* 0x3f317218 */ +const LG2_H: f32 = 6.93145752e-01; /* 0x3f317200 */ +const LG2_L: f32 = 1.42860654e-06; /* 0x35bfbe8c */ +const OVT: f32 = 4.2995665694e-08; /* -(128-log2(ovfl+.5ulp)) */ +const CP: f32 = 9.6179670095e-01; /* 0x3f76384f =2/(3ln2) */ +const CP_H: f32 = 9.6191406250e-01; /* 0x3f764000 =12b cp */ +const CP_L: f32 = -1.1736857402e-04; /* 0xb8f623c6 =tail of cp_h */ +const IVLN2: f32 = 1.4426950216e+00; +const IVLN2_H: f32 = 1.4426879883e+00; +const IVLN2_L: f32 = 7.0526075433e-06; + +pub fn powf(x: f32, y: f32) -> f32 { + let mut z: f32; + let mut ax: f32; + let z_h: f32; + let z_l: f32; + let mut p_h: f32; + let mut p_l: f32; + let y1: f32; + let mut t1: f32; + let t2: f32; + let mut r: f32; + let s: f32; + let mut sn: f32; + let mut t: f32; + let mut u: f32; + let mut v: f32; + let mut w: f32; + let i: i32; + let mut j: i32; + let mut k: i32; + let mut yisint: i32; + let mut n: i32; + let hx: i32; + let hy: i32; + let mut ix: i32; + let iy: i32; + let mut is: i32; + + hx = x.to_bits() as i32; + hy = y.to_bits() as i32; + + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + /* x**0 = 1, even if x is NaN */ + if iy == 0 { + return 1.0; + } + + /* 1**y = 1, even if y is NaN */ + if hx == 0x3f800000 { + return 1.0; + } + + /* NaN if either arg is NaN */ + if ix > 0x7f800000 || iy > 0x7f800000 { + return x + y; + } + + /* determine if y is an odd int when x < 0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + yisint = 0; + if hx < 0 { + if iy >= 0x4b800000 { + yisint = 2; /* even integer y */ + } else if iy >= 0x3f800000 { + k = (iy >> 23) - 0x7f; /* exponent */ + j = iy >> (23 - k); + if (j << (23 - k)) == iy { + yisint = 2 - (j & 1); + } + } + } + + /* special value of y */ + if iy == 0x7f800000 { + /* y is +-inf */ + if ix == 0x3f800000 { + /* (-1)**+-inf is 1 */ + return 1.0; + } else if ix > 0x3f800000 { + /* (|x|>1)**+-inf = inf,0 */ + return if hy >= 0 { y } else { 0.0 }; + } else { + /* (|x|<1)**+-inf = 0,inf */ + return if hy >= 0 { 0.0 } else { -y }; + } + } + if iy == 0x3f800000 { + /* y is +-1 */ + return if hy >= 0 { x } else { 1.0 / x }; + } + + if hy == 0x40000000 { + /* y is 2 */ + return x * x; + } + + if hy == 0x3f000000 { + /* y is 0.5 */ + if hx >= 0 { + /* x >= +0 */ + return sqrtf(x); + } + } + + ax = ::fabsf(x); + /* special value of x */ + if ix == 0x7f800000 || ix == 0 || ix == 0x3f800000 { + /* x is +-0,+-inf,+-1 */ + z = ax; + if hy < 0 { + /* z = (1/|x|) */ + z = 1.0 / z; + } + + if hx < 0 { + if ((ix - 0x3f800000) | yisint) == 0 { + z = (z - z) / (z - z); /* (-1)**non-int is NaN */ + } else if yisint == 1 { + z = -z; /* (x<0)**odd = -(|x|**odd) */ + } + } + return z; + } + + sn = 1.0; /* sign of result */ + if hx < 0 { + if yisint == 0 { + /* (x<0)**(non-int) is NaN */ + return (x - x) / (x - x); + } + + if yisint == 1 { + /* (x<0)**(odd int) */ + sn = -1.0; + } + } + + /* |y| is HUGE */ + if iy > 0x4d000000 { + /* if |y| > 2**27 */ + /* over/underflow if x is not close to one */ + if ix < 0x3f7ffff8 { + return if hy < 0 { + sn * HUGE * HUGE + } else { + sn * TINY * TINY + }; + } + + if ix > 0x3f800007 { + return if hy > 0 { + sn * HUGE * HUGE + } else { + sn * TINY * TINY + }; + } + + /* now |1-x| is TINY <= 2**-20, suffice to compute + log(x) by x-x^2/2+x^3/3-x^4/4 */ + t = ax - 1.; /* t has 20 trailing zeros */ + w = (t * t) * (0.5 - t * (0.333333333333 - t * 0.25)); + u = IVLN2_H * t; /* IVLN2_H has 16 sig. bits */ + v = t * IVLN2_L - w * IVLN2; + t1 = u + v; + is = t1.to_bits() as i32; + t1 = f32::from_bits(is as u32 & 0xfffff000); + t2 = v - (t1 - u); + } else { + let mut s2: f32; + let mut s_h: f32; + let s_l: f32; + let mut t_h: f32; + let mut t_l: f32; + + n = 0; + /* take care subnormal number */ + if ix < 0x00800000 { + ax *= TWO24; + n -= 24; + ix = ax.to_bits() as i32; + } + n += ((ix) >> 23) - 0x7f; + j = ix & 0x007fffff; + /* determine interval */ + ix = j | 0x3f800000; /* normalize ix */ + if j <= 0x1cc471 { + /* |x|> 1) & 0xfffff000) | 0x20000000) as i32; + t_h = f32::from_bits(is as u32 + 0x00400000 + ((k as u32) << 21)); + t_l = ax - (t_h - BP[k as usize]); + s_l = v * ((u - s_h * t_h) - s_h * t_l); + /* compute log(ax) */ + s2 = s * s; + r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); + r += s_l * (s_h + s); + s2 = s_h * s_h; + t_h = 3.0 + s2 + r; + is = t_h.to_bits() as i32; + t_h = f32::from_bits(is as u32 & 0xfffff000); + t_l = r - ((t_h - 3.0) - s2); + /* u+v = s*(1+...) */ + u = s_h * t_h; + v = s_l * t_h + t_l * s; + /* 2/(3log2)*(s+...) */ + p_h = u + v; + is = p_h.to_bits() as i32; + p_h = f32::from_bits(is as u32 & 0xfffff000); + p_l = v - (p_h - u); + z_h = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ + z_l = CP_L * p_h + p_l * CP + DP_L[k as usize]; + /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */ + t = n as f32; + t1 = ((z_h + z_l) + DP_H[k as usize]) + t; + is = t1.to_bits() as i32; + t1 = f32::from_bits(is as u32 & 0xfffff000); + t2 = z_l - (((t1 - t) - DP_H[k as usize]) - z_h); + }; + + /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ + is = y.to_bits() as i32; + y1 = f32::from_bits(is as u32 & 0xfffff000); + p_l = (y - y1) * t1 + y * t2; + p_h = y1 * t1; + z = p_l + p_h; + j = z.to_bits() as i32; + if j > 0x43000000 { + /* if z > 128 */ + return sn * HUGE * HUGE; /* overflow */ + } else if j == 0x43000000 { + /* if z == 128 */ + if p_l + OVT > z - p_h { + return sn * HUGE * HUGE; /* overflow */ + } + } else if (j & 0x7fffffff) > 0x43160000 { + /* z < -150 */ + // FIXME: check should be (uint32_t)j > 0xc3160000 + return sn * TINY * TINY; /* underflow */ + } else if j as u32 == 0xc3160000 { + /* z == -150 */ + if p_l <= z - p_h { + return sn * TINY * TINY; /* underflow */ + } + } + + /* + * compute 2**(p_h+p_l) + */ + i = j & 0x7fffffff; + k = (i >> 23) - 0x7f; + n = 0; + if i > 0x3f000000 { + /* if |z| > 0.5, set n = [z+0.5] */ + n = j + (0x00800000 >> (k + 1)); + k = ((n & 0x7fffffff) >> 23) - 0x7f; /* new k for n */ + t = f32::from_bits(n as u32 & !(0x007fffff >> k)); + n = ((n & 0x007fffff) | 0x00800000) >> (23 - k); + if j < 0 { + n = -n; + } + p_h -= t; + } + t = p_l + p_h; + is = t.to_bits() as i32; + t = f32::from_bits(is as u32 & 0xffff8000); + u = t * LG2_H; + v = (p_l - (t - p_h)) * LG2 + t * LG2_L; + z = u + v; + w = v - (z - u); + t = z * z; + t1 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); + r = (z * t1) / (t1 - 2.0) - (w + z * w); + z = 1.0 - (r - z); + j = z.to_bits() as i32; + j += n << 23; + if (j >> 23) <= 0 { + /* subnormal output */ + z = scalbnf(z, n); + } else { + z = f32::from_bits(j as u32); + } + return sn * z; +} diff --git a/libm/src/scalbnf.rs b/libm/src/scalbnf.rs new file mode 100644 index 000000000..2c057ebc5 --- /dev/null +++ b/libm/src/scalbnf.rs @@ -0,0 +1,34 @@ +pub fn scalbnf(mut x: f32, mut n: i32) -> f32 { + let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 + let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 + let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 + + let mut y: f32 = x; + + if n > 127 { + y *= x1p127; + n -= 127; + if n > 127 { + y *= x1p127; + n -= 127; + if n > 127 { + n = 127; + } + } + } else if n < -126 { + y *= x1p_126; + y *= x1p24; + n += 126 - 24; + if n < -126 { + y *= x1p_126; + y *= x1p24; + n += 126 - 24; + if n < -126 { + n = -126; + } + } + } + + x = y * f32::from_bits((0x7f + n as u32) << 23); + x +} diff --git a/libm/src/sqrtf.rs b/libm/src/sqrtf.rs new file mode 100644 index 000000000..6e92f67d4 --- /dev/null +++ b/libm/src/sqrtf.rs @@ -0,0 +1,83 @@ +const TINY: f32 = 1.0e-30; + +pub fn sqrtf(x: f32) -> f32 { + let mut z: f32; + let sign: i32 = 0x80000000u32 as i32; + let mut ix: i32; + let mut s: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: u32; + + ix = x.to_bits() as i32; + + /* take care of Inf and NaN */ + if (ix as u32 & 0x7f800000) == 0x7f800000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + + /* take care of zero */ + if ix <= 0 { + if (ix & !sign) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ + } + } + + /* normalize x */ + m = ix >> 23; + if m == 0 { + /* subnormal x */ + i = 0; + while ix & 0x00800000 == 0 { + ix <<= 1; + i = i + 1; + } + m -= i - 1; + } + m -= 127; /* unbias exponent */ + ix = (ix & 0x007fffff) | 0x00800000; + if m & 1 == 1 { + /* odd m, double x to make it even */ + ix += ix; + } + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix += ix; + q = 0; + s = 0; + r = 0x01000000; /* r = moving bit from right to left */ + + while r != 0 { + t = s + r as i32; + if t <= ix { + s = t + r as i32; + ix -= t; + q += r as i32; + } + ix += ix; + r >>= 1; + } + + /* use floating add to find out rounding direction */ + if ix != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if z > 1.0 { + q += 2; + } else { + q += q & 1; + } + } + } + + ix = (q >> 1) + 0x3f000000; + ix += m << 23; + f32::from_bits(ix as u32) +} diff --git a/libm/test-generator/Cargo.toml b/libm/test-generator/Cargo.toml new file mode 100644 index 000000000..f45d173b4 --- /dev/null +++ b/libm/test-generator/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "test-generator" +version = "0.1.0" +authors = ["Jorge Aparicio "] +publish = false + +[dependencies] +rand = "0.5.3" diff --git a/libm/test-generator/README.md b/libm/test-generator/README.md new file mode 100644 index 000000000..cbacd88f1 --- /dev/null +++ b/libm/test-generator/README.md @@ -0,0 +1,8 @@ +# `test-generator` + +This is a tool to generate test cases for the `libm` crate. + +The generator randomly creates inputs for each math function, then proceeds to compute the +expected output for the given function by running the MUSL *C implementation* of the function and +finally it packs the test cases as a Cargo test file. For this reason, this generator **must** +always be compiled for the `x86_64-unknown-linux-musl` target. diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs new file mode 100644 index 000000000..7ee956f73 --- /dev/null +++ b/libm/test-generator/src/main.rs @@ -0,0 +1,234 @@ +// NOTE we intentionally avoid using the `quote` crate here because it doesn't work with the +// `x86_64-unknown-linux-musl` target. + +// NOTE usually the only thing you need to do to test a new math function is to add it to one of the +// macro invocations found in the bottom of this file. + +extern crate rand; + +use std::error::Error; +use std::fmt::Write as _0; +use std::fs::{self, File}; +use std::io::Write as _1; +use std::{i16, u32, u8}; + +use rand::{Rng, SeedableRng, XorShiftRng}; + +// Number of test cases to generate +const NTESTS: usize = 10_000; + +// TODO tweak this function to generate edge cases (zero, infinity, NaN) more often +fn f32(rng: &mut XorShiftRng) -> f32 { + let sign = if rng.gen_bool(0.5) { 1 << 31 } else { 0 }; + let exponent = (rng.gen_range(0, u8::MAX) as u32) << 23; + let mantissa = rng.gen_range(0, u32::MAX) & ((1 << 23) - 1); + + f32::from_bits(sign + exponent + mantissa) +} + +// fn(f32) -> f32 +macro_rules! f32_f32 { + ($($intr:ident,)+) => { + fn f32_f32(rng: &mut XorShiftRng) -> Result<(), Box> { + // MUSL C implementation of the function to test + extern "C" { + $(fn $intr(_: f32) -> f32;)+ + } + + $( + let mut cases = String::new(); + for _ in 0..NTESTS { + let inp = f32(rng); + let out = unsafe { $intr(inp) }; + + let inp = inp.to_bits(); + let out = out.to_bits(); + + write!(cases, "({}, {})", inp, out).unwrap(); + cases.push(','); + } + + let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; + write!(f, " + extern crate libm; + + #[test] + fn {0}() {{ + const CASES: &[(u32, u32)] = &[ + {1} + ]; + + for case in CASES {{ + let (inp, expected) = *case; + + let outf = libm::{0}(f32::from_bits(inp)); + let outi = outf.to_bits(); + + if !((outf.is_nan() && f32::from_bits(expected).is_nan()) || + outi == expected) {{ + panic!( + \"input: {{}}, output: {{}}, expected: {{}}\", + inp, + outi, + expected, + ); + }} + }} + }} +", + stringify!($intr), + cases)?; + )+ + + Ok(()) + } + } +} + +macro_rules! f32f32_f32 { + ($($intr:ident,)+) => { + fn f32f32_f32(rng: &mut XorShiftRng) -> Result<(), Box> { + extern "C" { + $(fn $intr(_: f32, _: f32) -> f32;)+ + } + + $( + let mut cases = String::new(); + for _ in 0..NTESTS { + let i1 = f32(rng); + let i2 = f32(rng); + let out = unsafe { $intr(i1, i2) }; + + let i1 = i1.to_bits(); + let i2 = i2.to_bits(); + let out = out.to_bits(); + + write!(cases, "(({}, {}), {})", i1, i2, out).unwrap(); + cases.push(','); + } + + let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; + write!(f, " + extern crate libm; + + #[test] + fn {0}() {{ + const CASES: &[((u32, u32), u32)] = &[ + {1} + ]; + + for case in CASES {{ + let ((i1, i2), expected) = *case; + + let outf = libm::{0}(f32::from_bits(i1), f32::from_bits(i2)); + let outi = outf.to_bits(); + + if !((outf.is_nan() && f32::from_bits(expected).is_nan()) || + outi == expected) {{ + panic!( + \"input: {{:?}}, output: {{}}, expected: {{}}\", + (i1, i2), + outi, + expected, + ); + }} + }} + }} +", + stringify!($intr), + cases)?; + )+ + + Ok(()) + } + }; +} + +macro_rules! f32i32_f32 { + ($($intr:ident,)+) => { + fn f32i32_f32(rng: &mut XorShiftRng) -> Result<(), Box> { + extern "C" { + $(fn $intr(_: f32, _: i32) -> f32;)+ + } + + $( + let mut cases = String::new(); + for _ in 0..NTESTS { + let i1 = f32(rng); + let i2 = rng.gen_range(i16::MIN, i16::MAX); + let out = unsafe { $intr(i1, i2 as i32) }; + + let i1 = i1.to_bits(); + let out = out.to_bits(); + + write!(cases, "(({}, {}), {})", i1, i2, out).unwrap(); + cases.push(','); + } + + let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; + write!(f, " + extern crate libm; + + #[test] + fn {0}() {{ + const CASES: &[((u32, i16), u32)] = &[ + {1} + ]; + + for case in CASES {{ + let ((i1, i2), expected) = *case; + + let outf = libm::{0}(f32::from_bits(i1), i2 as i32); + let outi = outf.to_bits(); + + if !((outf.is_nan() && f32::from_bits(expected).is_nan()) || + outi == expected) {{ + panic!( + \"input: {{:?}}, output: {{}}, expected: {{}}\", + (i1, i2), + outi, + expected, + ); + }} + }} + }} +", + stringify!($intr), + cases)?; + )+ + + Ok(()) + } + }; +} + +fn main() -> Result<(), Box> { + fs::remove_dir_all("tests").ok(); + fs::create_dir("tests")?; + + let mut rng = XorShiftRng::from_rng(&mut rand::thread_rng())?; + + f32_f32(&mut rng)?; + f32f32_f32(&mut rng)?; + f32i32_f32(&mut rng)?; + + Ok(()) +} + +/* Functions to test */ + +// With signature `fn(f32) -> f32` +f32_f32! { + fabsf, + sqrtf, +} + +// With signature `fn(f32, f32) -> f32` +f32f32_f32! { + powf, +} + +// With signature `fn(f32, i32) -> f32` +f32i32_f32! { + scalbnf, +} From 7a456394ea277a4a3607457f6b7ad70ebd472a2f Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 12 Jul 2018 00:53:44 -0500 Subject: [PATCH 0002/1459] add CI --- libm/.travis.yml | 32 ++++++++++++++++++++++++++++++++ libm/ci/install.sh | 15 +++++++++++++++ libm/ci/script.sh | 2 +- 3 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 libm/.travis.yml create mode 100644 libm/ci/install.sh diff --git a/libm/.travis.yml b/libm/.travis.yml new file mode 100644 index 000000000..4d8ccc784 --- /dev/null +++ b/libm/.travis.yml @@ -0,0 +1,32 @@ +language: rust +services: docker +sudo: required + +matrix: + include: + - env: TARGET=aarch64-unknown-linux-gnu + - env: TARGET=armv7-unknown-linux-gnueabihf + - env: TARGET=i686-unknown-linux-gnu + - env: TARGET=mips-unknown-linux-gnu + - env: TARGET=mips64-unknown-linux-gnuabi64 + - env: TARGET=mips64el-unknown-linux-gnuabi64 + - env: TARGET=mipsel-unknown-linux-gnu + - env: TARGET=powerpc-unknown-linux-gnu + - env: TARGET=powerpc64-unknown-linux-gnu + - env: TARGET=powerpc64le-unknown-linux-gnu + - env: TARGET=x86_64-unknown-linux-gnu + +before_install: set -e + +install: + - bash ci/install.sh + +script: + - bash ci/script.sh + +after_script: set +e + +cache: cargo + +before_cache: + - chmod -R a+r $HOME/.cargo; diff --git a/libm/ci/install.sh b/libm/ci/install.sh new file mode 100644 index 000000000..efdbb06c2 --- /dev/null +++ b/libm/ci/install.sh @@ -0,0 +1,15 @@ +set -euxo pipefail + +main() { + if ! hash cross >/dev/null 2>&1; then + cargo install cross + fi + + rustup target add x86_64-unknown-linux-musl + + if [ $TARGET != x86_64-unknown-linux-gnu ]; then + rustup target add $TARGET + fi +} + +main diff --git a/libm/ci/script.sh b/libm/ci/script.sh index 64550573c..caba616ed 100644 --- a/libm/ci/script.sh +++ b/libm/ci/script.sh @@ -2,7 +2,7 @@ set -euxo pipefail main() { cargo run --package test-generator --target x86_64-unknown-linux-musl - if hash cargo-fmt; then + if cargo fmt --version >/dev/null 2>&1; then # nicer syntax error messages (if any) cargo fmt fi From 724d4b26055af70d9ba70b802ffb85cbb14316df Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 12 Jul 2018 12:20:55 -0500 Subject: [PATCH 0003/1459] use approximate equality with 1 ULP of tolerance --- libm/src/lib.rs | 6 ++++++ libm/test-generator/src/main.rs | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 3d4062f3d..cb2110f1f 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -10,3 +10,9 @@ pub use fabsf::fabsf; pub use powf::powf; pub use scalbnf::scalbnf; pub use sqrtf::sqrtf; + +/// Approximate equality with 1 ULP of tolerance +#[doc(hidden)] +pub fn _eqf(a: u32, b: u32) -> bool { + (a as i32).wrapping_sub(b as i32).abs() <= 1 +} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 7ee956f73..c406cf340 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -65,7 +65,7 @@ macro_rules! f32_f32 { let outi = outf.to_bits(); if !((outf.is_nan() && f32::from_bits(expected).is_nan()) || - outi == expected) {{ + libm::_eqf(outi, expected)) {{ panic!( \"input: {{}}, output: {{}}, expected: {{}}\", inp, @@ -124,7 +124,7 @@ macro_rules! f32f32_f32 { let outi = outf.to_bits(); if !((outf.is_nan() && f32::from_bits(expected).is_nan()) || - outi == expected) {{ + libm::_eqf(outi, expected)) {{ panic!( \"input: {{:?}}, output: {{}}, expected: {{}}\", (i1, i2), @@ -182,7 +182,7 @@ macro_rules! f32i32_f32 { let outi = outf.to_bits(); if !((outf.is_nan() && f32::from_bits(expected).is_nan()) || - outi == expected) {{ + libm::_eqf(outi, expected)) {{ panic!( \"input: {{:?}}, output: {{}}, expected: {{}}\", (i1, i2), From fc5fe1ff29cf84e0201561b1b89a370cea566d73 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 12 Jul 2018 03:07:28 -0500 Subject: [PATCH 0004/1459] implement fmodf --- libm/src/fmodf.rs | 89 +++++++++++++++++++++++++++++++++ libm/src/lib.rs | 6 +++ libm/test-generator/src/main.rs | 1 + 3 files changed, 96 insertions(+) create mode 100644 libm/src/fmodf.rs diff --git a/libm/src/fmodf.rs b/libm/src/fmodf.rs new file mode 100644 index 000000000..a184411a1 --- /dev/null +++ b/libm/src/fmodf.rs @@ -0,0 +1,89 @@ +use core::u32; + +use isnanf; + +pub fn fmodf(x: f32, y: f32) -> f32 { + let mut uxi = x.to_bits(); + let mut uyi = y.to_bits(); + let mut ex = (uxi >> 23 & 0xff) as i32; + let mut ey = (uyi >> 23 & 0xff) as i32; + let sx = uxi & 0x80000000; + let mut i; + + if uyi << 1 == 0 || isnanf(y) || ex == 0xff { + return (x * y) / (x * y); + } + + if uxi << 1 <= uyi << 1 { + if uxi << 1 == uyi << 1 { + return 0.0 * x; + } + + return x; + } + + /* normalize x and y */ + if ex == 0 { + i = uxi << 9; + while i >> 31 == 0 { + ex -= 1; + i <<= 1; + } + + uxi <<= -ex + 1; + } else { + uxi &= u32::MAX >> 9; + uxi |= 1 << 23; + } + + if ey == 0 { + i = uyi << 9; + while i >> 31 == 0 { + ey -= 1; + i <<= 1; + } + + uyi <<= -ey + 1; + } else { + uyi &= u32::MAX >> 9; + uyi |= 1 << 23; + } + + /* x mod y */ + while ex > ey { + i = uxi - uyi; + if i >> 31 == 0 { + if i == 0 { + return 0.0 * x; + } + uxi = i; + } + uxi <<= 1; + + ex -= 1; + } + + i = uxi - uyi; + if i >> 31 == 0 { + if i == 0 { + return 0.0 * x; + } + uxi = i; + } + + while uxi >> 23 == 0 { + uxi <<= 1; + ex -= 1; + } + + /* scale result up */ + if ex > 0 { + uxi -= 1 << 23; + uxi |= (ex as u32) << 23; + } else { + uxi >>= -ex + 1; + } + uxi |= sx; + + f32::from_bits(uxi) +} diff --git a/libm/src/lib.rs b/libm/src/lib.rs index cb2110f1f..41c20b2da 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -2,11 +2,13 @@ #![no_std] mod fabsf; +mod fmodf; mod powf; mod scalbnf; mod sqrtf; pub use fabsf::fabsf; +pub use fmodf::fmodf; pub use powf::powf; pub use scalbnf::scalbnf; pub use sqrtf::sqrtf; @@ -16,3 +18,7 @@ pub use sqrtf::sqrtf; pub fn _eqf(a: u32, b: u32) -> bool { (a as i32).wrapping_sub(b as i32).abs() <= 1 } + +fn isnanf(x: f32) -> bool { + x.to_bits() & 0x7fffffff > 0x7f800000 +} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index c406cf340..3e58f7e1a 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -225,6 +225,7 @@ f32_f32! { // With signature `fn(f32, f32) -> f32` f32f32_f32! { + fmodf, powf, } From c17fb2dad95cdd5673b1e3afc87646e67bbd9001 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 12 Jul 2018 12:43:25 -0500 Subject: [PATCH 0005/1459] update the README --- libm/README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libm/README.md b/libm/README.md index fe384e118..ba8212346 100644 --- a/libm/README.md +++ b/libm/README.md @@ -28,11 +28,18 @@ $ TARGET=armv7-unknown-linux-gnueabihf bash ci/script.sh - Look for the C implementation of the function in the [MUSL source code][src]. - Copy paste the C code into a Rust file in the `src` directory and adjust `src/lib.rs` accordingly. - Run `cargo watch check` and fix the compiler errors. +- Tweak the bottom of `test-generator/src/main.rs` to add your function to the test suite. - If you can, run the test suite locally. If you can't, no problem! Your PR will be tested automatically. - Send us a pull request! - :tada: +[src]: https://git.musl-libc.org/cgit/musl/tree/src/math + +Check [PR #2] for an example. + +[PR #2]: https://github.com/japaric/libm/pull/2 + ### Notes - To reinterpret a float as an integer use the `to_bits` method. The MUSL code uses the @@ -44,7 +51,6 @@ $ TARGET=armv7-unknown-linux-gnueabihf bash ci/script.sh - Rust code panics on arithmetic overflows when not optimized. You may need to use the [`Wrapping`] newtype to avoid this problem. -[src]: https://git.musl-libc.org/cgit/musl/tree/src/math [`Wrapping`]: https://doc.rust-lang.org/std/num/struct.Wrapping.html ## Progress From dfd8c9c399e9ba59224ef62200aeaae8b5b62170 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 12 Jul 2018 13:23:40 -0500 Subject: [PATCH 0006/1459] fmodf is done --- libm/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/README.md b/libm/README.md index ba8212346..17c4ee94d 100644 --- a/libm/README.md +++ b/libm/README.md @@ -75,7 +75,7 @@ cf. [rustwasm/team#84](https://github.com/rustwasm/team/issues/84). - [ ] fma - [ ] fmaf - [ ] fmod -- [ ] fmodf +- [x] fmodf - [ ] hypot - [ ] log - [ ] log10 From 14dca0417db884bd14522c3faec8ef0dbb255900 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 12 Jul 2018 13:34:19 -0500 Subject: [PATCH 0007/1459] add comment about issue rust-lang/libm#4 --- libm/ci/script.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libm/ci/script.sh b/libm/ci/script.sh index caba616ed..ee2e458cc 100644 --- a/libm/ci/script.sh +++ b/libm/ci/script.sh @@ -7,6 +7,9 @@ main() { cargo fmt fi cross test --target $TARGET --release + + # TODO need to fix overflow issues (cf. issue #4) + # cross test --target $TARGET } main From 4939d2a277e2a6abc2c2cba24b67525c89126411 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 12 Jul 2018 14:24:02 -0500 Subject: [PATCH 0008/1459] add test infrastructure for f64 functions --- libm/README.md | 1 + libm/src/fabs.rs | 5 + libm/src/lib.rs | 7 + libm/test-generator/src/main.rs | 228 ++++++++++++++++++++++++++++++-- 4 files changed, 230 insertions(+), 11 deletions(-) create mode 100644 libm/src/fabs.rs diff --git a/libm/README.md b/libm/README.md index 17c4ee94d..a0b131733 100644 --- a/libm/README.md +++ b/libm/README.md @@ -96,6 +96,7 @@ cf. [rustwasm/team#84](https://github.com/rustwasm/team/issues/84). ### Other functions +- [x] fabs - [x] fabsf - [x] scalbnf - [x] sqrtf diff --git a/libm/src/fabs.rs b/libm/src/fabs.rs new file mode 100644 index 000000000..993918efc --- /dev/null +++ b/libm/src/fabs.rs @@ -0,0 +1,5 @@ +use core::u64; + +pub fn fabs(x: f64) -> f64 { + f64::from_bits(x.to_bits() & (u64::MAX / 2)) +} diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 41c20b2da..3f71fe876 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -1,12 +1,14 @@ #![deny(warnings)] #![no_std] +mod fabs; mod fabsf; mod fmodf; mod powf; mod scalbnf; mod sqrtf; +pub use fabs::fabs; pub use fabsf::fabsf; pub use fmodf::fmodf; pub use powf::powf; @@ -19,6 +21,11 @@ pub fn _eqf(a: u32, b: u32) -> bool { (a as i32).wrapping_sub(b as i32).abs() <= 1 } +#[doc(hidden)] +pub fn _eq(a: u64, b: u64) -> bool { + (a as i64).wrapping_sub(b as i64).abs() <= 1 +} + fn isnanf(x: f32) -> bool { x.to_bits() & 0x7fffffff > 0x7f800000 } diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 3e58f7e1a..fb9154aec 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -10,14 +10,14 @@ use std::error::Error; use std::fmt::Write as _0; use std::fs::{self, File}; use std::io::Write as _1; -use std::{i16, u32, u8}; +use std::{i16, u16, u32, u64, u8}; use rand::{Rng, SeedableRng, XorShiftRng}; // Number of test cases to generate const NTESTS: usize = 10_000; -// TODO tweak this function to generate edge cases (zero, infinity, NaN) more often +// TODO tweak these functions to generate edge cases (zero, infinity, NaN) more often fn f32(rng: &mut XorShiftRng) -> f32 { let sign = if rng.gen_bool(0.5) { 1 << 31 } else { 0 }; let exponent = (rng.gen_range(0, u8::MAX) as u32) << 23; @@ -26,13 +26,21 @@ fn f32(rng: &mut XorShiftRng) -> f32 { f32::from_bits(sign + exponent + mantissa) } +fn f64(rng: &mut XorShiftRng) -> f64 { + let sign = if rng.gen_bool(0.5) { 1 << 63 } else { 0 }; + let exponent = (rng.gen_range(0, u16::MAX) as u64 & ((1 << 11) - 1)) << 52; + let mantissa = rng.gen_range(0, u64::MAX) & ((1 << 52) - 1); + + f64::from_bits(sign + exponent + mantissa) +} + // fn(f32) -> f32 macro_rules! f32_f32 { - ($($intr:ident,)+) => { + ($($intr:ident,)*) => { fn f32_f32(rng: &mut XorShiftRng) -> Result<(), Box> { // MUSL C implementation of the function to test extern "C" { - $(fn $intr(_: f32) -> f32;)+ + $(fn $intr(_: f32) -> f32;)* } $( @@ -78,18 +86,19 @@ macro_rules! f32_f32 { ", stringify!($intr), cases)?; - )+ + )* Ok(()) } } } +// fn(f32, f32) -> f32 macro_rules! f32f32_f32 { - ($($intr:ident,)+) => { + ($($intr:ident,)*) => { fn f32f32_f32(rng: &mut XorShiftRng) -> Result<(), Box> { extern "C" { - $(fn $intr(_: f32, _: f32) -> f32;)+ + $(fn $intr(_: f32, _: f32) -> f32;)* } $( @@ -137,18 +146,19 @@ macro_rules! f32f32_f32 { ", stringify!($intr), cases)?; - )+ + )* Ok(()) } }; } +// fn(f32, i32) -> f32 macro_rules! f32i32_f32 { - ($($intr:ident,)+) => { + ($($intr:ident,)*) => { fn f32i32_f32(rng: &mut XorShiftRng) -> Result<(), Box> { extern "C" { - $(fn $intr(_: f32, _: i32) -> f32;)+ + $(fn $intr(_: f32, _: i32) -> f32;)* } $( @@ -195,7 +205,185 @@ macro_rules! f32i32_f32 { ", stringify!($intr), cases)?; - )+ + )* + + Ok(()) + } + }; +} + +// fn(f64) -> f64 +macro_rules! f64_f64 { + ($($intr:ident,)*) => { + fn f64_f64(rng: &mut XorShiftRng) -> Result<(), Box> { + // MUSL C implementation of the function to test + extern "C" { + $(fn $intr(_: f64) -> f64;)* + } + + $( + let mut cases = String::new(); + for _ in 0..NTESTS { + let inp = f64(rng); + let out = unsafe { $intr(inp) }; + + let inp = inp.to_bits(); + let out = out.to_bits(); + + write!(cases, "({}, {})", inp, out).unwrap(); + cases.push(','); + } + + let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; + write!(f, " + extern crate libm; + + #[test] + fn {0}() {{ + const CASES: &[(u64, u64)] = &[ + {1} + ]; + + for case in CASES {{ + let (inp, expected) = *case; + + let outf = libm::{0}(f64::from_bits(inp)); + let outi = outf.to_bits(); + + if !((outf.is_nan() && f64::from_bits(expected).is_nan()) || + libm::_eq(outi, expected)) {{ + panic!( + \"input: {{}}, output: {{}}, expected: {{}}\", + inp, + outi, + expected, + ); + }} + }} + }} +", + stringify!($intr), + cases)?; + )* + + Ok(()) + } + } +} + +// fn(f64, f64) -> f64 +macro_rules! f64f64_f64 { + ($($intr:ident,)*) => { + fn f64f64_f64(rng: &mut XorShiftRng) -> Result<(), Box> { + extern "C" { + $(fn $intr(_: f64, _: f64) -> f64;)* + } + + $( + let mut cases = String::new(); + for _ in 0..NTESTS { + let i1 = f64(rng); + let i2 = f64(rng); + let out = unsafe { $intr(i1, i2) }; + + let i1 = i1.to_bits(); + let i2 = i2.to_bits(); + let out = out.to_bits(); + + write!(cases, "(({}, {}), {})", i1, i2, out).unwrap(); + cases.push(','); + } + + let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; + write!(f, " + extern crate libm; + + #[test] + fn {0}() {{ + const CASES: &[((u64, u64), u64)] = &[ + {1} + ]; + + for case in CASES {{ + let ((i1, i2), expected) = *case; + + let outf = libm::{0}(f64::from_bits(i1), f64::from_bits(i2)); + let outi = outf.to_bits(); + + if !((outf.is_nan() && f64::from_bits(expected).is_nan()) || + libm::_eq(outi, expected)) {{ + panic!( + \"input: {{:?}}, output: {{}}, expected: {{}}\", + (i1, i2), + outi, + expected, + ); + }} + }} + }} +", + stringify!($intr), + cases)?; + )* + + Ok(()) + } + }; +} + +// fn(f64, i32) -> f64 +macro_rules! f64i32_f64 { + ($($intr:ident,)*) => { + fn f64i32_f64(rng: &mut XorShiftRng) -> Result<(), Box> { + extern "C" { + $(fn $intr(_: f64, _: i32) -> f64;)* + } + + $( + let mut cases = String::new(); + for _ in 0..NTESTS { + let i1 = f64(rng); + let i2 = rng.gen_range(i16::MIN, i16::MAX); + let out = unsafe { $intr(i1, i2 as i32) }; + + let i1 = i1.to_bits(); + let out = out.to_bits(); + + write!(cases, "(({}, {}), {})", i1, i2, out).unwrap(); + cases.push(','); + } + + let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; + write!(f, " + extern crate libm; + + #[test] + fn {0}() {{ + const CASES: &[((u64, i16), u64)] = &[ + {1} + ]; + + for case in CASES {{ + let ((i1, i2), expected) = *case; + + let outf = libm::{0}(f64::from_bits(i1), i2 as i32); + let outi = outf.to_bits(); + + if !((outf.is_nan() && f64::from_bits(expected).is_nan()) || + libm::_eq(outi, expected)) {{ + panic!( + \"input: {{:?}}, output: {{}}, expected: {{}}\", + (i1, i2), + outi, + expected, + ); + }} + }} + }} +", + stringify!($intr), + cases)?; + )* Ok(()) } @@ -211,6 +399,9 @@ fn main() -> Result<(), Box> { f32_f32(&mut rng)?; f32f32_f32(&mut rng)?; f32i32_f32(&mut rng)?; + f64_f64(&mut rng)?; + f64f64_f64(&mut rng)?; + f64i32_f64(&mut rng)?; Ok(()) } @@ -233,3 +424,18 @@ f32f32_f32! { f32i32_f32! { scalbnf, } + +// With signature `fn(f64) -> f64` +f64_f64! { + fabs, +} + +// With signature `fn(f64, f64) -> f64` +f64f64_f64! { + // fmod, +} + +// With signature `fn(f64, i32) -> f64` +f64i32_f64! { + // scalbn, +} From 39c52388e39727d91d92e7f4878389bbdbc37390 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 12 Jul 2018 15:30:32 -0500 Subject: [PATCH 0009/1459] add more testing infrastructure --- libm/test-generator/src/main.rs | 188 ++++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index fb9154aec..f933b34dc 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -58,6 +58,8 @@ macro_rules! f32_f32 { let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; write!(f, " + #![deny(warnings)] + extern crate libm; #[test] @@ -118,6 +120,8 @@ macro_rules! f32f32_f32 { let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; write!(f, " + #![deny(warnings)] + extern crate libm; #[test] @@ -153,6 +157,74 @@ macro_rules! f32f32_f32 { }; } +// fn(f32, f32, f32) -> f32 +macro_rules! f32f32f32_f32 { + ($($intr:ident,)*) => { + fn f32f32f32_f32(rng: &mut XorShiftRng) -> Result<(), Box> { + extern "C" { + $(fn $intr(_: f32, _: f32, _: f32) -> f32;)* + } + + $( + let mut cases = String::new(); + for _ in 0..NTESTS { + let i1 = f32(rng); + let i2 = f32(rng); + let i3 = f32(rng); + let out = unsafe { $intr(i1, i2, i3) }; + + let i1 = i1.to_bits(); + let i2 = i2.to_bits(); + let i3 = i3.to_bits(); + let out = out.to_bits(); + + write!(cases, "(({}, {}, {}), {})", i1, i2, i3, out).unwrap(); + cases.push(','); + } + + let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; + write!(f, " + #![deny(warnings)] + + extern crate libm; + + #[test] + fn {0}() {{ + const CASES: &[((u32, u32, u32), u32)] = &[ + {1} + ]; + + for case in CASES {{ + let ((i1, i2, i3), expected) = *case; + + let outf = libm::{0}( + f32::from_bits(i1), + f32::from_bits(i2), + f32::from_bits(i3), + ); + let outi = outf.to_bits(); + + if !((outf.is_nan() && f32::from_bits(expected).is_nan()) || + libm::_eqf(outi, expected)) {{ + panic!( + \"input: {{:?}}, output: {{}}, expected: {{}}\", + (i1, i2, i3), + outi, + expected, + ); + }} + }} + }} +", + stringify!($intr), + cases)?; + )* + + Ok(()) + } + }; +} + // fn(f32, i32) -> f32 macro_rules! f32i32_f32 { ($($intr:ident,)*) => { @@ -177,6 +249,8 @@ macro_rules! f32i32_f32 { let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; write!(f, " + #![deny(warnings)] + extern crate libm; #[test] @@ -236,6 +310,8 @@ macro_rules! f64_f64 { let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; write!(f, " + #![deny(warnings)] + extern crate libm; #[test] @@ -296,6 +372,8 @@ macro_rules! f64f64_f64 { let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; write!(f, " + #![deny(warnings)] + extern crate libm; #[test] @@ -331,6 +409,74 @@ macro_rules! f64f64_f64 { }; } +// fn(f64, f64, f64) -> f64 +macro_rules! f64f64f64_f64 { + ($($intr:ident,)*) => { + fn f64f64f64_f64(rng: &mut XorShiftRng) -> Result<(), Box> { + extern "C" { + $(fn $intr(_: f64, _: f64, _: f64) -> f64;)* + } + + $( + let mut cases = String::new(); + for _ in 0..NTESTS { + let i1 = f64(rng); + let i2 = f64(rng); + let i3 = f64(rng); + let out = unsafe { $intr(i1, i2, i3) }; + + let i1 = i1.to_bits(); + let i2 = i2.to_bits(); + let i3 = i3.to_bits(); + let out = out.to_bits(); + + write!(cases, "(({}, {}, {}), {})", i1, i2, i3, out).unwrap(); + cases.push(','); + } + + let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; + write!(f, " + #![deny(warnings)] + + extern crate libm; + + #[test] + fn {0}() {{ + const CASES: &[((u64, u64, u64), u64)] = &[ + {1} + ]; + + for case in CASES {{ + let ((i1, i2, i3), expected) = *case; + + let outf = libm::{0}( + f64::from_bits(i1), + f64::from_bits(i2), + f64::from_bits(i3), + ); + let outi = outf.to_bits(); + + if !((outf.is_nan() && f64::from_bits(expected).is_nan()) || + libm::_eq(outi, expected)) {{ + panic!( + \"input: {{:?}}, output: {{}}, expected: {{}}\", + (i1, i2, i3), + outi, + expected, + ); + }} + }} + }} +", + stringify!($intr), + cases)?; + )* + + Ok(()) + } + }; +} + // fn(f64, i32) -> f64 macro_rules! f64i32_f64 { ($($intr:ident,)*) => { @@ -355,6 +501,8 @@ macro_rules! f64i32_f64 { let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; write!(f, " + #![deny(warnings)] + extern crate libm; #[test] @@ -398,9 +546,11 @@ fn main() -> Result<(), Box> { f32_f32(&mut rng)?; f32f32_f32(&mut rng)?; + f32f32f32_f32(&mut rng)?; f32i32_f32(&mut rng)?; f64_f64(&mut rng)?; f64f64_f64(&mut rng)?; + f64f64f64_f64(&mut rng)?; f64i32_f64(&mut rng)?; Ok(()) @@ -410,6 +560,13 @@ fn main() -> Result<(), Box> { // With signature `fn(f32) -> f32` f32_f32! { + // cosf, + // exp2f, + // expf, + // log10f, + // log2f, + // roundf, + // sinf, fabsf, sqrtf, } @@ -420,6 +577,11 @@ f32f32_f32! { powf, } +// With signature `fn(f32, f32, f32) -> f32` +f32f32f32_f32! { + // fmaf, +} + // With signature `fn(f32, i32) -> f32` f32i32_f32! { scalbnf, @@ -427,12 +589,38 @@ f32i32_f32! { // With signature `fn(f64) -> f64` f64_f64! { + // acos, + // asin, + // atan, + // cbrt, + // cos, + // cosh, + // exp, + // exp2, + // expm1, + // log, + // log10, + // log1p, + // log2, + // round, + // sin, + // sinh, + // tan, + // tanh, fabs, } // With signature `fn(f64, f64) -> f64` f64f64_f64! { + // atan2, // fmod, + // hypot, + // pow, +} + +// With signature `fn(f64, f64, f64) -> f64` +f64f64f64_f64! { + // fma, } // With signature `fn(f64, i32) -> f64` From 72fb519f37c9ae034c18dee4f27af39e532f67cc Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 12 Jul 2018 15:50:06 -0500 Subject: [PATCH 0010/1459] bors-ng support --- libm/.travis.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libm/.travis.yml b/libm/.travis.yml index 4d8ccc784..0c0832793 100644 --- a/libm/.travis.yml +++ b/libm/.travis.yml @@ -30,3 +30,9 @@ cache: cargo before_cache: - chmod -R a+r $HOME/.cargo; + +branches: + only: + - master + - staging + - trying From 0c449aca0a4f1c3328b2c7f320cea25bf75cb718 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 12 Jul 2018 18:26:39 -0500 Subject: [PATCH 0011/1459] add more commented out tests --- libm/test-generator/src/main.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index f933b34dc..a13aa4ef7 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -560,19 +560,33 @@ fn main() -> Result<(), Box> { // With signature `fn(f32) -> f32` f32_f32! { + // acosf, + // floorf, + // truncf + // asinf, + // atanf, + // cbrtf, + // ceilf, // cosf, + // coshf, // exp2f, // expf, + // fdimf, // log10f, // log2f, // roundf, // sinf, + // sinhf, + // tanf, + // tanhf, fabsf, sqrtf, } // With signature `fn(f32, f32) -> f32` f32f32_f32! { + // atan2f, + // hypotf, fmodf, powf, } @@ -593,11 +607,13 @@ f64_f64! { // asin, // atan, // cbrt, + // ceil, // cos, // cosh, // exp, // exp2, // expm1, + // floor, // log, // log10, // log1p, @@ -605,14 +621,17 @@ f64_f64! { // round, // sin, // sinh, + // sqrt, // tan, // tanh, + // trunc, fabs, } // With signature `fn(f64, f64) -> f64` f64f64_f64! { // atan2, + // fdim, // fmod, // hypot, // pow, From 06936d983c0f263417b9bcaa0ef6884711dab4b8 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 12 Jul 2018 18:43:59 -0500 Subject: [PATCH 0012/1459] update the README --- libm/README.md | 56 +++++++------------------------------------------- 1 file changed, 7 insertions(+), 49 deletions(-) diff --git a/libm/README.md b/libm/README.md index a0b131733..841199a9f 100644 --- a/libm/README.md +++ b/libm/README.md @@ -4,6 +4,11 @@ A port of [MUSL]'s libm to Rust. [MUSL]: https://www.musl-libc.org/ +## Goals + +The short term goal of this library is to enable math support (e.g. `sin`, `atan2`) for the +`wasm32-unknown-unknown` target. The longer term goal is to enable math support in the `core` crate. + ## Testing The test suite of this crate can only be run on x86_64 Linux systems. @@ -24,7 +29,7 @@ $ TARGET=armv7-unknown-linux-gnueabihf bash ci/script.sh ## Contributing -- Pick your favorite math function from the list below. +- Pick your favorite math function from the [issue tracker]. - Look for the C implementation of the function in the [MUSL source code][src]. - Copy paste the C code into a Rust file in the `src` directory and adjust `src/lib.rs` accordingly. - Run `cargo watch check` and fix the compiler errors. @@ -34,6 +39,7 @@ $ TARGET=armv7-unknown-linux-gnueabihf bash ci/script.sh - Send us a pull request! - :tada: +[issue tracker]: https://github.com/japaric/libm/issues [src]: https://git.musl-libc.org/cgit/musl/tree/src/math Check [PR #2] for an example. @@ -53,54 +59,6 @@ Check [PR #2] for an example. [`Wrapping`]: https://doc.rust-lang.org/std/num/struct.Wrapping.html -## Progress - -### Functions wanted by the wasm WG - -cf. [rustwasm/team#84](https://github.com/rustwasm/team/issues/84). - -- [ ] acos -- [ ] asin -- [ ] atan -- [ ] atan2 -- [ ] cbrt -- [ ] cos -- [ ] cosf -- [ ] cosh -- [ ] exp -- [ ] exp2 -- [ ] exp2f -- [ ] expf -- [ ] expm1 -- [ ] fma -- [ ] fmaf -- [ ] fmod -- [x] fmodf -- [ ] hypot -- [ ] log -- [ ] log10 -- [ ] log10f -- [ ] log1p -- [ ] log2 -- [ ] log2f -- [ ] logf -- [ ] pow -- [x] powf -- [ ] round -- [ ] roundf -- [ ] sin -- [ ] sinf -- [ ] sinh -- [ ] tan -- [ ] tanh - -### Other functions - -- [x] fabs -- [x] fabsf -- [x] scalbnf -- [x] sqrtf - ## License Licensed under either of From 74d4dce13d5fbbb973fa6d1441207b4a2ab48adc Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 12 Jul 2018 19:16:50 -0500 Subject: [PATCH 0013/1459] re-structure for compiler-builtins integration --- libm/README.md | 3 ++- libm/src/lib.rs | 18 ++---------------- libm/src/{ => math}/fabs.rs | 1 + libm/src/{ => math}/fabsf.rs | 1 + libm/src/{ => math}/fmodf.rs | 3 ++- libm/src/math/mod.rs | 17 +++++++++++++++++ libm/src/{ => math}/powf.rs | 5 +++-- libm/src/{ => math}/scalbnf.rs | 1 + libm/src/{ => math}/sqrtf.rs | 1 + 9 files changed, 30 insertions(+), 20 deletions(-) rename libm/src/{ => math}/fabs.rs (90%) rename libm/src/{ => math}/fabsf.rs (88%) rename libm/src/{ => math}/fmodf.rs (98%) create mode 100644 libm/src/math/mod.rs rename libm/src/{ => math}/powf.rs (99%) rename libm/src/{ => math}/scalbnf.rs (98%) rename libm/src/{ => math}/sqrtf.rs (99%) diff --git a/libm/README.md b/libm/README.md index 841199a9f..55f465434 100644 --- a/libm/README.md +++ b/libm/README.md @@ -31,7 +31,8 @@ $ TARGET=armv7-unknown-linux-gnueabihf bash ci/script.sh - Pick your favorite math function from the [issue tracker]. - Look for the C implementation of the function in the [MUSL source code][src]. -- Copy paste the C code into a Rust file in the `src` directory and adjust `src/lib.rs` accordingly. +- Copy paste the C code into a Rust file in the `src/math` directory and adjust `src/math/mod.rs` + accordingly. - Run `cargo watch check` and fix the compiler errors. - Tweak the bottom of `test-generator/src/main.rs` to add your function to the test suite. - If you can, run the test suite locally. If you can't, no problem! Your PR will be tested diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 3f71fe876..4d7cec56f 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -1,19 +1,9 @@ #![deny(warnings)] #![no_std] -mod fabs; -mod fabsf; -mod fmodf; -mod powf; -mod scalbnf; -mod sqrtf; +mod math; -pub use fabs::fabs; -pub use fabsf::fabsf; -pub use fmodf::fmodf; -pub use powf::powf; -pub use scalbnf::scalbnf; -pub use sqrtf::sqrtf; +pub use math::*; /// Approximate equality with 1 ULP of tolerance #[doc(hidden)] @@ -25,7 +15,3 @@ pub fn _eqf(a: u32, b: u32) -> bool { pub fn _eq(a: u64, b: u64) -> bool { (a as i64).wrapping_sub(b as i64).abs() <= 1 } - -fn isnanf(x: f32) -> bool { - x.to_bits() & 0x7fffffff > 0x7f800000 -} diff --git a/libm/src/fabs.rs b/libm/src/math/fabs.rs similarity index 90% rename from libm/src/fabs.rs rename to libm/src/math/fabs.rs index 993918efc..9e081f3f9 100644 --- a/libm/src/fabs.rs +++ b/libm/src/math/fabs.rs @@ -1,5 +1,6 @@ use core::u64; +#[inline] pub fn fabs(x: f64) -> f64 { f64::from_bits(x.to_bits() & (u64::MAX / 2)) } diff --git a/libm/src/fabsf.rs b/libm/src/math/fabsf.rs similarity index 88% rename from libm/src/fabsf.rs rename to libm/src/math/fabsf.rs index be60d06cc..4cc941116 100644 --- a/libm/src/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -1,3 +1,4 @@ +#[inline] pub fn fabsf(x: f32) -> f32 { f32::from_bits(x.to_bits() & 0x7fffffff) } diff --git a/libm/src/fmodf.rs b/libm/src/math/fmodf.rs similarity index 98% rename from libm/src/fmodf.rs rename to libm/src/math/fmodf.rs index a184411a1..909775249 100644 --- a/libm/src/fmodf.rs +++ b/libm/src/math/fmodf.rs @@ -1,7 +1,8 @@ use core::u32; -use isnanf; +use super::isnanf; +#[inline] pub fn fmodf(x: f32, y: f32) -> f32 { let mut uxi = x.to_bits(); let mut uyi = y.to_bits(); diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs new file mode 100644 index 000000000..09d81944c --- /dev/null +++ b/libm/src/math/mod.rs @@ -0,0 +1,17 @@ +mod fabs; +mod fabsf; +mod fmodf; +mod powf; +mod scalbnf; +mod sqrtf; + +pub use self::fabs::fabs; +pub use self::fabsf::fabsf; +pub use self::fmodf::fmodf; +pub use self::powf::powf; +pub use self::scalbnf::scalbnf; +pub use self::sqrtf::sqrtf; + +fn isnanf(x: f32) -> bool { + x.to_bits() & 0x7fffffff > 0x7f800000 +} diff --git a/libm/src/powf.rs b/libm/src/math/powf.rs similarity index 99% rename from libm/src/powf.rs rename to libm/src/math/powf.rs index 770987c2a..f1dc3a5b8 100644 --- a/libm/src/powf.rs +++ b/libm/src/math/powf.rs @@ -1,4 +1,4 @@ -use {scalbnf, sqrtf}; +use super::{fabsf, scalbnf, sqrtf}; const BP: [f32; 2] = [1.0, 1.5]; const DP_H: [f32; 2] = [0.0, 5.84960938e-01]; /* 0x3f15c000 */ @@ -28,6 +28,7 @@ const IVLN2: f32 = 1.4426950216e+00; const IVLN2_H: f32 = 1.4426879883e+00; const IVLN2_L: f32 = 7.0526075433e-06; +#[inline] pub fn powf(x: f32, y: f32) -> f32 { let mut z: f32; let mut ax: f32; @@ -127,7 +128,7 @@ pub fn powf(x: f32, y: f32) -> f32 { } } - ax = ::fabsf(x); + ax = fabsf(x); /* special value of x */ if ix == 0x7f800000 || ix == 0 || ix == 0x3f800000 { /* x is +-0,+-inf,+-1 */ diff --git a/libm/src/scalbnf.rs b/libm/src/math/scalbnf.rs similarity index 98% rename from libm/src/scalbnf.rs rename to libm/src/math/scalbnf.rs index 2c057ebc5..2ae8bf31b 100644 --- a/libm/src/scalbnf.rs +++ b/libm/src/math/scalbnf.rs @@ -1,3 +1,4 @@ +#[inline] pub fn scalbnf(mut x: f32, mut n: i32) -> f32 { let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 diff --git a/libm/src/sqrtf.rs b/libm/src/math/sqrtf.rs similarity index 99% rename from libm/src/sqrtf.rs rename to libm/src/math/sqrtf.rs index 6e92f67d4..a265bef48 100644 --- a/libm/src/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -1,5 +1,6 @@ const TINY: f32 = 1.0e-30; +#[inline] pub fn sqrtf(x: f32) -> f32 { let mut z: f32; let sign: i32 = 0x80000000u32 as i32; From f58b91992db873fa24ff0554d2e76e54b7fa2a09 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 12 Jul 2018 19:57:44 -0500 Subject: [PATCH 0014/1459] add extension traits --- libm/src/lib.rs | 690 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 690 insertions(+) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 4d7cec56f..fc9628d91 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -1,8 +1,22 @@ +//! Port of MUSL's libm to Rust +//! +//! # Usage +//! +//! You can use this crate in two ways: +//! +//! - By directly using its free functions, e.g. `libm::powf`. +//! +//! - By importing the `F32Ext` and / or `F64Ext` extension traits to add methods like `powf` to the +//! `f32` and `f64` types. Then you'll be able to invoke math functions as methods, e.g. `x.sqrt()`. + #![deny(warnings)] #![no_std] mod math; +#[cfg(todo)] +use core::{f32, f64}; + pub use math::*; /// Approximate equality with 1 ULP of tolerance @@ -15,3 +29,679 @@ pub fn _eqf(a: u32, b: u32) -> bool { pub fn _eq(a: u64, b: u64) -> bool { (a as i64).wrapping_sub(b as i64).abs() <= 1 } + +/// Math support for `f32` +/// +/// NOTE this meant to be a closed extension trait. The only stable way to use this trait is to +/// import it to access its methods. +pub trait F32Ext { + #[cfg(todo)] + fn floor(self) -> Self; + + #[cfg(todo)] + fn ceil(self) -> Self; + + #[cfg(todo)] + fn round(self) -> Self; + + #[cfg(todo)] + fn trunc(self) -> Self; + + #[cfg(todo)] + fn fract(self) -> Self; + + fn abs(self) -> Self; + + #[cfg(todo)] + fn signum(self) -> Self; + + #[cfg(todo)] + fn mul_add(self, a: Self, b: Self) -> Self; + + #[cfg(todo)] + fn div_euc(self, rhs: Self) -> Self; + + #[cfg(todo)] + fn mod_euc(self, rhs: Self) -> Self; + + // NOTE depends on unstable intrinsics::powif32 + // fn powi(self, n: i32) -> Self; + + fn powf(self, n: Self) -> Self; + + fn sqrt(self) -> Self; + + #[cfg(todo)] + fn exp(self) -> Self; + + #[cfg(todo)] + fn exp2(self) -> Self; + + #[cfg(todo)] + fn ln(self) -> Self; + + #[cfg(todo)] + fn log(self, base: Self) -> Self; + + #[cfg(todo)] + fn log2(self) -> Self; + + #[cfg(todo)] + fn log10(self) -> Self; + + #[cfg(todo)] + fn cbrt(self) -> Self; + + #[cfg(todo)] + fn hypot(self, other: Self) -> Self; + + #[cfg(todo)] + fn sin(self) -> Self; + + #[cfg(todo)] + fn cos(self) -> Self; + + #[cfg(todo)] + fn tan(self) -> Self; + + #[cfg(todo)] + fn asin(self) -> Self; + + #[cfg(todo)] + fn acos(self) -> Self; + + #[cfg(todo)] + fn atan(self) -> Self; + + #[cfg(todo)] + fn atan2(self, other: Self) -> Self; + + #[cfg(todo)] + #[inline] + fn sin_cos(self) -> (Self, Self) { + (self.sin(), self.cos()) + } + + #[cfg(todo)] + fn exp_m1(self) -> Self; + + #[cfg(todo)] + fn ln_1p(self) -> Self; + + #[cfg(todo)] + fn sinh(self) -> Self; + + #[cfg(todo)] + fn cosh(self) -> Self; + + #[cfg(todo)] + fn tanh(self) -> Self; + + #[cfg(todo)] + fn asinh(self) -> Self; + + #[cfg(todo)] + fn acosh(self) -> Self; + + #[cfg(todo)] + fn atanh(self) -> Self; +} + +impl F32Ext for f32 { + #[cfg(todo)] + #[inline] + fn floor(self) -> Self { + floorf(self) + } + + #[cfg(todo)] + #[inline] + fn ceil(self) -> Self { + ceilf(self) + } + + #[cfg(todo)] + #[inline] + fn round(self) -> Self { + roundf(self) + } + + #[cfg(todo)] + #[inline] + fn trunc(self) -> Self { + truncf(self) + } + + #[cfg(todo)] + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } + + #[inline] + fn abs(self) -> Self { + fabsf(self) + } + + #[cfg(todo)] + #[inline] + fn mul_add(self, a: Self, b: Self) -> Self { + fmaf(self, a, b) + } + + #[cfg(todo)] + #[inline] + fn div_euc(self, rhs: Self) -> Self { + let q = (self / rhs).trunc(); + if self % rhs < 0.0 { + return if rhs > 0.0 { q - 1.0 } else { q + 1.0 }; + } + q + } + + #[cfg(todo)] + #[inline] + fn mod_euc(self, rhs: f32) -> f32 { + let r = self % rhs; + if r < 0.0 { + r + rhs.abs() + } else { + r + } + } + + #[inline] + fn powf(self, n: Self) -> Self { + powf(self, n) + } + + #[inline] + fn sqrt(self) -> Self { + sqrtf(self) + } + + #[cfg(todo)] + #[inline] + fn exp(self) -> Self { + expf(self) + } + + #[cfg(todo)] + #[inline] + fn exp2(self) -> Self { + exp2f(self) + } + + #[cfg(todo)] + #[inline] + fn ln(self) -> Self { + logf(self) + } + + #[cfg(todo)] + #[inline] + fn log(self, base: Self) -> Self { + self.ln() / base.ln() + } + + #[cfg(todo)] + #[inline] + fn log2(self) -> Self { + log2f(self) + } + + #[cfg(todo)] + #[inline] + fn log10(self) -> Self { + log10f(self) + } + + #[cfg(todo)] + #[inline] + fn cbrt(self) -> Self { + cbrtf(self) + } + + #[cfg(todo)] + #[inline] + fn hypot(self, other: Self) -> Self { + hypotf(self, other) + } + + #[cfg(todo)] + #[inline] + fn sin(self) -> Self { + sinf(self) + } + + #[cfg(todo)] + #[inline] + fn cos(self) -> Self { + cosf(self) + } + + #[cfg(todo)] + #[inline] + fn tan(self) -> Self { + tanf(self) + } + + #[cfg(todo)] + #[inline] + fn asin(self) -> Self { + asinf(self) + } + + #[cfg(todo)] + #[inline] + fn acos(self) -> Self { + acosf(self) + } + + #[cfg(todo)] + #[inline] + fn atan(self) -> Self { + atanf(self) + } + + #[cfg(todo)] + #[inline] + fn atan2(self, other: Self) -> Self { + atan2f(self, other) + } + + #[cfg(todo)] + #[inline] + fn exp_m1(self) -> Self { + expm1f(self) + } + + #[cfg(todo)] + #[inline] + fn ln_1p(self) -> Self { + log1pf(self) + } + + #[cfg(todo)] + #[inline] + fn sinh(self) -> Self { + sinhf(self) + } + + #[cfg(todo)] + #[inline] + fn cosh(self) -> Self { + coshf(self) + } + + #[cfg(todo)] + #[inline] + fn tanh(self) -> Self { + tanhf(self) + } + + #[cfg(todo)] + #[inline] + fn asinh(self) -> Self { + if self == f32::NEG_INFINITY { + f32::NEG_INFINITY + } else { + (self + ((self * self) + 1.0).sqrt()).ln() + } + } + + #[cfg(todo)] + #[inline] + fn acosh(self) -> Self { + match self { + x if x < 1.0 => f32::NAN, + x => (x + ((x * x) - 1.0).sqrt()).ln(), + } + } + + #[cfg(todo)] + #[inline] + fn atanh(self) -> Self { + 0.5 * ((2.0 * self) / (1.0 - self)).ln_1p() + } +} + +/// Math support for `f32` +/// +/// NOTE this meant to be a closed extension trait. The only stable way to use this trait is to +/// import it to access its methods. +pub trait F64Ext { + #[cfg(todo)] + fn floor(self) -> Self; + + #[cfg(todo)] + fn ceil(self) -> Self; + + #[cfg(todo)] + fn round(self) -> Self; + + #[cfg(todo)] + fn trunc(self) -> Self; + + #[cfg(todo)] + fn fract(self) -> Self; + + fn abs(self) -> Self; + + #[cfg(todo)] + fn signum(self) -> Self; + + #[cfg(todo)] + fn mul_add(self, a: Self, b: Self) -> Self; + + #[cfg(todo)] + fn div_euc(self, rhs: Self) -> Self; + + #[cfg(todo)] + fn mod_euc(self, rhs: Self) -> Self; + + // NOTE depends on unstable intrinsics::powif64 + // fn powi(self, n: i32) -> Self; + + #[cfg(todo)] + fn powf(self, n: Self) -> Self; + + #[cfg(todo)] + fn sqrt(self) -> Self; + + #[cfg(todo)] + fn exp(self) -> Self; + + #[cfg(todo)] + fn exp2(self) -> Self; + + #[cfg(todo)] + fn ln(self) -> Self; + + #[cfg(todo)] + fn log(self, base: Self) -> Self; + + #[cfg(todo)] + fn log2(self) -> Self; + + #[cfg(todo)] + fn log10(self) -> Self; + + #[cfg(todo)] + fn cbrt(self) -> Self; + + #[cfg(todo)] + fn hypot(self, other: Self) -> Self; + + #[cfg(todo)] + fn sin(self) -> Self; + + #[cfg(todo)] + fn cos(self) -> Self; + + #[cfg(todo)] + fn tan(self) -> Self; + + #[cfg(todo)] + fn asin(self) -> Self; + + #[cfg(todo)] + fn acos(self) -> Self; + + #[cfg(todo)] + fn atan(self) -> Self; + + #[cfg(todo)] + fn atan2(self, other: Self) -> Self; + + #[cfg(todo)] + #[inline] + fn sin_cos(self) -> (Self, Self) { + (self.sin(), self.cos()) + } + + #[cfg(todo)] + fn exp_m1(self) -> Self; + + #[cfg(todo)] + fn ln_1p(self) -> Self; + + #[cfg(todo)] + fn sinh(self) -> Self; + + #[cfg(todo)] + fn cosh(self) -> Self; + + #[cfg(todo)] + fn tanh(self) -> Self; + + #[cfg(todo)] + fn asinh(self) -> Self; + + #[cfg(todo)] + fn acosh(self) -> Self; + + #[cfg(todo)] + fn atanh(self) -> Self; +} + +impl F64Ext for f64 { + #[cfg(todo)] + #[inline] + fn floor(self) -> Self { + floor(self) + } + + #[cfg(todo)] + #[inline] + fn ceil(self) -> Self { + ceil(self) + } + + #[cfg(todo)] + #[inline] + fn round(self) -> Self { + round(self) + } + + #[cfg(todo)] + #[inline] + fn trunc(self) -> Self { + trunc(self) + } + + #[cfg(todo)] + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } + + #[inline] + fn abs(self) -> Self { + fabs(self) + } + + #[cfg(todo)] + #[inline] + fn mul_add(self, a: Self, b: Self) -> Self { + fma(self, a, b) + } + + #[cfg(todo)] + #[inline] + fn div_euc(self, rhs: Self) -> Self { + let q = (self / rhs).trunc(); + if self % rhs < 0.0 { + return if rhs > 0.0 { q - 1.0 } else { q + 1.0 }; + } + q + } + + #[cfg(todo)] + #[inline] + fn mod_euc(self, rhs: f32) -> f32 { + let r = self % rhs; + if r < 0.0 { + r + rhs.abs() + } else { + r + } + } + + #[cfg(todo)] + #[inline] + fn powf(self, n: Self) -> Self { + pow(self, n) + } + + #[cfg(todo)] + #[inline] + fn sqrt(self) -> Self { + sqrt(self) + } + + #[cfg(todo)] + #[inline] + fn exp(self) -> Self { + exp(self) + } + + #[cfg(todo)] + #[inline] + fn exp2(self) -> Self { + exp2(self) + } + + #[cfg(todo)] + #[inline] + fn ln(self) -> Self { + log(self) + } + + #[cfg(todo)] + #[inline] + fn log(self, base: Self) -> Self { + self.ln() / base.ln() + } + + #[cfg(todo)] + #[inline] + fn log2(self) -> Self { + log2(self) + } + + #[cfg(todo)] + #[inline] + fn log10(self) -> Self { + log10(self) + } + + #[cfg(todo)] + #[inline] + fn cbrt(self) -> Self { + cbrt(self) + } + + #[cfg(todo)] + #[inline] + fn hypot(self, other: Self) -> Self { + hypot(self, other) + } + + #[cfg(todo)] + #[inline] + fn sin(self) -> Self { + sin(self) + } + + #[cfg(todo)] + #[inline] + fn cos(self) -> Self { + cos(self) + } + + #[cfg(todo)] + #[inline] + fn tan(self) -> Self { + tan(self) + } + + #[cfg(todo)] + #[inline] + fn asin(self) -> Self { + asin(self) + } + + #[cfg(todo)] + #[inline] + fn acos(self) -> Self { + acos(self) + } + + #[cfg(todo)] + #[inline] + fn atan(self) -> Self { + atan(self) + } + + #[cfg(todo)] + #[inline] + fn atan2(self, other: Self) -> Self { + atan2(self, other) + } + + #[cfg(todo)] + #[inline] + fn exp_m1(self) -> Self { + expm1(self) + } + + #[cfg(todo)] + #[inline] + fn ln_1p(self) -> Self { + log1p(self) + } + + #[cfg(todo)] + #[inline] + fn sinh(self) -> Self { + sinh(self) + } + + #[cfg(todo)] + #[inline] + fn cosh(self) -> Self { + cosh(self) + } + + #[cfg(todo)] + #[inline] + fn tanh(self) -> Self { + tanh(self) + } + + #[cfg(todo)] + #[inline] + fn asinh(self) -> Self { + if self == f64::NEG_INFINITY { + f64::NEG_INFINITY + } else { + (self + ((self * self) + 1.0).sqrt()).ln() + } + } + + #[cfg(todo)] + #[inline] + fn acosh(self) -> Self { + match self { + x if x < 1.0 => f64::NAN, + x => (x + ((x * x) - 1.0).sqrt()).ln(), + } + } + + #[cfg(todo)] + #[inline] + fn atanh(self) -> Self { + 0.5 * ((2.0 * self) / (1.0 - self)).ln_1p() + } +} From 239306b84ef6c8e0cc71ec7c1c6a5a0a6d2cfbf5 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 12 Jul 2018 20:19:42 -0500 Subject: [PATCH 0015/1459] test source importing this crate --- libm/Cargo.toml | 2 +- libm/README.md | 5 ++++- libm/cb/Cargo.toml | 6 ++++++ libm/cb/src/lib.rs | 9 +++++++++ libm/ci/script.sh | 10 ++++++++++ 5 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 libm/cb/Cargo.toml create mode 100644 libm/cb/src/lib.rs diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 96bc290fb..e3498eed0 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -4,4 +4,4 @@ version = "0.1.0" authors = ["Jorge Aparicio "] [workspace] -members = ["test-generator"] \ No newline at end of file +members = ["cb", "test-generator"] \ No newline at end of file diff --git a/libm/README.md b/libm/README.md index 55f465434..cb29baf63 100644 --- a/libm/README.md +++ b/libm/README.md @@ -32,7 +32,7 @@ $ TARGET=armv7-unknown-linux-gnueabihf bash ci/script.sh - Pick your favorite math function from the [issue tracker]. - Look for the C implementation of the function in the [MUSL source code][src]. - Copy paste the C code into a Rust file in the `src/math` directory and adjust `src/math/mod.rs` - accordingly. + accordingly. Also, uncomment the corresponding trait method in `src/lib.rs`. - Run `cargo watch check` and fix the compiler errors. - Tweak the bottom of `test-generator/src/main.rs` to add your function to the test suite. - If you can, run the test suite locally. If you can't, no problem! Your PR will be tested @@ -49,6 +49,9 @@ Check [PR #2] for an example. ### Notes +- Only use relative imports within the `math` directory / module, e.g. `use self::fabs::fabs` or +`use super::isnanf`. Absolute imports from core are OK, e.g. `use core::u64`. + - To reinterpret a float as an integer use the `to_bits` method. The MUSL code uses the `GET_FLOAT_WORD` macro, or a union, to do this operation. diff --git a/libm/cb/Cargo.toml b/libm/cb/Cargo.toml new file mode 100644 index 000000000..40e75dd22 --- /dev/null +++ b/libm/cb/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "cb" +version = "0.1.0" +authors = ["Jorge Aparicio "] + +[dependencies] diff --git a/libm/cb/src/lib.rs b/libm/cb/src/lib.rs new file mode 100644 index 000000000..439ba7dc4 --- /dev/null +++ b/libm/cb/src/lib.rs @@ -0,0 +1,9 @@ +//! Fake compiler-builtins crate +//! +//! This is used to test that we can source import `libm` into the compiler-builtins crate. + +#![allow(dead_code)] +#![no_std] + +#[path = "../../src/math/mod.rs"] +mod libm; diff --git a/libm/ci/script.sh b/libm/ci/script.sh index ee2e458cc..3db6bfeb1 100644 --- a/libm/ci/script.sh +++ b/libm/ci/script.sh @@ -1,11 +1,21 @@ set -euxo pipefail main() { + # quick check + cargo check + + # check that we can source import libm into compiler-builtins + cargo check --package cb + + # generate tests cargo run --package test-generator --target x86_64-unknown-linux-musl + if cargo fmt --version >/dev/null 2>&1; then # nicer syntax error messages (if any) cargo fmt fi + + # run tests cross test --target $TARGET --release # TODO need to fix overflow issues (cf. issue #4) From cf7d34d19c14152eab97d81b7c404e416c7f6ef0 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 13 Jul 2018 00:40:05 +0000 Subject: [PATCH 0016/1459] Implement `round` --- libm/src/lib.rs | 6 ++++++ libm/src/math/mod.rs | 2 ++ libm/src/math/round.rs | 35 +++++++++++++++++++++++++++++++++ libm/test-generator/src/main.rs | 2 +- 4 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 libm/src/math/round.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index fc9628d91..f54366b95 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -12,6 +12,12 @@ #![deny(warnings)] #![no_std] +macro_rules! force_eval { + ($e:expr) => { + unsafe { ::core::ptr::read_volatile(&$e); } + } +} + mod math; #[cfg(todo)] diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 09d81944c..c150a6295 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -2,6 +2,7 @@ mod fabs; mod fabsf; mod fmodf; mod powf; +mod round; mod scalbnf; mod sqrtf; @@ -9,6 +10,7 @@ pub use self::fabs::fabs; pub use self::fabsf::fabsf; pub use self::fmodf::fmodf; pub use self::powf::powf; +pub use self::round::round; pub use self::scalbnf::scalbnf; pub use self::sqrtf::sqrtf; diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs new file mode 100644 index 000000000..2a9f67c1d --- /dev/null +++ b/libm/src/math/round.rs @@ -0,0 +1,35 @@ +use core::f64; + +const TOINT: f64 = 1.0 / f64::EPSILON; + +pub fn round(mut x: f64) -> f64 { + let (f, i) = (x, x.to_bits()); + let e: u64 = i >> 52 & 0x7ff; + let mut y: f64; + + if e >= 0x3ff + 52 { + return x; + } + if i >> 63 != 0 { + x = -x; + } + if e < 0x3ff - 1 { + // raise inexact if x!=0 + force_eval!(x + TOINT); + return 0.0 * f; + } + y = x + TOINT - TOINT - x; + if y > 0.5 { + y = y + x - 1.0; + } else if y <= -0.5 { + y = y + x + 1.0; + } else { + y = y + x; + } + + if i >> 63 != 0 { + -y + } else { + y + } +} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index a13aa4ef7..5ff78ffc8 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -618,7 +618,7 @@ f64_f64! { // log10, // log1p, // log2, - // round, + round, // sin, // sinh, // sqrt, From c2d9853e55dcabc047aecde3ce454dd94e962364 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 13 Jul 2018 01:25:46 +0000 Subject: [PATCH 0017/1459] Enable f64::round --- libm/src/lib.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index f54366b95..06ef4592d 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -383,7 +383,6 @@ pub trait F64Ext { #[cfg(todo)] fn ceil(self) -> Self; - #[cfg(todo)] fn round(self) -> Self; #[cfg(todo)] @@ -504,7 +503,6 @@ impl F64Ext for f64 { ceil(self) } - #[cfg(todo)] #[inline] fn round(self) -> Self { round(self) From 5dc9fa82eae04b85d082dd3c70e2ab1ef5bf89d4 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 13 Jul 2018 01:38:04 +0000 Subject: [PATCH 0018/1459] Move the `force_eval!` macro into the math module This fixes the cross-inclusion into the compiler builtins --- libm/src/lib.rs | 6 ------ libm/src/math/mod.rs | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 06ef4592d..0d13590c7 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -12,12 +12,6 @@ #![deny(warnings)] #![no_std] -macro_rules! force_eval { - ($e:expr) => { - unsafe { ::core::ptr::read_volatile(&$e); } - } -} - mod math; #[cfg(todo)] diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index c150a6295..41359bf8c 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -1,3 +1,9 @@ +macro_rules! force_eval { + ($e:expr) => { + unsafe { ::core::ptr::read_volatile(&$e); } + } +} + mod fabs; mod fabsf; mod fmodf; From f0ce733451cc267dc55657bb15ce11264a94493f Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Fri, 13 Jul 2018 11:10:41 +0300 Subject: [PATCH 0019/1459] add exp and log/ln functions for f32 --- libm/src/lib.rs | 6 ---- libm/src/math/expf.rs | 76 +++++++++++++++++++++++++++++++++++++++++++ libm/src/math/logf.rs | 49 ++++++++++++++++++++++++++++ libm/src/math/mod.rs | 4 +++ 4 files changed, 129 insertions(+), 6 deletions(-) create mode 100644 libm/src/math/expf.rs create mode 100644 libm/src/math/logf.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 0d13590c7..bb00149df 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -71,16 +71,13 @@ pub trait F32Ext { fn sqrt(self) -> Self; - #[cfg(todo)] fn exp(self) -> Self; #[cfg(todo)] fn exp2(self) -> Self; - #[cfg(todo)] fn ln(self) -> Self; - #[cfg(todo)] fn log(self, base: Self) -> Self; #[cfg(todo)] @@ -220,7 +217,6 @@ impl F32Ext for f32 { sqrtf(self) } - #[cfg(todo)] #[inline] fn exp(self) -> Self { expf(self) @@ -232,13 +228,11 @@ impl F32Ext for f32 { exp2f(self) } - #[cfg(todo)] #[inline] fn ln(self) -> Self { logf(self) } - #[cfg(todo)] #[inline] fn log(self, base: Self) -> Self { self.ln() / base.ln() diff --git a/libm/src/math/expf.rs b/libm/src/math/expf.rs new file mode 100644 index 000000000..c26ff8e64 --- /dev/null +++ b/libm/src/math/expf.rs @@ -0,0 +1,76 @@ +use super::scalbnf; + +const HALF : [f32; 2] = [0.5,-0.5]; +const LN2_HI : f32 = 6.9314575195e-01; /* 0x3f317200 */ +const LN2_LO : f32 = 1.4286067653e-06; /* 0x35bfbe8e */ +const INV_LN2 : f32 = 1.4426950216e+00; /* 0x3fb8aa3b */ +/* + * Domain [-0.34568, 0.34568], range ~[-4.278e-9, 4.447e-9]: + * |x*(exp(x)+1)/(exp(x)-1) - p(x)| < 2**-27.74 + */ +const P1 : f32 = 1.6666625440e-1; /* 0xaaaa8f.0p-26 */ +const P2 : f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */ + +#[inline] +pub fn expf(mut x: f32) -> f32 { + let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 + let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 /*original 0x1p-149f ??????????? */ + + let mut hx = x.to_bits() as i32; + let sign = (hx >> 31) as i32; /* sign bit of x */ + let signb : bool = sign != 0; + hx &= 0x7fffffff; /* high word of |x| */ + + /* special cases */ + if hx >= 0x42aeac50 { /* if |x| >= -87.33655f or NaN */ + if hx > 0x7f800000 {/* NaN */ + return x; + } + if (hx >= 0x42b17218) && (!signb) { /* x >= 88.722839f */ + /* overflow */ + x *= x1p127; + return x; + } + if signb { + /* underflow */ + force_eval!(-x1p_126/x); + if hx >= 0x42cff1b5 { /* x <= -103.972084f */ + return 0. + } + } + } + + /* argument reduction */ + let k : i32; + let hi : f32; + let lo : f32; + if hx > 0x3eb17218 { /* if |x| > 0.5 ln2 */ + if hx > 0x3f851592 { /* if |x| > 1.5 ln2 */ + k = (INV_LN2*x + HALF[sign as usize]) as i32; + } else { + k = 1 - sign - sign; + } + let kf = k as f32; + hi = x - kf*LN2_HI; /* k*ln2hi is exact here */ + lo = kf*LN2_LO; + x = hi - lo; + } else if hx > 0x39000000 { /* |x| > 2**-14 */ + k = 0; + hi = x; + lo = 0.; + } else { + /* raise inexact */ + force_eval!(x1p127 + x); + return 1. + x; + } + + /* x is now in primary range */ + let xx = x*x; + let c = x - xx*(P1+xx*P2); + let y = 1. + (x*c/(2.-c) - lo + hi); + if k == 0 { + y + } else { + scalbnf(y, k) + } +} diff --git a/libm/src/math/logf.rs b/libm/src/math/logf.rs new file mode 100644 index 000000000..f8bbe733a --- /dev/null +++ b/libm/src/math/logf.rs @@ -0,0 +1,49 @@ +const LN2_HI : f32 = 6.9313812256e-01; /* 0x3f317180 */ +const LN2_LO : f32 = 9.0580006145e-06; /* 0x3717f7d1 */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +const LG1 : f32 = 0.66666662693; /* 0xaaaaaa.0p-24*/ +const LG2 : f32 = 0.40000972152; /* 0xccce13.0p-25 */ +const LG3 : f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ +const LG4 : f32 = 0.24279078841; /* 0xf89e26.0p-26 */ + +#[inline] +pub fn logf(mut x: f32) -> f32 { + let x1p25 = f32::from_bits(0x4c000000); // 0x1p24f === 2 ^ 25 + + let mut ix = x.to_bits(); + let mut k = 0i32; + + if (ix < 0x00800000) || ((ix>>31) != 0) { /* x < 2**-126 */ + if ix<<1 == 0 { + return -1./(x*x); /* log(+-0)=-inf */ + } + if (ix>>31) != 0 { + return (x-x)/0.; /* log(-#) = NaN */ + } + /* subnormal number, scale up x */ + k -= 25; + x *= x1p25; + ix = x.to_bits(); + } else if ix >= 0x7f800000 { + return x; + } else if ix == 0x3f800000 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + ix += 0x3f800000 - 0x3f3504f3; + k += ((ix>>23) as i32) - 0x7f; + ix = (ix & 0x007fffff) + 0x3f3504f3; + x = f32::from_bits(ix); + + let f = x - 1.; + let s = f/(2. + f); + let z = s*s; + let w = z*z; + let t1 = w*(LG2+w*LG4); + let t2 = z*(LG1+w*LG3); + let r = t2 + t1; + let hfsq = 0.5*f*f; + let dk = k as f32; + s*(hfsq+r) + dk*LN2_LO - hfsq + f + dk*LN2_HI +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 41359bf8c..5edf6fd74 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -11,6 +11,8 @@ mod powf; mod round; mod scalbnf; mod sqrtf; +mod logf; +mod expf; pub use self::fabs::fabs; pub use self::fabsf::fabsf; @@ -19,6 +21,8 @@ pub use self::powf::powf; pub use self::round::round; pub use self::scalbnf::scalbnf; pub use self::sqrtf::sqrtf; +pub use self::logf::logf; +pub use self::expf::expf; fn isnanf(x: f32) -> bool { x.to_bits() & 0x7fffffff > 0x7f800000 From 2ddbd239d15eaa100062786e0c0988ca895fa309 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 13 Jul 2018 11:16:43 -0500 Subject: [PATCH 0020/1459] enable tests for expf and logf --- libm/test-generator/src/main.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 5ff78ffc8..d324c4f09 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -570,10 +570,11 @@ f32_f32! { // cosf, // coshf, // exp2f, - // expf, + expf, // fdimf, // log10f, // log2f, + logf, // roundf, // sinf, // sinhf, From 776655ab13a53a7ee84bc467c1dbc54050af7c58 Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Fri, 13 Jul 2018 21:02:11 +0300 Subject: [PATCH 0021/1459] add floor, scalbn; fixes in expf, scalbnf --- libm/src/lib.rs | 2 -- libm/src/math/expf.rs | 8 ++++---- libm/src/math/floor.rs | 29 +++++++++++++++++++++++++++++ libm/src/math/logf.rs | 2 +- libm/src/math/mod.rs | 26 +++++++++++++++++--------- libm/src/math/scalbn.rs | 33 +++++++++++++++++++++++++++++++++ libm/src/math/scalbnf.rs | 26 ++++++++++---------------- 7 files changed, 94 insertions(+), 32 deletions(-) create mode 100644 libm/src/math/floor.rs create mode 100644 libm/src/math/scalbn.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index bb00149df..ed163ff98 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -365,7 +365,6 @@ impl F32Ext for f32 { /// NOTE this meant to be a closed extension trait. The only stable way to use this trait is to /// import it to access its methods. pub trait F64Ext { - #[cfg(todo)] fn floor(self) -> Self; #[cfg(todo)] @@ -479,7 +478,6 @@ pub trait F64Ext { } impl F64Ext for f64 { - #[cfg(todo)] #[inline] fn floor(self) -> Self { floor(self) diff --git a/libm/src/math/expf.rs b/libm/src/math/expf.rs index c26ff8e64..1b645654e 100644 --- a/libm/src/math/expf.rs +++ b/libm/src/math/expf.rs @@ -13,16 +13,16 @@ const P2 : f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */ #[inline] pub fn expf(mut x: f32) -> f32 { - let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 - let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 /*original 0x1p-149f ??????????? */ + let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 + let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 /*original 0x1p-149f ??????????? */ - let mut hx = x.to_bits() as i32; + let mut hx = x.to_bits(); let sign = (hx >> 31) as i32; /* sign bit of x */ let signb : bool = sign != 0; hx &= 0x7fffffff; /* high word of |x| */ /* special cases */ - if hx >= 0x42aeac50 { /* if |x| >= -87.33655f or NaN */ + if hx >= 0x42aeac50 { /* if |x| >= -87.33655f or NaN */ if hx > 0x7f800000 {/* NaN */ return x; } diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs new file mode 100644 index 000000000..a5fb17574 --- /dev/null +++ b/libm/src/math/floor.rs @@ -0,0 +1,29 @@ +use core::f64; + +const TOINT : f64 = 1. / f64::EPSILON; + +#[inline] +pub fn floor(x : f64) -> f64 { + let ui = x.to_bits(); + let e = ((ui >> 52) & 0x7ff) as i32; + + if (e >= 0x3ff+52) || (x == 0.) { + return x; + } + /* y = int(x) - x, where int(x) is an integer neighbor of x */ + let y = if (ui >> 63) != 0 { + x - TOINT + TOINT - x + } else { + x + TOINT - TOINT - x + }; + /* special case because of non-nearest rounding modes */ + if e <= 0x3ff-1 { + force_eval!(y); + return if (ui >> 63) != 0 { -1. } else { 0. }; + } + if y > 0. { + x + y - 1. + } else { + x + y + } +} diff --git a/libm/src/math/logf.rs b/libm/src/math/logf.rs index f8bbe733a..76b4ede19 100644 --- a/libm/src/math/logf.rs +++ b/libm/src/math/logf.rs @@ -8,7 +8,7 @@ const LG4 : f32 = 0.24279078841; /* 0xf89e26.0p-26 */ #[inline] pub fn logf(mut x: f32) -> f32 { - let x1p25 = f32::from_bits(0x4c000000); // 0x1p24f === 2 ^ 25 + let x1p25 = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 let mut ix = x.to_bits(); let mut k = 0i32; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 5edf6fd74..e400badd1 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -9,20 +9,28 @@ mod fabsf; mod fmodf; mod powf; mod round; +mod scalbn; mod scalbnf; mod sqrtf; mod logf; mod expf; +mod floor; -pub use self::fabs::fabs; -pub use self::fabsf::fabsf; -pub use self::fmodf::fmodf; -pub use self::powf::powf; -pub use self::round::round; -pub use self::scalbnf::scalbnf; -pub use self::sqrtf::sqrtf; -pub use self::logf::logf; -pub use self::expf::expf; +//mod service; + +pub use self::{ + fabs::fabs, + fabsf::fabsf, + fmodf::fmodf, + powf::powf, + round::round, + scalbn::scalbn, + scalbnf::scalbnf, + sqrtf::sqrtf, + logf::logf, + expf::expf, + floor::floor, +}; fn isnanf(x: f32) -> bool { x.to_bits() & 0x7fffffff > 0x7f800000 diff --git a/libm/src/math/scalbn.rs b/libm/src/math/scalbn.rs new file mode 100644 index 000000000..76e06f03e --- /dev/null +++ b/libm/src/math/scalbn.rs @@ -0,0 +1,33 @@ +#[inline] +pub fn scalbn(x : f64, mut n: i32) -> f64 { + let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 + let x1p53 = f64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53 + let x1p_1022 = f64::from_bits(0x0010000000000000); // 0x1p-1022 === 2 ^ (-1022) + + let mut y = x; + + if n > 1023 { + y *= x1p1023; + n -= 1023; + if n > 1023 { + y *= x1p1023; + n -= 1023; + if n > 1023 { + n = 1023; + } + } + } else if n < -1022 { + /* make sure final n < -53 to avoid double + rounding in the subnormal range */ + y *= x1p_1022 * x1p53; + n += 1022 - 53; + if n < -1022 { + y *= x1p_1022 * x1p53; + n += 1022 - 53; + if n < -1022 { + n = -1022; + } + } + } + y*f64::from_bits(((0x3ff+n) as u64)<<52) +} diff --git a/libm/src/math/scalbnf.rs b/libm/src/math/scalbnf.rs index 2ae8bf31b..31f93d323 100644 --- a/libm/src/math/scalbnf.rs +++ b/libm/src/math/scalbnf.rs @@ -1,35 +1,29 @@ #[inline] -pub fn scalbnf(mut x: f32, mut n: i32) -> f32 { - let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 - let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 - let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 - - let mut y: f32 = x; - +pub fn scalbnf(mut x: f32, mut n : i32) -> f32 { + let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 + let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 + let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 + if n > 127 { - y *= x1p127; + x *= x1p127; n -= 127; if n > 127 { - y *= x1p127; + x *= x1p127; n -= 127; if n > 127 { n = 127; } } } else if n < -126 { - y *= x1p_126; - y *= x1p24; + x *= x1p_126 * x1p24; n += 126 - 24; if n < -126 { - y *= x1p_126; - y *= x1p24; + x *= x1p_126 * x1p24; n += 126 - 24; if n < -126 { n = -126; } } } - - x = y * f32::from_bits((0x7f + n as u32) << 23); - x + x * f32::from_bits(((0x7f+n) as u32)<<23) } From 6de7f9b834b542c75fda8a85f8d43fc688a839bf Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 13 Jul 2018 13:36:59 -0500 Subject: [PATCH 0022/1459] enable tests for floor and scalbn --- libm/test-generator/src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index d324c4f09..42e157ad8 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -614,7 +614,7 @@ f64_f64! { // exp, // exp2, // expm1, - // floor, + floor, // log, // log10, // log1p, @@ -645,5 +645,5 @@ f64f64f64_f64! { // With signature `fn(f64, i32) -> f64` f64i32_f64! { - // scalbn, + scalbn, } From 2ab3a1e6bea8aa4c6f88a6898920b8322a493530 Mon Sep 17 00:00:00 2001 From: Erik Date: Fri, 13 Jul 2018 16:04:30 -0400 Subject: [PATCH 0023/1459] implement trunc and truncf --- libm/src/lib.rs | 4 ---- libm/src/math/mod.rs | 4 ++++ libm/src/math/trunc.rs | 24 ++++++++++++++++++++++++ libm/src/math/truncf.rs | 24 ++++++++++++++++++++++++ libm/test-generator/src/main.rs | 4 ++-- 5 files changed, 54 insertions(+), 6 deletions(-) create mode 100644 libm/src/math/trunc.rs create mode 100644 libm/src/math/truncf.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index ed163ff98..fa709ad75 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -44,7 +44,6 @@ pub trait F32Ext { #[cfg(todo)] fn round(self) -> Self; - #[cfg(todo)] fn trunc(self) -> Self; #[cfg(todo)] @@ -163,7 +162,6 @@ impl F32Ext for f32 { roundf(self) } - #[cfg(todo)] #[inline] fn trunc(self) -> Self { truncf(self) @@ -372,7 +370,6 @@ pub trait F64Ext { fn round(self) -> Self; - #[cfg(todo)] fn trunc(self) -> Self; #[cfg(todo)] @@ -494,7 +491,6 @@ impl F64Ext for f64 { round(self) } - #[cfg(todo)] #[inline] fn trunc(self) -> Self { trunc(self) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index e400badd1..71b58dda8 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -15,6 +15,8 @@ mod sqrtf; mod logf; mod expf; mod floor; +mod trunc; +mod truncf; //mod service; @@ -30,6 +32,8 @@ pub use self::{ logf::logf, expf::expf, floor::floor, + trunc::trunc, + truncf::truncf, }; fn isnanf(x: f32) -> bool { diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs new file mode 100644 index 000000000..b50ffd771 --- /dev/null +++ b/libm/src/math/trunc.rs @@ -0,0 +1,24 @@ +use core::f64; + +#[inline] +pub fn trunc(x: f64) -> f64 { + let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 + + let mut i: u64 = x.to_bits(); + let mut e: i64 = (i >> 52 & 0x7ff) as i64 - 0x3ff + 12; + let m: u64; + + if e >= 52 + 12 { + return x; + } + if e < 12 { + e = 1; + } + m = -1i64 as u64 >> e; + if (i & m) == 0 { + return x; + } + force_eval!(x + x1p120); + i &= !m; + f64::from_bits(i) +} diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs new file mode 100644 index 000000000..f7d7249e3 --- /dev/null +++ b/libm/src/math/truncf.rs @@ -0,0 +1,24 @@ +use core::f32; + +#[inline] +pub fn truncf(x: f32) -> f32 { + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + let mut i: u32 = x.to_bits(); + let mut e: i32 = (i >> 23 & 0xff) as i32 - 0x7f + 9; + let m: u32; + + if e >= 23 + 9 { + return x; + } + if e < 9 { + e = 1; + } + m = -1i32 as u32 >> e; + if (i & m) == 0 { + return x; + } + force_eval!(x + x1p120); + i &= !m; + f32::from_bits(i) +} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 42e157ad8..6353b257d 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -562,7 +562,7 @@ fn main() -> Result<(), Box> { f32_f32! { // acosf, // floorf, - // truncf + truncf, // asinf, // atanf, // cbrtf, @@ -625,7 +625,7 @@ f64_f64! { // sqrt, // tan, // tanh, - // trunc, + trunc, fabs, } From 3b4951cf53846ad309301da9bacfd6a447a39699 Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Fri, 13 Jul 2018 23:23:14 +0300 Subject: [PATCH 0024/1459] add cosf with dependencies --- libm/src/lib.rs | 2 - libm/src/math/cosf.rs | 65 ++++ libm/src/math/mod.rs | 4 +- libm/src/math/service/cosdf.rs | 13 + libm/src/math/service/mod.rs | 11 + libm/src/math/service/rem_pio2_large.rs | 489 ++++++++++++++++++++++++ libm/src/math/service/rem_pio2f.rs | 44 +++ libm/src/math/service/sindf.rs | 14 + 8 files changed, 639 insertions(+), 3 deletions(-) create mode 100644 libm/src/math/cosf.rs create mode 100644 libm/src/math/service/cosdf.rs create mode 100644 libm/src/math/service/mod.rs create mode 100644 libm/src/math/service/rem_pio2_large.rs create mode 100644 libm/src/math/service/rem_pio2f.rs create mode 100644 libm/src/math/service/sindf.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index ed163ff98..e51a7c2dc 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -95,7 +95,6 @@ pub trait F32Ext { #[cfg(todo)] fn sin(self) -> Self; - #[cfg(todo)] fn cos(self) -> Self; #[cfg(todo)] @@ -268,7 +267,6 @@ impl F32Ext for f32 { sinf(self) } - #[cfg(todo)] #[inline] fn cos(self) -> Self { cosf(self) diff --git a/libm/src/math/cosf.rs b/libm/src/math/cosf.rs new file mode 100644 index 000000000..b1aefd5e3 --- /dev/null +++ b/libm/src/math/cosf.rs @@ -0,0 +1,65 @@ +use super::service::{cosdf, sindf, rem_pio2f}; + +use core::f64::consts::FRAC_PI_2; + +/* Small multiples of pi/2 rounded to double precision. */ +const C1_PIO2 : f64 = 1.*FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ +const C2_PIO2 : f64 = 2.*FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ +const C3_PIO2 : f64 = 3.*FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const C4_PIO2 : f64 = 4.*FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ + +#[inline] +pub fn cosf(x: f32) -> f32 { + let x64 = x as f64; + + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + if ix <= 0x3f490fda { /* |x| ~<= pi/4 */ + if ix < 0x39800000 { /* |x| < 2**-12 */ + /* raise inexact if x != 0 */ + force_eval!(x + x1p120); + return 1.; + } + return cosdf(x64); + } + if ix <= 0x407b53d1 { /* |x| ~<= 5*pi/4 */ + if ix > 0x4016cbe3 { /* |x| ~> 3*pi/4 */ + return -cosdf(if sign { x64+C2_PIO2 } else { x64-C2_PIO2 }); + } else { + if sign { + return sindf(x64 + C1_PIO2); + } else { + return sindf(C1_PIO2 - x64); + } + } + } + if ix <= 0x40e231d5 { /* |x| ~<= 9*pi/4 */ + if ix > 0x40afeddf { /* |x| ~> 7*pi/4 */ + return cosdf(if sign { x64+C4_PIO2 } else { x64-C4_PIO2 }); + } else { + if sign { + return sindf(-x64 - C3_PIO2); + } else { + return sindf(x64 - C3_PIO2); + } + } + } + + /* cos(Inf or NaN) is NaN */ + if ix >= 0x7f800000 { + return x-x; + } + + /* general argument reduction needed */ + let (n, y) = rem_pio2f(x); + match n&3 { + 0 => { cosdf( y) }, + 1 => { sindf(-y) }, + 2 => { -cosdf( y) }, + _ => { sindf( y) }, + } +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index e400badd1..bc69aca0f 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -15,8 +15,9 @@ mod sqrtf; mod logf; mod expf; mod floor; +mod cosf; -//mod service; +mod service; pub use self::{ fabs::fabs, @@ -30,6 +31,7 @@ pub use self::{ logf::logf, expf::expf, floor::floor, + cosf::cosf, }; fn isnanf(x: f32) -> bool { diff --git a/libm/src/math/service/cosdf.rs b/libm/src/math/service/cosdf.rs new file mode 100644 index 000000000..6c5e9d349 --- /dev/null +++ b/libm/src/math/service/cosdf.rs @@ -0,0 +1,13 @@ +/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */ +const C0 : f64 = -0.499999997251031003120; /* -0x1ffffffd0c5e81.0p-54 */ +const C1 : f64 = 0.0416666233237390631894; /* 0x155553e1053a42.0p-57 */ +const C2 : f64 = -0.00138867637746099294692; /* -0x16c087e80f1e27.0p-62 */ +const C3 : f64 = 0.0000243904487962774090654; /* 0x199342e0ee5069.0p-68 */ + +#[inline] +pub(crate) fn cosdf(x : f64) -> f32 { + let z = x*x; + let w = z*z; + let r = C2+z*C3; + (((1.0+z*C0) + w*C1) + (w*z)*r) as f32 +} diff --git a/libm/src/math/service/mod.rs b/libm/src/math/service/mod.rs new file mode 100644 index 000000000..96bb09431 --- /dev/null +++ b/libm/src/math/service/mod.rs @@ -0,0 +1,11 @@ +mod sindf; +mod cosdf; +mod rem_pio2f; +mod rem_pio2_large; + +pub(crate) use self::{ + cosdf::cosdf, + sindf::sindf, + rem_pio2f::rem_pio2f, + rem_pio2_large::rem_pio2_large, +}; diff --git a/libm/src/math/service/rem_pio2_large.rs b/libm/src/math/service/rem_pio2_large.rs new file mode 100644 index 000000000..017fc88ba --- /dev/null +++ b/libm/src/math/service/rem_pio2_large.rs @@ -0,0 +1,489 @@ +use ::scalbn; +use ::F64Ext; + +/// double x[],y[]; int e0,nx,prec; +/// +/// __rem_pio2_large return the last three digits of N with +/// y = x - N*pi/2 +/// so that |y| < pi/2. +/// +/// The method is to compute the integer (mod 8) and fraction parts of +/// (2/pi)*x without doing the full multiplication. In general we +/// skip the part of the product that are known to be a huge integer ( +/// more accurately, = 0 mod 8 ). Thus the number of operations are +/// independent of the exponent of the input. +/// +/// (2/pi) is represented by an array of 24-bit integers in ipio2[]. +/// +/// Input parameters: +/// x[] The input value (must be positive) is broken into nx +/// pieces of 24-bit integers in double precision format. +/// x[i] will be the i-th 24 bit of x. The scaled exponent +/// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 +/// match x's up to 24 bits. +/// +/// Example of breaking a double positive z into x[0]+x[1]+x[2]: +/// e0 = ilogb(z)-23 +/// z = scalbn(z,-e0) +/// for i = 0,1,2 +/// x[i] = floor(z) +/// z = (z-x[i])*2**24 +/// +/// y[] ouput result in an array of double precision numbers. +/// The dimension of y[] is: +/// 24-bit precision 1 +/// 53-bit precision 2 +/// 64-bit precision 2 +/// 113-bit precision 3 +/// The actual value is the sum of them. Thus for 113-bit +/// precison, one may have to do something like: +/// +/// long double t,w,r_head, r_tail; +/// t = (long double)y[2] + (long double)y[1]; +/// w = (long double)y[0]; +/// r_head = t+w; +/// r_tail = w - (r_head - t); +/// +/// e0 The exponent of x[0]. Must be <= 16360 or you need to +/// expand the ipio2 table. +/// +/// prec an integer indicating the precision: +/// 0 24 bits (single) +/// 1 53 bits (double) +/// 2 64 bits (extended) +/// 3 113 bits (quad) +/// External function: +/// double scalbn(), floor(); +/// +/// +/// Here is the description of some local variables: +/// +/// jk jk+1 is the initial number of terms of ipio2[] needed +/// in the computation. The minimum and recommended value +/// for jk is 3,4,4,6 for single, double, extended, and quad. +/// jk+1 must be 2 larger than you might expect so that our +/// recomputation test works. (Up to 24 bits in the integer +/// part (the 24 bits of it that we compute) and 23 bits in +/// the fraction part may be lost to cancelation before we +/// recompute.) +/// +/// jz local integer variable indicating the number of +/// terms of ipio2[] used. +/// +/// jx nx - 1 +/// +/// jv index for pointing to the suitable ipio2[] for the +/// computation. In general, we want +/// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 +/// is an integer. Thus +/// e0-3-24*jv >= 0 or (e0-3)/24 >= jv +/// Hence jv = max(0,(e0-3)/24). +/// +/// jp jp+1 is the number of terms in PIo2[] needed, jp = jk. +/// +/// q[] double array with integral value, representing the +/// 24-bits chunk of the product of x and 2/pi. +/// +/// q0 the corresponding exponent of q[0]. Note that the +/// exponent for q[i] would be q0-24*i. +/// +/// PIo2[] double precision array, obtained by cutting pi/2 +/// into 24 bits chunks. +/// +/// f[] ipio2[] in floating point +/// +/// iq[] integer array by breaking up q[] in 24-bits chunk. +/// +/// fq[] final product of x*(2/pi) in fq[0],..,fq[jk] +/// +/// ih integer. If >0 it indicates q[] is >= 0.5, hence +/// it also indicates the *sign* of the result. + +const INIT_JK : [usize; 4] = [3,4,4,6]; /* initial value for jk */ + +/// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi +/// +/// integer array, contains the (24*i)-th to (24*i+23)-th +/// bit of 2/pi after binary point. The corresponding +/// floating value is +/// +/// ipio2[i] * 2^(-24(i+1)). +/// +/// NB: This table must have at least (e0-3)/24 + jk terms. +/// For quad precision (e0 <= 16360, jk = 6), this is 686. +#[cfg(not(ldbl_max_exp_more1024))] +const IPIO2 : [i32; 66] = [ + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, + 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, + 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, + 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, + 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, + 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, + 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, + 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, +]; + +#[cfg(ldbl_max_exp_more1024)] +const IPIO2 : [i32; 690] = [ + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, + 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, + 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, + 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, + 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, + 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, + 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, + 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, + 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6, + 0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2, + 0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35, + 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30, + 0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C, + 0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4, + 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770, + 0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7, + 0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19, + 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522, + 0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16, + 0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6, + 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E, + 0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48, + 0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3, + 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF, + 0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55, + 0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612, + 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929, + 0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC, + 0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B, + 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C, + 0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4, + 0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB, + 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC, + 0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C, + 0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F, + 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5, + 0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437, + 0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B, + 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA, + 0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD, + 0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3, + 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3, + 0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717, + 0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F, + 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61, + 0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB, + 0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51, + 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0, + 0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C, + 0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6, + 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC, + 0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED, + 0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328, + 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D, + 0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0, + 0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B, + 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4, + 0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3, + 0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F, + 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD, + 0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B, + 0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4, + 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761, + 0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31, + 0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30, + 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262, + 0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E, + 0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1, + 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C, + 0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4, + 0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08, + 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196, + 0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9, + 0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4, + 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC, + 0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C, + 0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0, + 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C, + 0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0, + 0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC, + 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22, + 0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893, + 0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7, + 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5, + 0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F, + 0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4, + 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF, + 0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B, + 0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2, + 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138, + 0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E, + 0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569, + 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34, + 0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9, + 0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D, + 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F, + 0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855, + 0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569, + 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B, + 0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE, + 0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41, + 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49, + 0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F, + 0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110, + 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8, + 0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365, + 0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A, + 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270, + 0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5, + 0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616, + 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B, + 0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0, +]; + +const PIO2 : [f64; 8] = [ + 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */ + 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */ + 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */ + 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */ + 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */ + 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */ + 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */ + 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ +]; + +#[inline] +pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 { + let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 + let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24) + + #[cfg(not(ldbl_max_exp_more1024))] + assert!(e0 <= 16360); + + let nx = x.len(); + + let mut fw : f64; + let mut n : i32; + let mut ih : i32; + let mut z = 0f64; + let mut f : [f64;20] = [0.;20]; + let mut fq : [f64;20] = [0.;20]; + let mut q : [f64;20] = [0.;20]; + let mut iq : [i32;20] = [0;20]; + + /* initialize jk*/ + let jk = INIT_JK[prec]; + let jp = jk; + + /* determine jx,jv,q0, note that 3>q0 */ + let jx = nx-1; + let mut jv = (e0-3)/24; + if jv < 0 { + jv=0; + } + let mut q0 = e0-24*(jv+1); + let jv = jv as usize; + + /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ + let mut j = (jv-jx) as i32; + let m = jx+jk; + for i in 0..=m { + f[i] = if j<0 { + 0. + } else { + IPIO2[j as usize] as f64 + }; + j += 1 + } + + /* compute q[0],q[1],...q[jk] */ + for i in 0..=jk { + fw = 0f64; + for j in 0..=jx { + fw += x[j]*f[jx+i-j]; + } + q[i] = fw; + } + + let mut jz = jk; + + 'recompute: loop { + /* distill q[] into iq[] reversingly */ + let mut i = 0i32; + let mut z = q[jz]; + for j in (1..=jz).rev() { + fw = (x1p_24*z) as i32 as f64; + iq[i as usize] = (z - x1p24*fw) as i32; + z = q[j-1]+fw; + i += 1; + } + + /* compute n */ + z = scalbn(z, q0); /* actual value of z */ + z -= 8.0*(z*0.125).floor(); /* trim off integer >= 8 */ + n = z as i32; + z -= n as f64; + ih = 0; + if q0 > 0 { /* need iq[jz-1] to determine n */ + i = iq[jz-1] >> (24-q0); + n += i; + iq[jz-1] -= i << (24-q0); + ih = iq[jz-1] >> (23-q0); + } else if q0 == 0 { + ih = iq[jz-1]>>23; + } else if z >= 0.5 { + ih = 2; + } + + if ih > 0 { /* q > 0.5 */ + n += 1; + let mut carry = 0i32; + for i in 0..jz { /* compute 1-q */ + let j = iq[i]; + if carry == 0 { + if j != 0 { + carry = 1; + iq[i] = 0x1000000 - j; + } + } else { + iq[i] = 0xffffff - j; + } + } + if q0 > 0 { /* rare case: chance is 1 in 12 */ + match q0 { + 1 => { iq[jz-1] &= 0x7fffff; }, + 2 => { iq[jz-1] &= 0x3fffff; }, + _ => {} + } + } + if ih == 2 { + z = 1. - z; + if carry != 0 { + z -= scalbn(1., q0); + } + } + } + + /* check if recomputation is needed */ + if z == 0. { + let mut j = 0; + for i in (jk..=jz-1).rev() { + j |= iq[i]; + } + if j == 0 { /* need recomputation */ + let mut k = 1; + while iq[jk-k]==0 { + k += 1; /* k = no. of terms needed */ + } + + for i in (jz+1)..=(jz+k) { /* add q[jz+1] to q[jz+k] */ + f[jx+i] = IPIO2[jv+i] as f64; + fw = 0f64; + for j in 0..=jx { + fw += x[j]*f[jx+i-j]; + } + q[i] = fw; + } + jz += k; + continue 'recompute; + } + } + + break; + } + + /* chop off zero terms */ + if z == 0. { + jz -= 1; + q0 -= 24; + while iq[jz] == 0 { + jz -= 1; + q0 -= 24; + } + } else { /* break z into 24-bit if necessary */ + z = scalbn(z, -q0); + if z >= x1p24 { + fw = (x1p_24*z) as i32 as f64; + iq[jz] = (z - x1p24*fw) as i32; + jz += 1; + q0 += 24; + iq[jz] = fw as i32; + } else { + iq[jz] = z as i32; + } + } + + /* convert integer "bit" chunk to floating-point value */ + fw = scalbn(1., q0); + for i in (0..=jz).rev() { + q[i] = fw*(iq[i] as f64); + fw *= x1p_24; + } + + /* compute PIo2[0,...,jp]*q[jz,...,0] */ + for i in (0..=jz).rev() { + fw = 0f64; + let mut k = 0; + while (k <= jp) && (k <= jz-i) { + fw += PIO2[k]*q[i+k]; + k += 1; + } + fq[jz-i] = fw; + } + + /* compress fq[] into y[] */ + match prec { + 0 => { + fw = 0f64; + for i in (0..=jz).rev() { + fw += fq[i]; + } + y[0] = if ih == 0 { fw } else { -fw }; + }, + 1 | 2 => { + fw = 0f64; + for i in (0..=jz).rev() { + fw += fq[i]; + } + // TODO: drop excess precision here once double_t is used + fw = fw as f64; + y[0] = if ih == 0 { fw } else { -fw }; + fw = fq[0]-fw; + for i in 1..=jz { + fw += fq[i]; + } + y[1] = if ih == 0 { fw } else { -fw }; + }, + 3 => { /* painful */ + for i in (1..=jz).rev() { + fw = fq[i-1]+fq[i]; + fq[i] += fq[i-1]-fw; + fq[i-1] = fw; + } + for i in (2..=jz).rev() { + fw = fq[i-1]+fq[i]; + fq[i] += fq[i-1]-fw; + fq[i-1] = fw; + } + fw = 0f64; + for i in (2..=jz).rev() { + fw += fq[i]; + } + if ih==0 { + y[0] = fq[0]; + y[1] = fq[1]; + y[2] = fw; + } else { + y[0] = -fq[0]; + y[1] = -fq[1]; + y[2] = -fw; + } + }, + _ => { unreachable!() } + } + n & 7 +} diff --git a/libm/src/math/service/rem_pio2f.rs b/libm/src/math/service/rem_pio2f.rs new file mode 100644 index 000000000..a908ccd9f --- /dev/null +++ b/libm/src/math/service/rem_pio2f.rs @@ -0,0 +1,44 @@ +use super::rem_pio2_large; + +use core::f64; + +const TOINT : f64 = 1.5 / f64::EPSILON; + +/// 53 bits of 2/pi +const INV_PIO2 : f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ +/// first 25 bits of pi/2 +const PIO2_1 : f64 = 1.57079631090164184570e+00; /* 0x3FF921FB, 0x50000000 */ +/// pi/2 - pio2_1 +const PIO2_1T : f64 = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ + +/// Return the remainder of x rem pi/2 in *y +/// +/// use double precision for everything except passing x +/// use __rem_pio2_large() for large x +#[inline] +pub(crate) fn rem_pio2f(x : f32) -> (i32, f64) { + let x64 = x as f64; + + let mut tx : [f64; 1] = [0.,]; + let mut ty : [f64; 1] = [0.,]; + + let ix = x.to_bits() & 0x7fffffff; + /* 25+53 bit pi is good enough for medium size */ + if ix < 0x4dc90fdb { /* |x| ~< 2^28*(pi/2), medium size */ + /* Use a specialized rint() to get fn. Assume round-to-nearest. */ + let f_n = x64*INV_PIO2 + TOINT - TOINT; + return (f_n as i32, x64 - f_n*PIO2_1 - f_n*PIO2_1T); + } + if ix>=0x7f800000 { /* x is inf or NaN */ + return (0, x64-x64); + } + /* scale x into [2^23, 2^24-1] */ + let sign = (x.to_bits() >> 31) != 0; + let e0 = ((ix>>23) - (0x7f+23)) as i32; /* e0 = ilogb(|x|)-23, positive */ + tx[0] = f32::from_bits(ix - (e0<<23) as u32) as f64; + let n = rem_pio2_large(&tx, &mut ty, e0, 0); + if sign { + return (-n, -ty[0]); + } + (n, ty[0]) +} diff --git a/libm/src/math/service/sindf.rs b/libm/src/math/service/sindf.rs new file mode 100644 index 000000000..a633545ba --- /dev/null +++ b/libm/src/math/service/sindf.rs @@ -0,0 +1,14 @@ +/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */ +const S1 : f64 = -0.166666666416265235595; /* -0x15555554cbac77.0p-55 */ +const S2 : f64 = 0.0083333293858894631756; /* 0x111110896efbb2.0p-59 */ +const S3 : f64 = -0.000198393348360966317347; /* -0x1a00f9e2cae774.0p-65 */ +const S4 : f64 = 0.0000027183114939898219064; /* 0x16cd878c3b46a7.0p-71 */ + +#[inline] +pub(crate) fn sindf(x : f64) -> f32 { + let z = x*x; + let w = z*z; + let r = S3 + z*S4; + let s = z*x; + ((x + s*(S1 + z*S2)) + s*w*r) as f32 +} From f94f8cc030b18d6dfbf110b97295c61aa2a26032 Mon Sep 17 00:00:00 2001 From: Erik Date: Fri, 13 Jul 2018 15:31:57 -0400 Subject: [PATCH 0025/1459] seal extension traits to prevent external implementation --- libm/src/lib.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index fa709ad75..4deb2e21e 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -32,9 +32,8 @@ pub fn _eq(a: u64, b: u64) -> bool { /// Math support for `f32` /// -/// NOTE this meant to be a closed extension trait. The only stable way to use this trait is to -/// import it to access its methods. -pub trait F32Ext { +/// This trait is sealed and cannot be implemented outside of `libm`. +pub trait F32Ext: private::Sealed { #[cfg(todo)] fn floor(self) -> Self; @@ -360,9 +359,8 @@ impl F32Ext for f32 { /// Math support for `f32` /// -/// NOTE this meant to be a closed extension trait. The only stable way to use this trait is to -/// import it to access its methods. -pub trait F64Ext { +/// This trait is sealed and cannot be implemented outside of `libm`. +pub trait F64Ext: private::Sealed { fn floor(self) -> Self; #[cfg(todo)] @@ -691,3 +689,10 @@ impl F64Ext for f64 { 0.5 * ((2.0 * self) / (1.0 - self)).ln_1p() } } + +mod private { + pub trait Sealed {} + + impl Sealed for f32 {} + impl Sealed for f64 {} +} From 828fb906d4d5bb1b697715f8a30de96cf357ac63 Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Fri, 13 Jul 2018 23:58:41 +0300 Subject: [PATCH 0026/1459] cosf review --- libm/src/math/{service => }/cosdf.rs | 0 libm/src/math/cosf.rs | 2 +- libm/src/math/mod.rs | 14 ++++++++++++-- libm/src/math/{service => }/rem_pio2_large.rs | 8 ++++---- libm/src/math/{service => }/rem_pio2f.rs | 0 libm/src/math/service/mod.rs | 11 ----------- libm/src/math/{service => }/sindf.rs | 0 7 files changed, 17 insertions(+), 18 deletions(-) rename libm/src/math/{service => }/cosdf.rs (100%) rename libm/src/math/{service => }/rem_pio2_large.rs (99%) rename libm/src/math/{service => }/rem_pio2f.rs (100%) delete mode 100644 libm/src/math/service/mod.rs rename libm/src/math/{service => }/sindf.rs (100%) diff --git a/libm/src/math/service/cosdf.rs b/libm/src/math/cosdf.rs similarity index 100% rename from libm/src/math/service/cosdf.rs rename to libm/src/math/cosdf.rs diff --git a/libm/src/math/cosf.rs b/libm/src/math/cosf.rs index b1aefd5e3..f63724e79 100644 --- a/libm/src/math/cosf.rs +++ b/libm/src/math/cosf.rs @@ -1,4 +1,4 @@ -use super::service::{cosdf, sindf, rem_pio2f}; +use super::{cosdf, sindf, rem_pio2f}; use core::f64::consts::FRAC_PI_2; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index bc69aca0f..b8de3c733 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -17,8 +17,6 @@ mod expf; mod floor; mod cosf; -mod service; - pub use self::{ fabs::fabs, fabsf::fabsf, @@ -34,6 +32,18 @@ pub use self::{ cosf::cosf, }; +mod sindf; +mod cosdf; +mod rem_pio2f; +mod rem_pio2_large; + +use self::{ + sindf::sindf, + cosdf::cosdf, + rem_pio2f::rem_pio2f, + rem_pio2_large::rem_pio2_large, +}; + fn isnanf(x: f32) -> bool { x.to_bits() & 0x7fffffff > 0x7f800000 } diff --git a/libm/src/math/service/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs similarity index 99% rename from libm/src/math/service/rem_pio2_large.rs rename to libm/src/math/rem_pio2_large.rs index 017fc88ba..deb985f1d 100644 --- a/libm/src/math/service/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -1,5 +1,5 @@ -use ::scalbn; -use ::F64Ext; +use super::scalbn; +use super::floor; /// double x[],y[]; int e0,nx,prec; /// @@ -323,8 +323,8 @@ pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) } /* compute n */ - z = scalbn(z, q0); /* actual value of z */ - z -= 8.0*(z*0.125).floor(); /* trim off integer >= 8 */ + z = scalbn(z, q0); /* actual value of z */ + z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ n = z as i32; z -= n as f64; ih = 0; diff --git a/libm/src/math/service/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs similarity index 100% rename from libm/src/math/service/rem_pio2f.rs rename to libm/src/math/rem_pio2f.rs diff --git a/libm/src/math/service/mod.rs b/libm/src/math/service/mod.rs deleted file mode 100644 index 96bb09431..000000000 --- a/libm/src/math/service/mod.rs +++ /dev/null @@ -1,11 +0,0 @@ -mod sindf; -mod cosdf; -mod rem_pio2f; -mod rem_pio2_large; - -pub(crate) use self::{ - cosdf::cosdf, - sindf::sindf, - rem_pio2f::rem_pio2f, - rem_pio2_large::rem_pio2_large, -}; diff --git a/libm/src/math/service/sindf.rs b/libm/src/math/sindf.rs similarity index 100% rename from libm/src/math/service/sindf.rs rename to libm/src/math/sindf.rs From 6cf2daa7f4f0b55ee343f2c9f973efb929833ad7 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 13 Jul 2018 16:32:57 -0500 Subject: [PATCH 0027/1459] catch panics in tests so we can print the inputs that triggered the panic --- libm/test-generator/src/main.rs | 223 ++++++++++++++++++++++---------- 1 file changed, 157 insertions(+), 66 deletions(-) diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 6353b257d..ca49179f1 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -62,6 +62,8 @@ macro_rules! f32_f32 { extern crate libm; + use std::panic; + #[test] fn {0}() {{ const CASES: &[(u32, u32)] = &[ @@ -71,16 +73,23 @@ macro_rules! f32_f32 { for case in CASES {{ let (inp, expected) = *case; - let outf = libm::{0}(f32::from_bits(inp)); - let outi = outf.to_bits(); - - if !((outf.is_nan() && f32::from_bits(expected).is_nan()) || - libm::_eqf(outi, expected)) {{ + if let Ok(outf) = + panic::catch_unwind(|| libm::{0}(f32::from_bits(inp))) + {{ + let outi = outf.to_bits(); + + if !((outf.is_nan() && f32::from_bits(expected).is_nan()) + || libm::_eqf(outi, expected)) + {{ + panic!( + \"input: {{}}, output: {{}}, expected: {{}}\", + inp, outi, expected, + ); + }} + }} else {{ panic!( - \"input: {{}}, output: {{}}, expected: {{}}\", - inp, - outi, - expected, + \"input: {{}}, output: PANIC, expected: {{}}\", + inp, expected, ); }} }} @@ -124,6 +133,8 @@ macro_rules! f32f32_f32 { extern crate libm; + use std::panic; + #[test] fn {0}() {{ const CASES: &[((u32, u32), u32)] = &[ @@ -133,15 +144,25 @@ macro_rules! f32f32_f32 { for case in CASES {{ let ((i1, i2), expected) = *case; - let outf = libm::{0}(f32::from_bits(i1), f32::from_bits(i2)); - let outi = outf.to_bits(); - - if !((outf.is_nan() && f32::from_bits(expected).is_nan()) || - libm::_eqf(outi, expected)) {{ + if let Ok(outf) = panic::catch_unwind(|| {{ + libm::{0}(f32::from_bits(i1), f32::from_bits(i2)) + }}) {{ + let outi = outf.to_bits(); + + if !((outf.is_nan() && f32::from_bits(expected).is_nan()) + || libm::_eqf(outi, expected)) + {{ + panic!( + \"input: {{:?}}, output: {{}}, expected: {{}}\", + (i1, i2), + outi, + expected, + ); + }} + }} else {{ panic!( - \"input: {{:?}}, output: {{}}, expected: {{}}\", + \"input: {{:?}}, output: PANIC, expected: {{}}\", (i1, i2), - outi, expected, ); }} @@ -188,6 +209,8 @@ macro_rules! f32f32f32_f32 { extern crate libm; + use std::panic; + #[test] fn {0}() {{ const CASES: &[((u32, u32, u32), u32)] = &[ @@ -197,19 +220,29 @@ macro_rules! f32f32f32_f32 { for case in CASES {{ let ((i1, i2, i3), expected) = *case; - let outf = libm::{0}( - f32::from_bits(i1), - f32::from_bits(i2), - f32::from_bits(i3), - ); - let outi = outf.to_bits(); - - if !((outf.is_nan() && f32::from_bits(expected).is_nan()) || - libm::_eqf(outi, expected)) {{ + if let Ok(outf) = panic::catch_unwind(|| {{ + libm::{0}( + f32::from_bits(i1), + f32::from_bits(i2), + f32::from_bits(i3), + ) + }}) {{ + let outi = outf.to_bits(); + + if !((outf.is_nan() && f32::from_bits(expected).is_nan()) + || libm::_eqf(outi, expected)) + {{ + panic!( + \"input: {{:?}}, output: {{}}, expected: {{}}\", + (i1, i2, i3), + outi, + expected, + ); + }} + }} else {{ panic!( - \"input: {{:?}}, output: {{}}, expected: {{}}\", + \"input: {{:?}}, output: PANIC, expected: {{}}\", (i1, i2, i3), - outi, expected, ); }} @@ -253,6 +286,8 @@ macro_rules! f32i32_f32 { extern crate libm; + use std::panic; + #[test] fn {0}() {{ const CASES: &[((u32, i16), u32)] = &[ @@ -262,15 +297,25 @@ macro_rules! f32i32_f32 { for case in CASES {{ let ((i1, i2), expected) = *case; - let outf = libm::{0}(f32::from_bits(i1), i2 as i32); - let outi = outf.to_bits(); - - if !((outf.is_nan() && f32::from_bits(expected).is_nan()) || - libm::_eqf(outi, expected)) {{ + if let Ok(outf) = panic::catch_unwind(|| {{ + libm::{0}(f32::from_bits(i1), i2 as i32) + }}) {{ + let outi = outf.to_bits(); + + if !((outf.is_nan() && f32::from_bits(expected).is_nan()) + || libm::_eqf(outi, expected)) + {{ + panic!( + \"input: {{:?}}, output: {{}}, expected: {{}}\", + (i1, i2), + outi, + expected, + ); + }} + }} else {{ panic!( - \"input: {{:?}}, output: {{}}, expected: {{}}\", + \"input: {{:?}}, output: PANIC, expected: {{}}\", (i1, i2), - outi, expected, ); }} @@ -314,6 +359,8 @@ macro_rules! f64_f64 { extern crate libm; + use std::panic; + #[test] fn {0}() {{ const CASES: &[(u64, u64)] = &[ @@ -323,15 +370,25 @@ macro_rules! f64_f64 { for case in CASES {{ let (inp, expected) = *case; - let outf = libm::{0}(f64::from_bits(inp)); - let outi = outf.to_bits(); - - if !((outf.is_nan() && f64::from_bits(expected).is_nan()) || - libm::_eq(outi, expected)) {{ + if let Ok(outf) = panic::catch_unwind(|| {{ + libm::{0}(f64::from_bits(inp)) + }}) {{ + let outi = outf.to_bits(); + + if !((outf.is_nan() && f64::from_bits(expected).is_nan()) + || libm::_eq(outi, expected)) + {{ + panic!( + \"input: {{}}, output: {{}}, expected: {{}}\", + inp, + outi, + expected, + ); + }} + }} else {{ panic!( - \"input: {{}}, output: {{}}, expected: {{}}\", + \"input: {{}}, output: PANIC, expected: {{}}\", inp, - outi, expected, ); }} @@ -376,6 +433,8 @@ macro_rules! f64f64_f64 { extern crate libm; + use std::panic; + #[test] fn {0}() {{ const CASES: &[((u64, u64), u64)] = &[ @@ -385,15 +444,24 @@ macro_rules! f64f64_f64 { for case in CASES {{ let ((i1, i2), expected) = *case; - let outf = libm::{0}(f64::from_bits(i1), f64::from_bits(i2)); - let outi = outf.to_bits(); - - if !((outf.is_nan() && f64::from_bits(expected).is_nan()) || - libm::_eq(outi, expected)) {{ + if let Ok(outf) = panic::catch_unwind(|| {{ + libm::{0}(f64::from_bits(i1), f64::from_bits(i2)) + }}) {{ + let outi = outf.to_bits(); + + if !((outf.is_nan() && f64::from_bits(expected).is_nan()) || + libm::_eq(outi, expected)) {{ + panic!( + \"input: {{:?}}, output: {{}}, expected: {{}}\", + (i1, i2), + outi, + expected, + ); + }} + }} else {{ panic!( - \"input: {{:?}}, output: {{}}, expected: {{}}\", + \"input: {{:?}}, output: PANIC, expected: {{}}\", (i1, i2), - outi, expected, ); }} @@ -440,6 +508,8 @@ macro_rules! f64f64f64_f64 { extern crate libm; + use std::panic; + #[test] fn {0}() {{ const CASES: &[((u64, u64, u64), u64)] = &[ @@ -449,19 +519,29 @@ macro_rules! f64f64f64_f64 { for case in CASES {{ let ((i1, i2, i3), expected) = *case; - let outf = libm::{0}( - f64::from_bits(i1), - f64::from_bits(i2), - f64::from_bits(i3), - ); - let outi = outf.to_bits(); - - if !((outf.is_nan() && f64::from_bits(expected).is_nan()) || - libm::_eq(outi, expected)) {{ + if let Ok(outf) = panic::catch_unwind(|| {{ + libm::{0}( + f64::from_bits(i1), + f64::from_bits(i2), + f64::from_bits(i3), + ) + }}) {{ + let outi = outf.to_bits(); + + if !((outf.is_nan() && f64::from_bits(expected).is_nan()) + || libm::_eq(outi, expected)) + {{ + panic!( + \"input: {{:?}}, output: {{}}, expected: {{}}\", + (i1, i2, i3), + outi, + expected, + ); + }} + }} else {{ panic!( - \"input: {{:?}}, output: {{}}, expected: {{}}\", + \"input: {{:?}}, output: PANIC, expected: {{}}\", (i1, i2, i3), - outi, expected, ); }} @@ -505,6 +585,8 @@ macro_rules! f64i32_f64 { extern crate libm; + use std::panic; + #[test] fn {0}() {{ const CASES: &[((u64, i16), u64)] = &[ @@ -514,15 +596,24 @@ macro_rules! f64i32_f64 { for case in CASES {{ let ((i1, i2), expected) = *case; - let outf = libm::{0}(f64::from_bits(i1), i2 as i32); - let outi = outf.to_bits(); - - if !((outf.is_nan() && f64::from_bits(expected).is_nan()) || - libm::_eq(outi, expected)) {{ + if let Ok(outf) = panic::catch_unwind(|| {{ + libm::{0}(f64::from_bits(i1), i2 as i32) + }}) {{ + let outi = outf.to_bits(); + + if !((outf.is_nan() && f64::from_bits(expected).is_nan()) || + libm::_eq(outi, expected)) {{ + panic!( + \"input: {{:?}}, output: {{}}, expected: {{}}\", + (i1, i2), + outi, + expected, + ); + }} + }} else {{ panic!( - \"input: {{:?}}, output: {{}}, expected: {{}}\", + \"input: {{:?}}, output: PANIC, expected: {{}}\", (i1, i2), - outi, expected, ); }} From 87f4dedaea7c7cf6f1913757d2e2cad70e27b83d Mon Sep 17 00:00:00 2001 From: Erik Date: Fri, 13 Jul 2018 17:41:54 -0400 Subject: [PATCH 0028/1459] implement hypotf --- libm/src/lib.rs | 2 -- libm/src/math/hypotf.rs | 43 +++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 2 ++ libm/test-generator/src/main.rs | 2 +- 4 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 libm/src/math/hypotf.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index fa709ad75..a6702c4a7 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -88,7 +88,6 @@ pub trait F32Ext { #[cfg(todo)] fn cbrt(self) -> Self; - #[cfg(todo)] fn hypot(self, other: Self) -> Self; #[cfg(todo)] @@ -254,7 +253,6 @@ impl F32Ext for f32 { cbrtf(self) } - #[cfg(todo)] #[inline] fn hypot(self, other: Self) -> Self { hypotf(self, other) diff --git a/libm/src/math/hypotf.rs b/libm/src/math/hypotf.rs new file mode 100644 index 000000000..146aab4e4 --- /dev/null +++ b/libm/src/math/hypotf.rs @@ -0,0 +1,43 @@ +use core::f32; + +use super::sqrtf; + +#[inline] +pub fn hypotf(mut x: f32, mut y: f32) -> f32 { + let x1p90 = f32::from_bits(0x6c800000); // 0x1p90f === 2 ^ 90 + let x1p_90 = f32::from_bits(0x12800000); // 0x1p-90f === 2 ^ -90 + + let mut uxi = x.to_bits(); + let mut uyi = y.to_bits(); + let uti; + let mut z: f32; + + uxi &= -1i32 as u32 >> 1; + uyi &= -1i32 as u32 >> 1; + if uxi < uyi { + uti = uxi; + uxi = uyi; + uyi = uti; + } + + x = f32::from_bits(uxi); + y = f32::from_bits(uyi); + if uyi == 0xff<<23 { + return y; + } + if uxi >= 0xff<<23 || uyi == 0 || uxi - uyi >= 25<<23 { + return x + y; + } + + z = 1.; + if uxi >= (0x7f+60)<<23 { + z = x1p90; + x *= x1p_90; + y *= x1p_90; + } else if uyi < (0x7f-60)<<23 { + z = x1p_90; + x *= x1p90; + y *= x1p90; + } + z*sqrtf((x as f64 * x as f64 + y as f64 * y as f64) as f32) +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 71b58dda8..a34ebd767 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -17,6 +17,7 @@ mod expf; mod floor; mod trunc; mod truncf; +mod hypotf; //mod service; @@ -34,6 +35,7 @@ pub use self::{ floor::floor, trunc::trunc, truncf::truncf, + hypotf::hypotf, }; fn isnanf(x: f32) -> bool { diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 6353b257d..290952bbd 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -587,7 +587,7 @@ f32_f32! { // With signature `fn(f32, f32) -> f32` f32f32_f32! { // atan2f, - // hypotf, + hypotf, fmodf, powf, } From 28af05d51a8014f9df7bf0b73f12342ea603be1b Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Sat, 14 Jul 2018 00:52:28 +0300 Subject: [PATCH 0029/1459] rem_pio2_large comments --- libm/src/math/rem_pio2_large.rs | 216 ++++++++++++++++---------------- 1 file changed, 105 insertions(+), 111 deletions(-) diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index deb985f1d..809724df4 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -1,117 +1,20 @@ use super::scalbn; use super::floor; -/// double x[],y[]; int e0,nx,prec; -/// -/// __rem_pio2_large return the last three digits of N with -/// y = x - N*pi/2 -/// so that |y| < pi/2. -/// -/// The method is to compute the integer (mod 8) and fraction parts of -/// (2/pi)*x without doing the full multiplication. In general we -/// skip the part of the product that are known to be a huge integer ( -/// more accurately, = 0 mod 8 ). Thus the number of operations are -/// independent of the exponent of the input. -/// -/// (2/pi) is represented by an array of 24-bit integers in ipio2[]. -/// -/// Input parameters: -/// x[] The input value (must be positive) is broken into nx -/// pieces of 24-bit integers in double precision format. -/// x[i] will be the i-th 24 bit of x. The scaled exponent -/// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 -/// match x's up to 24 bits. -/// -/// Example of breaking a double positive z into x[0]+x[1]+x[2]: -/// e0 = ilogb(z)-23 -/// z = scalbn(z,-e0) -/// for i = 0,1,2 -/// x[i] = floor(z) -/// z = (z-x[i])*2**24 -/// -/// y[] ouput result in an array of double precision numbers. -/// The dimension of y[] is: -/// 24-bit precision 1 -/// 53-bit precision 2 -/// 64-bit precision 2 -/// 113-bit precision 3 -/// The actual value is the sum of them. Thus for 113-bit -/// precison, one may have to do something like: -/// -/// long double t,w,r_head, r_tail; -/// t = (long double)y[2] + (long double)y[1]; -/// w = (long double)y[0]; -/// r_head = t+w; -/// r_tail = w - (r_head - t); -/// -/// e0 The exponent of x[0]. Must be <= 16360 or you need to -/// expand the ipio2 table. -/// -/// prec an integer indicating the precision: -/// 0 24 bits (single) -/// 1 53 bits (double) -/// 2 64 bits (extended) -/// 3 113 bits (quad) -/// External function: -/// double scalbn(), floor(); -/// -/// -/// Here is the description of some local variables: -/// -/// jk jk+1 is the initial number of terms of ipio2[] needed -/// in the computation. The minimum and recommended value -/// for jk is 3,4,4,6 for single, double, extended, and quad. -/// jk+1 must be 2 larger than you might expect so that our -/// recomputation test works. (Up to 24 bits in the integer -/// part (the 24 bits of it that we compute) and 23 bits in -/// the fraction part may be lost to cancelation before we -/// recompute.) -/// -/// jz local integer variable indicating the number of -/// terms of ipio2[] used. -/// -/// jx nx - 1 -/// -/// jv index for pointing to the suitable ipio2[] for the -/// computation. In general, we want -/// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 -/// is an integer. Thus -/// e0-3-24*jv >= 0 or (e0-3)/24 >= jv -/// Hence jv = max(0,(e0-3)/24). -/// -/// jp jp+1 is the number of terms in PIo2[] needed, jp = jk. -/// -/// q[] double array with integral value, representing the -/// 24-bits chunk of the product of x and 2/pi. -/// -/// q0 the corresponding exponent of q[0]. Note that the -/// exponent for q[i] would be q0-24*i. -/// -/// PIo2[] double precision array, obtained by cutting pi/2 -/// into 24 bits chunks. -/// -/// f[] ipio2[] in floating point -/// -/// iq[] integer array by breaking up q[] in 24-bits chunk. -/// -/// fq[] final product of x*(2/pi) in fq[0],..,fq[jk] -/// -/// ih integer. If >0 it indicates q[] is >= 0.5, hence -/// it also indicates the *sign* of the result. +// initial value for jk +const INIT_JK : [usize; 4] = [3,4,4,6]; -const INIT_JK : [usize; 4] = [3,4,4,6]; /* initial value for jk */ - -/// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi -/// -/// integer array, contains the (24*i)-th to (24*i+23)-th -/// bit of 2/pi after binary point. The corresponding -/// floating value is -/// -/// ipio2[i] * 2^(-24(i+1)). -/// -/// NB: This table must have at least (e0-3)/24 + jk terms. -/// For quad precision (e0 <= 16360, jk = 6), this is 686. -#[cfg(not(ldbl_max_exp_more1024))] +// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi +// +// integer array, contains the (24*i)-th to (24*i+23)-th +// bit of 2/pi after binary point. The corresponding +// floating value is +// +// ipio2[i] * 2^(-24(i+1)). +// +// NB: This table must have at least (e0-3)/24 + jk terms. +// For quad precision (e0 <= 16360, jk = 6), this is 686. +#[cfg(target_pointer_width = "32")] const IPIO2 : [i32; 66] = [ 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, @@ -126,7 +29,7 @@ const IPIO2 : [i32; 66] = [ 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, ]; -#[cfg(ldbl_max_exp_more1024)] +#[cfg(target_pointer_width = "64")] const IPIO2 : [i32; 690] = [ 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, @@ -256,6 +159,97 @@ const PIO2 : [f64; 8] = [ 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ ]; +// fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 +// +// Input parameters: +// x[] The input value (must be positive) is broken into nx +// pieces of 24-bit integers in double precision format. +// x[i] will be the i-th 24 bit of x. The scaled exponent +// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 +// match x's up to 24 bits. +// +// Example of breaking a double positive z into x[0]+x[1]+x[2]: +// e0 = ilogb(z)-23 +// z = scalbn(z,-e0) +// for i = 0,1,2 +// x[i] = floor(z) +// z = (z-x[i])*2**24 +// +// y[] ouput result in an array of double precision numbers. +// The dimension of y[] is: +// 24-bit precision 1 +// 53-bit precision 2 +// 64-bit precision 2 +// 113-bit precision 3 +// The actual value is the sum of them. Thus for 113-bit +// precison, one may have to do something like: +// +// long double t,w,r_head, r_tail; +// t = (long double)y[2] + (long double)y[1]; +// w = (long double)y[0]; +// r_head = t+w; +// r_tail = w - (r_head - t); +// +// e0 The exponent of x[0]. Must be <= 16360 or you need to +// expand the ipio2 table. +// +// prec an integer indicating the precision: +// 0 24 bits (single) +// 1 53 bits (double) +// 2 64 bits (extended) +// 3 113 bits (quad) +// +// Here is the description of some local variables: +// +// jk jk+1 is the initial number of terms of ipio2[] needed +// in the computation. The minimum and recommended value +// for jk is 3,4,4,6 for single, double, extended, and quad. +// jk+1 must be 2 larger than you might expect so that our +// recomputation test works. (Up to 24 bits in the integer +// part (the 24 bits of it that we compute) and 23 bits in +// the fraction part may be lost to cancelation before we +// recompute.) +// +// jz local integer variable indicating the number of +// terms of ipio2[] used. +// +// jx nx - 1 +// +// jv index for pointing to the suitable ipio2[] for the +// computation. In general, we want +// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 +// is an integer. Thus +// e0-3-24*jv >= 0 or (e0-3)/24 >= jv +// Hence jv = max(0,(e0-3)/24). +// +// jp jp+1 is the number of terms in PIo2[] needed, jp = jk. +// +// q[] double array with integral value, representing the +// 24-bits chunk of the product of x and 2/pi. +// +// q0 the corresponding exponent of q[0]. Note that the +// exponent for q[i] would be q0-24*i. +// +// PIo2[] double precision array, obtained by cutting pi/2 +// into 24 bits chunks. +// +// f[] ipio2[] in floating point +// +// iq[] integer array by breaking up q[] in 24-bits chunk. +// +// fq[] final product of x*(2/pi) in fq[0],..,fq[jk] +// +// ih integer. If >0 it indicates q[] is >= 0.5, hence +// it also indicates the *sign* of the result. + +/// Return the last three digits of N with y = x - N*pi/2 +/// so that |y| < pi/2. +/// +/// The method is to compute the integer (mod 8) and fraction parts of +/// (2/pi)*x without doing the full multiplication. In general we +/// skip the part of the product that are known to be a huge integer ( +/// more accurately, = 0 mod 8 ). Thus the number of operations are +/// independent of the exponent of the input. #[inline] pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 { let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 From d5033644e1182ac4f5c80d8059101dd334a48f08 Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Sat, 14 Jul 2018 00:58:02 +0300 Subject: [PATCH 0030/1459] cfg fix --- libm/src/math/rem_pio2_large.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 809724df4..18dc721c6 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -255,7 +255,7 @@ pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24) - #[cfg(not(ldbl_max_exp_more1024))] + #[cfg(target_pointer_width = "64")] assert!(e0 <= 16360); let nx = x.len(); From cf2fb1a1a8d173d16e46172ea983d1a6c661191c Mon Sep 17 00:00:00 2001 From: Erik Date: Fri, 13 Jul 2018 18:31:01 -0400 Subject: [PATCH 0031/1459] implement sqrt and hypot --- libm/src/lib.rs | 4 - libm/src/math/hypot.rs | 74 ++++++++++++++++++ libm/src/math/mod.rs | 4 + libm/src/math/sqrt.rs | 129 ++++++++++++++++++++++++++++++++ libm/test-generator/src/main.rs | 4 +- 5 files changed, 209 insertions(+), 6 deletions(-) create mode 100644 libm/src/math/hypot.rs create mode 100644 libm/src/math/sqrt.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 0fb555a7b..3e25c16db 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -391,7 +391,6 @@ pub trait F64Ext: private::Sealed { #[cfg(todo)] fn powf(self, n: Self) -> Self; - #[cfg(todo)] fn sqrt(self) -> Self; #[cfg(todo)] @@ -415,7 +414,6 @@ pub trait F64Ext: private::Sealed { #[cfg(todo)] fn cbrt(self) -> Self; - #[cfg(todo)] fn hypot(self, other: Self) -> Self; #[cfg(todo)] @@ -536,7 +534,6 @@ impl F64Ext for f64 { pow(self, n) } - #[cfg(todo)] #[inline] fn sqrt(self) -> Self { sqrt(self) @@ -584,7 +581,6 @@ impl F64Ext for f64 { cbrt(self) } - #[cfg(todo)] #[inline] fn hypot(self, other: Self) -> Self { hypot(self, other) diff --git a/libm/src/math/hypot.rs b/libm/src/math/hypot.rs new file mode 100644 index 000000000..dcc17d914 --- /dev/null +++ b/libm/src/math/hypot.rs @@ -0,0 +1,74 @@ +use core::f64; + +use super::sqrt; + +const SPLIT: f64 = 134217728. + 1.; // 0x1p27 + 1 === (2 ^ 27) + 1 + +fn sq(x: f64) -> (f64, f64) { + let xh: f64; + let xl: f64; + let xc: f64; + + xc = x * SPLIT; + xh = x - xc + xc; + xl = x - xh; + let hi = x*x; + let lo = xh*xh - hi + 2.*xh*xl + xl*xl; + (hi, lo) +} + +#[inline] +pub fn hypot(mut x: f64, mut y: f64) -> f64 { + let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700 + let x1p_700 = f64::from_bits(0x1430000000000000); // 0x1p-700 === 2 ^ -700 + + let mut uxi = x.to_bits(); + let mut uyi = y.to_bits(); + let uti; + let ex: i64; + let ey: i64; + let mut z: f64; + + /* arrange |x| >= |y| */ + uxi &= -1i64 as u64 >> 1; + uyi &= -1i64 as u64 >> 1; + if uxi < uyi { + uti = uxi; + uxi = uyi; + uyi = uti; + } + + /* special cases */ + ex = (uxi>>52) as i64; + ey = (uyi>>52) as i64; + x = f64::from_bits(uxi); + y = f64::from_bits(uyi); + /* note: hypot(inf,nan) == inf */ + if ey == 0x7ff { + return y; + } + if ex == 0x7ff || uyi == 0 { + return x; + } + /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */ + /* 64 difference is enough for ld80 double_t */ + if ex - ey > 64 { + return x + y; + } + + /* precise sqrt argument in nearest rounding mode without overflow */ + /* xh*xh must not overflow and xl*xl must not underflow in sq */ + z = 1.; + if ex > 0x3ff+510 { + z = x1p700; + x *= x1p_700; + y *= x1p_700; + } else if ey < 0x3ff-450 { + z = x1p_700; + x *= x1p700; + y *= x1p700; + } + let (hx, lx) = sq(x); + let (hy, ly) = sq(y); + return z*sqrt(ly+lx+hy+hx); +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index a34ebd767..444652e81 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -11,12 +11,14 @@ mod powf; mod round; mod scalbn; mod scalbnf; +mod sqrt; mod sqrtf; mod logf; mod expf; mod floor; mod trunc; mod truncf; +mod hypot; mod hypotf; //mod service; @@ -29,12 +31,14 @@ pub use self::{ round::round, scalbn::scalbn, scalbnf::scalbnf, + sqrt::sqrt, sqrtf::sqrtf, logf::logf, expf::expf, floor::floor, trunc::trunc, truncf::truncf, + hypot::hypot, hypotf::hypotf, }; diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs new file mode 100644 index 000000000..49fc58fff --- /dev/null +++ b/libm/src/math/sqrt.rs @@ -0,0 +1,129 @@ +use core::f64; + +const TINY: f64 = 1.0e-300; + +#[inline] +pub fn sqrt(x: f64) -> f64 { + let mut z: f64; + let sign: u32 = 0x80000000; + let mut ix0: i32; + let mut s0: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: u32; + let mut t1: u32; + let mut s1: u32; + let mut ix1: u32; + let mut q1: u32; + + ix0 = (x.to_bits() >> 32) as i32; + ix1 = x.to_bits() as u32; + + /* take care of Inf and NaN */ + if (ix0&0x7ff00000) == 0x7ff00000 { + return x*x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + /* take care of zero */ + if ix0 <= 0 { + if ((ix0&!(sign as i32))|ix1 as i32) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix0 < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ + } + } + /* normalize x */ + m = ix0>>20; + if m == 0 { /* subnormal x */ + while ix0 == 0 { + m -= 21; + ix0 |= (ix1>>11) as i32; + ix1 <<= 21; + } + i=0; + while (ix0&0x00100000) == 0 { + i += 1; + ix0 <<= 1; + } + m -= i - 1; + ix0 |= (ix1>>(32-i)) as i32; + ix1 <<= i; + } + m -= 1023; /* unbias exponent */ + ix0 = (ix0&0x000fffff)|0x00100000; + if (m & 1) == 1 { /* odd m, double x to make it even */ + ix0 += ix0 + ((ix1&sign)>>31) as i32; + ix1 += ix1; + } + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix0 += ix0 + ((ix1&sign)>>31) as i32; + ix1 += ix1; + q = 0; /* [q,q1] = sqrt(x) */ + q1 = 0; + s0 = 0; + s1 = 0; + r = 0x00200000; /* r = moving bit from right to left */ + + while r != 0 { + t = s0 + r as i32; + if t <= ix0 { + s0 = t + r as i32; + ix0 -= t; + q += r as i32; + } + ix0 += ix0 + ((ix1&sign)>>31) as i32; + ix1 += ix1; + r >>= 1; + } + + r = sign; + while r != 0 { + t1 = s1 + r; + t = s0; + if t < ix0 || (t == ix0 && t1 <= ix1) { + s1 = t1 + r; + if (t1&sign) == sign && (s1&sign) == 0 { + s0 += 1; + } + ix0 -= t; + if ix1 < t1 { + ix0 -= 1; + } + ix1 -= t1; + q1 += r; + } + ix0 += ix0 + ((ix1&sign)>>31) as i32; + ix1 += ix1; + r >>= 1; + } + + /* use floating add to find out rounding direction */ + if (ix0 as u32|ix1) != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if q1 == 0xffffffff { + q1 = 0; + q+=1; + } else if z > 1.0 { + if q1 == 0xfffffffe { + q += 1; + } + q1 += 2; + } else { + q1 += q1 & 1; + } + } + } + ix0 = (q>>1) + 0x3fe00000; + ix1 = q1>>1; + if (q&1) == 1 { + ix1 |= sign; + } + ix0 += m << 20; + f64::from_bits((ix0 as u64) << 32 | ix1 as u64) +} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 8da1a920e..ce20e80d1 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -713,7 +713,7 @@ f64_f64! { round, // sin, // sinh, - // sqrt, + sqrt, // tan, // tanh, trunc, @@ -725,7 +725,7 @@ f64f64_f64! { // atan2, // fdim, // fmod, - // hypot, + hypot, // pow, } From 9b5023a2e85afc4dba9c780dca48e2026c95aeb5 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 13 Jul 2018 17:52:58 -0500 Subject: [PATCH 0032/1459] move a chunk of the README into CONTRIBUTING.md --- libm/CONTRIBUTING.md | 87 +++++++++++++++++++++++++++++++++++++++++ libm/README.md | 52 +----------------------- libm/src/lib.rs | 2 +- libm/src/math/trunc.rs | 8 ++++ libm/src/math/truncf.rs | 8 ++++ 5 files changed, 105 insertions(+), 52 deletions(-) create mode 100644 libm/CONTRIBUTING.md diff --git a/libm/CONTRIBUTING.md b/libm/CONTRIBUTING.md new file mode 100644 index 000000000..1f0a05a3f --- /dev/null +++ b/libm/CONTRIBUTING.md @@ -0,0 +1,87 @@ +# How to contribute + +- Pick your favorite math function from the [issue tracker]. +- Look for the C implementation of the function in the [MUSL source code][src]. +- Copy paste the C code into a Rust file in the `src/math` directory and adjust `src/math/mod.rs` + accordingly. Also, uncomment the corresponding trait method in `src/lib.rs`. +- Run `cargo watch check` and fix the compiler errors. +- Tweak the bottom of `test-generator/src/main.rs` to add your function to the test suite. +- If you can, run the full test suite locally (see the [testing](#testing) section below). If you + can't, no problem! Your PR will be fully tested automatically. Though you may still want to add + and run some unit tests. See the bottom of [`src/math/truncf.rs`] for an example of such tests; + you can run unit tests with the `cargo test --lib` command. +- Send us a pull request! +- :tada: + +[issue tracker]: https://github.com/japaric/libm/issues +[src]: https://git.musl-libc.org/cgit/musl/tree/src/math +[`src/math/truncf.rs`]: https://github.com/japaric/libm/blob/master/src/math/truncf.rs + +Check [PR #65] for an example. + +[PR #65]: https://github.com/japaric/libm/pull/65 + +## Tips and tricks + +- *IMPORTANT* The code in this crate will end up being used in the `core` crate so it can **not** + have any external dependencies (other than `core` itself). + +- Only use relative imports within the `math` directory / module, e.g. `use self::fabs::fabs` or +`use super::isnanf`. Absolute imports from core are OK, e.g. `use core::u64`. + +- To reinterpret a float as an integer use the `to_bits` method. The MUSL code uses the + `GET_FLOAT_WORD` macro, or a union, to do this operation. + +- To reinterpret an integer as a float use the `f32::from_bits` constructor. The MUSL code uses the + `SET_FLOAT_WORD` macro, or a union, to do this operation. + +- You may encounter weird literals like `0x1p127f` in the MUSL code. These are hexadecimal floating + point literals. Rust (the language) doesn't support these kind of literals. The best way I have + found to deal with these literals is to turn them into their integer representation using the + [`hexf!`] macro and then turn them back into floats. See below: + +[`hexf!`]: https://crates.io/crates/hexf + +``` rust +// Step 1: write a program to convert the float into its integer representation +#[macro_use] +extern crate hexf; + +fn main() { + println!("{:#x}", hexf32!("0x1.0p127").to_bits()); +} +``` + +``` console +$ # Step 2: run the program +$ cargo run +0x7f000000 +``` + +``` rust +// Step 3: copy paste the output into libm +let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 12 +``` + +- Rust code panics on arithmetic overflows when not optimized. You may need to use the [`Wrapping`] + newtype to avoid this problem. + +[`Wrapping`]: https://doc.rust-lang.org/std/num/struct.Wrapping.html + +## Testing + +The test suite of this crate can only be run on x86_64 Linux systems using the following commands: + +``` console +$ # The test suite depends on the `cross` tool so install it if you don't have it +$ cargo install cross + +$ # and the `cross` tool requires docker to be running +$ systemctl start docker + +$ # execute the test suite for the x86_64 target +$ TARGET=x86_64-unknown-linux-gnu bash ci/script.sh + +$ # execute the test suite for the ARMv7 target +$ TARGET=armv7-unknown-linux-gnueabihf bash ci/script.sh +``` diff --git a/libm/README.md b/libm/README.md index cb29baf63..ce731a203 100644 --- a/libm/README.md +++ b/libm/README.md @@ -9,59 +9,9 @@ A port of [MUSL]'s libm to Rust. The short term goal of this library is to enable math support (e.g. `sin`, `atan2`) for the `wasm32-unknown-unknown` target. The longer term goal is to enable math support in the `core` crate. -## Testing - -The test suite of this crate can only be run on x86_64 Linux systems. - -``` -$ # The test suite depends on the `cross` tool so install it if you don't have it -$ cargo install cross - -$ # and the `cross` tool requires docker to be running -$ systemctl start docker - -$ # execute the test suite for the x86_64 target -$ TARGET=x86_64-unknown-linux-gnu bash ci/script.sh - -$ # execute the test suite for the ARMv7 target -$ TARGET=armv7-unknown-linux-gnueabihf bash ci/script.sh -``` - ## Contributing -- Pick your favorite math function from the [issue tracker]. -- Look for the C implementation of the function in the [MUSL source code][src]. -- Copy paste the C code into a Rust file in the `src/math` directory and adjust `src/math/mod.rs` - accordingly. Also, uncomment the corresponding trait method in `src/lib.rs`. -- Run `cargo watch check` and fix the compiler errors. -- Tweak the bottom of `test-generator/src/main.rs` to add your function to the test suite. -- If you can, run the test suite locally. If you can't, no problem! Your PR will be tested - automatically. -- Send us a pull request! -- :tada: - -[issue tracker]: https://github.com/japaric/libm/issues -[src]: https://git.musl-libc.org/cgit/musl/tree/src/math - -Check [PR #2] for an example. - -[PR #2]: https://github.com/japaric/libm/pull/2 - -### Notes - -- Only use relative imports within the `math` directory / module, e.g. `use self::fabs::fabs` or -`use super::isnanf`. Absolute imports from core are OK, e.g. `use core::u64`. - -- To reinterpret a float as an integer use the `to_bits` method. The MUSL code uses the - `GET_FLOAT_WORD` macro, or a union, to do this operation. - -- To reinterpret an integer as a float use the `f32::from_bits` constructor. The MUSL code uses the - `SET_FLOAT_WORD` macro, or a union, to do this operation. - -- Rust code panics on arithmetic overflows when not optimized. You may need to use the [`Wrapping`] - newtype to avoid this problem. - -[`Wrapping`]: https://doc.rust-lang.org/std/num/struct.Wrapping.html +Please check [CONTRIBUTING.md](CONTRIBUTING.md) ## License diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 0fb555a7b..32735f7ed 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -355,7 +355,7 @@ impl F32Ext for f32 { } } -/// Math support for `f32` +/// Math support for `f64` /// /// This trait is sealed and cannot be implemented outside of `libm`. pub trait F64Ext: private::Sealed { diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs index b50ffd771..6bea67cbc 100644 --- a/libm/src/math/trunc.rs +++ b/libm/src/math/trunc.rs @@ -22,3 +22,11 @@ pub fn trunc(x: f64) -> f64 { i &= !m; f64::from_bits(i) } + +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::trunc(1.1), 1.0); + } +} diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs index f7d7249e3..9d42620d9 100644 --- a/libm/src/math/truncf.rs +++ b/libm/src/math/truncf.rs @@ -22,3 +22,11 @@ pub fn truncf(x: f32) -> f32 { i &= !m; f32::from_bits(i) } + +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::truncf(1.1), 1.0); + } +} From 6c52d5851c0e65d75df17d8888327d64765796e2 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 13 Jul 2018 19:38:51 -0500 Subject: [PATCH 0033/1459] v0.1.0 --- libm/CHANGELOG.md | 12 ++++++++++++ libm/Cargo.toml | 8 +++++++- libm/README.md | 9 +++++++-- 3 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 libm/CHANGELOG.md diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md new file mode 100644 index 000000000..c3e74a814 --- /dev/null +++ b/libm/CHANGELOG.md @@ -0,0 +1,12 @@ +# Change Log + +All notable changes to this project will be documented in this file. +This project adheres to [Semantic Versioning](http://semver.org/). + +## [Unreleased] + +## v0.1.0 - 2018-07-13 + +- Initial release + +[Unreleased]: https://github.com/japaric/libm/compare/v0.1.0...HEAD diff --git a/libm/Cargo.toml b/libm/Cargo.toml index e3498eed0..5a1ae4a6c 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -1,7 +1,13 @@ [package] +authors = ["Jorge Aparicio "] +categories = ["no-std"] +description = "libm in pure Rust" +documentation = "https://docs.rs/libm" +keywords = ["libm", "math"] +license = "MIT OR Apache-2.0" name = "libm" +repository = "https://github.com/japaric/libm" version = "0.1.0" -authors = ["Jorge Aparicio "] [workspace] members = ["cb", "test-generator"] \ No newline at end of file diff --git a/libm/README.md b/libm/README.md index ce731a203..6d8f9c49c 100644 --- a/libm/README.md +++ b/libm/README.md @@ -6,8 +6,13 @@ A port of [MUSL]'s libm to Rust. ## Goals -The short term goal of this library is to enable math support (e.g. `sin`, `atan2`) for the -`wasm32-unknown-unknown` target. The longer term goal is to enable math support in the `core` crate. +The short term goal of this library is to [enable math support (e.g. `sin`, `atan2`) for the +`wasm32-unknown-unknown` target][wasm] (cf. [rust-lang-nursery/compiler-builtins][pr]). The longer +term goal is to enable [math support in the `core` crate][core]. + +[wasm]: https://github.com/japaric/libm/milestone/1 +[pr]: https://github.com/rust-lang-nursery/compiler-builtins/pull/248 +[core]: https://github.com/japaric/libm/milestone/2 ## Contributing From f34d91111c92721dbe6aab19fe37ea00749938ff Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 13 Jul 2018 20:25:27 -0500 Subject: [PATCH 0034/1459] README: note that this crate is on crates.io and already usable --- libm/README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/libm/README.md b/libm/README.md index 6d8f9c49c..02de9765a 100644 --- a/libm/README.md +++ b/libm/README.md @@ -14,6 +14,30 @@ term goal is to enable [math support in the `core` crate][core]. [pr]: https://github.com/rust-lang-nursery/compiler-builtins/pull/248 [core]: https://github.com/japaric/libm/milestone/2 +## Already usable + +This crate is [on crates.io] and can be used today in stable `#![no_std]` programs like this: + +[on crates.io]: https://crates.io/crates/libm + +``` rust +#![no_std] + +extern crate libm; + +use libm::F32Ext; // adds methods to `f32` + +fn foo(x: f32) { + let y = x.sqrt(); + let z = libm::truncf(x); +} +``` + +Not all the math functions are available at the moment. Check the [API docs] to learn what's +currently supported. + +[API docs]: https://docs.rs/libm + ## Contributing Please check [CONTRIBUTING.md](CONTRIBUTING.md) From af4f5c47fed14a1999a0c78ad64a5a2ee2cf3d2d Mon Sep 17 00:00:00 2001 From: Joseph Ryan Date: Fri, 13 Jul 2018 20:46:09 -0500 Subject: [PATCH 0035/1459] implement roundf --- libm/src/lib.rs | 2 -- libm/src/math/mod.rs | 8 ++++++-- libm/src/math/roundf.rs | 33 +++++++++++++++++++++++++++++++++ libm/test-generator/src/main.rs | 2 +- 4 files changed, 40 insertions(+), 5 deletions(-) create mode 100644 libm/src/math/roundf.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 0d13590c7..b87e82b2a 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -41,7 +41,6 @@ pub trait F32Ext { #[cfg(todo)] fn ceil(self) -> Self; - #[cfg(todo)] fn round(self) -> Self; #[cfg(todo)] @@ -160,7 +159,6 @@ impl F32Ext for f32 { ceilf(self) } - #[cfg(todo)] #[inline] fn round(self) -> Self { roundf(self) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 41359bf8c..8032bdabf 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -1,7 +1,9 @@ macro_rules! force_eval { ($e:expr) => { - unsafe { ::core::ptr::read_volatile(&$e); } - } + unsafe { + ::core::ptr::read_volatile(&$e); + } + }; } mod fabs; @@ -9,6 +11,7 @@ mod fabsf; mod fmodf; mod powf; mod round; +mod roundf; mod scalbnf; mod sqrtf; @@ -17,6 +20,7 @@ pub use self::fabsf::fabsf; pub use self::fmodf::fmodf; pub use self::powf::powf; pub use self::round::round; +pub use self::roundf::roundf; pub use self::scalbnf::scalbnf; pub use self::sqrtf::sqrtf; diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs new file mode 100644 index 000000000..bd2488fa9 --- /dev/null +++ b/libm/src/math/roundf.rs @@ -0,0 +1,33 @@ +use core::f32; + +const TOINT: f32 = 1.0 / f32::EPSILON; + +pub fn roundf(mut x: f32) -> f32 { + let i = x.to_bits(); + let e: u32 = i >> 23 & 0xff; + let mut y: f32; + + if e >= 0x7f + 23 { + return x; + } + if i >> 31 != 0 { + x = -x; + } + if e < 0x7f - 1 { + force_eval!(x + TOINT); + return 0.0 * x; + } + y = x + TOINT - TOINT - x; + if y > 0.5f32 { + y = y + x - 1.0; + } else if y <= -0.5f32 { + y = y + x + 1.0; + } else { + y = y + x; + } + if i >> 31 != 0 { + -y + } else { + y + } +} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 5ff78ffc8..2f78a8ed3 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -574,7 +574,7 @@ f32_f32! { // fdimf, // log10f, // log2f, - // roundf, + roundf, // sinf, // sinhf, // tanf, From c73d2b51ccc8ea65ca8535077d36b8ce4e15c87e Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Sat, 14 Jul 2018 05:20:09 +0300 Subject: [PATCH 0036/1459] add cosf test --- libm/test-generator/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 6353b257d..40eac207c 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -567,7 +567,7 @@ f32_f32! { // atanf, // cbrtf, // ceilf, - // cosf, + cosf, // coshf, // exp2f, expf, From 26089b73cddf351d68272635f9c1da28f7fed0a1 Mon Sep 17 00:00:00 2001 From: Jack Mott Date: Fri, 13 Jul 2018 21:32:59 -0500 Subject: [PATCH 0037/1459] adding ceilf and floorf for issues rust-lang/libm#56 and rust-lang/libm#54 --- libm/src/lib.rs | 4 ---- libm/src/math/ceilf.rs | 29 +++++++++++++++++++++++++++++ libm/src/math/floorf.rs | 30 ++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 4 ++++ libm/test-generator/src/main.rs | 4 ++-- 5 files changed, 65 insertions(+), 6 deletions(-) create mode 100644 libm/src/math/ceilf.rs create mode 100644 libm/src/math/floorf.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 13e038bfc..783cc9e18 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -34,10 +34,8 @@ pub fn _eq(a: u64, b: u64) -> bool { /// /// This trait is sealed and cannot be implemented outside of `libm`. pub trait F32Ext: private::Sealed { - #[cfg(todo)] fn floor(self) -> Self; - #[cfg(todo)] fn ceil(self) -> Self; #[cfg(todo)] @@ -142,13 +140,11 @@ pub trait F32Ext: private::Sealed { } impl F32Ext for f32 { - #[cfg(todo)] #[inline] fn floor(self) -> Self { floorf(self) } - #[cfg(todo)] #[inline] fn ceil(self) -> Self { ceilf(self) diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs new file mode 100644 index 000000000..b4f58bfb8 --- /dev/null +++ b/libm/src/math/ceilf.rs @@ -0,0 +1,29 @@ +use core::f32; + +pub fn ceilf(x: f32) -> f32 { + let mut ui = x.to_bits(); + let e = (((ui >> 23) & 0xff) - 0x7f) as i32; + + if e >= 23 { + return x; + } + if e >= 0 { + let m = 0x007fffff >> e; + if (ui & m) == 0 { + return x; + } + force_eval!(x + f32::from_bits(0x7b800000)); + if ui >> 31 == 0 { + ui += m; + } + ui &= !m; + } else { + force_eval!(x + f32::from_bits(0x7b800000)); + if ui >> 31 != 0 { + return -0.0; + } else if ui << 1 != 0 { + return 1.0; + } + } + return f32::from_bits(ui); +} diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs new file mode 100644 index 000000000..9c263b518 --- /dev/null +++ b/libm/src/math/floorf.rs @@ -0,0 +1,30 @@ +use core::f32; + +#[inline] +pub fn floorf(x: f32) -> f32 { + let mut ui = x.to_bits(); + let e = (((ui >> 23) & 0xff) - 0x7f) as i32; + + if e >= 23 { + return x; + } + if e >= 0 { + let m: u32 = 0x007fffff >> e; + if (ui & m) == 0 { + return x; + } + force_eval!(x + f32::from_bits(0x7b800000)); + if ui >> 31 != 0 { + ui += m; + } + ui &= !m; + } else { + force_eval!(x + f32::from_bits(0x7b800000)); + if ui >> 31 == 0 { + ui = 0; + } else if ui << 1 != 0 { + return -1.0; + } + } + return f32::from_bits(ui); +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 444652e81..2ddaa2728 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -4,8 +4,10 @@ macro_rules! force_eval { } } +mod ceilf; mod fabs; mod fabsf; +mod floorf; mod fmodf; mod powf; mod round; @@ -24,8 +26,10 @@ mod hypotf; //mod service; pub use self::{ + ceilf::ceilf, fabs::fabs, fabsf::fabsf, + floorf::floorf, fmodf::fmodf, powf::powf, round::round, diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index ce20e80d1..1d6168e4b 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -652,12 +652,12 @@ fn main() -> Result<(), Box> { // With signature `fn(f32) -> f32` f32_f32! { // acosf, - // floorf, + floorf, truncf, // asinf, // atanf, // cbrtf, - // ceilf, + ceilf, // cosf, // coshf, // exp2f, From a594fe6ff1e1c4b5053c5191622dcd9df687e192 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 13 Jul 2018 21:51:07 -0500 Subject: [PATCH 0038/1459] run cargo-fmt --- libm/src/math/expf.rs | 73 ++++++++++++++++++++++------------------ libm/src/math/floor.rs | 32 +++++++++--------- libm/src/math/hypot.rs | 14 ++++---- libm/src/math/hypotf.rs | 10 +++--- libm/src/math/logf.rs | 45 +++++++++++++------------ libm/src/math/mod.rs | 32 +++++------------- libm/src/math/round.rs | 2 +- libm/src/math/scalbn.rs | 12 +++---- libm/src/math/scalbnf.rs | 12 +++---- libm/src/math/sqrt.rs | 60 +++++++++++++++++---------------- 10 files changed, 143 insertions(+), 149 deletions(-) diff --git a/libm/src/math/expf.rs b/libm/src/math/expf.rs index 1b645654e..cffb55771 100644 --- a/libm/src/math/expf.rs +++ b/libm/src/math/expf.rs @@ -1,60 +1,67 @@ use super::scalbnf; -const HALF : [f32; 2] = [0.5,-0.5]; -const LN2_HI : f32 = 6.9314575195e-01; /* 0x3f317200 */ -const LN2_LO : f32 = 1.4286067653e-06; /* 0x35bfbe8e */ -const INV_LN2 : f32 = 1.4426950216e+00; /* 0x3fb8aa3b */ +const HALF: [f32; 2] = [0.5, -0.5]; +const LN2_HI: f32 = 6.9314575195e-01; /* 0x3f317200 */ +const LN2_LO: f32 = 1.4286067653e-06; /* 0x35bfbe8e */ +const INV_LN2: f32 = 1.4426950216e+00; /* 0x3fb8aa3b */ /* * Domain [-0.34568, 0.34568], range ~[-4.278e-9, 4.447e-9]: * |x*(exp(x)+1)/(exp(x)-1) - p(x)| < 2**-27.74 */ -const P1 : f32 = 1.6666625440e-1; /* 0xaaaa8f.0p-26 */ -const P2 : f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */ +const P1: f32 = 1.6666625440e-1; /* 0xaaaa8f.0p-26 */ +const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */ #[inline] pub fn expf(mut x: f32) -> f32 { - let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 - let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 /*original 0x1p-149f ??????????? */ - + let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 + let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 /*original 0x1p-149f ??????????? */ + let mut hx = x.to_bits(); - let sign = (hx >> 31) as i32; /* sign bit of x */ - let signb : bool = sign != 0; - hx &= 0x7fffffff; /* high word of |x| */ - + let sign = (hx >> 31) as i32; /* sign bit of x */ + let signb: bool = sign != 0; + hx &= 0x7fffffff; /* high word of |x| */ + /* special cases */ - if hx >= 0x42aeac50 { /* if |x| >= -87.33655f or NaN */ - if hx > 0x7f800000 {/* NaN */ + if hx >= 0x42aeac50 { + /* if |x| >= -87.33655f or NaN */ + if hx > 0x7f800000 { + /* NaN */ return x; } - if (hx >= 0x42b17218) && (!signb) { /* x >= 88.722839f */ + if (hx >= 0x42b17218) && (!signb) { + /* x >= 88.722839f */ /* overflow */ x *= x1p127; return x; } if signb { /* underflow */ - force_eval!(-x1p_126/x); - if hx >= 0x42cff1b5 { /* x <= -103.972084f */ - return 0. + force_eval!(-x1p_126 / x); + if hx >= 0x42cff1b5 { + /* x <= -103.972084f */ + return 0.; } } } - + /* argument reduction */ - let k : i32; - let hi : f32; - let lo : f32; - if hx > 0x3eb17218 { /* if |x| > 0.5 ln2 */ - if hx > 0x3f851592 { /* if |x| > 1.5 ln2 */ - k = (INV_LN2*x + HALF[sign as usize]) as i32; + let k: i32; + let hi: f32; + let lo: f32; + if hx > 0x3eb17218 { + /* if |x| > 0.5 ln2 */ + if hx > 0x3f851592 { + /* if |x| > 1.5 ln2 */ + k = (INV_LN2 * x + HALF[sign as usize]) as i32; } else { k = 1 - sign - sign; } let kf = k as f32; - hi = x - kf*LN2_HI; /* k*ln2hi is exact here */ - lo = kf*LN2_LO; + hi = x - kf * LN2_HI; /* k*ln2hi is exact here */ + lo = kf * LN2_LO; x = hi - lo; - } else if hx > 0x39000000 { /* |x| > 2**-14 */ + } else if hx > 0x39000000 { + /* |x| > 2**-14 */ k = 0; hi = x; lo = 0.; @@ -63,11 +70,11 @@ pub fn expf(mut x: f32) -> f32 { force_eval!(x1p127 + x); return 1. + x; } - + /* x is now in primary range */ - let xx = x*x; - let c = x - xx*(P1+xx*P2); - let y = 1. + (x*c/(2.-c) - lo + hi); + let xx = x * x; + let c = x - xx * (P1 + xx * P2); + let y = 1. + (x * c / (2. - c) - lo + hi); if k == 0 { y } else { diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index a5fb17574..997865d39 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -1,27 +1,27 @@ use core::f64; -const TOINT : f64 = 1. / f64::EPSILON; +const TOINT: f64 = 1. / f64::EPSILON; #[inline] -pub fn floor(x : f64) -> f64 { +pub fn floor(x: f64) -> f64 { let ui = x.to_bits(); - let e = ((ui >> 52) & 0x7ff) as i32; + let e = ((ui >> 52) & 0x7ff) as i32; - if (e >= 0x3ff+52) || (x == 0.) { - return x; + if (e >= 0x3ff + 52) || (x == 0.) { + return x; } - /* y = int(x) - x, where int(x) is an integer neighbor of x */ - let y = if (ui >> 63) != 0 { - x - TOINT + TOINT - x - } else { - x + TOINT - TOINT - x + /* y = int(x) - x, where int(x) is an integer neighbor of x */ + let y = if (ui >> 63) != 0 { + x - TOINT + TOINT - x + } else { + x + TOINT - TOINT - x }; - /* special case because of non-nearest rounding modes */ - if e <= 0x3ff-1 { - force_eval!(y); - return if (ui >> 63) != 0 { -1. } else { 0. }; - } - if y > 0. { + /* special case because of non-nearest rounding modes */ + if e <= 0x3ff - 1 { + force_eval!(y); + return if (ui >> 63) != 0 { -1. } else { 0. }; + } + if y > 0. { x + y - 1. } else { x + y diff --git a/libm/src/math/hypot.rs b/libm/src/math/hypot.rs index dcc17d914..7ad1baf79 100644 --- a/libm/src/math/hypot.rs +++ b/libm/src/math/hypot.rs @@ -12,8 +12,8 @@ fn sq(x: f64) -> (f64, f64) { xc = x * SPLIT; xh = x - xc + xc; xl = x - xh; - let hi = x*x; - let lo = xh*xh - hi + 2.*xh*xl + xl*xl; + let hi = x * x; + let lo = xh * xh - hi + 2. * xh * xl + xl * xl; (hi, lo) } @@ -39,8 +39,8 @@ pub fn hypot(mut x: f64, mut y: f64) -> f64 { } /* special cases */ - ex = (uxi>>52) as i64; - ey = (uyi>>52) as i64; + ex = (uxi >> 52) as i64; + ey = (uyi >> 52) as i64; x = f64::from_bits(uxi); y = f64::from_bits(uyi); /* note: hypot(inf,nan) == inf */ @@ -59,16 +59,16 @@ pub fn hypot(mut x: f64, mut y: f64) -> f64 { /* precise sqrt argument in nearest rounding mode without overflow */ /* xh*xh must not overflow and xl*xl must not underflow in sq */ z = 1.; - if ex > 0x3ff+510 { + if ex > 0x3ff + 510 { z = x1p700; x *= x1p_700; y *= x1p_700; - } else if ey < 0x3ff-450 { + } else if ey < 0x3ff - 450 { z = x1p_700; x *= x1p700; y *= x1p700; } let (hx, lx) = sq(x); let (hy, ly) = sq(y); - return z*sqrt(ly+lx+hy+hx); + return z * sqrt(ly + lx + hy + hx); } diff --git a/libm/src/math/hypotf.rs b/libm/src/math/hypotf.rs index 146aab4e4..d59710ada 100644 --- a/libm/src/math/hypotf.rs +++ b/libm/src/math/hypotf.rs @@ -22,22 +22,22 @@ pub fn hypotf(mut x: f32, mut y: f32) -> f32 { x = f32::from_bits(uxi); y = f32::from_bits(uyi); - if uyi == 0xff<<23 { + if uyi == 0xff << 23 { return y; } - if uxi >= 0xff<<23 || uyi == 0 || uxi - uyi >= 25<<23 { + if uxi >= 0xff << 23 || uyi == 0 || uxi - uyi >= 25 << 23 { return x + y; } z = 1.; - if uxi >= (0x7f+60)<<23 { + if uxi >= (0x7f + 60) << 23 { z = x1p90; x *= x1p_90; y *= x1p_90; - } else if uyi < (0x7f-60)<<23 { + } else if uyi < (0x7f - 60) << 23 { z = x1p_90; x *= x1p90; y *= x1p90; } - z*sqrtf((x as f64 * x as f64 + y as f64 * y as f64) as f32) + z * sqrtf((x as f64 * x as f64 + y as f64 * y as f64) as f32) } diff --git a/libm/src/math/logf.rs b/libm/src/math/logf.rs index 76b4ede19..78c5e94ad 100644 --- a/libm/src/math/logf.rs +++ b/libm/src/math/logf.rs @@ -1,24 +1,25 @@ -const LN2_HI : f32 = 6.9313812256e-01; /* 0x3f317180 */ -const LN2_LO : f32 = 9.0580006145e-06; /* 0x3717f7d1 */ +const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */ +const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */ /* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ -const LG1 : f32 = 0.66666662693; /* 0xaaaaaa.0p-24*/ -const LG2 : f32 = 0.40000972152; /* 0xccce13.0p-25 */ -const LG3 : f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ -const LG4 : f32 = 0.24279078841; /* 0xf89e26.0p-26 */ +const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24*/ +const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ +const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ +const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ #[inline] pub fn logf(mut x: f32) -> f32 { let x1p25 = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 - + let mut ix = x.to_bits(); let mut k = 0i32; - - if (ix < 0x00800000) || ((ix>>31) != 0) { /* x < 2**-126 */ - if ix<<1 == 0 { - return -1./(x*x); /* log(+-0)=-inf */ + + if (ix < 0x00800000) || ((ix >> 31) != 0) { + /* x < 2**-126 */ + if ix << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ } - if (ix>>31) != 0 { - return (x-x)/0.; /* log(-#) = NaN */ + if (ix >> 31) != 0 { + return (x - x) / 0.; /* log(-#) = NaN */ } /* subnormal number, scale up x */ k -= 25; @@ -32,18 +33,18 @@ pub fn logf(mut x: f32) -> f32 { /* reduce x into [sqrt(2)/2, sqrt(2)] */ ix += 0x3f800000 - 0x3f3504f3; - k += ((ix>>23) as i32) - 0x7f; + k += ((ix >> 23) as i32) - 0x7f; ix = (ix & 0x007fffff) + 0x3f3504f3; - x = f32::from_bits(ix); + x = f32::from_bits(ix); let f = x - 1.; - let s = f/(2. + f); - let z = s*s; - let w = z*z; - let t1 = w*(LG2+w*LG4); - let t2 = z*(LG1+w*LG3); + let s = f / (2. + f); + let z = s * s; + let w = z * z; + let t1 = w * (LG2 + w * LG4); + let t2 = z * (LG1 + w * LG3); let r = t2 + t1; - let hfsq = 0.5*f*f; + let hfsq = 0.5 * f * f; let dk = k as f32; - s*(hfsq+r) + dk*LN2_LO - hfsq + f + dk*LN2_HI + s * (hfsq + r) + dk * LN2_LO - hfsq + f + dk * LN2_HI } diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index d0121048d..c43199f82 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -7,10 +7,15 @@ macro_rules! force_eval { } mod ceilf; +mod expf; mod fabs; mod fabsf; +mod floor; mod floorf; mod fmodf; +mod hypot; +mod hypotf; +mod logf; mod powf; mod round; mod roundf; @@ -18,36 +23,15 @@ mod scalbn; mod scalbnf; mod sqrt; mod sqrtf; -mod logf; -mod expf; -mod floor; mod trunc; mod truncf; -mod hypot; -mod hypotf; //mod service; pub use self::{ - ceilf::ceilf, - fabs::fabs, - fabsf::fabsf, - floorf::floorf, - fmodf::fmodf, - powf::powf, - round::round, - roundf::roundf, - scalbn::scalbn, - scalbnf::scalbnf, - sqrt::sqrt, - sqrtf::sqrtf, - logf::logf, - expf::expf, - floor::floor, - trunc::trunc, - truncf::truncf, - hypot::hypot, - hypotf::hypotf, + ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, fmodf::fmodf, + hypot::hypot, hypotf::hypotf, logf::logf, powf::powf, round::round, roundf::roundf, + scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, }; fn isnanf(x: f32) -> bool { diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs index 2a9f67c1d..73d431c51 100644 --- a/libm/src/math/round.rs +++ b/libm/src/math/round.rs @@ -26,7 +26,7 @@ pub fn round(mut x: f64) -> f64 { } else { y = y + x; } - + if i >> 63 != 0 { -y } else { diff --git a/libm/src/math/scalbn.rs b/libm/src/math/scalbn.rs index 76e06f03e..ad81072dd 100644 --- a/libm/src/math/scalbn.rs +++ b/libm/src/math/scalbn.rs @@ -1,11 +1,11 @@ #[inline] -pub fn scalbn(x : f64, mut n: i32) -> f64 { - let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 - let x1p53 = f64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53 +pub fn scalbn(x: f64, mut n: i32) -> f64 { + let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 + let x1p53 = f64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53 let x1p_1022 = f64::from_bits(0x0010000000000000); // 0x1p-1022 === 2 ^ (-1022) - + let mut y = x; - + if n > 1023 { y *= x1p1023; n -= 1023; @@ -29,5 +29,5 @@ pub fn scalbn(x : f64, mut n: i32) -> f64 { } } } - y*f64::from_bits(((0x3ff+n) as u64)<<52) + y * f64::from_bits(((0x3ff + n) as u64) << 52) } diff --git a/libm/src/math/scalbnf.rs b/libm/src/math/scalbnf.rs index 31f93d323..901497e5e 100644 --- a/libm/src/math/scalbnf.rs +++ b/libm/src/math/scalbnf.rs @@ -1,9 +1,9 @@ #[inline] -pub fn scalbnf(mut x: f32, mut n : i32) -> f32 { - let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 - let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 - let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 - +pub fn scalbnf(mut x: f32, mut n: i32) -> f32 { + let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 + let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 + let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 + if n > 127 { x *= x1p127; n -= 127; @@ -25,5 +25,5 @@ pub fn scalbnf(mut x: f32, mut n : i32) -> f32 { } } } - x * f32::from_bits(((0x7f+n) as u32)<<23) + x * f32::from_bits(((0x7f + n) as u32) << 23) } diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 49fc58fff..17de5a2e0 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -22,60 +22,62 @@ pub fn sqrt(x: f64) -> f64 { ix1 = x.to_bits() as u32; /* take care of Inf and NaN */ - if (ix0&0x7ff00000) == 0x7ff00000 { - return x*x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + if (ix0 & 0x7ff00000) == 0x7ff00000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ } /* take care of zero */ if ix0 <= 0 { - if ((ix0&!(sign as i32))|ix1 as i32) == 0 { - return x; /* sqrt(+-0) = +-0 */ + if ((ix0 & !(sign as i32)) | ix1 as i32) == 0 { + return x; /* sqrt(+-0) = +-0 */ } if ix0 < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ } } /* normalize x */ - m = ix0>>20; - if m == 0 { /* subnormal x */ + m = ix0 >> 20; + if m == 0 { + /* subnormal x */ while ix0 == 0 { m -= 21; - ix0 |= (ix1>>11) as i32; + ix0 |= (ix1 >> 11) as i32; ix1 <<= 21; } - i=0; - while (ix0&0x00100000) == 0 { + i = 0; + while (ix0 & 0x00100000) == 0 { i += 1; ix0 <<= 1; } m -= i - 1; - ix0 |= (ix1>>(32-i)) as i32; + ix0 |= (ix1 >> (32 - i)) as i32; ix1 <<= i; } - m -= 1023; /* unbias exponent */ - ix0 = (ix0&0x000fffff)|0x00100000; - if (m & 1) == 1 { /* odd m, double x to make it even */ - ix0 += ix0 + ((ix1&sign)>>31) as i32; + m -= 1023; /* unbias exponent */ + ix0 = (ix0 & 0x000fffff) | 0x00100000; + if (m & 1) == 1 { + /* odd m, double x to make it even */ + ix0 += ix0 + ((ix1 & sign) >> 31) as i32; ix1 += ix1; } - m >>= 1; /* m = [m/2] */ + m >>= 1; /* m = [m/2] */ /* generate sqrt(x) bit by bit */ - ix0 += ix0 + ((ix1&sign)>>31) as i32; + ix0 += ix0 + ((ix1 & sign) >> 31) as i32; ix1 += ix1; q = 0; /* [q,q1] = sqrt(x) */ q1 = 0; s0 = 0; s1 = 0; - r = 0x00200000; /* r = moving bit from right to left */ + r = 0x00200000; /* r = moving bit from right to left */ while r != 0 { t = s0 + r as i32; if t <= ix0 { - s0 = t + r as i32; + s0 = t + r as i32; ix0 -= t; - q += r as i32; + q += r as i32; } - ix0 += ix0 + ((ix1&sign)>>31) as i32; + ix0 += ix0 + ((ix1 & sign) >> 31) as i32; ix1 += ix1; r >>= 1; } @@ -83,10 +85,10 @@ pub fn sqrt(x: f64) -> f64 { r = sign; while r != 0 { t1 = s1 + r; - t = s0; + t = s0; if t < ix0 || (t == ix0 && t1 <= ix1) { s1 = t1 + r; - if (t1&sign) == sign && (s1&sign) == 0 { + if (t1 & sign) == sign && (s1 & sign) == 0 { s0 += 1; } ix0 -= t; @@ -96,19 +98,19 @@ pub fn sqrt(x: f64) -> f64 { ix1 -= t1; q1 += r; } - ix0 += ix0 + ((ix1&sign)>>31) as i32; + ix0 += ix0 + ((ix1 & sign) >> 31) as i32; ix1 += ix1; r >>= 1; } /* use floating add to find out rounding direction */ - if (ix0 as u32|ix1) != 0 { + if (ix0 as u32 | ix1) != 0 { z = 1.0 - TINY; /* raise inexact flag */ if z >= 1.0 { z = 1.0 + TINY; if q1 == 0xffffffff { q1 = 0; - q+=1; + q += 1; } else if z > 1.0 { if q1 == 0xfffffffe { q += 1; @@ -119,9 +121,9 @@ pub fn sqrt(x: f64) -> f64 { } } } - ix0 = (q>>1) + 0x3fe00000; - ix1 = q1>>1; - if (q&1) == 1 { + ix0 = (q >> 1) + 0x3fe00000; + ix1 = q1 >> 1; + if (q & 1) == 1 { ix1 |= sign; } ix0 += m << 20; From a68185a2f3fddc6862b9035f175049feb808dbd7 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 13 Jul 2018 21:53:01 -0500 Subject: [PATCH 0039/1459] force PR code to be formatted --- libm/.travis.yml | 2 ++ libm/ci/install.sh | 2 ++ libm/ci/script.sh | 6 ++---- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/libm/.travis.yml b/libm/.travis.yml index 0c0832793..ae03b6601 100644 --- a/libm/.travis.yml +++ b/libm/.travis.yml @@ -22,6 +22,8 @@ install: - bash ci/install.sh script: + # code must be already formatted + - cargo fmt -- --check - bash ci/script.sh after_script: set +e diff --git a/libm/ci/install.sh b/libm/ci/install.sh index efdbb06c2..68dd0f337 100644 --- a/libm/ci/install.sh +++ b/libm/ci/install.sh @@ -5,6 +5,8 @@ main() { cargo install cross fi + rustup component add rustfmt-preview + rustup target add x86_64-unknown-linux-musl if [ $TARGET != x86_64-unknown-linux-gnu ]; then diff --git a/libm/ci/script.sh b/libm/ci/script.sh index 3db6bfeb1..d29003052 100644 --- a/libm/ci/script.sh +++ b/libm/ci/script.sh @@ -10,10 +10,8 @@ main() { # generate tests cargo run --package test-generator --target x86_64-unknown-linux-musl - if cargo fmt --version >/dev/null 2>&1; then - # nicer syntax error messages (if any) - cargo fmt - fi + # nicer syntax error messages (if any) + cargo fmt # run tests cross test --target $TARGET --release From fd3f43113a6638651caefee10b52ebd1e6065978 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 13 Jul 2018 22:10:08 -0500 Subject: [PATCH 0040/1459] run cargo fmt -- --check on beta --- libm/.travis.yml | 4 ++-- libm/ci/install.sh | 7 +++++-- libm/ci/script.sh | 5 +++++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/libm/.travis.yml b/libm/.travis.yml index ae03b6601..2bbb60fc6 100644 --- a/libm/.travis.yml +++ b/libm/.travis.yml @@ -15,6 +15,8 @@ matrix: - env: TARGET=powerpc64-unknown-linux-gnu - env: TARGET=powerpc64le-unknown-linux-gnu - env: TARGET=x86_64-unknown-linux-gnu + - env: TARGET=cargo-fmt + rust: beta before_install: set -e @@ -22,8 +24,6 @@ install: - bash ci/install.sh script: - # code must be already formatted - - cargo fmt -- --check - bash ci/script.sh after_script: set +e diff --git a/libm/ci/install.sh b/libm/ci/install.sh index 68dd0f337..4d9552d23 100644 --- a/libm/ci/install.sh +++ b/libm/ci/install.sh @@ -1,12 +1,15 @@ set -euxo pipefail main() { + if [ $TARGET = cargo-fmt ]; then + rustup component add rustfmt-preview + return + fi + if ! hash cross >/dev/null 2>&1; then cargo install cross fi - rustup component add rustfmt-preview - rustup target add x86_64-unknown-linux-musl if [ $TARGET != x86_64-unknown-linux-gnu ]; then diff --git a/libm/ci/script.sh b/libm/ci/script.sh index d29003052..a4f1b324d 100644 --- a/libm/ci/script.sh +++ b/libm/ci/script.sh @@ -1,6 +1,11 @@ set -euxo pipefail main() { + if [ $TARGET = cargo-fmt ]; then + cargo fmt -- --check + return + fi + # quick check cargo check From 47bb6c3a81d120c87905b0dcad234e1e927835fb Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 13 Jul 2018 22:15:21 -0500 Subject: [PATCH 0041/1459] make cargo fmt in ci/script.sh optional --- libm/ci/script.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libm/ci/script.sh b/libm/ci/script.sh index a4f1b324d..f2a294b48 100644 --- a/libm/ci/script.sh +++ b/libm/ci/script.sh @@ -15,8 +15,10 @@ main() { # generate tests cargo run --package test-generator --target x86_64-unknown-linux-musl - # nicer syntax error messages (if any) - cargo fmt + if cargo fmt --version >/dev/null 2>&1; then + # nicer syntax error messages (if any) + cargo fmt + fi # run tests cross test --target $TARGET --release From 2b8ce0d4a0e4a7499301311aa4a1eb86d5f5561b Mon Sep 17 00:00:00 2001 From: Erik Date: Fri, 13 Jul 2018 23:40:57 -0400 Subject: [PATCH 0042/1459] implement log2 and log2f --- libm/src/lib.rs | 4 -- libm/src/math/log2.rs | 86 +++++++++++++++++++++++++++++++++ libm/src/math/log2f.rs | 71 +++++++++++++++++++++++++++ libm/src/math/mod.rs | 7 ++- libm/test-generator/src/main.rs | 4 +- 5 files changed, 164 insertions(+), 8 deletions(-) create mode 100644 libm/src/math/log2.rs create mode 100644 libm/src/math/log2f.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 571cf365c..0194722af 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -75,7 +75,6 @@ pub trait F32Ext: private::Sealed { fn log(self, base: Self) -> Self; - #[cfg(todo)] fn log2(self) -> Self; #[cfg(todo)] @@ -228,7 +227,6 @@ impl F32Ext for f32 { self.ln() / base.ln() } - #[cfg(todo)] #[inline] fn log2(self) -> Self { log2f(self) @@ -399,7 +397,6 @@ pub trait F64Ext: private::Sealed { #[cfg(todo)] fn log(self, base: Self) -> Self; - #[cfg(todo)] fn log2(self) -> Self; #[cfg(todo)] @@ -557,7 +554,6 @@ impl F64Ext for f64 { self.ln() / base.ln() } - #[cfg(todo)] #[inline] fn log2(self) -> Self { log2(self) diff --git a/libm/src/math/log2.rs b/libm/src/math/log2.rs new file mode 100644 index 000000000..c0d3263e3 --- /dev/null +++ b/libm/src/math/log2.rs @@ -0,0 +1,86 @@ +use core::f64; + +const IVLN2HI: f64 = 1.44269504072144627571e+00; /* 0x3ff71547, 0x65200000 */ +const IVLN2LO: f64 = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */ +const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ +const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ +const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ +const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ +const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ +const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ +const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ + +pub fn log2(mut x: f64) -> f64 { + let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 + + let mut ui: u64 = x.to_bits(); + let hfsq: f64; + let f: f64; + let s: f64; + let z: f64; + let r: f64; + let mut w: f64; + let t1: f64; + let t2: f64; + let y: f64; + let mut hi: f64; + let lo: f64; + let mut val_hi: f64; + let mut val_lo: f64; + let mut hx: u32; + let mut k: i32; + + hx = (ui >> 32) as u32; + k = 0; + if hx < 0x00100000 || (hx >> 31) > 0 { + if ui << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if (hx >> 31) > 0 { + return (x - x) / 0.0; /* log(-#) = NaN */ + } + /* subnormal number, scale x up */ + k -= 54; + x *= x1p54; + ui = x.to_bits(); + hx = (ui >> 32) as u32; + } else if hx >= 0x7ff00000 { + return x; + } else if hx == 0x3ff00000 && ui << 32 == 0 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + hx += 0x3ff00000 - 0x3fe6a09e; + k += (hx >> 20) as i32 - 0x3ff; + hx = (hx & 0x000fffff) + 0x3fe6a09e; + ui = (hx as u64) << 32 | (ui & 0xffffffff); + x = f64::from_bits(ui); + + f = x - 1.0; + hfsq = 0.5 * f * f; + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * (LG4 + w * LG6)); + t2 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); + r = t2 + t1; + + /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */ + hi = f - hfsq; + ui = hi.to_bits(); + ui &= (-1i64 as u64) << 32; + hi = f64::from_bits(ui); + lo = f - hi - hfsq + s * (hfsq + r); + + val_hi = hi * IVLN2HI; + val_lo = (lo + hi) * IVLN2LO + lo * IVLN2HI; + + /* spadd(val_hi, val_lo, y), except for not using double_t: */ + y = k.into(); + w = y + val_hi; + val_lo += (y - w) + val_hi; + val_hi = w; + + return val_lo + val_hi; +} diff --git a/libm/src/math/log2f.rs b/libm/src/math/log2f.rs new file mode 100644 index 000000000..47a917bdb --- /dev/null +++ b/libm/src/math/log2f.rs @@ -0,0 +1,71 @@ +use core::f32; + +const IVLN2HI: f32 = 1.4428710938e+00; /* 0x3fb8b000 */ +const IVLN2LO: f32 = -1.7605285393e-04; /* 0xb9389ad4 */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24 */ +const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ +const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ +const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ + +pub fn log2f(mut x: f32) -> f32 { + let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 + + let mut ui: u32 = x.to_bits(); + let hfsq: f32; + let f: f32; + let s: f32; + let z: f32; + let r: f32; + let w: f32; + let t1: f32; + let t2: f32; + let mut hi: f32; + let lo: f32; + let mut ix: u32; + let mut k: i32; + + ix = ui; + k = 0; + if ix < 0x00800000 || (ix >> 31) > 0 { + /* x < 2**-126 */ + if ix << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if (ix >> 31) > 0 { + return (x - x) / 0.0; /* log(-#) = NaN */ + } + /* subnormal number, scale up x */ + k -= 25; + x *= x1p25f; + ui = x.to_bits(); + ix = ui; + } else if ix >= 0x7f800000 { + return x; + } else if ix == 0x3f800000 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + ix += 0x3f800000 - 0x3f3504f3; + k += (ix >> 23) as i32 - 0x7f; + ix = (ix & 0x007fffff) + 0x3f3504f3; + ui = ix; + x = f32::from_bits(ui); + + f = x - 1.0; + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * LG4); + t2 = z * (LG1 + w * LG3); + r = t2 + t1; + hfsq = 0.5 * f * f; + + hi = f - hfsq; + ui = hi.to_bits(); + ui &= 0xfffff000; + hi = f32::from_bits(ui); + lo = f - hi - hfsq + s * (hfsq + r); + return (lo + hi) * IVLN2LO + lo * IVLN2HI + hi * IVLN2HI + k as f32; +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index c43199f82..a82c1ba7e 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -15,6 +15,8 @@ mod floorf; mod fmodf; mod hypot; mod hypotf; +mod log2; +mod log2f; mod logf; mod powf; mod round; @@ -30,8 +32,9 @@ mod truncf; pub use self::{ ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, fmodf::fmodf, - hypot::hypot, hypotf::hypotf, logf::logf, powf::powf, round::round, roundf::roundf, - scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, + hypot::hypot, hypotf::hypotf, log2::log2, log2f::log2f, logf::logf, powf::powf, round::round, + roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, + truncf::truncf, }; fn isnanf(x: f32) -> bool { diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 257232ca6..7ec2292c3 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -664,7 +664,7 @@ f32_f32! { expf, // fdimf, // log10f, - // log2f, + log2f, logf, roundf, // sinf, @@ -709,7 +709,7 @@ f64_f64! { // log, // log10, // log1p, - // log2, + log2, round, // sin, // sinh, From f815e9f1fc4248048fa23f183cfd95212ecf245c Mon Sep 17 00:00:00 2001 From: Joseph Ryan Date: Fri, 13 Jul 2018 23:15:36 -0500 Subject: [PATCH 0043/1459] Implement fmod and tweak fmodf --- libm/src/math/fmod.rs | 80 +++++++++++++++++++++++++++++++++ libm/src/math/fmodf.rs | 4 +- libm/src/math/mod.rs | 36 ++++----------- libm/test-generator/src/main.rs | 2 +- 4 files changed, 91 insertions(+), 31 deletions(-) create mode 100644 libm/src/math/fmod.rs diff --git a/libm/src/math/fmod.rs b/libm/src/math/fmod.rs new file mode 100644 index 000000000..23f0c4846 --- /dev/null +++ b/libm/src/math/fmod.rs @@ -0,0 +1,80 @@ +use core::u64; + +#[inline] +pub fn fmod(x: f64, y: f64) -> f64 { + let mut uxi = x.to_bits(); + let mut uyi = y.to_bits(); + let mut ex = (uxi >> 52 & 0x7ff) as i64; + let mut ey = (uyi >> 52 & 0x7ff) as i64; + let sx = uxi >> 63; + let mut i; + + if uyi << 1 == 0 || y.is_nan() || ex == 0x7ff { + return (x * y) / (x * y); + } + if uxi << 1 <= uyi << 1 { + if uxi << 1 == uyi << 1 { + return 0.0 * x; + } + return x; + } + + /* normalize x and y */ + if ex == 0 { + i = uxi << 12; + while i >> 63 == 0 { + ex -= 1; + i <<= 1; + } + uxi <<= -ex + 1; + } else { + uxi &= u64::MAX >> 12; + uxi |= 1 << 52; + } + if ey == 0 { + i = uyi << 12; + while i >> 63 == 0 { + ey -= 1; + i <<= 1; + } + uyi <<= -ey + 1; + } else { + uyi &= u64::MAX >> 12; + uyi |= 1 << 52; + } + + /* x mod y */ + while ex > ey { + i = uxi - uyi; + if i >> 63 == 0 { + if i == 0 { + return 0.0 * x; + } + uxi = i; + } + uxi <<= 1; + ex -= 1; + } + i = uxi - uyi; + if i >> 63 == 0 { + if i == 0 { + return 0.0 * x; + } + uxi = i; + } + while uxi >> 52 == 0 { + uxi <<= 1; + ex -= 1; + } + + /* scale result */ + if ex > 0 { + uxi -= 1 << 52; + uxi |= (ex as u64) << 52; + } else { + uxi >>= -ex + 1; + } + uxi |= (sx as u64) << 63; + + f64::from_bits(uxi) +} diff --git a/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs index 909775249..8d0c2d5c8 100644 --- a/libm/src/math/fmodf.rs +++ b/libm/src/math/fmodf.rs @@ -1,7 +1,5 @@ use core::u32; -use super::isnanf; - #[inline] pub fn fmodf(x: f32, y: f32) -> f32 { let mut uxi = x.to_bits(); @@ -11,7 +9,7 @@ pub fn fmodf(x: f32, y: f32) -> f32 { let sx = uxi & 0x80000000; let mut i; - if uyi << 1 == 0 || isnanf(y) || ex == 0xff { + if uyi << 1 == 0 || y.is_nan() || ex == 0xff { return (x * y) / (x * y); } diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index d0121048d..a81fc174b 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -7,10 +7,16 @@ macro_rules! force_eval { } mod ceilf; +mod expf; mod fabs; mod fabsf; +mod floor; mod floorf; +mod fmod; mod fmodf; +mod hypot; +mod hypotf; +mod logf; mod powf; mod round; mod roundf; @@ -18,38 +24,14 @@ mod scalbn; mod scalbnf; mod sqrt; mod sqrtf; -mod logf; -mod expf; -mod floor; mod trunc; mod truncf; -mod hypot; -mod hypotf; //mod service; pub use self::{ - ceilf::ceilf, - fabs::fabs, - fabsf::fabsf, - floorf::floorf, - fmodf::fmodf, - powf::powf, - round::round, - roundf::roundf, - scalbn::scalbn, - scalbnf::scalbnf, - sqrt::sqrt, - sqrtf::sqrtf, - logf::logf, - expf::expf, - floor::floor, - trunc::trunc, + ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, fmod::fmod, + fmodf::fmodf, hypot::hypot, hypotf::hypotf, logf::logf, powf::powf, round::round, + roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, - hypot::hypot, - hypotf::hypotf, }; - -fn isnanf(x: f32) -> bool { - x.to_bits() & 0x7fffffff > 0x7f800000 -} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 257232ca6..ee291027d 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -724,7 +724,7 @@ f64_f64! { f64f64_f64! { // atan2, // fdim, - // fmod, + fmod, hypot, // pow, } From f0c53459d7a28a3310cf010162a67f35ca194139 Mon Sep 17 00:00:00 2001 From: Erik Date: Sat, 14 Jul 2018 00:24:22 -0400 Subject: [PATCH 0044/1459] implement log10 and log10f --- libm/src/lib.rs | 4 -- libm/src/math/log10.rs | 98 +++++++++++++++++++++++++++++++++ libm/src/math/log10f.rs | 76 +++++++++++++++++++++++++ libm/src/math/mod.rs | 8 ++- libm/test-generator/src/main.rs | 4 +- 5 files changed, 181 insertions(+), 9 deletions(-) create mode 100644 libm/src/math/log10.rs create mode 100644 libm/src/math/log10f.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 0194722af..3ed89f729 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -77,7 +77,6 @@ pub trait F32Ext: private::Sealed { fn log2(self) -> Self; - #[cfg(todo)] fn log10(self) -> Self; #[cfg(todo)] @@ -232,7 +231,6 @@ impl F32Ext for f32 { log2f(self) } - #[cfg(todo)] #[inline] fn log10(self) -> Self { log10f(self) @@ -399,7 +397,6 @@ pub trait F64Ext: private::Sealed { fn log2(self) -> Self; - #[cfg(todo)] fn log10(self) -> Self; #[cfg(todo)] @@ -559,7 +556,6 @@ impl F64Ext for f64 { log2(self) } - #[cfg(todo)] #[inline] fn log10(self) -> Self { log10(self) diff --git a/libm/src/math/log10.rs b/libm/src/math/log10.rs new file mode 100644 index 000000000..137d8966f --- /dev/null +++ b/libm/src/math/log10.rs @@ -0,0 +1,98 @@ +use core::f64; + +const IVLN10HI: f64 = 4.34294481878168880939e-01; /* 0x3fdbcb7b, 0x15200000 */ +const IVLN10LO: f64 = 2.50829467116452752298e-11; /* 0x3dbb9438, 0xca9aadd5 */ +const LOG10_2HI: f64 = 3.01029995663611771306e-01; /* 0x3FD34413, 0x509F6000 */ +const LOG10_2LO: f64 = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ +const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ +const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ +const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ +const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ +const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ +const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ +const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ + +#[inline] +pub fn log10(mut x: f64) -> f64 { + let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 + + let mut ui: u64 = x.to_bits(); + let hfsq: f64; + let f: f64; + let s: f64; + let z: f64; + let r: f64; + let mut w: f64; + let t1: f64; + let t2: f64; + let dk: f64; + let y: f64; + let mut hi: f64; + let lo: f64; + let mut val_hi: f64; + let mut val_lo: f64; + let mut hx: u32; + let mut k: i32; + + hx = (ui >> 32) as u32; + k = 0; + if hx < 0x00100000 || (hx >> 31) > 0 { + if ui << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if (hx >> 31) > 0 { + return (x - x) / 0.0; /* log(-#) = NaN */ + } + /* subnormal number, scale x up */ + k -= 54; + x *= x1p54; + ui = x.to_bits(); + hx = (ui >> 32) as u32; + } else if hx >= 0x7ff00000 { + return x; + } else if hx == 0x3ff00000 && ui << 32 == 0 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + hx += 0x3ff00000 - 0x3fe6a09e; + k += (hx >> 20) as i32 - 0x3ff; + hx = (hx & 0x000fffff) + 0x3fe6a09e; + ui = (hx as u64) << 32 | (ui & 0xffffffff); + x = f64::from_bits(ui); + + f = x - 1.0; + hfsq = 0.5 * f * f; + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * (LG4 + w * LG6)); + t2 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); + r = t2 + t1; + + /* See log2.c for details. */ + /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */ + hi = f - hfsq; + ui = hi.to_bits(); + ui &= (-1i64 as u64) << 32; + hi = f64::from_bits(ui); + lo = f - hi - hfsq + s * (hfsq + r); + + /* val_hi+val_lo ~ log10(1+f) + k*log10(2) */ + val_hi = hi * IVLN10HI; + dk = k as f64; + y = dk * LOG10_2HI; + val_lo = dk * LOG10_2LO + (lo + hi) * IVLN10LO + lo * IVLN10HI; + + /* + * Extra precision in for adding y is not strictly needed + * since there is no very large cancellation near x = sqrt(2) or + * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs + * with some parallelism and it reduces the error for many args. + */ + w = y + val_hi; + val_lo += (y - w) + val_hi; + val_hi = w; + + return val_lo + val_hi; +} diff --git a/libm/src/math/log10f.rs b/libm/src/math/log10f.rs new file mode 100644 index 000000000..58db09344 --- /dev/null +++ b/libm/src/math/log10f.rs @@ -0,0 +1,76 @@ +use core::f32; + +const IVLN10HI: f32 = 4.3432617188e-01; /* 0x3ede6000 */ +const IVLN10LO: f32 = -3.1689971365e-05; /* 0xb804ead9 */ +const LOG10_2HI: f32 = 3.0102920532e-01; /* 0x3e9a2080 */ +const LOG10_2LO: f32 = 7.9034151668e-07; /* 0x355427db */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24 */ +const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ +const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ +const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ + +#[inline] +pub fn log10f(mut x: f32) -> f32 { + let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 + + let mut ui: u32 = x.to_bits(); + let hfsq: f32; + let f: f32; + let s: f32; + let z: f32; + let r: f32; + let w: f32; + let t1: f32; + let t2: f32; + let dk: f32; + let mut hi: f32; + let lo: f32; + let mut ix: u32; + let mut k: i32; + + ix = ui; + k = 0; + if ix < 0x00800000 || (ix >> 31) > 0 { + /* x < 2**-126 */ + if ix << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if (ix >> 31) > 0 { + return (x - x) / 0.0; /* log(-#) = NaN */ + } + /* subnormal number, scale up x */ + k -= 25; + x *= x1p25f; + ui = x.to_bits(); + ix = ui; + } else if ix >= 0x7f800000 { + return x; + } else if ix == 0x3f800000 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + ix += 0x3f800000 - 0x3f3504f3; + k += (ix >> 23) as i32 - 0x7f; + ix = (ix & 0x007fffff) + 0x3f3504f3; + ui = ix; + x = f32::from_bits(ui); + + f = x - 1.0; + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * LG4); + t2 = z * (LG1 + w * LG3); + r = t2 + t1; + hfsq = 0.5 * f * f; + + hi = f - hfsq; + ui = hi.to_bits(); + ui &= 0xfffff000; + hi = f32::from_bits(ui); + lo = f - hi - hfsq + s * (hfsq + r); + dk = k as f32; + return dk * LOG10_2LO + (lo + hi) * IVLN10LO + lo * IVLN10HI + hi * IVLN10HI + dk * LOG10_2HI; +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index a82c1ba7e..828eef859 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -15,6 +15,8 @@ mod floorf; mod fmodf; mod hypot; mod hypotf; +mod log10; +mod log10f; mod log2; mod log2f; mod logf; @@ -32,9 +34,9 @@ mod truncf; pub use self::{ ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, fmodf::fmodf, - hypot::hypot, hypotf::hypotf, log2::log2, log2f::log2f, logf::logf, powf::powf, round::round, - roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, - truncf::truncf, + hypot::hypot, hypotf::hypotf, log10::log10, log10f::log10f, log2::log2, log2f::log2f, + logf::logf, powf::powf, round::round, roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, + sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, }; fn isnanf(x: f32) -> bool { diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 7ec2292c3..799a354a9 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -663,7 +663,7 @@ f32_f32! { // exp2f, expf, // fdimf, - // log10f, + log10f, log2f, logf, roundf, @@ -707,7 +707,7 @@ f64_f64! { // expm1, floor, // log, - // log10, + log10, // log1p, log2, round, From d2e80bddad0874ec54353ac02b3829b344f1e2a0 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 13 Jul 2018 23:34:37 -0500 Subject: [PATCH 0045/1459] update CONTRIBUTING --- libm/CONTRIBUTING.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libm/CONTRIBUTING.md b/libm/CONTRIBUTING.md index 1f0a05a3f..6f8e984f3 100644 --- a/libm/CONTRIBUTING.md +++ b/libm/CONTRIBUTING.md @@ -10,7 +10,8 @@ can't, no problem! Your PR will be fully tested automatically. Though you may still want to add and run some unit tests. See the bottom of [`src/math/truncf.rs`] for an example of such tests; you can run unit tests with the `cargo test --lib` command. -- Send us a pull request! +- Send us a pull request! Make sure to run `cargo fmt` on your code before sending the PR. Also + include "closes #42" in the PR description to close the corresponding issue. - :tada: [issue tracker]: https://github.com/japaric/libm/issues From 387f08f93055ad7af2ce3234c05aa547b2594b06 Mon Sep 17 00:00:00 2001 From: Joseph Ryan Date: Fri, 13 Jul 2018 23:56:00 -0500 Subject: [PATCH 0046/1459] Fix log2 --- libm/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 0194722af..735f8162e 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -229,7 +229,7 @@ impl F32Ext for f32 { #[inline] fn log2(self) -> Self { - log2f(self) + self.log2f(self) } #[cfg(todo)] @@ -556,7 +556,7 @@ impl F64Ext for f64 { #[inline] fn log2(self) -> Self { - log2(self) + self.log2(self) } #[cfg(todo)] From a0ac602aa24aea7d034e95bc2bec2df9b2396f59 Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Sat, 14 Jul 2018 08:13:35 +0300 Subject: [PATCH 0047/1459] fix bug in rem_pio2_large --- libm/src/math/cosf.rs | 80 +++++++++++++-------------- libm/src/math/{cosdf.rs => k_cosf.rs} | 2 +- libm/src/math/{sindf.rs => k_sinf.rs} | 2 +- libm/src/math/mod.rs | 8 +-- libm/src/math/rem_pio2_large.rs | 8 +-- 5 files changed, 50 insertions(+), 50 deletions(-) rename libm/src/math/{cosdf.rs => k_cosf.rs} (92%) rename libm/src/math/{sindf.rs => k_sinf.rs} (92%) diff --git a/libm/src/math/cosf.rs b/libm/src/math/cosf.rs index f63724e79..ef46d4a15 100644 --- a/libm/src/math/cosf.rs +++ b/libm/src/math/cosf.rs @@ -1,4 +1,4 @@ -use super::{cosdf, sindf, rem_pio2f}; +use super::{k_cosf, k_sinf, rem_pio2f}; use core::f64::consts::FRAC_PI_2; @@ -14,52 +14,52 @@ pub fn cosf(x: f32) -> f32 { let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 - let mut ix = x.to_bits(); - let sign = (ix >> 31) != 0; - ix &= 0x7fffffff; + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; - if ix <= 0x3f490fda { /* |x| ~<= pi/4 */ - if ix < 0x39800000 { /* |x| < 2**-12 */ - /* raise inexact if x != 0 */ - force_eval!(x + x1p120); - return 1.; - } - return cosdf(x64); - } - if ix <= 0x407b53d1 { /* |x| ~<= 5*pi/4 */ - if ix > 0x4016cbe3 { /* |x| ~> 3*pi/4 */ - return -cosdf(if sign { x64+C2_PIO2 } else { x64-C2_PIO2 }); + if ix <= 0x3f490fda { /* |x| ~<= pi/4 */ + if ix < 0x39800000 { /* |x| < 2**-12 */ + /* raise inexact if x != 0 */ + force_eval!(x + x1p120); + return 1.; + } + return k_cosf(x64); + } + if ix <= 0x407b53d1 { /* |x| ~<= 5*pi/4 */ + if ix > 0x4016cbe3 { /* |x| ~> 3*pi/4 */ + return -k_cosf(if sign { x64+C2_PIO2 } else { x64-C2_PIO2 }); } else { - if sign { - return sindf(x64 + C1_PIO2); - } else { - return sindf(C1_PIO2 - x64); + if sign { + return k_sinf(x64 + C1_PIO2); + } else { + return k_sinf(C1_PIO2 - x64); } - } - } - if ix <= 0x40e231d5 { /* |x| ~<= 9*pi/4 */ - if ix > 0x40afeddf { /* |x| ~> 7*pi/4 */ - return cosdf(if sign { x64+C4_PIO2 } else { x64-C4_PIO2 }); - } else { - if sign { - return sindf(-x64 - C3_PIO2); - } else { - return sindf(x64 - C3_PIO2); + } + } + if ix <= 0x40e231d5 { /* |x| ~<= 9*pi/4 */ + if ix > 0x40afeddf { /* |x| ~> 7*pi/4 */ + return k_cosf(if sign { x64+C4_PIO2 } else { x64-C4_PIO2 }); + } else { + if sign { + return k_sinf(-x64 - C3_PIO2); + } else { + return k_sinf(x64 - C3_PIO2); } - } - } + } + } - /* cos(Inf or NaN) is NaN */ - if ix >= 0x7f800000 { - return x-x; + /* cos(Inf or NaN) is NaN */ + if ix >= 0x7f800000 { + return x-x; } - /* general argument reduction needed */ - let (n, y) = rem_pio2f(x); + /* general argument reduction needed */ + let (n, y) = rem_pio2f(x); match n&3 { - 0 => { cosdf( y) }, - 1 => { sindf(-y) }, - 2 => { -cosdf( y) }, - _ => { sindf( y) }, + 0 => { k_cosf( y) }, + 1 => { k_sinf(-y) }, + 2 => { -k_cosf( y) }, + _ => { k_sinf( y) }, } } diff --git a/libm/src/math/cosdf.rs b/libm/src/math/k_cosf.rs similarity index 92% rename from libm/src/math/cosdf.rs rename to libm/src/math/k_cosf.rs index 6c5e9d349..5d1ede7af 100644 --- a/libm/src/math/cosdf.rs +++ b/libm/src/math/k_cosf.rs @@ -5,7 +5,7 @@ const C2 : f64 = -0.00138867637746099294692; /* -0x16c087e80f1e27.0p-62 */ const C3 : f64 = 0.0000243904487962774090654; /* 0x199342e0ee5069.0p-68 */ #[inline] -pub(crate) fn cosdf(x : f64) -> f32 { +pub(crate) fn k_cosf(x : f64) -> f32 { let z = x*x; let w = z*z; let r = C2+z*C3; diff --git a/libm/src/math/sindf.rs b/libm/src/math/k_sinf.rs similarity index 92% rename from libm/src/math/sindf.rs rename to libm/src/math/k_sinf.rs index a633545ba..5eedab3aa 100644 --- a/libm/src/math/sindf.rs +++ b/libm/src/math/k_sinf.rs @@ -5,7 +5,7 @@ const S3 : f64 = -0.000198393348360966317347; /* -0x1a00f9e2cae774.0p-65 */ const S4 : f64 = 0.0000027183114939898219064; /* 0x16cd878c3b46a7.0p-71 */ #[inline] -pub(crate) fn sindf(x : f64) -> f32 { +pub(crate) fn k_sinf(x : f64) -> f32 { let z = x*x; let w = z*z; let r = S3 + z*S4; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 4900bf94c..d844ee153 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -36,14 +36,14 @@ pub use self::{ truncf::truncf, }; -mod sindf; -mod cosdf; +mod k_cosf; +mod k_sinf; mod rem_pio2f; mod rem_pio2_large; use self::{ - sindf::sindf, - cosdf::cosdf, + k_cosf::k_cosf, + k_sinf::k_sinf, rem_pio2f::rem_pio2f, rem_pio2_large::rem_pio2_large, }; diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 18dc721c6..b5fa7e0af 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -1,5 +1,5 @@ -use super::scalbn; -use super::floor; +use math::scalbn; +use math::floor; // initial value for jk const INIT_JK : [usize; 4] = [3,4,4,6]; @@ -263,7 +263,7 @@ pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) let mut fw : f64; let mut n : i32; let mut ih : i32; - let mut z = 0f64; + let mut z : f64; let mut f : [f64;20] = [0.;20]; let mut fq : [f64;20] = [0.;20]; let mut q : [f64;20] = [0.;20]; @@ -308,7 +308,7 @@ pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) 'recompute: loop { /* distill q[] into iq[] reversingly */ let mut i = 0i32; - let mut z = q[jz]; + z = q[jz]; for j in (1..=jz).rev() { fw = (x1p_24*z) as i32 as f64; iq[i as usize] = (z - x1p24*fw) as i32; From 39a04bca314bf91cb982f92379a2e307f5fc6e31 Mon Sep 17 00:00:00 2001 From: Joseph Ryan Date: Sat, 14 Jul 2018 00:15:24 -0500 Subject: [PATCH 0048/1459] Revert log2 breakage --- libm/src/lib.rs | 4 ++-- libm/src/math/mod.rs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 735f8162e..0194722af 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -229,7 +229,7 @@ impl F32Ext for f32 { #[inline] fn log2(self) -> Self { - self.log2f(self) + log2f(self) } #[cfg(todo)] @@ -556,7 +556,7 @@ impl F64Ext for f64 { #[inline] fn log2(self) -> Self { - self.log2(self) + log2(self) } #[cfg(todo)] diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index e75429df5..56488c438 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -33,7 +33,7 @@ mod truncf; pub use self::{ ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, fmod::fmod, - fmodf::fmodf, hypot::hypot, hypotf::hypotf, logf::logf, powf::powf, round::round, - roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, - truncf::truncf, + fmodf::fmodf, hypot::hypot, hypotf::hypotf, log2::log2, log2f::log2f, logf::logf, powf::powf, + round::round, roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, + trunc::trunc, truncf::truncf, }; From 22ebe18556d436f11c05e53537d119040107f344 Mon Sep 17 00:00:00 2001 From: Opal Date: Sat, 14 Jul 2018 16:59:27 +1200 Subject: [PATCH 0049/1459] Add log implementation. Fixes rust-lang/libm#23 --- libm/src/lib.rs | 4 -- libm/src/math/log.rs | 117 ++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 3 +- libm/test-generator/src/main.rs | 2 +- 4 files changed, 120 insertions(+), 6 deletions(-) create mode 100644 libm/src/math/log.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 3ed89f729..39eb71e99 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -389,10 +389,8 @@ pub trait F64Ext: private::Sealed { #[cfg(todo)] fn exp2(self) -> Self; - #[cfg(todo)] fn ln(self) -> Self; - #[cfg(todo)] fn log(self, base: Self) -> Self; fn log2(self) -> Self; @@ -539,13 +537,11 @@ impl F64Ext for f64 { exp2(self) } - #[cfg(todo)] #[inline] fn ln(self) -> Self { log(self) } - #[cfg(todo)] #[inline] fn log(self, base: Self) -> Self { self.ln() / base.ln() diff --git a/libm/src/math/log.rs b/libm/src/math/log.rs new file mode 100644 index 000000000..48e9fa79a --- /dev/null +++ b/libm/src/math/log.rs @@ -0,0 +1,117 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* log(x) + * Return the logarithm of x + * + * Method : + * 1. Argument Reduction: find k and f such that + * x = 2^k * (1+f), + * where sqrt(2)/2 < 1+f < sqrt(2) . + * + * 2. Approximation of log(1+f). + * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) + * = 2s + 2/3 s**3 + 2/5 s**5 + ....., + * = 2s + s*R + * We use a special Remez algorithm on [0,0.1716] to generate + * a polynomial of degree 14 to approximate R The maximum error + * of this polynomial approximation is bounded by 2**-58.45. In + * other words, + * 2 4 6 8 10 12 14 + * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s + * (the values of Lg1 to Lg7 are listed in the program) + * and + * | 2 14 | -58.45 + * | Lg1*s +...+Lg7*s - R(z) | <= 2 + * | | + * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. + * In order to guarantee error in log below 1ulp, we compute log + * by + * log(1+f) = f - s*(f - R) (if f is not too large) + * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) + * + * 3. Finally, log(x) = k*ln2 + log(1+f). + * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) + * Here ln2 is split into two floating point number: + * ln2_hi + ln2_lo, + * where n*ln2_hi is always exact for |n| < 2000. + * + * Special cases: + * log(x) is NaN with signal if x < 0 (including -INF) ; + * log(+INF) is +INF; log(0) is -INF with signal; + * log(NaN) is that NaN with no signal. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ + +const LN2_HI: f64 = 6.93147180369123816490e-01; /* 3fe62e42 fee00000 */ +const LN2_LO: f64 = 1.90821492927058770002e-10; /* 3dea39ef 35793c76 */ +const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ +const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ +const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ +const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ +const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ +const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ +const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ + +#[inline] +pub fn log(mut x: f64) -> f64 { + let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 + + let mut ui = x.to_bits(); + let mut hx: u32 = (ui >> 32) as u32; + let mut k: i32 = 0; + + if (hx < 0x00100000) || ((hx >> 31) != 0) { + /* x < 2**-126 */ + if ui << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if hx >> 31 != 0 { + return (x - x) / 0.0; /* log(-#) = NaN */ + } + /* subnormal number, scale x up */ + k -= 54; + x *= x1p54; + ui = x.to_bits(); + hx = (ui >> 32) as u32; + } else if hx >= 0x7ff00000 { + return x; + } else if hx == 0x3ff00000 && ui << 32 == 0 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + hx += 0x3ff00000 - 0x3fe6a09e; + k += ((hx >> 20) as i32) - 0x3ff; + hx = (hx & 0x000fffff) + 0x3fe6a09e; + ui = ((hx as u64) << 32) | (ui & 0xffffffff); + x = f64::from_bits(ui); + + let f: f64 = x - 1.0; + let hfsq: f64 = 0.5 * f * f; + let s: f64 = f / (2.0 + f); + let z: f64 = s * s; + let w: f64 = z * z; + let t1: f64 = w * (LG2 + w * (LG4 + w * LG6)); + let t2: f64 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); + let r: f64 = t2 + t1; + let dk: f64 = k as f64; + return s * (hfsq + r) + dk * LN2_LO - hfsq + f + dk * LN2_HI; +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 828eef859..6e514f86f 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -15,6 +15,7 @@ mod floorf; mod fmodf; mod hypot; mod hypotf; +mod log; mod log10; mod log10f; mod log2; @@ -34,7 +35,7 @@ mod truncf; pub use self::{ ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, fmodf::fmodf, - hypot::hypot, hypotf::hypotf, log10::log10, log10f::log10f, log2::log2, log2f::log2f, + hypot::hypot, hypotf::hypotf, log::log, log10::log10, log10f::log10f, log2::log2, log2f::log2f, logf::logf, powf::powf, round::round, roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, }; diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 799a354a9..6f8d3c68a 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -706,7 +706,7 @@ f64_f64! { // exp2, // expm1, floor, - // log, + log, log10, // log1p, log2, From 264551d3e5159bc11e1ced6583a84f1d1c009f6c Mon Sep 17 00:00:00 2001 From: Joseph Ryan Date: Sat, 14 Jul 2018 00:44:36 -0500 Subject: [PATCH 0050/1459] Run rustfmt --- libm/src/math/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 855929081..afa8cd8ef 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -35,7 +35,7 @@ mod truncf; pub use self::{ ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, fmod::fmod, - fmodf::fmodf, hypot::hypot, hypotf::hypotf, log10::log10, log10f::log10f, log2::log2, - log2f::log2f, logf::logf, powf::powf, round::round, roundf::roundf, scalbn::scalbn, + fmodf::fmodf, hypot::hypot, hypotf::hypotf, log10::log10, log10f::log10f, log2::log2, + log2f::log2f, logf::logf, powf::powf, round::round, roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, }; From 197ed85440a0d5dc227a711a7ded79a936f5ddaf Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Sat, 14 Jul 2018 08:50:53 +0300 Subject: [PATCH 0051/1459] clean duplicates --- libm/src/math/mod.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 30e7a2c59..00945b258 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -27,9 +27,6 @@ mod scalbn; mod scalbnf; mod sqrt; mod sqrtf; -mod logf; -mod expf; -mod floor; mod cosf; mod trunc; mod truncf; From 5ace05d0672103be1f0191ef83123c0b8669c208 Mon Sep 17 00:00:00 2001 From: Erik Date: Sat, 14 Jul 2018 01:28:00 -0400 Subject: [PATCH 0052/1459] implement log1p and log1pf --- libm/src/lib.rs | 4 - libm/src/math/log1p.rs | 142 ++++++++++++++++++++++++++++++++ libm/src/math/log1pf.rs | 97 ++++++++++++++++++++++ libm/src/math/mod.rs | 8 +- libm/test-generator/src/main.rs | 3 +- 5 files changed, 246 insertions(+), 8 deletions(-) create mode 100644 libm/src/math/log1p.rs create mode 100644 libm/src/math/log1pf.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 39eb71e99..30f879769 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -114,7 +114,6 @@ pub trait F32Ext: private::Sealed { #[cfg(todo)] fn exp_m1(self) -> Self; - #[cfg(todo)] fn ln_1p(self) -> Self; #[cfg(todo)] @@ -295,7 +294,6 @@ impl F32Ext for f32 { expm1f(self) } - #[cfg(todo)] #[inline] fn ln_1p(self) -> Self { log1pf(self) @@ -432,7 +430,6 @@ pub trait F64Ext: private::Sealed { #[cfg(todo)] fn exp_m1(self) -> Self; - #[cfg(todo)] fn ln_1p(self) -> Self; #[cfg(todo)] @@ -616,7 +613,6 @@ impl F64Ext for f64 { expm1(self) } - #[cfg(todo)] #[inline] fn ln_1p(self) -> Self { log1p(self) diff --git a/libm/src/math/log1p.rs b/libm/src/math/log1p.rs new file mode 100644 index 000000000..7f7a5125b --- /dev/null +++ b/libm/src/math/log1p.rs @@ -0,0 +1,142 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_log1p.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* double log1p(double x) + * Return the natural logarithm of 1+x. + * + * Method : + * 1. Argument Reduction: find k and f such that + * 1+x = 2^k * (1+f), + * where sqrt(2)/2 < 1+f < sqrt(2) . + * + * Note. If k=0, then f=x is exact. However, if k!=0, then f + * may not be representable exactly. In that case, a correction + * term is need. Let u=1+x rounded. Let c = (1+x)-u, then + * log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), + * and add back the correction term c/u. + * (Note: when x > 2**53, one can simply return log(x)) + * + * 2. Approximation of log(1+f): See log.c + * + * 3. Finally, log1p(x) = k*ln2 + log(1+f) + c/u. See log.c + * + * Special cases: + * log1p(x) is NaN with signal if x < -1 (including -INF) ; + * log1p(+INF) is +INF; log1p(-1) is -INF with signal; + * log1p(NaN) is that NaN with no signal. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + * + * Note: Assuming log() return accurate answer, the following + * algorithm can be used to compute log1p(x) to within a few ULP: + * + * u = 1+x; + * if(u==1.0) return x ; else + * return log(u)*(x/(u-1.0)); + * + * See HP-15C Advanced Functions Handbook, p.193. + */ + +use core::f64; + +const LN2_HI: f64 = 6.93147180369123816490e-01; /* 3fe62e42 fee00000 */ +const LN2_LO: f64 = 1.90821492927058770002e-10; /* 3dea39ef 35793c76 */ +const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ +const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ +const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ +const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ +const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ +const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ +const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ + +pub fn log1p(x: f64) -> f64 { + let mut ui: u64 = x.to_bits(); + let hfsq: f64; + let mut f: f64 = 0.; + let mut c: f64 = 0.; + let s: f64; + let z: f64; + let r: f64; + let w: f64; + let t1: f64; + let t2: f64; + let dk: f64; + let hx: u32; + let mut hu: u32; + let mut k: i32; + + hx = (ui >> 32) as u32; + k = 1; + if hx < 0x3fda827a || (hx >> 31) > 0 { + /* 1+x < sqrt(2)+ */ + if hx >= 0xbff00000 { + /* x <= -1.0 */ + if x == -1. { + return x / 0.0; /* log1p(-1) = -inf */ + } + return (x - x) / 0.0; /* log1p(x<-1) = NaN */ + } + if hx << 1 < 0x3ca00000 << 1 { + /* |x| < 2**-53 */ + /* underflow if subnormal */ + if (hx & 0x7ff00000) == 0 { + force_eval!(x as f32); + } + return x; + } + if hx <= 0xbfd2bec4 { + /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ + k = 0; + c = 0.; + f = x; + } + } else if hx >= 0x7ff00000 { + return x; + } + if k > 0 { + ui = (1. + x).to_bits(); + hu = (ui >> 32) as u32; + hu += 0x3ff00000 - 0x3fe6a09e; + k = (hu >> 20) as i32 - 0x3ff; + /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ + if k < 54 { + c = if k >= 2 { + 1. - (f64::from_bits(ui) - x) + } else { + x - (f64::from_bits(ui) - 1.) + }; + c /= f64::from_bits(ui); + } else { + c = 0.; + } + /* reduce u into [sqrt(2)/2, sqrt(2)] */ + hu = (hu & 0x000fffff) + 0x3fe6a09e; + ui = (hu as u64) << 32 | (ui & 0xffffffff); + f = f64::from_bits(ui) - 1.; + } + hfsq = 0.5 * f * f; + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * (LG4 + w * LG6)); + t2 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); + r = t2 + t1; + dk = k as f64; + return s * (hfsq + r) + (dk * LN2_LO + c) - hfsq + f + dk * LN2_HI; +} diff --git a/libm/src/math/log1pf.rs b/libm/src/math/log1pf.rs new file mode 100644 index 000000000..c8e242259 --- /dev/null +++ b/libm/src/math/log1pf.rs @@ -0,0 +1,97 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_log1pf.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use core::f32; + +const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */ +const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24 */ +const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ +const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ +const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ + +pub fn log1pf(x: f32) -> f32 { + let mut ui: u32 = x.to_bits(); + let hfsq: f32; + let mut f: f32 = 0.; + let mut c: f32 = 0.; + let s: f32; + let z: f32; + let r: f32; + let w: f32; + let t1: f32; + let t2: f32; + let dk: f32; + let ix: u32; + let mut iu: u32; + let mut k: i32; + + ix = ui; + k = 1; + if ix < 0x3ed413d0 || (ix >> 31) > 0 { + /* 1+x < sqrt(2)+ */ + if ix >= 0xbf800000 { + /* x <= -1.0 */ + if x == -1. { + return x / 0.0; /* log1p(-1)=+inf */ + } + return (x - x) / 0.0; /* log1p(x<-1)=NaN */ + } + if ix << 1 < 0x33800000 << 1 { + /* |x| < 2**-24 */ + /* underflow if subnormal */ + if (ix & 0x7f800000) == 0 { + force_eval!(x * x); + } + return x; + } + if ix <= 0xbe95f619 { + /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ + k = 0; + c = 0.; + f = x; + } + } else if ix >= 0x7f800000 { + return x; + } + if k > 0 { + ui = (1. + x).to_bits(); + iu = ui; + iu += 0x3f800000 - 0x3f3504f3; + k = (iu >> 23) as i32 - 0x7f; + /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ + if k < 25 { + c = if k >= 2 { + 1. - (f32::from_bits(ui) - x) + } else { + x - (f32::from_bits(ui) - 1.) + }; + c /= f32::from_bits(ui); + } else { + c = 0.; + } + /* reduce u into [sqrt(2)/2, sqrt(2)] */ + iu = (iu & 0x007fffff) + 0x3f3504f3; + ui = iu; + f = f32::from_bits(ui) - 1.; + } + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * LG4); + t2 = z * (LG1 + w * LG3); + r = t2 + t1; + hfsq = 0.5 * f * f; + dk = k as f32; + return s * (hfsq + r) + (dk * LN2_LO + c) - hfsq + f + dk * LN2_HI; +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 6e514f86f..fb5e3df90 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -18,6 +18,8 @@ mod hypotf; mod log; mod log10; mod log10f; +mod log1p; +mod log1pf; mod log2; mod log2f; mod logf; @@ -35,9 +37,9 @@ mod truncf; pub use self::{ ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, fmodf::fmodf, - hypot::hypot, hypotf::hypotf, log::log, log10::log10, log10f::log10f, log2::log2, log2f::log2f, - logf::logf, powf::powf, round::round, roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, - sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, + hypot::hypot, hypotf::hypotf, log::log, log10::log10, log10f::log10f, log1p::log1p, + log1pf::log1pf, log2::log2, log2f::log2f, logf::logf, powf::powf, round::round, roundf::roundf, + scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, }; fn isnanf(x: f32) -> bool { diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 6f8d3c68a..be5ed0784 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -664,6 +664,7 @@ f32_f32! { expf, // fdimf, log10f, + log1pf, log2f, logf, roundf, @@ -708,7 +709,7 @@ f64_f64! { floor, log, log10, - // log1p, + log1p, log2, round, // sin, From 3d79f69559ac098dc0bc49e37ac38fd0a0c8b77d Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Sat, 14 Jul 2018 09:17:41 +0300 Subject: [PATCH 0053/1459] revert changes --- libm/src/math/rem_pio2_large.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index b5fa7e0af..e1f5d1333 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -1,5 +1,5 @@ -use math::scalbn; -use math::floor; +use super::scalbn; +use super::floor; // initial value for jk const INIT_JK : [usize; 4] = [3,4,4,6]; From 42bfbaa56f4700fd50fb404d41f5e39fedcc9967 Mon Sep 17 00:00:00 2001 From: Erik Date: Sat, 14 Jul 2018 02:26:19 -0400 Subject: [PATCH 0054/1459] add license and other comments to existing files --- libm/src/math/expf.rs | 15 ++++++++ libm/src/math/log10.rs | 19 ++++++++++ libm/src/math/log10f.rs | 15 ++++++++ libm/src/math/log2.rs | 19 ++++++++++ libm/src/math/log2f.rs | 15 ++++++++ libm/src/math/logf.rs | 15 ++++++++ libm/src/math/powf.rs | 15 ++++++++ libm/src/math/sqrt.rs | 78 +++++++++++++++++++++++++++++++++++++++++ libm/src/math/sqrtf.rs | 15 ++++++++ 9 files changed, 206 insertions(+) diff --git a/libm/src/math/expf.rs b/libm/src/math/expf.rs index cffb55771..8ecc3b6ab 100644 --- a/libm/src/math/expf.rs +++ b/libm/src/math/expf.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_expf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + use super::scalbnf; const HALF: [f32; 2] = [0.5, -0.5]; diff --git a/libm/src/math/log10.rs b/libm/src/math/log10.rs index 137d8966f..7c7afefa3 100644 --- a/libm/src/math/log10.rs +++ b/libm/src/math/log10.rs @@ -1,3 +1,22 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * Return the base 10 logarithm of x. See log.c for most comments. + * + * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2 + * as in log.c, then combine and scale in extra precision: + * log10(x) = (f - f*f/2 + r)/log(10) + k*log10(2) + */ + use core::f64; const IVLN10HI: f64 = 4.34294481878168880939e-01; /* 0x3fdbcb7b, 0x15200000 */ diff --git a/libm/src/math/log10f.rs b/libm/src/math/log10f.rs index 58db09344..82b87c044 100644 --- a/libm/src/math/log10f.rs +++ b/libm/src/math/log10f.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log10f.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * See comments in log10.c. + */ + use core::f32; const IVLN10HI: f32 = 4.3432617188e-01; /* 0x3ede6000 */ diff --git a/libm/src/math/log2.rs b/libm/src/math/log2.rs index c0d3263e3..f6640d296 100644 --- a/libm/src/math/log2.rs +++ b/libm/src/math/log2.rs @@ -1,3 +1,22 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log2.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * Return the base 2 logarithm of x. See log.c for most comments. + * + * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2 + * as in log.c, then combine and scale in extra precision: + * log2(x) = (f - f*f/2 + r)/log(2) + k + */ + use core::f64; const IVLN2HI: f64 = 1.44269504072144627571e+00; /* 0x3ff71547, 0x65200000 */ diff --git a/libm/src/math/log2f.rs b/libm/src/math/log2f.rs index 47a917bdb..c007ff9b0 100644 --- a/libm/src/math/log2f.rs +++ b/libm/src/math/log2f.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log2f.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * See comments in log2.c. + */ + use core::f32; const IVLN2HI: f32 = 1.4428710938e+00; /* 0x3fb8b000 */ diff --git a/libm/src/math/logf.rs b/libm/src/math/logf.rs index 78c5e94ad..095191041 100644 --- a/libm/src/math/logf.rs +++ b/libm/src/math/logf.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_logf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */ const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */ /* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ diff --git a/libm/src/math/powf.rs b/libm/src/math/powf.rs index f1dc3a5b8..8d0afe669 100644 --- a/libm/src/math/powf.rs +++ b/libm/src/math/powf.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_powf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + use super::{fabsf, scalbnf, sqrtf}; const BP: [f32; 2] = [1.0, 1.5]; diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 17de5a2e0..cbadb49bb 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -1,3 +1,81 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* sqrt(x) + * Return correctly rounded sqrt. + * ------------------------------------------ + * | Use the hardware sqrt if you have one | + * ------------------------------------------ + * Method: + * Bit by bit method using integer arithmetic. (Slow, but portable) + * 1. Normalization + * Scale x to y in [1,4) with even powers of 2: + * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then + * sqrt(x) = 2^k * sqrt(y) + * 2. Bit by bit computation + * Let q = sqrt(y) truncated to i bit after binary point (q = 1), + * i 0 + * i+1 2 + * s = 2*q , and y = 2 * ( y - q ). (1) + * i i i i + * + * To compute q from q , one checks whether + * i+1 i + * + * -(i+1) 2 + * (q + 2 ) <= y. (2) + * i + * -(i+1) + * If (2) is false, then q = q ; otherwise q = q + 2 . + * i+1 i i+1 i + * + * With some algebric manipulation, it is not difficult to see + * that (2) is equivalent to + * -(i+1) + * s + 2 <= y (3) + * i i + * + * The advantage of (3) is that s and y can be computed by + * i i + * the following recurrence formula: + * if (3) is false + * + * s = s , y = y ; (4) + * i+1 i i+1 i + * + * otherwise, + * -i -(i+1) + * s = s + 2 , y = y - s - 2 (5) + * i+1 i i+1 i i + * + * One may easily use induction to prove (4) and (5). + * Note. Since the left hand side of (3) contain only i+2 bits, + * it does not necessary to do a full (53-bit) comparison + * in (3). + * 3. Final rounding + * After generating the 53 bits result, we compute one more bit. + * Together with the remainder, we can decide whether the + * result is exact, bigger than 1/2ulp, or less than 1/2ulp + * (it will never equal to 1/2ulp). + * The rounding mode can be detected by checking whether + * huge + tiny is equal to huge, and whether huge - tiny is + * equal to huge for some floating point number "huge" and "tiny". + * + * Special cases: + * sqrt(+-0) = +-0 ... exact + * sqrt(inf) = inf + * sqrt(-ve) = NaN ... with invalid signal + * sqrt(NaN) = NaN ... with invalid signal for signaling NaN + */ + use core::f64; const TINY: f64 = 1.0e-300; diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index a265bef48..49984689e 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + const TINY: f32 = 1.0e-30; #[inline] From c1630213bd7014f9ad9bb08bc7bd36e6f8c6461a Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Sat, 14 Jul 2018 09:33:46 +0300 Subject: [PATCH 0055/1459] cargo fmt --- libm/src/math/cosf.rs | 46 ++-- libm/src/math/k_cosf.rs | 18 +- libm/src/math/k_sinf.rs | 20 +- libm/src/math/mod.rs | 19 +- libm/src/math/rem_pio2_large.rs | 393 +++++++++++++++----------------- libm/src/math/rem_pio2f.rs | 32 +-- 6 files changed, 249 insertions(+), 279 deletions(-) diff --git a/libm/src/math/cosf.rs b/libm/src/math/cosf.rs index ef46d4a15..79df97e35 100644 --- a/libm/src/math/cosf.rs +++ b/libm/src/math/cosf.rs @@ -3,32 +3,36 @@ use super::{k_cosf, k_sinf, rem_pio2f}; use core::f64::consts::FRAC_PI_2; /* Small multiples of pi/2 rounded to double precision. */ -const C1_PIO2 : f64 = 1.*FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ -const C2_PIO2 : f64 = 2.*FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ -const C3_PIO2 : f64 = 3.*FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ -const C4_PIO2 : f64 = 4.*FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ +const C1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ +const C2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ +const C3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const C4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ #[inline] pub fn cosf(x: f32) -> f32 { let x64 = x as f64; - + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 - + let mut ix = x.to_bits(); let sign = (ix >> 31) != 0; ix &= 0x7fffffff; - if ix <= 0x3f490fda { /* |x| ~<= pi/4 */ - if ix < 0x39800000 { /* |x| < 2**-12 */ + if ix <= 0x3f490fda { + /* |x| ~<= pi/4 */ + if ix < 0x39800000 { + /* |x| < 2**-12 */ /* raise inexact if x != 0 */ force_eval!(x + x1p120); return 1.; } return k_cosf(x64); } - if ix <= 0x407b53d1 { /* |x| ~<= 5*pi/4 */ - if ix > 0x4016cbe3 { /* |x| ~> 3*pi/4 */ - return -k_cosf(if sign { x64+C2_PIO2 } else { x64-C2_PIO2 }); + if ix <= 0x407b53d1 { + /* |x| ~<= 5*pi/4 */ + if ix > 0x4016cbe3 { + /* |x| ~> 3*pi/4 */ + return -k_cosf(if sign { x64 + C2_PIO2 } else { x64 - C2_PIO2 }); } else { if sign { return k_sinf(x64 + C1_PIO2); @@ -37,9 +41,11 @@ pub fn cosf(x: f32) -> f32 { } } } - if ix <= 0x40e231d5 { /* |x| ~<= 9*pi/4 */ - if ix > 0x40afeddf { /* |x| ~> 7*pi/4 */ - return k_cosf(if sign { x64+C4_PIO2 } else { x64-C4_PIO2 }); + if ix <= 0x40e231d5 { + /* |x| ~<= 9*pi/4 */ + if ix > 0x40afeddf { + /* |x| ~> 7*pi/4 */ + return k_cosf(if sign { x64 + C4_PIO2 } else { x64 - C4_PIO2 }); } else { if sign { return k_sinf(-x64 - C3_PIO2); @@ -51,15 +57,15 @@ pub fn cosf(x: f32) -> f32 { /* cos(Inf or NaN) is NaN */ if ix >= 0x7f800000 { - return x-x; + return x - x; } /* general argument reduction needed */ let (n, y) = rem_pio2f(x); - match n&3 { - 0 => { k_cosf( y) }, - 1 => { k_sinf(-y) }, - 2 => { -k_cosf( y) }, - _ => { k_sinf( y) }, + match n & 3 { + 0 => k_cosf(y), + 1 => k_sinf(-y), + 2 => -k_cosf(y), + _ => k_sinf(y), } } diff --git a/libm/src/math/k_cosf.rs b/libm/src/math/k_cosf.rs index 5d1ede7af..83d13b2e9 100644 --- a/libm/src/math/k_cosf.rs +++ b/libm/src/math/k_cosf.rs @@ -1,13 +1,13 @@ /* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */ -const C0 : f64 = -0.499999997251031003120; /* -0x1ffffffd0c5e81.0p-54 */ -const C1 : f64 = 0.0416666233237390631894; /* 0x155553e1053a42.0p-57 */ -const C2 : f64 = -0.00138867637746099294692; /* -0x16c087e80f1e27.0p-62 */ -const C3 : f64 = 0.0000243904487962774090654; /* 0x199342e0ee5069.0p-68 */ +const C0: f64 = -0.499999997251031003120; /* -0x1ffffffd0c5e81.0p-54 */ +const C1: f64 = 0.0416666233237390631894; /* 0x155553e1053a42.0p-57 */ +const C2: f64 = -0.00138867637746099294692; /* -0x16c087e80f1e27.0p-62 */ +const C3: f64 = 0.0000243904487962774090654; /* 0x199342e0ee5069.0p-68 */ #[inline] -pub(crate) fn k_cosf(x : f64) -> f32 { - let z = x*x; - let w = z*z; - let r = C2+z*C3; - (((1.0+z*C0) + w*C1) + (w*z)*r) as f32 +pub(crate) fn k_cosf(x: f64) -> f32 { + let z = x * x; + let w = z * z; + let r = C2 + z * C3; + (((1.0 + z * C0) + w * C1) + (w * z) * r) as f32 } diff --git a/libm/src/math/k_sinf.rs b/libm/src/math/k_sinf.rs index 5eedab3aa..bb2183afc 100644 --- a/libm/src/math/k_sinf.rs +++ b/libm/src/math/k_sinf.rs @@ -1,14 +1,14 @@ /* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */ -const S1 : f64 = -0.166666666416265235595; /* -0x15555554cbac77.0p-55 */ -const S2 : f64 = 0.0083333293858894631756; /* 0x111110896efbb2.0p-59 */ -const S3 : f64 = -0.000198393348360966317347; /* -0x1a00f9e2cae774.0p-65 */ -const S4 : f64 = 0.0000027183114939898219064; /* 0x16cd878c3b46a7.0p-71 */ +const S1: f64 = -0.166666666416265235595; /* -0x15555554cbac77.0p-55 */ +const S2: f64 = 0.0083333293858894631756; /* 0x111110896efbb2.0p-59 */ +const S3: f64 = -0.000198393348360966317347; /* -0x1a00f9e2cae774.0p-65 */ +const S4: f64 = 0.0000027183114939898219064; /* 0x16cd878c3b46a7.0p-71 */ #[inline] -pub(crate) fn k_sinf(x : f64) -> f32 { - let z = x*x; - let w = z*z; - let r = S3 + z*S4; - let s = z*x; - ((x + s*(S1 + z*S2)) + s*w*r) as f32 +pub(crate) fn k_sinf(x: f64) -> f32 { + let z = x * x; + let w = z * z; + let r = S3 + z * S4; + let s = z * x; + ((x + s * (S1 + z * S2)) + s * w * r) as f32 } diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 00945b258..013e70870 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -7,6 +7,7 @@ macro_rules! force_eval { } mod ceilf; +mod cosf; mod expf; mod fabs; mod fabsf; @@ -27,28 +28,22 @@ mod scalbn; mod scalbnf; mod sqrt; mod sqrtf; -mod cosf; mod trunc; mod truncf; pub use self::{ - ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, fmodf::fmodf, - hypot::hypot, hypotf::hypotf, log10::log10, log10f::log10f, log2::log2, log2f::log2f, - logf::logf, powf::powf, round::round, roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, - sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, cosf::cosf, + ceilf::ceilf, cosf::cosf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, + fmodf::fmodf, hypot::hypot, hypotf::hypotf, log10::log10, log10f::log10f, log2::log2, + log2f::log2f, logf::logf, powf::powf, round::round, roundf::roundf, scalbn::scalbn, + scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, }; mod k_cosf; mod k_sinf; -mod rem_pio2f; mod rem_pio2_large; +mod rem_pio2f; -use self::{ - k_cosf::k_cosf, - k_sinf::k_sinf, - rem_pio2f::rem_pio2f, - rem_pio2_large::rem_pio2_large, -}; +use self::{k_cosf::k_cosf, k_sinf::k_sinf, rem_pio2_large::rem_pio2_large, rem_pio2f::rem_pio2f}; fn isnanf(x: f32) -> bool { x.to_bits() & 0x7fffffff > 0x7f800000 diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index e1f5d1333..52b47279c 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -1,8 +1,8 @@ -use super::scalbn; use super::floor; +use super::scalbn; // initial value for jk -const INIT_JK : [usize; 4] = [3,4,4,6]; +const INIT_JK: [usize; 4] = [3, 4, 4, 6]; // Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi // @@ -15,140 +15,99 @@ const INIT_JK : [usize; 4] = [3,4,4,6]; // NB: This table must have at least (e0-3)/24 + jk terms. // For quad precision (e0 <= 16360, jk = 6), this is 686. #[cfg(target_pointer_width = "32")] -const IPIO2 : [i32; 66] = [ - 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, - 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, - 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, - 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, - 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, - 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, - 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, - 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, - 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, - 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, - 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, +const IPIO2: [i32; 66] = [ + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, + 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, + 0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292, + 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA, + 0x73A8C9, 0x60E27B, 0xC08C6B, ]; #[cfg(target_pointer_width = "64")] -const IPIO2 : [i32; 690] = [ - 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, - 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, - 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, - 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, - 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, - 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, - 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, - 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, - 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, - 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, - 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, - 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6, - 0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2, - 0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35, - 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30, - 0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C, - 0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4, - 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770, - 0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7, - 0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19, - 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522, - 0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16, - 0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6, - 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E, - 0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48, - 0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3, - 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF, - 0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55, - 0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612, - 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929, - 0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC, - 0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B, - 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C, - 0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4, - 0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB, - 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC, - 0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C, - 0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F, - 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5, - 0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437, - 0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B, - 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA, - 0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD, - 0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3, - 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3, - 0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717, - 0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F, - 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61, - 0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB, - 0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51, - 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0, - 0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C, - 0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6, - 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC, - 0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED, - 0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328, - 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D, - 0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0, - 0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B, - 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4, - 0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3, - 0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F, - 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD, - 0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B, - 0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4, - 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761, - 0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31, - 0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30, - 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262, - 0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E, - 0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1, - 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C, - 0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4, - 0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08, - 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196, - 0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9, - 0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4, - 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC, - 0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C, - 0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0, - 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C, - 0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0, - 0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC, - 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22, - 0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893, - 0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7, - 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5, - 0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F, - 0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4, - 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF, - 0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B, - 0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2, - 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138, - 0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E, - 0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569, - 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34, - 0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9, - 0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D, - 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F, - 0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855, - 0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569, - 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B, - 0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE, - 0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41, - 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49, - 0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F, - 0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110, - 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8, - 0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365, - 0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A, - 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270, - 0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5, - 0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616, - 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B, +const IPIO2: [i32; 690] = [ + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, + 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, + 0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292, + 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA, + 0x73A8C9, 0x60E27B, 0xC08C6B, 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6, + 0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2, 0xDE4F98, 0x327DBB, 0xC33D26, + 0xEF6B1E, 0x5EF89F, 0x3A1F35, 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30, + 0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C, 0x467D86, 0x2D71E3, 0x9AC69B, + 0x006233, 0x7CD2B4, 0x97A7B4, 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770, + 0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7, 0xCB2324, 0x778AD6, 0x23545A, + 0xB91F00, 0x1B0AF1, 0xDFCE19, 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522, + 0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16, 0xDE3B58, 0x929BDE, 0x2822D2, + 0xE88628, 0x4D58E2, 0x32CAC6, 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E, + 0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48, 0xD36710, 0xD8DDAA, 0x425FAE, + 0xCE616A, 0xA4280A, 0xB499D3, 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF, + 0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55, 0x36D9CA, 0xD2A828, 0x8D61C2, + 0x77C912, 0x142604, 0x9B4612, 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929, + 0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC, 0xC3E7B3, 0x28F8C7, 0x940593, + 0x3E71C1, 0xB3092E, 0xF3450B, 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C, + 0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4, 0x9794E8, 0x84E6E2, 0x973199, + 0x6BED88, 0x365F5F, 0x0EFDBB, 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC, + 0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C, 0x90AA47, 0x02E774, 0x24D6BD, + 0xA67DF7, 0x72486E, 0xEF169F, 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5, + 0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437, 0x10D86D, 0x324832, 0x754C5B, + 0xD4714E, 0x6E5445, 0xC1090B, 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA, + 0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD, 0x6AE290, 0x89D988, 0x50722C, + 0xBEA404, 0x940777, 0x7030F3, 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3, + 0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717, 0x3BDF08, 0x2B3715, 0xA0805C, + 0x93805A, 0x921110, 0xD8E80F, 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61, + 0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB, 0xAA140A, 0x2F2689, 0x768364, + 0x333B09, 0x1A940E, 0xAA3A51, 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0, + 0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C, 0x5BC3D8, 0xC492F5, 0x4BADC6, + 0xA5CA4E, 0xCD37A7, 0x36A9E6, 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC, + 0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED, 0x306529, 0xBF5657, 0x3AFF47, + 0xB9F96A, 0xF3BE75, 0xDF9328, 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D, + 0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0, 0xA8654F, 0xA5C1D2, 0x0F3F0B, + 0xCD785B, 0x76F923, 0x048B7B, 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4, + 0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3, 0xDA4886, 0xA05DF7, 0xF480C6, + 0x2FF0AC, 0x9AECDD, 0xBC5C3F, 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD, + 0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B, 0x2A1216, 0x2DB7DC, 0xFDE5FA, + 0xFEDB89, 0xFDBE89, 0x6C76E4, 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761, + 0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31, 0x48D784, 0x16DF30, 0x432DC7, + 0x356125, 0xCE70C9, 0xB8CB30, 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262, + 0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E, 0xC4F133, 0x5F6E13, 0xE4305D, + 0xA92E85, 0xC3B21D, 0x3632A1, 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C, + 0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4, 0xCBDA11, 0xD0BE7D, 0xC1DB9B, + 0xBD17AB, 0x81A2CA, 0x5C6A08, 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196, + 0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9, 0x4F6A68, 0xA82A4A, 0x5AC44F, + 0xBCF82D, 0x985AD7, 0x95C7F4, 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC, + 0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C, 0xD0C0B2, 0x485551, 0x0EFB1E, + 0xC37295, 0x3B06A3, 0x3540C0, 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C, + 0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0, 0x3C3ABA, 0x461846, 0x5F7555, + 0xF5BDD2, 0xC6926E, 0x5D2EAC, 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22, + 0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893, 0x745D7C, 0xB2AD6B, 0x9D6ECD, + 0x7B723E, 0x6A11C6, 0xA9CFF7, 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5, + 0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F, 0xBEFDFD, 0xEF4556, 0x367ED9, + 0x13D9EC, 0xB9BA8B, 0xFC97C4, 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF, + 0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B, 0x9C2A3E, 0xCC5F11, 0x4A0BFD, + 0xFBF4E1, 0x6D3B8E, 0x2C86E2, 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138, + 0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E, 0xCC2254, 0xDC552A, 0xD6C6C0, + 0x96190B, 0xB8701A, 0x649569, 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34, + 0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9, 0x9B5861, 0xBC57E1, 0xC68351, + 0x103ED8, 0x4871DD, 0xDD1C2D, 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F, + 0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855, 0x382682, 0x9BE7CA, 0xA40D51, + 0xB13399, 0x0ED7A9, 0x480569, 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B, + 0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE, 0x5FD45E, 0xA4677B, 0x7AACBA, + 0xA2F655, 0x23882B, 0x55BA41, 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49, + 0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F, 0xAE5ADB, 0x86C547, 0x624385, + 0x3B8621, 0x94792C, 0x876110, 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8, + 0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365, 0xB1933D, 0x0B7CBD, 0xDC51A4, + 0x63DD27, 0xDDE169, 0x19949A, 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270, + 0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5, 0x4D7E6F, 0x5119A5, 0xABF9B5, + 0xD6DF82, 0x61DD96, 0x023616, 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B, 0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0, ]; -const PIO2 : [f64; 8] = [ +const PIO2: [f64; 8] = [ 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */ 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */ 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */ @@ -251,92 +210,91 @@ const PIO2 : [f64; 8] = [ /// more accurately, = 0 mod 8 ). Thus the number of operations are /// independent of the exponent of the input. #[inline] -pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 { - let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 +pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { + let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24) - + #[cfg(target_pointer_width = "64")] assert!(e0 <= 16360); - + let nx = x.len(); - - let mut fw : f64; - let mut n : i32; - let mut ih : i32; - let mut z : f64; - let mut f : [f64;20] = [0.;20]; - let mut fq : [f64;20] = [0.;20]; - let mut q : [f64;20] = [0.;20]; - let mut iq : [i32;20] = [0;20]; - + + let mut fw: f64; + let mut n: i32; + let mut ih: i32; + let mut z: f64; + let mut f: [f64; 20] = [0.; 20]; + let mut fq: [f64; 20] = [0.; 20]; + let mut q: [f64; 20] = [0.; 20]; + let mut iq: [i32; 20] = [0; 20]; + /* initialize jk*/ let jk = INIT_JK[prec]; let jp = jk; /* determine jx,jv,q0, note that 3>q0 */ - let jx = nx-1; - let mut jv = (e0-3)/24; + let jx = nx - 1; + let mut jv = (e0 - 3) / 24; if jv < 0 { - jv=0; + jv = 0; } - let mut q0 = e0-24*(jv+1); + let mut q0 = e0 - 24 * (jv + 1); let jv = jv as usize; /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ - let mut j = (jv-jx) as i32; - let m = jx+jk; + let mut j = (jv - jx) as i32; + let m = jx + jk; for i in 0..=m { - f[i] = if j<0 { - 0. - } else { - IPIO2[j as usize] as f64 - }; + f[i] = if j < 0 { 0. } else { IPIO2[j as usize] as f64 }; j += 1 } - + /* compute q[0],q[1],...q[jk] */ for i in 0..=jk { fw = 0f64; for j in 0..=jx { - fw += x[j]*f[jx+i-j]; + fw += x[j] * f[jx + i - j]; } q[i] = fw; } - + let mut jz = jk; - + 'recompute: loop { /* distill q[] into iq[] reversingly */ let mut i = 0i32; z = q[jz]; for j in (1..=jz).rev() { - fw = (x1p_24*z) as i32 as f64; - iq[i as usize] = (z - x1p24*fw) as i32; - z = q[j-1]+fw; + fw = (x1p_24 * z) as i32 as f64; + iq[i as usize] = (z - x1p24 * fw) as i32; + z = q[j - 1] + fw; i += 1; } - + /* compute n */ - z = scalbn(z, q0); /* actual value of z */ - z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ - n = z as i32; + z = scalbn(z, q0); /* actual value of z */ + z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */ + n = z as i32; z -= n as f64; ih = 0; - if q0 > 0 { /* need iq[jz-1] to determine n */ - i = iq[jz-1] >> (24-q0); + if q0 > 0 { + /* need iq[jz-1] to determine n */ + i = iq[jz - 1] >> (24 - q0); n += i; - iq[jz-1] -= i << (24-q0); - ih = iq[jz-1] >> (23-q0); + iq[jz - 1] -= i << (24 - q0); + ih = iq[jz - 1] >> (23 - q0); } else if q0 == 0 { - ih = iq[jz-1]>>23; + ih = iq[jz - 1] >> 23; } else if z >= 0.5 { ih = 2; } - - if ih > 0 { /* q > 0.5 */ + + if ih > 0 { + /* q > 0.5 */ n += 1; let mut carry = 0i32; - for i in 0..jz { /* compute 1-q */ + for i in 0..jz { + /* compute 1-q */ let j = iq[i]; if carry == 0 { if j != 0 { @@ -347,10 +305,15 @@ pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) iq[i] = 0xffffff - j; } } - if q0 > 0 { /* rare case: chance is 1 in 12 */ + if q0 > 0 { + /* rare case: chance is 1 in 12 */ match q0 { - 1 => { iq[jz-1] &= 0x7fffff; }, - 2 => { iq[jz-1] &= 0x3fffff; }, + 1 => { + iq[jz - 1] &= 0x7fffff; + } + 2 => { + iq[jz - 1] &= 0x3fffff; + } _ => {} } } @@ -361,24 +324,26 @@ pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) } } } - + /* check if recomputation is needed */ if z == 0. { let mut j = 0; - for i in (jk..=jz-1).rev() { + for i in (jk..=jz - 1).rev() { j |= iq[i]; } - if j == 0 { /* need recomputation */ + if j == 0 { + /* need recomputation */ let mut k = 1; - while iq[jk-k]==0 { - k += 1; /* k = no. of terms needed */ + while iq[jk - k] == 0 { + k += 1; /* k = no. of terms needed */ } - - for i in (jz+1)..=(jz+k) { /* add q[jz+1] to q[jz+k] */ - f[jx+i] = IPIO2[jv+i] as f64; + + for i in (jz + 1)..=(jz + k) { + /* add q[jz+1] to q[jz+k] */ + f[jx + i] = IPIO2[jv + i] as f64; fw = 0f64; for j in 0..=jx { - fw += x[j]*f[jx+i-j]; + fw += x[j] * f[jx + i - j]; } q[i] = fw; } @@ -389,7 +354,7 @@ pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) break; } - + /* chop off zero terms */ if z == 0. { jz -= 1; @@ -398,11 +363,12 @@ pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) jz -= 1; q0 -= 24; } - } else { /* break z into 24-bit if necessary */ + } else { + /* break z into 24-bit if necessary */ z = scalbn(z, -q0); if z >= x1p24 { - fw = (x1p_24*z) as i32 as f64; - iq[jz] = (z - x1p24*fw) as i32; + fw = (x1p_24 * z) as i32 as f64; + iq[jz] = (z - x1p24 * fw) as i32; jz += 1; q0 += 24; iq[jz] = fw as i32; @@ -410,25 +376,25 @@ pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) iq[jz] = z as i32; } } - + /* convert integer "bit" chunk to floating-point value */ fw = scalbn(1., q0); for i in (0..=jz).rev() { - q[i] = fw*(iq[i] as f64); + q[i] = fw * (iq[i] as f64); fw *= x1p_24; } - + /* compute PIo2[0,...,jp]*q[jz,...,0] */ for i in (0..=jz).rev() { fw = 0f64; let mut k = 0; - while (k <= jp) && (k <= jz-i) { - fw += PIO2[k]*q[i+k]; + while (k <= jp) && (k <= jz - i) { + fw += PIO2[k] * q[i + k]; k += 1; } - fq[jz-i] = fw; + fq[jz - i] = fw; } - + /* compress fq[] into y[] */ match prec { 0 => { @@ -437,7 +403,7 @@ pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) fw += fq[i]; } y[0] = if ih == 0 { fw } else { -fw }; - }, + } 1 | 2 => { fw = 0f64; for i in (0..=jz).rev() { @@ -446,38 +412,39 @@ pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) // TODO: drop excess precision here once double_t is used fw = fw as f64; y[0] = if ih == 0 { fw } else { -fw }; - fw = fq[0]-fw; + fw = fq[0] - fw; for i in 1..=jz { fw += fq[i]; } y[1] = if ih == 0 { fw } else { -fw }; - }, - 3 => { /* painful */ + } + 3 => { + /* painful */ for i in (1..=jz).rev() { - fw = fq[i-1]+fq[i]; - fq[i] += fq[i-1]-fw; - fq[i-1] = fw; + fw = fq[i - 1] + fq[i]; + fq[i] += fq[i - 1] - fw; + fq[i - 1] = fw; } for i in (2..=jz).rev() { - fw = fq[i-1]+fq[i]; - fq[i] += fq[i-1]-fw; - fq[i-1] = fw; + fw = fq[i - 1] + fq[i]; + fq[i] += fq[i - 1] - fw; + fq[i - 1] = fw; } fw = 0f64; for i in (2..=jz).rev() { fw += fq[i]; } - if ih==0 { - y[0] = fq[0]; - y[1] = fq[1]; - y[2] = fw; + if ih == 0 { + y[0] = fq[0]; + y[1] = fq[1]; + y[2] = fw; } else { y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw; } - }, - _ => { unreachable!() } + } + _ => unreachable!(), } n & 7 } diff --git a/libm/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs index a908ccd9f..73ec3775d 100644 --- a/libm/src/math/rem_pio2f.rs +++ b/libm/src/math/rem_pio2f.rs @@ -2,40 +2,42 @@ use super::rem_pio2_large; use core::f64; -const TOINT : f64 = 1.5 / f64::EPSILON; +const TOINT: f64 = 1.5 / f64::EPSILON; /// 53 bits of 2/pi -const INV_PIO2 : f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ +const INV_PIO2: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ /// first 25 bits of pi/2 -const PIO2_1 : f64 = 1.57079631090164184570e+00; /* 0x3FF921FB, 0x50000000 */ +const PIO2_1: f64 = 1.57079631090164184570e+00; /* 0x3FF921FB, 0x50000000 */ /// pi/2 - pio2_1 -const PIO2_1T : f64 = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ +const PIO2_1T: f64 = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ /// Return the remainder of x rem pi/2 in *y /// /// use double precision for everything except passing x /// use __rem_pio2_large() for large x #[inline] -pub(crate) fn rem_pio2f(x : f32) -> (i32, f64) { +pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) { let x64 = x as f64; - - let mut tx : [f64; 1] = [0.,]; - let mut ty : [f64; 1] = [0.,]; + + let mut tx: [f64; 1] = [0.]; + let mut ty: [f64; 1] = [0.]; let ix = x.to_bits() & 0x7fffffff; /* 25+53 bit pi is good enough for medium size */ - if ix < 0x4dc90fdb { /* |x| ~< 2^28*(pi/2), medium size */ + if ix < 0x4dc90fdb { + /* |x| ~< 2^28*(pi/2), medium size */ /* Use a specialized rint() to get fn. Assume round-to-nearest. */ - let f_n = x64*INV_PIO2 + TOINT - TOINT; - return (f_n as i32, x64 - f_n*PIO2_1 - f_n*PIO2_1T); + let f_n = x64 * INV_PIO2 + TOINT - TOINT; + return (f_n as i32, x64 - f_n * PIO2_1 - f_n * PIO2_1T); } - if ix>=0x7f800000 { /* x is inf or NaN */ - return (0, x64-x64); + if ix >= 0x7f800000 { + /* x is inf or NaN */ + return (0, x64 - x64); } /* scale x into [2^23, 2^24-1] */ let sign = (x.to_bits() >> 31) != 0; - let e0 = ((ix>>23) - (0x7f+23)) as i32; /* e0 = ilogb(|x|)-23, positive */ - tx[0] = f32::from_bits(ix - (e0<<23) as u32) as f64; + let e0 = ((ix >> 23) - (0x7f + 23)) as i32; /* e0 = ilogb(|x|)-23, positive */ + tx[0] = f32::from_bits(ix - (e0 << 23) as u32) as f64; let n = rem_pio2_large(&tx, &mut ty, e0, 0); if sign { return (-n, -ty[0]); From 526d233d560bf4ab2e92917d015f43118a7a4b64 Mon Sep 17 00:00:00 2001 From: Opal Date: Sat, 14 Jul 2018 18:28:11 +1200 Subject: [PATCH 0056/1459] Adding acos implementation --- libm/src/lib.rs | 2 - libm/src/math/acos.rs | 108 ++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 10 +-- libm/test-generator/src/main.rs | 2 +- 4 files changed, 115 insertions(+), 7 deletions(-) create mode 100644 libm/src/math/acos.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 30f879769..9b3a6457f 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -412,7 +412,6 @@ pub trait F64Ext: private::Sealed { #[cfg(todo)] fn asin(self) -> Self; - #[cfg(todo)] fn acos(self) -> Self; #[cfg(todo)] @@ -589,7 +588,6 @@ impl F64Ext for f64 { asin(self) } - #[cfg(todo)] #[inline] fn acos(self) -> Self { acos(self) diff --git a/libm/src/math/acos.rs b/libm/src/math/acos.rs new file mode 100644 index 000000000..276e361f3 --- /dev/null +++ b/libm/src/math/acos.rs @@ -0,0 +1,108 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_acos.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* acos(x) + * Method : + * acos(x) = pi/2 - asin(x) + * acos(-x) = pi/2 + asin(x) + * For |x|<=0.5 + * acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c) + * For x>0.5 + * acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2))) + * = 2asin(sqrt((1-x)/2)) + * = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z) + * = 2f + (2c + 2s*z*R(z)) + * where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term + * for f so that f+c ~ sqrt(z). + * For x<-0.5 + * acos(x) = pi - 2asin(sqrt((1-|x|)/2)) + * = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z) + * + * Special cases: + * if x is NaN, return x itself; + * if |x|>1, return NaN with invalid signal. + * + * Function needed: sqrt + */ + +use super::sqrt; + +const PIO2_HI: f64 = 1.57079632679489655800e+00; /* 0x3FF921FB, 0x54442D18 */ +const PIO2_LO: f64 = 6.12323399573676603587e-17; /* 0x3C91A626, 0x33145C07 */ +const PS0: f64 = 1.66666666666666657415e-01; /* 0x3FC55555, 0x55555555 */ +const PS1: f64 = -3.25565818622400915405e-01; /* 0xBFD4D612, 0x03EB6F7D */ +const PS2: f64 = 2.01212532134862925881e-01; /* 0x3FC9C155, 0x0E884455 */ +const PS3: f64 = -4.00555345006794114027e-02; /* 0xBFA48228, 0xB5688F3B */ +const PS4: f64 = 7.91534994289814532176e-04; /* 0x3F49EFE0, 0x7501B288 */ +const PS5: f64 = 3.47933107596021167570e-05; /* 0x3F023DE1, 0x0DFDF709 */ +const QS1: f64 = -2.40339491173441421878e+00; /* 0xC0033A27, 0x1C8A2D4B */ +const QS2: f64 = 2.02094576023350569471e+00; /* 0x40002AE5, 0x9C598AC8 */ +const QS3: f64 = -6.88283971605453293030e-01; /* 0xBFE6066C, 0x1B8D0159 */ +const QS4: f64 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ + +#[inline] +fn r(z: f64) -> f64 { + let p: f64 = z * (PS0 + z * (PS1 + z * (PS2 + z * (PS3 + z * (PS4 + z * PS5))))); + let q: f64 = 1.0 + z * (QS1 + z * (QS2 + z * (QS3 + z * QS4))); + return p / q; +} + +#[inline] +pub fn acos(x: f64) -> f64 { + let x1p_120f = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ -120 + let z: f64; + let w: f64; + let s: f64; + let c: f64; + let df: f64; + let hx: u32; + let ix: u32; + + hx = (x.to_bits() >> 32) as u32; + ix = hx & 0x7fffffff; + /* |x| >= 1 or nan */ + if ix >= 0x3ff00000 { + let lx: u32 = x.to_bits() as u32; + + if (ix - 0x3ff00000 | lx) == 0 { + /* acos(1)=0, acos(-1)=pi */ + if (hx >> 31) != 0 { + return 2. * PIO2_HI + x1p_120f; + } + return 0.; + } + return 0. / (x - x); + } + /* |x| < 0.5 */ + if ix < 0x3fe00000 { + if ix <= 0x3c600000 { + /* |x| < 2**-57 */ + return PIO2_HI + x1p_120f; + } + return PIO2_HI - (x - (PIO2_LO - x * r(x * x))); + } + /* x < -0.5 */ + if (hx >> 31) != 0 { + z = (1.0 + x) * 0.5; + s = sqrt(z); + w = r(z) * s - PIO2_LO; + return 2. * (PIO2_HI - (s + w)); + } + /* x > 0.5 */ + z = (1.0 - x) * 0.5; + s = sqrt(z); + // Set the low 4 bytes to zero + df = f64::from_bits(s.to_bits() & 0xff_ff_ff_ff_00_00_00_00); + + c = (z - df * df) / (s + df); + w = r(z) * s + c; + return 2. * (df + w); +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index fb5e3df90..f70555e9c 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -6,6 +6,7 @@ macro_rules! force_eval { }; } +mod acos; mod ceilf; mod expf; mod fabs; @@ -36,10 +37,11 @@ mod truncf; //mod service; pub use self::{ - ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, fmodf::fmodf, - hypot::hypot, hypotf::hypotf, log::log, log10::log10, log10f::log10f, log1p::log1p, - log1pf::log1pf, log2::log2, log2f::log2f, logf::logf, powf::powf, round::round, roundf::roundf, - scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, + acos::acos, ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, + fmodf::fmodf, hypot::hypot, hypotf::hypotf, log::log, log10::log10, log10f::log10f, + log1p::log1p, log1pf::log1pf, log2::log2, log2f::log2f, logf::logf, powf::powf, round::round, + roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, + truncf::truncf, }; fn isnanf(x: f32) -> bool { diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index be5ed0784..d48851a9e 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -696,7 +696,7 @@ f32i32_f32! { // With signature `fn(f64) -> f64` f64_f64! { - // acos, + acos, // asin, // atan, // cbrt, From 82b2a42e6c7e5eb76b085bb4bddaf959daf6cf82 Mon Sep 17 00:00:00 2001 From: C Jones Date: Sat, 14 Jul 2018 05:48:39 -0400 Subject: [PATCH 0057/1459] Use separate imports instead of {}-grouped imports for better merges Previously every merge would cause conflicts because the {}-group would re-flow, which the merge algorithm can't handle. This will hopefully make rebases and merges go more smoothly while everyone is still adding new modules. --- libm/src/math/mod.rs | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index fb5e3df90..78f899141 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -35,12 +35,33 @@ mod truncf; //mod service; -pub use self::{ - ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, fmodf::fmodf, - hypot::hypot, hypotf::hypotf, log::log, log10::log10, log10f::log10f, log1p::log1p, - log1pf::log1pf, log2::log2, log2f::log2f, logf::logf, powf::powf, round::round, roundf::roundf, - scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, -}; +// Use separated imports instead of {}-grouped imports for easier merging. +pub use self::ceilf::ceilf; +pub use self::expf::expf; +pub use self::fabs::fabs; +pub use self::fabsf::fabsf; +pub use self::floor::floor; +pub use self::floorf::floorf; +pub use self::fmodf::fmodf; +pub use self::hypot::hypot; +pub use self::hypotf::hypotf; +pub use self::log::log; +pub use self::log10::log10; +pub use self::log10f::log10f; +pub use self::log1p::log1p; +pub use self::log1pf::log1pf; +pub use self::log2::log2; +pub use self::log2f::log2f; +pub use self::logf::logf; +pub use self::powf::powf; +pub use self::round::round; +pub use self::roundf::roundf; +pub use self::scalbn::scalbn; +pub use self::scalbnf::scalbnf; +pub use self::sqrt::sqrt; +pub use self::sqrtf::sqrtf; +pub use self::trunc::trunc; +pub use self::truncf::truncf; fn isnanf(x: f32) -> bool { x.to_bits() & 0x7fffffff > 0x7f800000 From aa0a26df7d0ba1cb386e21b3b8c9100dc405a6e0 Mon Sep 17 00:00:00 2001 From: Jack Mott Date: Sat, 14 Jul 2018 05:35:07 -0500 Subject: [PATCH 0058/1459] fdimf --- libm/src/lib.rs | 7 +++++++ libm/src/math/fdimf.rs | 15 +++++++++++++++ libm/src/math/mod.rs | 10 ++++++---- libm/test-generator/src/main.rs | 2 +- 4 files changed, 29 insertions(+), 5 deletions(-) create mode 100644 libm/src/math/fdimf.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 30f879769..7d8ae903a 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -42,6 +42,8 @@ pub trait F32Ext: private::Sealed { fn trunc(self) -> Self; + fn fdim(self, rhs: Self) -> Self; + #[cfg(todo)] fn fract(self) -> Self; @@ -156,6 +158,11 @@ impl F32Ext for f32 { truncf(self) } + #[inline] + fn fdim(self, rhs: Self) -> Self { + fdimf(self, rhs) + } + #[cfg(todo)] #[inline] fn fract(self) -> Self { diff --git a/libm/src/math/fdimf.rs b/libm/src/math/fdimf.rs new file mode 100644 index 000000000..3cf65d9be --- /dev/null +++ b/libm/src/math/fdimf.rs @@ -0,0 +1,15 @@ +use super::isnanf; + +pub fn fdimf(x: f32, y: f32) -> f32 { + if isnanf(x) { + x + } else if isnanf(y) { + y + } else { + if x > y { + x - y + } else { + 0.0 + } + } +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index fb5e3df90..a9ba87aa2 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -10,6 +10,7 @@ mod ceilf; mod expf; mod fabs; mod fabsf; +mod fdimf; mod floor; mod floorf; mod fmodf; @@ -36,10 +37,11 @@ mod truncf; //mod service; pub use self::{ - ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, fmodf::fmodf, - hypot::hypot, hypotf::hypotf, log::log, log10::log10, log10f::log10f, log1p::log1p, - log1pf::log1pf, log2::log2, log2f::log2f, logf::logf, powf::powf, round::round, roundf::roundf, - scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, + ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, fdimf::fdimf, floor::floor, floorf::floorf, + fmodf::fmodf, hypot::hypot, hypotf::hypotf, log::log, log10::log10, log10f::log10f, + log1p::log1p, log1pf::log1pf, log2::log2, log2f::log2f, logf::logf, powf::powf, round::round, + roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, + truncf::truncf, }; fn isnanf(x: f32) -> bool { diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index be5ed0784..295293562 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -662,7 +662,6 @@ f32_f32! { // coshf, // exp2f, expf, - // fdimf, log10f, log1pf, log2f, @@ -679,6 +678,7 @@ f32_f32! { // With signature `fn(f32, f32) -> f32` f32f32_f32! { // atan2f, + fdimf, hypotf, fmodf, powf, From c7e212b88470b79d3aa76f6965296b3718ebf6cf Mon Sep 17 00:00:00 2001 From: Jack Mott Date: Sat, 14 Jul 2018 06:20:13 -0500 Subject: [PATCH 0059/1459] fdim --- libm/src/lib.rs | 6 ++++++ libm/src/math/fdim.rs | 15 +++++++++++++++ libm/src/math/fdimf.rs | 6 +++--- libm/src/math/fmodf.rs | 5 ++--- libm/src/math/mod.rs | 15 ++++++--------- libm/test-generator/src/main.rs | 2 +- 6 files changed, 33 insertions(+), 16 deletions(-) create mode 100644 libm/src/math/fdim.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 7d8ae903a..df96aadc7 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -363,6 +363,8 @@ pub trait F64Ext: private::Sealed { fn trunc(self) -> Self; + fn fdim(self, rhs: Self) -> Self; + #[cfg(todo)] fn fract(self) -> Self; @@ -480,6 +482,10 @@ impl F64Ext for f64 { trunc(self) } + #[inline] + fn fdim(self, rhs: Self) -> Self { + fdim(self, rhs) + } #[cfg(todo)] #[inline] fn fract(self) -> Self { diff --git a/libm/src/math/fdim.rs b/libm/src/math/fdim.rs new file mode 100644 index 000000000..2b277eab0 --- /dev/null +++ b/libm/src/math/fdim.rs @@ -0,0 +1,15 @@ +use core::f64; + +pub fn fdim(x: f64, y: f64) -> f64 { + if x.is_nan() { + x + } else if y.is_nan() { + y + } else { + if x > y { + x - y + } else { + 0.0 + } + } +} diff --git a/libm/src/math/fdimf.rs b/libm/src/math/fdimf.rs index 3cf65d9be..44bf2d680 100644 --- a/libm/src/math/fdimf.rs +++ b/libm/src/math/fdimf.rs @@ -1,9 +1,9 @@ -use super::isnanf; +use core::f32; pub fn fdimf(x: f32, y: f32) -> f32 { - if isnanf(x) { + if x.is_nan() { x - } else if isnanf(y) { + } else if y.is_nan() { y } else { if x > y { diff --git a/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs index 909775249..d84cfeb01 100644 --- a/libm/src/math/fmodf.rs +++ b/libm/src/math/fmodf.rs @@ -1,7 +1,6 @@ +use core::f32; use core::u32; -use super::isnanf; - #[inline] pub fn fmodf(x: f32, y: f32) -> f32 { let mut uxi = x.to_bits(); @@ -11,7 +10,7 @@ pub fn fmodf(x: f32, y: f32) -> f32 { let sx = uxi & 0x80000000; let mut i; - if uyi << 1 == 0 || isnanf(y) || ex == 0xff { + if uyi << 1 == 0 || y.is_nan() || ex == 0xff { return (x * y) / (x * y); } diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index a9ba87aa2..e526314a6 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -10,6 +10,7 @@ mod ceilf; mod expf; mod fabs; mod fabsf; +mod fdim; mod fdimf; mod floor; mod floorf; @@ -37,13 +38,9 @@ mod truncf; //mod service; pub use self::{ - ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, fdimf::fdimf, floor::floor, floorf::floorf, - fmodf::fmodf, hypot::hypot, hypotf::hypotf, log::log, log10::log10, log10f::log10f, - log1p::log1p, log1pf::log1pf, log2::log2, log2f::log2f, logf::logf, powf::powf, round::round, - roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, - truncf::truncf, + ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, fdim::fdim, fdimf::fdimf, floor::floor, + floorf::floorf, fmodf::fmodf, hypot::hypot, hypotf::hypotf, log::log, log10::log10, + log10f::log10f, log1p::log1p, log1pf::log1pf, log2::log2, log2f::log2f, logf::logf, powf::powf, + round::round, roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, + trunc::trunc, truncf::truncf, }; - -fn isnanf(x: f32) -> bool { - x.to_bits() & 0x7fffffff > 0x7f800000 -} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 295293562..bf679ca64 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -724,7 +724,7 @@ f64_f64! { // With signature `fn(f64, f64) -> f64` f64f64_f64! { // atan2, - // fdim, + fdim, // fmod, hypot, // pow, From 6f5e5e25b24527ec72450b0c54fff85e2aa10564 Mon Sep 17 00:00:00 2001 From: Lucas Marques Date: Sat, 14 Jul 2018 00:23:58 -0300 Subject: [PATCH 0060/1459] implement ceil --- libm/src/lib.rs | 2 -- libm/src/math/ceil.rs | 39 +++++++++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 10 ++++++---- 3 files changed, 45 insertions(+), 6 deletions(-) create mode 100644 libm/src/math/ceil.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 30f879769..5af219fcc 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -349,7 +349,6 @@ impl F32Ext for f32 { pub trait F64Ext: private::Sealed { fn floor(self) -> Self; - #[cfg(todo)] fn ceil(self) -> Self; fn round(self) -> Self; @@ -457,7 +456,6 @@ impl F64Ext for f64 { floor(self) } - #[cfg(todo)] #[inline] fn ceil(self) -> Self { ceil(self) diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs new file mode 100644 index 000000000..4db2ca840 --- /dev/null +++ b/libm/src/math/ceil.rs @@ -0,0 +1,39 @@ +use core::f64; + +const TOINT: f64 = 1. / f64::EPSILON; + +#[inline] +pub fn ceil(x: f64) -> f64 { + let u: u64 = x.to_bits(); + let e: i64 = (u >> 52 & 0x7ff) as i64; + let y: f64; + + if e >= 0x3ff + 52 || x == 0. { + return x; + } + // y = int(x) - x, where int(x) is an integer neighbor of x + y = if (u >> 63) != 0 { + x - TOINT + TOINT - x + } else { + x + TOINT - TOINT - x + }; + // special case because of non-nearest rounding modes + if e <= 0x3ff - 1 { + force_eval!(y); + return if (u >> 63) != 0 { -0. } else { 1. }; + } + if y < 0. { + x + y + 1. + } else { + x + y + } +} + +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::ceil(1.1), 2.0); + assert_eq!(super::ceil(2.9), 3.0); + } +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index fb5e3df90..7c676095d 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -6,6 +6,7 @@ macro_rules! force_eval { }; } +mod ceil; mod ceilf; mod expf; mod fabs; @@ -36,10 +37,11 @@ mod truncf; //mod service; pub use self::{ - ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, fmodf::fmodf, - hypot::hypot, hypotf::hypotf, log::log, log10::log10, log10f::log10f, log1p::log1p, - log1pf::log1pf, log2::log2, log2f::log2f, logf::logf, powf::powf, round::round, roundf::roundf, - scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, + ceil::ceil, ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, + fmodf::fmodf, hypot::hypot, hypotf::hypotf, log::log, log10::log10, log10f::log10f, + log1p::log1p, log1pf::log1pf, log2::log2, log2f::log2f, logf::logf, powf::powf, round::round, + roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, + truncf::truncf, }; fn isnanf(x: f32) -> bool { From df6042fafef45d8cb046271fb8cc336371e02121 Mon Sep 17 00:00:00 2001 From: Lucas Marques Date: Sat, 14 Jul 2018 00:41:03 -0300 Subject: [PATCH 0061/1459] enable test generation for ceil --- libm/test-generator/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index be5ed0784..d386da9f6 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -700,7 +700,7 @@ f64_f64! { // asin, // atan, // cbrt, - // ceil, + ceil, // cos, // cosh, // exp, From 712bfdcfd87aeac4a74a206462e96e967d812e6d Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sat, 14 Jul 2018 18:31:28 +0200 Subject: [PATCH 0062/1459] Implement cosh, exp and expm1 --- libm/src/math/cosh.rs | 54 ++++++++++++++ libm/src/math/exp.rs | 84 ++++++++++++++++++++++ libm/src/math/expm1.rs | 124 ++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 12 ++-- libm/test-generator/src/main.rs | 6 +- 5 files changed, 273 insertions(+), 7 deletions(-) create mode 100644 libm/src/math/cosh.rs create mode 100644 libm/src/math/exp.rs create mode 100644 libm/src/math/expm1.rs diff --git a/libm/src/math/cosh.rs b/libm/src/math/cosh.rs new file mode 100644 index 000000000..ce541ce14 --- /dev/null +++ b/libm/src/math/cosh.rs @@ -0,0 +1,54 @@ +use core::f64; + +use super::exp; +use super::expm1; + +pub fn cosh(mut x: f64) -> f64 { + let t: f64; + /* |x| */ + let mut ui = x.to_bits(); + ui &= !0u64; + x = f64::from_bits(ui); + let w = (ui >> 32) as u32; + + /* |x| < log(2) */ + if w < 0x3fe62e42 { + if w < 0x3ff00000 - (26 << 20) { + /* raise inexact if x!=0 */ + force_eval!(x + f64::from_bits(0x4770000000000000)); + return 1.0; + } + let t = expm1(x); + return 1.0 + t * t / (2.0 * (1.0 + t)); + } + + /* |x| < log(DBL_MAX) */ + if w < 0x40862e42 { + t = exp(x); + /* note: if x>log(0x1p26) then the 1/t is not needed */ + return 0.5 * (t + 1.0 / t); + } + + /* |x| > log(DBL_MAX) or nan */ + /* note: the result is stored to handle overflow */ + t = __expo2(x); + return t; +} + +const K: u32 = 2043; + +pub fn __expo2(x: f64) -> f64 { + let kln2 = f64::from_bits(0x40962066151add8b); + /* note that k is odd and scale*scale overflows */ + let scale = f64::from_bits(((0x3ff + K / 2) << 20) as u64); + /* exp(x - k ln2) * 2**(k-1) */ + return exp(x - kln2) * scale * scale; +} + +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::cosh(1.1), 1.6685185538222564); + } +} diff --git a/libm/src/math/exp.rs b/libm/src/math/exp.rs new file mode 100644 index 000000000..870bf6085 --- /dev/null +++ b/libm/src/math/exp.rs @@ -0,0 +1,84 @@ +use super::scalbn; + +const HALF: [f64; 2] = [0.5, -0.5]; +const LN2_HI: f64 = 6.93147180369123816490e-01; /* 0x3fe62e42, 0xfee00000 */ +const LN2_LO: f64 = 1.90821492927058770002e-10; /* 0x3dea39ef, 0x35793c76 */ +const INV_LN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547, 0x652b82fe */ +const P1: f64 = 1.66666666666666019037e-01; /* 0x3FC55555, 0x5555553E */ +const P2: f64 = -2.77777777770155933842e-03; /* 0xBF66C16C, 0x16BEBD93 */ +const P3: f64 = 6.61375632143793436117e-05; /* 0x3F11566A, 0xAF25DE2C */ +const P4: f64 = -1.65339022054652515390e-06; /* 0xBEBBBD41, 0xC5D26BF1 */ +const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ + +#[inline] +pub fn exp(mut x: f64) -> f64 { + let mut hx: u32 = (x.to_bits() >> 32) as u32; + let sign = (hx >> 31) as i32; /* sign bit of x */ + hx &= 0x7fffffff; /* high word of |x| */ + + /* special cases */ + if hx >= 0x4086232b { + /* if |x| >= 708.39... */ + if x.is_nan() { + return x; + } + if x > 709.782712893383973096 { + /* overflow if x!=inf */ + x *= f64::from_bits(0x7fe0000000000000); + return x; + } + if x < -708.39641853226410622 { + /* underflow if x!=-inf */ + force_eval!((f64::from_bits(0xb6a0000000000000) / x) as f32); + if x < -745.13321910194110842 { + return 0.0; + } + } + } + + /* argument reduction */ + let k: i32; + let hi: f64; + let lo: f64; + if hx > 0x3fd62e42 { + /* if |x| > 0.5 ln2 */ + /* if |x| > 0.5 ln2 */ + if hx > 0x3ff0a2b2 { + /* if |x| > 1.5 ln2 */ + k = (INV_LN2 * x + HALF[sign as usize]) as i32; + } else { + k = 1 - sign - sign; + } + let kf = k as f64; + hi = x - kf * LN2_HI; /* k*ln2hi is exact here */ + lo = kf * LN2_LO; + x = hi - lo; + } else if hx > 0x3e300000 { + /* |x| > 2**-14 */ + k = 0; + hi = x; + lo = 0.0; + } else { + /* raise inexact */ + force_eval!(f64::from_bits(0x7fe0000000000000) + x); + return 1.0 + x; + } + + /* x is now in primary range */ + let xx = x * x; + let c = x - xx * (P1 + xx * (P2 + xx * (P3 + xx * (P4 + xx * P5)))); + let y = 1.0 + (x * c / (2.0 - c) - lo + hi); + if k == 0 { + y + } else { + scalbn(y, k) + } +} + +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::exp(1.1), 3.0041660239464334); + } +} diff --git a/libm/src/math/expm1.rs b/libm/src/math/expm1.rs new file mode 100644 index 000000000..2fc230b05 --- /dev/null +++ b/libm/src/math/expm1.rs @@ -0,0 +1,124 @@ +use core::f64; + +const O_THRESHOLD: f64 = 7.09782712893383973096e+02; /* 0x40862E42, 0xFEFA39EF */ +const LN2_HI: f64 = 6.93147180369123816490e-01; /* 0x3fe62e42, 0xfee00000 */ +const LN2_LO: f64 = 1.90821492927058770002e-10; /* 0x3dea39ef, 0x35793c76 */ +const INVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547, 0x652b82fe */ +/* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs = x*x/2: */ +const Q1: f64 = -3.33333333333331316428e-02; /* BFA11111 111110F4 */ +const Q2: f64 = 1.58730158725481460165e-03; /* 3F5A01A0 19FE5585 */ +const Q3: f64 = -7.93650757867487942473e-05; /* BF14CE19 9EAADBB7 */ +const Q4: f64 = 4.00821782732936239552e-06; /* 3ED0CFCA 86E65239 */ +const Q5: f64 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ + +pub fn expm1(mut x: f64) -> f64 { + let hi: f64; + let lo: f64; + let k: i32; + let c: f64; + let mut t: f64; + let mut y: f64; + + let mut ui = x.to_bits() >> 32; + let hx = ui & 0x7fffffff; + let sign = (ui >> 63) as i32; + + /* filter out huge and non-finite argument */ + if hx >= 0x4043687A { + /* if |x|>=56*ln2 */ + if x.is_nan() { + return x; + } + if sign != 0 { + return -1.0; + } + if x > O_THRESHOLD { + x *= f64::from_bits(0x7fe0000000000000); + return x; + } + } + + /* argument reduction */ + if hx > 0x3fd62e42 { + /* if |x| > 0.5 ln2 */ + if hx < 0x3FF0A2B2 { + /* and |x| < 1.5 ln2 */ + if sign == 0 { + hi = x - LN2_HI; + lo = LN2_LO; + k = 1; + } else { + hi = x + LN2_HI; + lo = -LN2_LO; + k = -1; + } + } else { + k = (INVLN2 * x + if sign != 0 { -0.5 } else { 0.5 }) as i32; + t = k as f64; + hi = x - t * LN2_HI; /* t*ln2_hi is exact here */ + lo = t * LN2_LO; + } + x = hi - lo; + c = (hi - x) - lo; + } else if hx < 0x3c900000 { + /* |x| < 2**-54, return x */ + if hx < 0x00100000 { + force_eval!(x as f32); + } + return x; + } else { + c = 0.0; + k = 0; + } + + /* x is now in primary range */ + let hfx = 0.5 * x; + let hxs = x * hfx; + let r1 = 1.0 + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5)))); + t = 3.0 - r1 * hfx; + let mut e = hxs * ((r1 - t) / (6.0 - x * t)); + if k == 0 { + /* c is 0 */ + return x - (x * e - hxs); + } + e = x * (e - c) - c; + e -= hxs; + /* exp(x) ~ 2^k (x_reduced - e + 1) */ + if k == -1 { + return 0.5 * (x - e) - 0.5; + } + if k == 1 { + if x < -0.25 { + return -2.0 * (e - (x + 0.5)); + } + return 1.0 + 2.0 * (x - e); + } + ui = ((0x3ff + k) as u64) << 52; /* 2^k */ + let twopk = f64::from_bits(ui); + if k < 0 || k > 56 { + /* suffice to return exp(x)-1 */ + y = x - e + 1.0; + if k == 1024 { + y = y * 2.0 * f64::from_bits(0x7fe0000000000000); + } else { + y = y * twopk; + } + return y - 1.0; + } + ui = ((0x3ff - k) as u64) << 52; /* 2^-k */ + let uf = f64::from_bits(ui); + if k < 20 { + y = (x - e + (1.0 - uf)) * twopk; + } else { + y = (x - (e + uf) + 1.0) * twopk; + } + y +} + +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::expm1(1.1), 2.0041660239464334); + } +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index fb5e3df90..b4f12b67d 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -7,7 +7,10 @@ macro_rules! force_eval { } mod ceilf; +mod cosh; +mod exp; mod expf; +mod expm1; mod fabs; mod fabsf; mod floor; @@ -36,10 +39,11 @@ mod truncf; //mod service; pub use self::{ - ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, fmodf::fmodf, - hypot::hypot, hypotf::hypotf, log::log, log10::log10, log10f::log10f, log1p::log1p, - log1pf::log1pf, log2::log2, log2f::log2f, logf::logf, powf::powf, round::round, roundf::roundf, - scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, + ceilf::ceilf, cosh::cosh, exp::exp, expf::expf, expm1::expm1, fabs::fabs, fabsf::fabsf, + floor::floor, floorf::floorf, fmodf::fmodf, hypot::hypot, hypotf::hypotf, log::log, + log10::log10, log10f::log10f, log1p::log1p, log1pf::log1pf, log2::log2, log2f::log2f, + logf::logf, powf::powf, round::round, roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, + sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, truncf::truncf, }; fn isnanf(x: f32) -> bool { diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index be5ed0784..b4f11dc1c 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -702,10 +702,10 @@ f64_f64! { // cbrt, // ceil, // cos, - // cosh, - // exp, + cosh, + exp, // exp2, - // expm1, + expm1, floor, log, log10, From c899a9ef00a1d0b53dfcf3ffe5464e832af8ea78 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sat, 14 Jul 2018 18:45:54 +0200 Subject: [PATCH 0063/1459] add lib.rs changes --- libm/src/lib.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 30f879769..2ef2092e7 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -381,7 +381,6 @@ pub trait F64Ext: private::Sealed { fn sqrt(self) -> Self; - #[cfg(todo)] fn exp(self) -> Self; #[cfg(todo)] @@ -427,7 +426,6 @@ pub trait F64Ext: private::Sealed { (self.sin(), self.cos()) } - #[cfg(todo)] fn exp_m1(self) -> Self; fn ln_1p(self) -> Self; @@ -435,7 +433,6 @@ pub trait F64Ext: private::Sealed { #[cfg(todo)] fn sinh(self) -> Self; - #[cfg(todo)] fn cosh(self) -> Self; #[cfg(todo)] @@ -522,7 +519,6 @@ impl F64Ext for f64 { sqrt(self) } - #[cfg(todo)] #[inline] fn exp(self) -> Self { exp(self) @@ -607,7 +603,6 @@ impl F64Ext for f64 { atan2(self, other) } - #[cfg(todo)] #[inline] fn exp_m1(self) -> Self { expm1(self) @@ -624,7 +619,6 @@ impl F64Ext for f64 { sinh(self) } - #[cfg(todo)] #[inline] fn cosh(self) -> Self { cosh(self) From 0dc451cedb7fcaa0d1ad4c96f634090dd7e63fa0 Mon Sep 17 00:00:00 2001 From: Jack Mott Date: Sat, 14 Jul 2018 11:51:46 -0500 Subject: [PATCH 0064/1459] fix rebase merge --- libm/src/math/mod.rs | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 21bd7a480..324220a35 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -42,6 +42,8 @@ pub use self::cosf::cosf; pub use self::expf::expf; pub use self::fabs::fabs; pub use self::fabsf::fabsf; +pub use self::fdim::fdim; +pub use self::fdimf::fdimf; pub use self::floor::floor; pub use self::floorf::floorf; pub use self::fmodf::fmodf; @@ -65,20 +67,9 @@ pub use self::sqrtf::sqrtf; pub use self::trunc::trunc; pub use self::truncf::truncf; -pub use self::{ - ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, fdim::fdim, fdimf::fdimf, floor::floor, - floorf::floorf, fmodf::fmodf, hypot::hypot, hypotf::hypotf, log::log, log10::log10, - log10f::log10f, log1p::log1p, log1pf::log1pf, log2::log2, log2f::log2f, logf::logf, powf::powf, - round::round, roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, - trunc::trunc, truncf::truncf, -}; mod k_cosf; mod k_sinf; mod rem_pio2_large; mod rem_pio2f; use self::{k_cosf::k_cosf, k_sinf::k_sinf, rem_pio2_large::rem_pio2_large, rem_pio2f::rem_pio2f}; - -fn isnanf(x: f32) -> bool { - x.to_bits() & 0x7fffffff > 0x7f800000 -} From 71b938bb6bd0f43a79aad89ab3b52bb37532c1f3 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sat, 14 Jul 2018 19:14:56 +0200 Subject: [PATCH 0065/1459] add missing bitshift for __expo2 and move it into its own module --- libm/src/math/cosh.rs | 13 ++----------- libm/src/math/k_expo2.rs | 12 ++++++++++++ libm/src/math/mod.rs | 8 ++++++-- 3 files changed, 20 insertions(+), 13 deletions(-) create mode 100644 libm/src/math/k_expo2.rs diff --git a/libm/src/math/cosh.rs b/libm/src/math/cosh.rs index ce541ce14..e76edca03 100644 --- a/libm/src/math/cosh.rs +++ b/libm/src/math/cosh.rs @@ -2,6 +2,7 @@ use core::f64; use super::exp; use super::expm1; +use super::k_expo2; pub fn cosh(mut x: f64) -> f64 { let t: f64; @@ -31,20 +32,10 @@ pub fn cosh(mut x: f64) -> f64 { /* |x| > log(DBL_MAX) or nan */ /* note: the result is stored to handle overflow */ - t = __expo2(x); + t = k_expo2(x); return t; } -const K: u32 = 2043; - -pub fn __expo2(x: f64) -> f64 { - let kln2 = f64::from_bits(0x40962066151add8b); - /* note that k is odd and scale*scale overflows */ - let scale = f64::from_bits(((0x3ff + K / 2) << 20) as u64); - /* exp(x - k ln2) * 2**(k-1) */ - return exp(x - kln2) * scale * scale; -} - #[cfg(test)] mod tests { #[test] diff --git a/libm/src/math/k_expo2.rs b/libm/src/math/k_expo2.rs new file mode 100644 index 000000000..c67a86550 --- /dev/null +++ b/libm/src/math/k_expo2.rs @@ -0,0 +1,12 @@ +use super::exp; + +const K: u32 = 2043; +const KLN2: f64 = 1416.0996898839683; + +#[inline] +pub(crate) fn k_expo2(x: f64) -> f64 { + /* note that k is odd and scale*scale overflows */ + let scale = f64::from_bits((((0x3ff + K / 2) << 20) as u64) << 32); + /* exp(x - k ln2) * 2**(k-1) */ + return exp(x - KLN2) * scale * scale; +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 67f482a6f..d9e37ee3a 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -43,8 +43,8 @@ pub use self::ceil::ceil; pub use self::ceilf::ceilf; pub use self::cosf::cosf; pub use self::cosh::cosh; -pub use self::expf::expf; pub use self::exp::exp; +pub use self::expf::expf; pub use self::expm1::expm1; pub use self::fabs::fabs; pub use self::fabsf::fabsf; @@ -72,11 +72,15 @@ pub use self::trunc::trunc; pub use self::truncf::truncf; mod k_cosf; +mod k_expo2; mod k_sinf; mod rem_pio2_large; mod rem_pio2f; -use self::{k_cosf::k_cosf, k_sinf::k_sinf, rem_pio2_large::rem_pio2_large, rem_pio2f::rem_pio2f}; +use self::{ + k_cosf::k_cosf, k_expo2::k_expo2, k_sinf::k_sinf, rem_pio2_large::rem_pio2_large, + rem_pio2f::rem_pio2f, +}; fn isnanf(x: f32) -> bool { x.to_bits() & 0x7fffffff > 0x7f800000 From 84a49af902a0d0e1d5cd1848d425a4a93e55f74e Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Sat, 14 Jul 2018 12:20:10 -0500 Subject: [PATCH 0066/1459] ci: don't test the master master --- libm/.travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/libm/.travis.yml b/libm/.travis.yml index 2bbb60fc6..223b586ea 100644 --- a/libm/.travis.yml +++ b/libm/.travis.yml @@ -35,6 +35,5 @@ before_cache: branches: only: - - master - staging - trying From 7d2438ccb76d163728d1f7a8d7c9986c91a0160d Mon Sep 17 00:00:00 2001 From: Erik Date: Sat, 14 Jul 2018 02:50:23 -0400 Subject: [PATCH 0067/1459] implement exp --- libm/src/lib.rs | 2 - libm/src/math/exp.rs | 150 ++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 2 + libm/test-generator/src/main.rs | 2 +- 4 files changed, 153 insertions(+), 3 deletions(-) create mode 100644 libm/src/math/exp.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 9095fd892..f112aaaca 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -378,7 +378,6 @@ pub trait F64Ext: private::Sealed { fn sqrt(self) -> Self; - #[cfg(todo)] fn exp(self) -> Self; #[cfg(todo)] @@ -518,7 +517,6 @@ impl F64Ext for f64 { sqrt(self) } - #[cfg(todo)] #[inline] fn exp(self) -> Self { exp(self) diff --git a/libm/src/math/exp.rs b/libm/src/math/exp.rs new file mode 100644 index 000000000..cd63b8fb3 --- /dev/null +++ b/libm/src/math/exp.rs @@ -0,0 +1,150 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_exp.c */ +/* + * ==================================================== + * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. + * + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* exp(x) + * Returns the exponential of x. + * + * Method + * 1. Argument reduction: + * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. + * Given x, find r and integer k such that + * + * x = k*ln2 + r, |r| <= 0.5*ln2. + * + * Here r will be represented as r = hi-lo for better + * accuracy. + * + * 2. Approximation of exp(r) by a special rational function on + * the interval [0,0.34658]: + * Write + * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... + * We use a special Remez algorithm on [0,0.34658] to generate + * a polynomial of degree 5 to approximate R. The maximum error + * of this polynomial approximation is bounded by 2**-59. In + * other words, + * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 + * (where z=r*r, and the values of P1 to P5 are listed below) + * and + * | 5 | -59 + * | 2.0+P1*z+...+P5*z - R(z) | <= 2 + * | | + * The computation of exp(r) thus becomes + * 2*r + * exp(r) = 1 + ---------- + * R(r) - r + * r*c(r) + * = 1 + r + ----------- (for better accuracy) + * 2 - c(r) + * where + * 2 4 10 + * c(r) = r - (P1*r + P2*r + ... + P5*r ). + * + * 3. Scale back to obtain exp(x): + * From step 1, we have + * exp(x) = 2^k * exp(r) + * + * Special cases: + * exp(INF) is INF, exp(NaN) is NaN; + * exp(-INF) is 0, and + * for finite argument, only exp(0)=1 is exact. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Misc. info. + * For IEEE double + * if x > 709.782712893383973096 then exp(x) overflows + * if x < -745.133219101941108420 then exp(x) underflows + */ + +use super::scalbn; + +const HALF: [f64; 2] = [0.5, -0.5]; +const LN2HI: f64 = 6.93147180369123816490e-01; /* 0x3fe62e42, 0xfee00000 */ +const LN2LO: f64 = 1.90821492927058770002e-10; /* 0x3dea39ef, 0x35793c76 */ +const INVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547, 0x652b82fe */ +const P1: f64 = 1.66666666666666019037e-01; /* 0x3FC55555, 0x5555553E */ +const P2: f64 = -2.77777777770155933842e-03; /* 0xBF66C16C, 0x16BEBD93 */ +const P3: f64 = 6.61375632143793436117e-05; /* 0x3F11566A, 0xAF25DE2C */ +const P4: f64 = -1.65339022054652515390e-06; /* 0xBEBBBD41, 0xC5D26BF1 */ +const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ + +#[inline] +pub fn exp(mut x: f64) -> f64 { + let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 + let x1p_149 = f64::from_bits(0x36a0000000000000); // 0x1p-149 === 2 ^ -149 + + let hi: f64; + let lo: f64; + let c: f64; + let xx: f64; + let y: f64; + let k: i32; + let sign: i32; + let mut hx: u32; + + hx = (x.to_bits() >> 32) as u32; + sign = (hx >> 31) as i32; + hx &= 0x7fffffff; /* high word of |x| */ + + /* special cases */ + if hx >= 0x4086232b { + /* if |x| >= 708.39... */ + if x.is_nan() { + return x; + } + if x > 709.782712893383973096 { + /* overflow if x!=inf */ + x *= x1p1023; + return x; + } + if x < -708.39641853226410622 { + /* underflow if x!=-inf */ + force_eval!((-x1p_149 / x) as f32); + if x < -745.13321910194110842 { + return 0.; + } + } + } + + /* argument reduction */ + if hx > 0x3fd62e42 { + /* if |x| > 0.5 ln2 */ + if hx >= 0x3ff0a2b2 { + /* if |x| >= 1.5 ln2 */ + k = (INVLN2 * x + HALF[sign as usize]) as i32; + } else { + k = 1 - sign - sign; + } + hi = x - k as f64 * LN2HI; /* k*ln2hi is exact here */ + lo = k as f64 * LN2LO; + x = hi - lo; + } else if hx > 0x3e300000 { + /* if |x| > 2**-28 */ + k = 0; + hi = x; + lo = 0.; + } else { + /* inexact if x!=0 */ + force_eval!(x1p1023 + x); + return 1. + x; + } + + /* x is now in primary range */ + xx = x * x; + c = x - xx * (P1 + xx * (P2 + xx * (P3 + xx * (P4 + xx * P5)))); + y = 1. + (x * c / (2. - c) - lo + hi); + if k == 0 { + y + } else { + scalbn(y, k) + } +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index ee846ebb9..2a84b463d 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -9,6 +9,7 @@ macro_rules! force_eval { mod ceil; mod ceilf; mod cosf; +mod exp; mod expf; mod fabs; mod fabsf; @@ -39,6 +40,7 @@ mod truncf; pub use self::ceil::ceil; pub use self::ceilf::ceilf; pub use self::cosf::cosf; +pub use self::exp::exp; pub use self::expf::expf; pub use self::fabs::fabs; pub use self::fabsf::fabsf; diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 291c30f63..a54d8b271 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -703,7 +703,7 @@ f64_f64! { ceil, // cos, // cosh, - // exp, + exp, // exp2, // expm1, floor, From d1429459c88ba38d18724c551f22214cf157580e Mon Sep 17 00:00:00 2001 From: Erik Date: Sat, 14 Jul 2018 14:07:14 -0400 Subject: [PATCH 0068/1459] implement cbrt and cbrtf --- libm/src/lib.rs | 4 -- libm/src/math/cbrt.rs | 110 ++++++++++++++++++++++++++++++++ libm/src/math/cbrtf.rs | 72 +++++++++++++++++++++ libm/src/math/mod.rs | 4 ++ libm/test-generator/src/main.rs | 4 +- 5 files changed, 188 insertions(+), 6 deletions(-) create mode 100644 libm/src/math/cbrt.rs create mode 100644 libm/src/math/cbrtf.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index f112aaaca..b175f6864 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -79,7 +79,6 @@ pub trait F32Ext: private::Sealed { fn log10(self) -> Self; - #[cfg(todo)] fn cbrt(self) -> Self; fn hypot(self, other: Self) -> Self; @@ -234,7 +233,6 @@ impl F32Ext for f32 { log10f(self) } - #[cfg(todo)] #[inline] fn cbrt(self) -> Self { cbrtf(self) @@ -391,7 +389,6 @@ pub trait F64Ext: private::Sealed { fn log10(self) -> Self; - #[cfg(todo)] fn cbrt(self) -> Self; fn hypot(self, other: Self) -> Self; @@ -548,7 +545,6 @@ impl F64Ext for f64 { log10(self) } - #[cfg(todo)] #[inline] fn cbrt(self) -> Self { cbrt(self) diff --git a/libm/src/math/cbrt.rs b/libm/src/math/cbrt.rs new file mode 100644 index 000000000..8c37f0b26 --- /dev/null +++ b/libm/src/math/cbrt.rs @@ -0,0 +1,110 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + * Optimized by Bruce D. Evans. + */ +/* cbrt(x) + * Return cube root of x + */ + +use core::f64; + +const B1: u32 = 715094163; /* B1 = (1023-1023/3-0.03306235651)*2**20 */ +const B2: u32 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */ + +/* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */ +const P0: f64 = 1.87595182427177009643; /* 0x3ffe03e6, 0x0f61e692 */ +const P1: f64 = -1.88497979543377169875; /* 0xbffe28e0, 0x92f02420 */ +const P2: f64 = 1.621429720105354466140; /* 0x3ff9f160, 0x4a49d6c2 */ +const P3: f64 = -0.758397934778766047437; /* 0xbfe844cb, 0xbee751d9 */ +const P4: f64 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */ + +#[inline] +pub fn cbrt(x: f64) -> f64 { + let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 + + let mut ui: u64 = x.to_bits(); + let mut r: f64; + let s: f64; + let mut t: f64; + let w: f64; + let mut hx: u32 = (ui >> 32) as u32 & 0x7fffffff; + + if hx >= 0x7ff00000 { + /* cbrt(NaN,INF) is itself */ + return x + x; + } + + /* + * Rough cbrt to 5 bits: + * cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3) + * where e is integral and >= 0, m is real and in [0, 1), and "/" and + * "%" are integer division and modulus with rounding towards minus + * infinity. The RHS is always >= the LHS and has a maximum relative + * error of about 1 in 16. Adding a bias of -0.03306235651 to the + * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE + * floating point representation, for finite positive normal values, + * ordinary integer divison of the value in bits magically gives + * almost exactly the RHS of the above provided we first subtract the + * exponent bias (1023 for doubles) and later add it back. We do the + * subtraction virtually to keep e >= 0 so that ordinary integer + * division rounds towards minus infinity; this is also efficient. + */ + if hx < 0x00100000 { + /* zero or subnormal? */ + ui = (x * x1p54).to_bits(); + hx = (ui >> 32) as u32 & 0x7fffffff; + if hx == 0 { + return x; /* cbrt(0) is itself */ + } + hx = hx / 3 + B2; + } else { + hx = hx / 3 + B1; + } + ui &= 1 << 63; + ui |= (hx as u64) << 32; + t = f64::from_bits(ui); + + /* + * New cbrt to 23 bits: + * cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x) + * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r) + * to within 2**-23.5 when |r - 1| < 1/10. The rough approximation + * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this + * gives us bounds for r = t**3/x. + * + * Try to optimize for parallel evaluation as in __tanf.c. + */ + r = (t * t) * (t / x); + t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4)); + + /* + * Round t away from zero to 23 bits (sloppily except for ensuring that + * the result is larger in magnitude than cbrt(x) but not much more than + * 2 23-bit ulps larger). With rounding towards zero, the error bound + * would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps + * in the rounded t, the infinite-precision error in the Newton + * approximation barely affects third digit in the final error + * 0.667; the error in the rounded t can be up to about 3 23-bit ulps + * before the final error is larger than 0.667 ulps. + */ + ui = t.to_bits(); + ui = (ui + 0x80000000) & 0xffffffffc0000000; + t = f64::from_bits(ui); + + /* one step Newton iteration to 53 bits with error < 0.667 ulps */ + s = t * t; /* t*t is exact */ + r = x / s; /* error <= 0.5 ulps; |r| < |t| */ + w = t + t; /* t+t is exact */ + r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */ + t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */ + t +} diff --git a/libm/src/math/cbrtf.rs b/libm/src/math/cbrtf.rs new file mode 100644 index 000000000..878372eef --- /dev/null +++ b/libm/src/math/cbrtf.rs @@ -0,0 +1,72 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Debugged and optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* cbrtf(x) + * Return cube root of x + */ + +use core::f32; + +const B1: u32 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */ +const B2: u32 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */ + +#[inline] +pub fn cbrtf(x: f32) -> f32 { + let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 + + let mut r: f64; + let mut t: f64; + let mut ui: u32 = x.to_bits(); + let mut hx: u32 = ui & 0x7fffffff; + + if hx >= 0x7f800000 { + /* cbrt(NaN,INF) is itself */ + return x + x; + } + + /* rough cbrt to 5 bits */ + if hx < 0x00800000 { + /* zero or subnormal? */ + if hx == 0 { + return x; /* cbrt(+-0) is itself */ + } + ui = (x * x1p24).to_bits(); + hx = ui & 0x7fffffff; + hx = hx / 3 + B2; + } else { + hx = hx / 3 + B1; + } + ui &= 0x80000000; + ui |= hx; + + /* + * First step Newton iteration (solving t*t-x/t == 0) to 16 bits. In + * double precision so that its terms can be arranged for efficiency + * without causing overflow or underflow. + */ + t = f32::from_bits(ui) as f64; + r = t * t * t; + t = t * (x as f64 + x as f64 + r) / (x as f64 + r + r); + + /* + * Second step Newton iteration to 47 bits. In double precision for + * efficiency and accuracy. + */ + r = t * t * t; + t = t * (x as f64 + x as f64 + r) / (x as f64 + r + r); + + /* rounding to 24 bits is perfect in round-to-nearest mode */ + t as f32 +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 2a84b463d..0f112a0cb 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -6,6 +6,8 @@ macro_rules! force_eval { }; } +mod cbrt; +mod cbrtf; mod ceil; mod ceilf; mod cosf; @@ -37,6 +39,8 @@ mod trunc; mod truncf; // Use separated imports instead of {}-grouped imports for easier merging. +pub use self::cbrt::cbrt; +pub use self::cbrtf::cbrtf; pub use self::ceil::ceil; pub use self::ceilf::ceilf; pub use self::cosf::cosf; diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index a54d8b271..0521538de 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -656,7 +656,7 @@ f32_f32! { truncf, // asinf, // atanf, - // cbrtf, + cbrtf, cosf, ceilf, // coshf, @@ -699,7 +699,7 @@ f64_f64! { // acos, // asin, // atan, - // cbrt, + cbrt, ceil, // cos, // cosh, From b71bf2683c73e871c42154c8e8a04d31e029f634 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sat, 14 Jul 2018 20:36:28 +0200 Subject: [PATCH 0069/1459] remove exp --- libm/src/lib.rs | 2 + libm/src/math/exp.rs | 84 --------------------------------- libm/src/math/mod.rs | 2 - libm/test-generator/src/main.rs | 2 +- 4 files changed, 3 insertions(+), 87 deletions(-) delete mode 100644 libm/src/math/exp.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index e8869fca9..c53fefb2f 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -378,6 +378,7 @@ pub trait F64Ext: private::Sealed { fn sqrt(self) -> Self; + #[cfg(todo)] fn exp(self) -> Self; #[cfg(todo)] @@ -515,6 +516,7 @@ impl F64Ext for f64 { sqrt(self) } + #[cfg(todo)] #[inline] fn exp(self) -> Self { exp(self) diff --git a/libm/src/math/exp.rs b/libm/src/math/exp.rs deleted file mode 100644 index 870bf6085..000000000 --- a/libm/src/math/exp.rs +++ /dev/null @@ -1,84 +0,0 @@ -use super::scalbn; - -const HALF: [f64; 2] = [0.5, -0.5]; -const LN2_HI: f64 = 6.93147180369123816490e-01; /* 0x3fe62e42, 0xfee00000 */ -const LN2_LO: f64 = 1.90821492927058770002e-10; /* 0x3dea39ef, 0x35793c76 */ -const INV_LN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547, 0x652b82fe */ -const P1: f64 = 1.66666666666666019037e-01; /* 0x3FC55555, 0x5555553E */ -const P2: f64 = -2.77777777770155933842e-03; /* 0xBF66C16C, 0x16BEBD93 */ -const P3: f64 = 6.61375632143793436117e-05; /* 0x3F11566A, 0xAF25DE2C */ -const P4: f64 = -1.65339022054652515390e-06; /* 0xBEBBBD41, 0xC5D26BF1 */ -const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ - -#[inline] -pub fn exp(mut x: f64) -> f64 { - let mut hx: u32 = (x.to_bits() >> 32) as u32; - let sign = (hx >> 31) as i32; /* sign bit of x */ - hx &= 0x7fffffff; /* high word of |x| */ - - /* special cases */ - if hx >= 0x4086232b { - /* if |x| >= 708.39... */ - if x.is_nan() { - return x; - } - if x > 709.782712893383973096 { - /* overflow if x!=inf */ - x *= f64::from_bits(0x7fe0000000000000); - return x; - } - if x < -708.39641853226410622 { - /* underflow if x!=-inf */ - force_eval!((f64::from_bits(0xb6a0000000000000) / x) as f32); - if x < -745.13321910194110842 { - return 0.0; - } - } - } - - /* argument reduction */ - let k: i32; - let hi: f64; - let lo: f64; - if hx > 0x3fd62e42 { - /* if |x| > 0.5 ln2 */ - /* if |x| > 0.5 ln2 */ - if hx > 0x3ff0a2b2 { - /* if |x| > 1.5 ln2 */ - k = (INV_LN2 * x + HALF[sign as usize]) as i32; - } else { - k = 1 - sign - sign; - } - let kf = k as f64; - hi = x - kf * LN2_HI; /* k*ln2hi is exact here */ - lo = kf * LN2_LO; - x = hi - lo; - } else if hx > 0x3e300000 { - /* |x| > 2**-14 */ - k = 0; - hi = x; - lo = 0.0; - } else { - /* raise inexact */ - force_eval!(f64::from_bits(0x7fe0000000000000) + x); - return 1.0 + x; - } - - /* x is now in primary range */ - let xx = x * x; - let c = x - xx * (P1 + xx * (P2 + xx * (P3 + xx * (P4 + xx * P5)))); - let y = 1.0 + (x * c / (2.0 - c) - lo + hi); - if k == 0 { - y - } else { - scalbn(y, k) - } -} - -#[cfg(test)] -mod tests { - #[test] - fn sanity_check() { - assert_eq!(super::exp(1.1), 3.0041660239464334); - } -} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index d9e37ee3a..be1729565 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -10,7 +10,6 @@ mod ceil; mod ceilf; mod cosf; mod cosh; -mod exp; mod expf; mod expm1; mod fabs; @@ -43,7 +42,6 @@ pub use self::ceil::ceil; pub use self::ceilf::ceilf; pub use self::cosf::cosf; pub use self::cosh::cosh; -pub use self::exp::exp; pub use self::expf::expf; pub use self::expm1::expm1; pub use self::fabs::fabs; diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 8689870c2..fc2842b0b 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -703,7 +703,7 @@ f64_f64! { ceil, // cos, cosh, - exp, + // exp, // exp2, expm1, floor, From d14fc12a25e17b6b4df694173130c15e893f4952 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sat, 14 Jul 2018 20:51:39 +0200 Subject: [PATCH 0070/1459] remove faulty cosh implementation --- libm/src/math/cosh.rs | 45 ---------------------------------------- libm/src/math/k_expo2.rs | 12 ----------- 2 files changed, 57 deletions(-) delete mode 100644 libm/src/math/cosh.rs delete mode 100644 libm/src/math/k_expo2.rs diff --git a/libm/src/math/cosh.rs b/libm/src/math/cosh.rs deleted file mode 100644 index e76edca03..000000000 --- a/libm/src/math/cosh.rs +++ /dev/null @@ -1,45 +0,0 @@ -use core::f64; - -use super::exp; -use super::expm1; -use super::k_expo2; - -pub fn cosh(mut x: f64) -> f64 { - let t: f64; - /* |x| */ - let mut ui = x.to_bits(); - ui &= !0u64; - x = f64::from_bits(ui); - let w = (ui >> 32) as u32; - - /* |x| < log(2) */ - if w < 0x3fe62e42 { - if w < 0x3ff00000 - (26 << 20) { - /* raise inexact if x!=0 */ - force_eval!(x + f64::from_bits(0x4770000000000000)); - return 1.0; - } - let t = expm1(x); - return 1.0 + t * t / (2.0 * (1.0 + t)); - } - - /* |x| < log(DBL_MAX) */ - if w < 0x40862e42 { - t = exp(x); - /* note: if x>log(0x1p26) then the 1/t is not needed */ - return 0.5 * (t + 1.0 / t); - } - - /* |x| > log(DBL_MAX) or nan */ - /* note: the result is stored to handle overflow */ - t = k_expo2(x); - return t; -} - -#[cfg(test)] -mod tests { - #[test] - fn sanity_check() { - assert_eq!(super::cosh(1.1), 1.6685185538222564); - } -} diff --git a/libm/src/math/k_expo2.rs b/libm/src/math/k_expo2.rs deleted file mode 100644 index c67a86550..000000000 --- a/libm/src/math/k_expo2.rs +++ /dev/null @@ -1,12 +0,0 @@ -use super::exp; - -const K: u32 = 2043; -const KLN2: f64 = 1416.0996898839683; - -#[inline] -pub(crate) fn k_expo2(x: f64) -> f64 { - /* note that k is odd and scale*scale overflows */ - let scale = f64::from_bits((((0x3ff + K / 2) << 20) as u64) << 32); - /* exp(x - k ln2) * 2**(k-1) */ - return exp(x - KLN2) * scale * scale; -} From 29110f0414adfcec36a345c2e4a5675096dba625 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sat, 14 Jul 2018 20:59:41 +0200 Subject: [PATCH 0071/1459] fix missing changes --- libm/src/lib.rs | 2 ++ libm/src/math/mod.rs | 6 +----- libm/test-generator/src/main.rs | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 8091079a6..3a7728811 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -439,6 +439,7 @@ pub trait F64Ext: private::Sealed { #[cfg(todo)] fn sinh(self) -> Self; + #[cfg(todo)] fn cosh(self) -> Self; #[cfg(todo)] @@ -628,6 +629,7 @@ impl F64Ext for f64 { sinh(self) } + #[cfg(todo)] #[inline] fn cosh(self) -> Self { cosh(self) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 86c181d13..65bfb96d0 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -9,7 +9,6 @@ macro_rules! force_eval { mod ceil; mod ceilf; mod cosf; -mod cosh; mod exp; mod expf; mod expm1; @@ -44,7 +43,6 @@ mod truncf; pub use self::ceil::ceil; pub use self::ceilf::ceilf; pub use self::cosf::cosf; -pub use self::cosh::cosh; pub use self::exp::exp; pub use self::expf::expf; pub use self::expm1::expm1; @@ -76,12 +74,10 @@ pub use self::trunc::trunc; pub use self::truncf::truncf; mod k_cosf; -mod k_expo2; mod k_sinf; mod rem_pio2_large; mod rem_pio2f; use self::{ - k_cosf::k_cosf, k_expo2::k_expo2, k_sinf::k_sinf, rem_pio2_large::rem_pio2_large, - rem_pio2f::rem_pio2f, + k_cosf::k_cosf, k_sinf::k_sinf, rem_pio2_large::rem_pio2_large, rem_pio2f::rem_pio2f, }; diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index dff949459..6010695cb 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -702,7 +702,7 @@ f64_f64! { // cbrt, ceil, // cos, - cosh, + // cosh, exp, // exp2, expm1, From 92ff6f7e858908fd88ae773722cf2d3663671cc8 Mon Sep 17 00:00:00 2001 From: C Jones Date: Sat, 14 Jul 2018 05:04:46 -0400 Subject: [PATCH 0072/1459] Add exp2f --- libm/src/lib.rs | 2 - libm/src/math/exp2f.rs | 130 ++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 2 + libm/test-generator/src/main.rs | 2 +- 4 files changed, 133 insertions(+), 3 deletions(-) create mode 100644 libm/src/math/exp2f.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index fddb8ca70..c6e01df41 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -70,7 +70,6 @@ pub trait F32Ext: private::Sealed { fn exp(self) -> Self; - #[cfg(todo)] fn exp2(self) -> Self; fn ln(self) -> Self; @@ -214,7 +213,6 @@ impl F32Ext for f32 { expf(self) } - #[cfg(todo)] #[inline] fn exp2(self) -> Self { exp2f(self) diff --git a/libm/src/math/exp2f.rs b/libm/src/math/exp2f.rs new file mode 100644 index 000000000..947679a83 --- /dev/null +++ b/libm/src/math/exp2f.rs @@ -0,0 +1,130 @@ +// origin: FreeBSD /usr/src/lib/msun/src/s_exp2f.c +//- +// Copyright (c) 2005 David Schultz +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +const TBLSIZE: usize = 16; + +// exp2f(x): compute the base 2 exponential of x +// +// Accuracy: Peak error < 0.501 ulp; location of peak: -0.030110927. +// +// Method: (equally-spaced tables) +// +// Reduce x: +// x = k + y, for integer k and |y| <= 1/2. +// Thus we have exp2f(x) = 2**k * exp2(y). +// +// Reduce y: +// y = i/TBLSIZE + z for integer i near y * TBLSIZE. +// Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z), +// with |z| <= 2**-(TBLSIZE+1). +// +// We compute exp2(i/TBLSIZE) via table lookup and exp2(z) via a +// degree-4 minimax polynomial with maximum error under 1.4 * 2**-33. +// Using double precision for everything except the reduction makes +// roundoff error insignificant and simplifies the scaling step. +// +// This method is due to Tang, but I do not use his suggested parameters: +// +// Tang, P. Table-driven Implementation of the Exponential Function +// in IEEE Floating-Point Arithmetic. TOMS 15(2), 144-157 (1989). +pub fn exp2f(mut x: f32) -> f32 { + let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32; + let p1 = f32::from_bits(0x3f317218); + let p2 = f32::from_bits(0x3e75fdf0); + let p3 = f32::from_bits(0x3d6359a4); + let p4 = f32::from_bits(0x3c1d964e); + + let exp2ft: [f64; TBLSIZE] = [ + f64::from_bits(0x3fe6a09e667f3bcd), + f64::from_bits(0x3fe7a11473eb0187), + f64::from_bits(0x3fe8ace5422aa0db), + f64::from_bits(0x3fe9c49182a3f090), + f64::from_bits(0x3feae89f995ad3ad), + f64::from_bits(0x3fec199bdd85529c), + f64::from_bits(0x3fed5818dcfba487), + f64::from_bits(0x3feea4afa2a490da), + f64::from_bits(0x3ff0000000000000), + f64::from_bits(0x3ff0b5586cf9890f), + f64::from_bits(0x3ff172b83c7d517b), + f64::from_bits(0x3ff2387a6e756238), + f64::from_bits(0x3ff306fe0a31b715), + f64::from_bits(0x3ff3dea64c123422), + f64::from_bits(0x3ff4bfdad5362a27), + f64::from_bits(0x3ff5ab07dd485429), + ]; + + // double_t t, r, z; + // uint32_t ix, i0, k; + + let x1p127 = f32::from_bits(0x7f000000); + + /* Filter out exceptional cases. */ + let ui = f32::to_bits(x); + let ix = ui & 0x7fffffff; + if ix > 0x42fc0000 { + /* |x| > 126 */ + if ix > 0x7f800000 { + /* NaN */ + return x; + } + if ui >= 0x43000000 && ui < 0x80000000 { + /* x >= 128 */ + x *= x1p127; + return x; + } + if ui >= 0x80000000 { + /* x < -126 */ + if ui >= 0xc3160000 || (ui & 0x0000ffff != 0) { + force_eval!(f32::from_bits(0x80000001) / x); + } + if ui >= 0xc3160000 { + /* x <= -150 */ + return 0.0; + } + } + } else if ix <= 0x33000000 { + /* |x| <= 0x1p-25 */ + return 1.0 + x; + } + + /* Reduce x, computing z, i0, and k. */ + let ui = f32::to_bits(x + redux); + let mut i0 = ui; + i0 += TBLSIZE as u32 / 2; + let k = i0 / TBLSIZE as u32; + let ukf = f64::from_bits(((0x3ff + k) as u64) << 52); + i0 &= TBLSIZE as u32 - 1; + let mut uf = f32::from_bits(ui); + uf -= redux; + let z: f64 = (x - uf) as f64; + /* Compute r = exp2(y) = exp2ft[i0] * p(z). */ + let r: f64 = exp2ft[i0 as usize]; + let t: f64 = r as f64 * z; + let r: f64 = r + t * (p1 as f64 + z * p2 as f64) + t * (z * z) * (p3 as f64 + z * p4 as f64); + + /* Scale by 2**k */ + (r * ukf) as f32 +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 59d669ab6..5540ba838 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -12,6 +12,7 @@ mod ceil; mod ceilf; mod cosf; mod exp; +mod exp2f; mod expf; mod fabs; mod fabsf; @@ -48,6 +49,7 @@ pub use self::ceil::ceil; pub use self::ceilf::ceilf; pub use self::cosf::cosf; pub use self::exp::exp; +pub use self::exp2f::exp2f; pub use self::expf::expf; pub use self::fabs::fabs; pub use self::fabsf::fabsf; diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index bce4284b3..dfe912a50 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -660,7 +660,7 @@ f32_f32! { cosf, ceilf, // coshf, - // exp2f, + exp2f, expf, log10f, log1pf, From 862de2420b041da1df4c4f77873ac455666d71ab Mon Sep 17 00:00:00 2001 From: C Jones Date: Sat, 14 Jul 2018 05:29:43 -0400 Subject: [PATCH 0073/1459] Implement exp2 --- libm/src/lib.rs | 2 - libm/src/math/exp2.rs | 383 ++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 2 + libm/test-generator/src/main.rs | 2 +- 4 files changed, 386 insertions(+), 3 deletions(-) create mode 100644 libm/src/math/exp2.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index c6e01df41..3f9a65bcc 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -385,7 +385,6 @@ pub trait F64Ext: private::Sealed { fn exp(self) -> Self; - #[cfg(todo)] fn exp2(self) -> Self; fn ln(self) -> Self; @@ -530,7 +529,6 @@ impl F64Ext for f64 { exp(self) } - #[cfg(todo)] #[inline] fn exp2(self) -> Self { exp2(self) diff --git a/libm/src/math/exp2.rs b/libm/src/math/exp2.rs new file mode 100644 index 000000000..c5bd3608a --- /dev/null +++ b/libm/src/math/exp2.rs @@ -0,0 +1,383 @@ +// origin: FreeBSD /usr/src/lib/msun/src/s_exp2.c */ +//- +// Copyright (c) 2005 David Schultz +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +use super::scalbn::scalbn; + +const TBLSIZE: usize = 256; + +// exp2(x): compute the base 2 exponential of x +// +// Accuracy: Peak error < 0.503 ulp for normalized results. +// +// Method: (accurate tables) +// +// Reduce x: +// x = k + y, for integer k and |y| <= 1/2. +// Thus we have exp2(x) = 2**k * exp2(y). +// +// Reduce y: +// y = i/TBLSIZE + z - eps[i] for integer i near y * TBLSIZE. +// Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z - eps[i]), +// with |z - eps[i]| <= 2**-9 + 2**-39 for the table used. +// +// We compute exp2(i/TBLSIZE) via table lookup and exp2(z - eps[i]) via +// a degree-5 minimax polynomial with maximum error under 1.3 * 2**-61. +// The values in exp2t[] and eps[] are chosen such that +// exp2t[i] = exp2(i/TBLSIZE + eps[i]), and eps[i] is a small offset such +// that exp2t[i] is accurate to 2**-64. +// +// Note that the range of i is +-TBLSIZE/2, so we actually index the tables +// by i0 = i + TBLSIZE/2. For cache efficiency, exp2t[] and eps[] are +// virtual tables, interleaved in the real table tbl[]. +// +// This method is due to Gal, with many details due to Gal and Bachelis: +// +// Gal, S. and Bachelis, B. An Accurate Elementary Mathematical Library +// for the IEEE Floating Point Standard. TOMS 17(1), 26-46 (1991). +pub fn exp2(mut x: f64) -> f64 { + let redux = f64::from_bits(0x4338000000000000) / TBLSIZE as f64; + let p1 = f64::from_bits(0x3fe62e42fefa39ef); + let p2 = f64::from_bits(0x3fcebfbdff82c575); + let p3 = f64::from_bits(0x3fac6b08d704a0a6); + let p4 = f64::from_bits(0x3f83b2ab88f70400); + let p5 = f64::from_bits(0x3f55d88003875c74); + + #[cfg_attr(rustfmt, rustfmt_skip)] + let tbl = [ + // exp2(z + eps) eps + f64::from_bits(0x3fe6a09e667f3d5d), f64::from_bits(0x3d39880000000000), + f64::from_bits(0x3fe6b052fa751744), f64::from_bits(0x3cd8000000000000), + f64::from_bits(0x3fe6c012750bd9fe), f64::from_bits(0xbd28780000000000), + f64::from_bits(0x3fe6cfdcddd476bf), f64::from_bits(0x3d1ec00000000000), + f64::from_bits(0x3fe6dfb23c651a29), f64::from_bits(0xbcd8000000000000), + f64::from_bits(0x3fe6ef9298593ae3), f64::from_bits(0xbcbc000000000000), + f64::from_bits(0x3fe6ff7df9519386), f64::from_bits(0xbd2fd80000000000), + f64::from_bits(0x3fe70f7466f42da3), f64::from_bits(0xbd2c880000000000), + f64::from_bits(0x3fe71f75e8ec5fc3), f64::from_bits(0x3d13c00000000000), + f64::from_bits(0x3fe72f8286eacf05), f64::from_bits(0xbd38300000000000), + f64::from_bits(0x3fe73f9a48a58152), f64::from_bits(0xbd00c00000000000), + f64::from_bits(0x3fe74fbd35d7ccfc), f64::from_bits(0x3d2f880000000000), + f64::from_bits(0x3fe75feb564267f1), f64::from_bits(0x3d03e00000000000), + f64::from_bits(0x3fe77024b1ab6d48), f64::from_bits(0xbd27d00000000000), + f64::from_bits(0x3fe780694fde5d38), f64::from_bits(0xbcdd000000000000), + f64::from_bits(0x3fe790b938ac1d00), f64::from_bits(0x3ce3000000000000), + f64::from_bits(0x3fe7a11473eb0178), f64::from_bits(0xbced000000000000), + f64::from_bits(0x3fe7b17b0976d060), f64::from_bits(0x3d20400000000000), + f64::from_bits(0x3fe7c1ed0130c133), f64::from_bits(0x3ca0000000000000), + f64::from_bits(0x3fe7d26a62ff8636), f64::from_bits(0xbd26900000000000), + f64::from_bits(0x3fe7e2f336cf4e3b), f64::from_bits(0xbd02e00000000000), + f64::from_bits(0x3fe7f3878491c3e8), f64::from_bits(0xbd24580000000000), + f64::from_bits(0x3fe80427543e1b4e), f64::from_bits(0x3d33000000000000), + f64::from_bits(0x3fe814d2add1071a), f64::from_bits(0x3d0f000000000000), + f64::from_bits(0x3fe82589994ccd7e), f64::from_bits(0xbd21c00000000000), + f64::from_bits(0x3fe8364c1eb942d0), f64::from_bits(0x3d29d00000000000), + f64::from_bits(0x3fe8471a4623cab5), f64::from_bits(0x3d47100000000000), + f64::from_bits(0x3fe857f4179f5bbc), f64::from_bits(0x3d22600000000000), + f64::from_bits(0x3fe868d99b4491af), f64::from_bits(0xbd32c40000000000), + f64::from_bits(0x3fe879cad931a395), f64::from_bits(0xbd23000000000000), + f64::from_bits(0x3fe88ac7d98a65b8), f64::from_bits(0xbd2a800000000000), + f64::from_bits(0x3fe89bd0a4785800), f64::from_bits(0xbced000000000000), + f64::from_bits(0x3fe8ace5422aa223), f64::from_bits(0x3d33280000000000), + f64::from_bits(0x3fe8be05bad619fa), f64::from_bits(0x3d42b40000000000), + f64::from_bits(0x3fe8cf3216b54383), f64::from_bits(0xbd2ed00000000000), + f64::from_bits(0x3fe8e06a5e08664c), f64::from_bits(0xbd20500000000000), + f64::from_bits(0x3fe8f1ae99157807), f64::from_bits(0x3d28280000000000), + f64::from_bits(0x3fe902fed0282c0e), f64::from_bits(0xbd1cb00000000000), + f64::from_bits(0x3fe9145b0b91ff96), f64::from_bits(0xbd05e00000000000), + f64::from_bits(0x3fe925c353aa2ff9), f64::from_bits(0x3cf5400000000000), + f64::from_bits(0x3fe93737b0cdc64a), f64::from_bits(0x3d17200000000000), + f64::from_bits(0x3fe948b82b5f98ae), f64::from_bits(0xbd09000000000000), + f64::from_bits(0x3fe95a44cbc852cb), f64::from_bits(0x3d25680000000000), + f64::from_bits(0x3fe96bdd9a766f21), f64::from_bits(0xbd36d00000000000), + f64::from_bits(0x3fe97d829fde4e2a), f64::from_bits(0xbd01000000000000), + f64::from_bits(0x3fe98f33e47a23a3), f64::from_bits(0x3d2d000000000000), + f64::from_bits(0x3fe9a0f170ca0604), f64::from_bits(0xbd38a40000000000), + f64::from_bits(0x3fe9b2bb4d53ff89), f64::from_bits(0x3d355c0000000000), + f64::from_bits(0x3fe9c49182a3f15b), f64::from_bits(0x3d26b80000000000), + f64::from_bits(0x3fe9d674194bb8c5), f64::from_bits(0xbcec000000000000), + f64::from_bits(0x3fe9e86319e3238e), f64::from_bits(0x3d17d00000000000), + f64::from_bits(0x3fe9fa5e8d07f302), f64::from_bits(0x3d16400000000000), + f64::from_bits(0x3fea0c667b5de54d), f64::from_bits(0xbcf5000000000000), + f64::from_bits(0x3fea1e7aed8eb8f6), f64::from_bits(0x3d09e00000000000), + f64::from_bits(0x3fea309bec4a2e27), f64::from_bits(0x3d2ad80000000000), + f64::from_bits(0x3fea42c980460a5d), f64::from_bits(0xbd1af00000000000), + f64::from_bits(0x3fea5503b23e259b), f64::from_bits(0x3d0b600000000000), + f64::from_bits(0x3fea674a8af46213), f64::from_bits(0x3d38880000000000), + f64::from_bits(0x3fea799e1330b3a7), f64::from_bits(0x3d11200000000000), + f64::from_bits(0x3fea8bfe53c12e8d), f64::from_bits(0x3d06c00000000000), + f64::from_bits(0x3fea9e6b5579fcd2), f64::from_bits(0xbd29b80000000000), + f64::from_bits(0x3feab0e521356fb8), f64::from_bits(0x3d2b700000000000), + f64::from_bits(0x3feac36bbfd3f381), f64::from_bits(0x3cd9000000000000), + f64::from_bits(0x3fead5ff3a3c2780), f64::from_bits(0x3ce4000000000000), + f64::from_bits(0x3feae89f995ad2a3), f64::from_bits(0xbd2c900000000000), + f64::from_bits(0x3feafb4ce622f367), f64::from_bits(0x3d16500000000000), + f64::from_bits(0x3feb0e07298db790), f64::from_bits(0x3d2fd40000000000), + f64::from_bits(0x3feb20ce6c9a89a9), f64::from_bits(0x3d12700000000000), + f64::from_bits(0x3feb33a2b84f1a4b), f64::from_bits(0x3d4d470000000000), + f64::from_bits(0x3feb468415b747e7), f64::from_bits(0xbd38380000000000), + f64::from_bits(0x3feb59728de5593a), f64::from_bits(0x3c98000000000000), + f64::from_bits(0x3feb6c6e29f1c56a), f64::from_bits(0x3d0ad00000000000), + f64::from_bits(0x3feb7f76f2fb5e50), f64::from_bits(0x3cde800000000000), + f64::from_bits(0x3feb928cf22749b2), f64::from_bits(0xbd04c00000000000), + f64::from_bits(0x3feba5b030a10603), f64::from_bits(0xbd0d700000000000), + f64::from_bits(0x3febb8e0b79a6f66), f64::from_bits(0x3d0d900000000000), + f64::from_bits(0x3febcc1e904bc1ff), f64::from_bits(0x3d02a00000000000), + f64::from_bits(0x3febdf69c3f3a16f), f64::from_bits(0xbd1f780000000000), + f64::from_bits(0x3febf2c25bd71db8), f64::from_bits(0xbd10a00000000000), + f64::from_bits(0x3fec06286141b2e9), f64::from_bits(0xbd11400000000000), + f64::from_bits(0x3fec199bdd8552e0), f64::from_bits(0x3d0be00000000000), + f64::from_bits(0x3fec2d1cd9fa64ee), f64::from_bits(0xbd09400000000000), + f64::from_bits(0x3fec40ab5fffd02f), f64::from_bits(0xbd0ed00000000000), + f64::from_bits(0x3fec544778fafd15), f64::from_bits(0x3d39660000000000), + f64::from_bits(0x3fec67f12e57d0cb), f64::from_bits(0xbd1a100000000000), + f64::from_bits(0x3fec7ba88988c1b6), f64::from_bits(0xbd58458000000000), + f64::from_bits(0x3fec8f6d9406e733), f64::from_bits(0xbd1a480000000000), + f64::from_bits(0x3feca3405751c4df), f64::from_bits(0x3ccb000000000000), + f64::from_bits(0x3fecb720dcef9094), f64::from_bits(0x3d01400000000000), + f64::from_bits(0x3feccb0f2e6d1689), f64::from_bits(0x3cf0200000000000), + f64::from_bits(0x3fecdf0b555dc412), f64::from_bits(0x3cf3600000000000), + f64::from_bits(0x3fecf3155b5bab3b), f64::from_bits(0xbd06900000000000), + f64::from_bits(0x3fed072d4a0789bc), f64::from_bits(0x3d09a00000000000), + f64::from_bits(0x3fed1b532b08c8fa), f64::from_bits(0xbd15e00000000000), + f64::from_bits(0x3fed2f87080d8a85), f64::from_bits(0x3d1d280000000000), + f64::from_bits(0x3fed43c8eacaa203), f64::from_bits(0x3d01a00000000000), + f64::from_bits(0x3fed5818dcfba491), f64::from_bits(0x3cdf000000000000), + f64::from_bits(0x3fed6c76e862e6a1), f64::from_bits(0xbd03a00000000000), + f64::from_bits(0x3fed80e316c9834e), f64::from_bits(0xbd0cd80000000000), + f64::from_bits(0x3fed955d71ff6090), f64::from_bits(0x3cf4c00000000000), + f64::from_bits(0x3feda9e603db32ae), f64::from_bits(0x3cff900000000000), + f64::from_bits(0x3fedbe7cd63a8325), f64::from_bits(0x3ce9800000000000), + f64::from_bits(0x3fedd321f301b445), f64::from_bits(0xbcf5200000000000), + f64::from_bits(0x3fede7d5641c05bf), f64::from_bits(0xbd1d700000000000), + f64::from_bits(0x3fedfc97337b9aec), f64::from_bits(0xbd16140000000000), + f64::from_bits(0x3fee11676b197d5e), f64::from_bits(0x3d0b480000000000), + f64::from_bits(0x3fee264614f5a3e7), f64::from_bits(0x3d40ce0000000000), + f64::from_bits(0x3fee3b333b16ee5c), f64::from_bits(0x3d0c680000000000), + f64::from_bits(0x3fee502ee78b3fb4), f64::from_bits(0xbd09300000000000), + f64::from_bits(0x3fee653924676d68), f64::from_bits(0xbce5000000000000), + f64::from_bits(0x3fee7a51fbc74c44), f64::from_bits(0xbd07f80000000000), + f64::from_bits(0x3fee8f7977cdb726), f64::from_bits(0xbcf3700000000000), + f64::from_bits(0x3feea4afa2a490e8), f64::from_bits(0x3ce5d00000000000), + f64::from_bits(0x3feeb9f4867ccae4), f64::from_bits(0x3d161a0000000000), + f64::from_bits(0x3feecf482d8e680d), f64::from_bits(0x3cf5500000000000), + f64::from_bits(0x3feee4aaa2188514), f64::from_bits(0x3cc6400000000000), + f64::from_bits(0x3feefa1bee615a13), f64::from_bits(0xbcee800000000000), + f64::from_bits(0x3fef0f9c1cb64106), f64::from_bits(0xbcfa880000000000), + f64::from_bits(0x3fef252b376bb963), f64::from_bits(0xbd2c900000000000), + f64::from_bits(0x3fef3ac948dd7275), f64::from_bits(0x3caa000000000000), + f64::from_bits(0x3fef50765b6e4524), f64::from_bits(0xbcf4f00000000000), + f64::from_bits(0x3fef6632798844fd), f64::from_bits(0x3cca800000000000), + f64::from_bits(0x3fef7bfdad9cbe38), f64::from_bits(0x3cfabc0000000000), + f64::from_bits(0x3fef91d802243c82), f64::from_bits(0xbcd4600000000000), + f64::from_bits(0x3fefa7c1819e908e), f64::from_bits(0xbd0b0c0000000000), + f64::from_bits(0x3fefbdba3692d511), f64::from_bits(0xbcc0e00000000000), + f64::from_bits(0x3fefd3c22b8f7194), f64::from_bits(0xbd10de8000000000), + f64::from_bits(0x3fefe9d96b2a23ee), f64::from_bits(0x3cee430000000000), + f64::from_bits(0x3ff0000000000000), f64::from_bits(0x0), + f64::from_bits(0x3ff00b1afa5abcbe), f64::from_bits(0xbcb3400000000000), + f64::from_bits(0x3ff0163da9fb3303), f64::from_bits(0xbd12170000000000), + f64::from_bits(0x3ff02168143b0282), f64::from_bits(0x3cba400000000000), + f64::from_bits(0x3ff02c9a3e77806c), f64::from_bits(0x3cef980000000000), + f64::from_bits(0x3ff037d42e11bbca), f64::from_bits(0xbcc7400000000000), + f64::from_bits(0x3ff04315e86e7f89), f64::from_bits(0x3cd8300000000000), + f64::from_bits(0x3ff04e5f72f65467), f64::from_bits(0xbd1a3f0000000000), + f64::from_bits(0x3ff059b0d315855a), f64::from_bits(0xbd02840000000000), + f64::from_bits(0x3ff0650a0e3c1f95), f64::from_bits(0x3cf1600000000000), + f64::from_bits(0x3ff0706b29ddf71a), f64::from_bits(0x3d15240000000000), + f64::from_bits(0x3ff07bd42b72a82d), f64::from_bits(0xbce9a00000000000), + f64::from_bits(0x3ff0874518759bd0), f64::from_bits(0x3ce6400000000000), + f64::from_bits(0x3ff092bdf66607c8), f64::from_bits(0xbd00780000000000), + f64::from_bits(0x3ff09e3ecac6f383), f64::from_bits(0xbc98000000000000), + f64::from_bits(0x3ff0a9c79b1f3930), f64::from_bits(0x3cffa00000000000), + f64::from_bits(0x3ff0b5586cf988fc), f64::from_bits(0xbcfac80000000000), + f64::from_bits(0x3ff0c0f145e46c8a), f64::from_bits(0x3cd9c00000000000), + f64::from_bits(0x3ff0cc922b724816), f64::from_bits(0x3d05200000000000), + f64::from_bits(0x3ff0d83b23395dd8), f64::from_bits(0xbcfad00000000000), + f64::from_bits(0x3ff0e3ec32d3d1f3), f64::from_bits(0x3d1bac0000000000), + f64::from_bits(0x3ff0efa55fdfa9a6), f64::from_bits(0xbd04e80000000000), + f64::from_bits(0x3ff0fb66affed2f0), f64::from_bits(0xbd0d300000000000), + f64::from_bits(0x3ff1073028d7234b), f64::from_bits(0x3cf1500000000000), + f64::from_bits(0x3ff11301d0125b5b), f64::from_bits(0x3cec000000000000), + f64::from_bits(0x3ff11edbab5e2af9), f64::from_bits(0x3d16bc0000000000), + f64::from_bits(0x3ff12abdc06c31d5), f64::from_bits(0x3ce8400000000000), + f64::from_bits(0x3ff136a814f2047d), f64::from_bits(0xbd0ed00000000000), + f64::from_bits(0x3ff1429aaea92de9), f64::from_bits(0x3ce8e00000000000), + f64::from_bits(0x3ff14e95934f3138), f64::from_bits(0x3ceb400000000000), + f64::from_bits(0x3ff15a98c8a58e71), f64::from_bits(0x3d05300000000000), + f64::from_bits(0x3ff166a45471c3df), f64::from_bits(0x3d03380000000000), + f64::from_bits(0x3ff172b83c7d5211), f64::from_bits(0x3d28d40000000000), + f64::from_bits(0x3ff17ed48695bb9f), f64::from_bits(0xbd05d00000000000), + f64::from_bits(0x3ff18af9388c8d93), f64::from_bits(0xbd1c880000000000), + f64::from_bits(0x3ff1972658375d66), f64::from_bits(0x3d11f00000000000), + f64::from_bits(0x3ff1a35beb6fcba7), f64::from_bits(0x3d10480000000000), + f64::from_bits(0x3ff1af99f81387e3), f64::from_bits(0xbd47390000000000), + f64::from_bits(0x3ff1bbe084045d54), f64::from_bits(0x3d24e40000000000), + f64::from_bits(0x3ff1c82f95281c43), f64::from_bits(0xbd0a200000000000), + f64::from_bits(0x3ff1d4873168b9b2), f64::from_bits(0x3ce3800000000000), + f64::from_bits(0x3ff1e0e75eb44031), f64::from_bits(0x3ceac00000000000), + f64::from_bits(0x3ff1ed5022fcd938), f64::from_bits(0x3d01900000000000), + f64::from_bits(0x3ff1f9c18438cdf7), f64::from_bits(0xbd1b780000000000), + f64::from_bits(0x3ff2063b88628d8f), f64::from_bits(0x3d2d940000000000), + f64::from_bits(0x3ff212be3578a81e), f64::from_bits(0x3cd8000000000000), + f64::from_bits(0x3ff21f49917ddd41), f64::from_bits(0x3d2b340000000000), + f64::from_bits(0x3ff22bdda2791323), f64::from_bits(0x3d19f80000000000), + f64::from_bits(0x3ff2387a6e7561e7), f64::from_bits(0xbd19c80000000000), + f64::from_bits(0x3ff2451ffb821427), f64::from_bits(0x3d02300000000000), + f64::from_bits(0x3ff251ce4fb2a602), f64::from_bits(0xbd13480000000000), + f64::from_bits(0x3ff25e85711eceb0), f64::from_bits(0x3d12700000000000), + f64::from_bits(0x3ff26b4565e27d16), f64::from_bits(0x3d11d00000000000), + f64::from_bits(0x3ff2780e341de00f), f64::from_bits(0x3d31ee0000000000), + f64::from_bits(0x3ff284dfe1f5633e), f64::from_bits(0xbd14c00000000000), + f64::from_bits(0x3ff291ba7591bb30), f64::from_bits(0xbd13d80000000000), + f64::from_bits(0x3ff29e9df51fdf09), f64::from_bits(0x3d08b00000000000), + f64::from_bits(0x3ff2ab8a66d10e9b), f64::from_bits(0xbd227c0000000000), + f64::from_bits(0x3ff2b87fd0dada3a), f64::from_bits(0x3d2a340000000000), + f64::from_bits(0x3ff2c57e39771af9), f64::from_bits(0xbd10800000000000), + f64::from_bits(0x3ff2d285a6e402d9), f64::from_bits(0xbd0ed00000000000), + f64::from_bits(0x3ff2df961f641579), f64::from_bits(0xbcf4200000000000), + f64::from_bits(0x3ff2ecafa93e2ecf), f64::from_bits(0xbd24980000000000), + f64::from_bits(0x3ff2f9d24abd8822), f64::from_bits(0xbd16300000000000), + f64::from_bits(0x3ff306fe0a31b625), f64::from_bits(0xbd32360000000000), + f64::from_bits(0x3ff31432edeea50b), f64::from_bits(0xbd70df8000000000), + f64::from_bits(0x3ff32170fc4cd7b8), f64::from_bits(0xbd22480000000000), + f64::from_bits(0x3ff32eb83ba8e9a2), f64::from_bits(0xbd25980000000000), + f64::from_bits(0x3ff33c08b2641766), f64::from_bits(0x3d1ed00000000000), + f64::from_bits(0x3ff3496266e3fa27), f64::from_bits(0xbcdc000000000000), + f64::from_bits(0x3ff356c55f929f0f), f64::from_bits(0xbd30d80000000000), + f64::from_bits(0x3ff36431a2de88b9), f64::from_bits(0x3d22c80000000000), + f64::from_bits(0x3ff371a7373aaa39), f64::from_bits(0x3d20600000000000), + f64::from_bits(0x3ff37f26231e74fe), f64::from_bits(0xbd16600000000000), + f64::from_bits(0x3ff38cae6d05d838), f64::from_bits(0xbd0ae00000000000), + f64::from_bits(0x3ff39a401b713ec3), f64::from_bits(0xbd44720000000000), + f64::from_bits(0x3ff3a7db34e5a020), f64::from_bits(0x3d08200000000000), + f64::from_bits(0x3ff3b57fbfec6e95), f64::from_bits(0x3d3e800000000000), + f64::from_bits(0x3ff3c32dc313a8f2), f64::from_bits(0x3cef800000000000), + f64::from_bits(0x3ff3d0e544ede122), f64::from_bits(0xbd17a00000000000), + f64::from_bits(0x3ff3dea64c1234bb), f64::from_bits(0x3d26300000000000), + f64::from_bits(0x3ff3ec70df1c4ecc), f64::from_bits(0xbd48a60000000000), + f64::from_bits(0x3ff3fa4504ac7e8c), f64::from_bits(0xbd3cdc0000000000), + f64::from_bits(0x3ff40822c367a0bb), f64::from_bits(0x3d25b80000000000), + f64::from_bits(0x3ff4160a21f72e95), f64::from_bits(0x3d1ec00000000000), + f64::from_bits(0x3ff423fb27094646), f64::from_bits(0xbd13600000000000), + f64::from_bits(0x3ff431f5d950a920), f64::from_bits(0x3d23980000000000), + f64::from_bits(0x3ff43ffa3f84b9eb), f64::from_bits(0x3cfa000000000000), + f64::from_bits(0x3ff44e0860618919), f64::from_bits(0xbcf6c00000000000), + f64::from_bits(0x3ff45c2042a7d201), f64::from_bits(0xbd0bc00000000000), + f64::from_bits(0x3ff46a41ed1d0016), f64::from_bits(0xbd12800000000000), + f64::from_bits(0x3ff4786d668b3326), f64::from_bits(0x3d30e00000000000), + f64::from_bits(0x3ff486a2b5c13c00), f64::from_bits(0xbd2d400000000000), + f64::from_bits(0x3ff494e1e192af04), f64::from_bits(0x3d0c200000000000), + f64::from_bits(0x3ff4a32af0d7d372), f64::from_bits(0xbd1e500000000000), + f64::from_bits(0x3ff4b17dea6db801), f64::from_bits(0x3d07800000000000), + f64::from_bits(0x3ff4bfdad53629e1), f64::from_bits(0xbd13800000000000), + f64::from_bits(0x3ff4ce41b817c132), f64::from_bits(0x3d00800000000000), + f64::from_bits(0x3ff4dcb299fddddb), f64::from_bits(0x3d2c700000000000), + f64::from_bits(0x3ff4eb2d81d8ab96), f64::from_bits(0xbd1ce00000000000), + f64::from_bits(0x3ff4f9b2769d2d02), f64::from_bits(0x3d19200000000000), + f64::from_bits(0x3ff508417f4531c1), f64::from_bits(0xbd08c00000000000), + f64::from_bits(0x3ff516daa2cf662a), f64::from_bits(0xbcfa000000000000), + f64::from_bits(0x3ff5257de83f51ea), f64::from_bits(0x3d4a080000000000), + f64::from_bits(0x3ff5342b569d4eda), f64::from_bits(0xbd26d80000000000), + f64::from_bits(0x3ff542e2f4f6ac1a), f64::from_bits(0xbd32440000000000), + f64::from_bits(0x3ff551a4ca5d94db), f64::from_bits(0x3d483c0000000000), + f64::from_bits(0x3ff56070dde9116b), f64::from_bits(0x3d24b00000000000), + f64::from_bits(0x3ff56f4736b529de), f64::from_bits(0x3d415a0000000000), + f64::from_bits(0x3ff57e27dbe2c40e), f64::from_bits(0xbd29e00000000000), + f64::from_bits(0x3ff58d12d497c76f), f64::from_bits(0xbd23080000000000), + f64::from_bits(0x3ff59c0827ff0b4c), f64::from_bits(0x3d4dec0000000000), + f64::from_bits(0x3ff5ab07dd485427), f64::from_bits(0xbcc4000000000000), + f64::from_bits(0x3ff5ba11fba87af4), f64::from_bits(0x3d30080000000000), + f64::from_bits(0x3ff5c9268a59460b), f64::from_bits(0xbd26c80000000000), + f64::from_bits(0x3ff5d84590998e3f), f64::from_bits(0x3d469a0000000000), + f64::from_bits(0x3ff5e76f15ad20e1), f64::from_bits(0xbd1b400000000000), + f64::from_bits(0x3ff5f6a320dcebca), f64::from_bits(0x3d17700000000000), + f64::from_bits(0x3ff605e1b976dcb8), f64::from_bits(0x3d26f80000000000), + f64::from_bits(0x3ff6152ae6cdf715), f64::from_bits(0x3d01000000000000), + f64::from_bits(0x3ff6247eb03a5531), f64::from_bits(0xbd15d00000000000), + f64::from_bits(0x3ff633dd1d1929b5), f64::from_bits(0xbd12d00000000000), + f64::from_bits(0x3ff6434634ccc313), f64::from_bits(0xbcea800000000000), + f64::from_bits(0x3ff652b9febc8efa), f64::from_bits(0xbd28600000000000), + f64::from_bits(0x3ff6623882553397), f64::from_bits(0x3d71fe0000000000), + f64::from_bits(0x3ff671c1c708328e), f64::from_bits(0xbd37200000000000), + f64::from_bits(0x3ff68155d44ca97e), f64::from_bits(0x3ce6800000000000), + f64::from_bits(0x3ff690f4b19e9471), f64::from_bits(0xbd29780000000000), + ]; + + // double_t r, t, z; + // uint32_t ix, i0; + // union {double f; uint64_t i;} u = {x}; + // union {uint32_t u; int32_t i;} k; + let x1p1023 = f64::from_bits(0x7fe0000000000000); + let x1p52 = f64::from_bits(0x4330000000000000); + let _0x1p_149 = f64::from_bits(0xb6a0000000000000); + + /* Filter out exceptional cases. */ + let ui = f64::to_bits(x); + let ix = ui >> 32 & 0x7fffffff; + if ix >= 0x408ff000 { + /* |x| >= 1022 or nan */ + if ix >= 0x40900000 && ui >> 63 == 0 { + /* x >= 1024 or nan */ + /* overflow */ + x *= x1p1023; + return x; + } + if ix >= 0x7ff00000 { + /* -inf or -nan */ + return -1.0 / x; + } + if ui >> 63 != 0 { + /* x <= -1022 */ + /* underflow */ + if x <= -1075.0 || x - x1p52 + x1p52 != x { + force_eval!((_0x1p_149 / x) as f32); + } + if x <= -1075.0 { + return 0.0; + } + } + } else if ix < 0x3c900000 { + /* |x| < 0x1p-54 */ + return 1.0 + x; + } + + /* Reduce x, computing z, i0, and k. */ + let ui = f64::to_bits(x + redux); + let mut i0 = ui as u32; + i0 += TBLSIZE as u32 / 2; + let ku = i0 / TBLSIZE as u32 * TBLSIZE as u32; + let ki = ku as i32 / TBLSIZE as i32; + i0 %= TBLSIZE as u32; + let uf = f64::from_bits(ui) - redux; + let mut z = x - uf; + + /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */ + let t = tbl[2 * i0 as usize]; /* exp2t[i0] */ + z -= tbl[2 * i0 as usize + 1]; /* eps[i0] */ + let r = t + t * z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * p5)))); + + scalbn(r, ki) +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 5540ba838..a15c4e60b 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -12,6 +12,7 @@ mod ceil; mod ceilf; mod cosf; mod exp; +mod exp2; mod exp2f; mod expf; mod fabs; @@ -49,6 +50,7 @@ pub use self::ceil::ceil; pub use self::ceilf::ceilf; pub use self::cosf::cosf; pub use self::exp::exp; +pub use self::exp2::exp2; pub use self::exp2f::exp2f; pub use self::expf::expf; pub use self::fabs::fabs; diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index dfe912a50..e33f120a7 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -704,7 +704,7 @@ f64_f64! { // cos, // cosh, exp, - // exp2, + exp2, // expm1, floor, log, From a0046c8740b896ca04d86f897b84fb6daf39126b Mon Sep 17 00:00:00 2001 From: C Jones Date: Sat, 14 Jul 2018 15:04:16 -0400 Subject: [PATCH 0074/1459] DRY up the exp2 and exp2f data tables --- libm/src/math/exp2.rs | 526 ++++++++++++++++++++--------------------- libm/src/math/exp2f.rs | 40 ++-- 2 files changed, 283 insertions(+), 283 deletions(-) diff --git a/libm/src/math/exp2.rs b/libm/src/math/exp2.rs index c5bd3608a..61bfd1015 100644 --- a/libm/src/math/exp2.rs +++ b/libm/src/math/exp2.rs @@ -28,6 +28,267 @@ use super::scalbn::scalbn; const TBLSIZE: usize = 256; +#[cfg_attr(rustfmt, rustfmt_skip)] +static TBL: [u64; TBLSIZE * 2] = [ + // exp2(z + eps) eps + 0x3fe6a09e667f3d5d, 0x3d39880000000000, + 0x3fe6b052fa751744, 0x3cd8000000000000, + 0x3fe6c012750bd9fe, 0xbd28780000000000, + 0x3fe6cfdcddd476bf, 0x3d1ec00000000000, + 0x3fe6dfb23c651a29, 0xbcd8000000000000, + 0x3fe6ef9298593ae3, 0xbcbc000000000000, + 0x3fe6ff7df9519386, 0xbd2fd80000000000, + 0x3fe70f7466f42da3, 0xbd2c880000000000, + 0x3fe71f75e8ec5fc3, 0x3d13c00000000000, + 0x3fe72f8286eacf05, 0xbd38300000000000, + 0x3fe73f9a48a58152, 0xbd00c00000000000, + 0x3fe74fbd35d7ccfc, 0x3d2f880000000000, + 0x3fe75feb564267f1, 0x3d03e00000000000, + 0x3fe77024b1ab6d48, 0xbd27d00000000000, + 0x3fe780694fde5d38, 0xbcdd000000000000, + 0x3fe790b938ac1d00, 0x3ce3000000000000, + 0x3fe7a11473eb0178, 0xbced000000000000, + 0x3fe7b17b0976d060, 0x3d20400000000000, + 0x3fe7c1ed0130c133, 0x3ca0000000000000, + 0x3fe7d26a62ff8636, 0xbd26900000000000, + 0x3fe7e2f336cf4e3b, 0xbd02e00000000000, + 0x3fe7f3878491c3e8, 0xbd24580000000000, + 0x3fe80427543e1b4e, 0x3d33000000000000, + 0x3fe814d2add1071a, 0x3d0f000000000000, + 0x3fe82589994ccd7e, 0xbd21c00000000000, + 0x3fe8364c1eb942d0, 0x3d29d00000000000, + 0x3fe8471a4623cab5, 0x3d47100000000000, + 0x3fe857f4179f5bbc, 0x3d22600000000000, + 0x3fe868d99b4491af, 0xbd32c40000000000, + 0x3fe879cad931a395, 0xbd23000000000000, + 0x3fe88ac7d98a65b8, 0xbd2a800000000000, + 0x3fe89bd0a4785800, 0xbced000000000000, + 0x3fe8ace5422aa223, 0x3d33280000000000, + 0x3fe8be05bad619fa, 0x3d42b40000000000, + 0x3fe8cf3216b54383, 0xbd2ed00000000000, + 0x3fe8e06a5e08664c, 0xbd20500000000000, + 0x3fe8f1ae99157807, 0x3d28280000000000, + 0x3fe902fed0282c0e, 0xbd1cb00000000000, + 0x3fe9145b0b91ff96, 0xbd05e00000000000, + 0x3fe925c353aa2ff9, 0x3cf5400000000000, + 0x3fe93737b0cdc64a, 0x3d17200000000000, + 0x3fe948b82b5f98ae, 0xbd09000000000000, + 0x3fe95a44cbc852cb, 0x3d25680000000000, + 0x3fe96bdd9a766f21, 0xbd36d00000000000, + 0x3fe97d829fde4e2a, 0xbd01000000000000, + 0x3fe98f33e47a23a3, 0x3d2d000000000000, + 0x3fe9a0f170ca0604, 0xbd38a40000000000, + 0x3fe9b2bb4d53ff89, 0x3d355c0000000000, + 0x3fe9c49182a3f15b, 0x3d26b80000000000, + 0x3fe9d674194bb8c5, 0xbcec000000000000, + 0x3fe9e86319e3238e, 0x3d17d00000000000, + 0x3fe9fa5e8d07f302, 0x3d16400000000000, + 0x3fea0c667b5de54d, 0xbcf5000000000000, + 0x3fea1e7aed8eb8f6, 0x3d09e00000000000, + 0x3fea309bec4a2e27, 0x3d2ad80000000000, + 0x3fea42c980460a5d, 0xbd1af00000000000, + 0x3fea5503b23e259b, 0x3d0b600000000000, + 0x3fea674a8af46213, 0x3d38880000000000, + 0x3fea799e1330b3a7, 0x3d11200000000000, + 0x3fea8bfe53c12e8d, 0x3d06c00000000000, + 0x3fea9e6b5579fcd2, 0xbd29b80000000000, + 0x3feab0e521356fb8, 0x3d2b700000000000, + 0x3feac36bbfd3f381, 0x3cd9000000000000, + 0x3fead5ff3a3c2780, 0x3ce4000000000000, + 0x3feae89f995ad2a3, 0xbd2c900000000000, + 0x3feafb4ce622f367, 0x3d16500000000000, + 0x3feb0e07298db790, 0x3d2fd40000000000, + 0x3feb20ce6c9a89a9, 0x3d12700000000000, + 0x3feb33a2b84f1a4b, 0x3d4d470000000000, + 0x3feb468415b747e7, 0xbd38380000000000, + 0x3feb59728de5593a, 0x3c98000000000000, + 0x3feb6c6e29f1c56a, 0x3d0ad00000000000, + 0x3feb7f76f2fb5e50, 0x3cde800000000000, + 0x3feb928cf22749b2, 0xbd04c00000000000, + 0x3feba5b030a10603, 0xbd0d700000000000, + 0x3febb8e0b79a6f66, 0x3d0d900000000000, + 0x3febcc1e904bc1ff, 0x3d02a00000000000, + 0x3febdf69c3f3a16f, 0xbd1f780000000000, + 0x3febf2c25bd71db8, 0xbd10a00000000000, + 0x3fec06286141b2e9, 0xbd11400000000000, + 0x3fec199bdd8552e0, 0x3d0be00000000000, + 0x3fec2d1cd9fa64ee, 0xbd09400000000000, + 0x3fec40ab5fffd02f, 0xbd0ed00000000000, + 0x3fec544778fafd15, 0x3d39660000000000, + 0x3fec67f12e57d0cb, 0xbd1a100000000000, + 0x3fec7ba88988c1b6, 0xbd58458000000000, + 0x3fec8f6d9406e733, 0xbd1a480000000000, + 0x3feca3405751c4df, 0x3ccb000000000000, + 0x3fecb720dcef9094, 0x3d01400000000000, + 0x3feccb0f2e6d1689, 0x3cf0200000000000, + 0x3fecdf0b555dc412, 0x3cf3600000000000, + 0x3fecf3155b5bab3b, 0xbd06900000000000, + 0x3fed072d4a0789bc, 0x3d09a00000000000, + 0x3fed1b532b08c8fa, 0xbd15e00000000000, + 0x3fed2f87080d8a85, 0x3d1d280000000000, + 0x3fed43c8eacaa203, 0x3d01a00000000000, + 0x3fed5818dcfba491, 0x3cdf000000000000, + 0x3fed6c76e862e6a1, 0xbd03a00000000000, + 0x3fed80e316c9834e, 0xbd0cd80000000000, + 0x3fed955d71ff6090, 0x3cf4c00000000000, + 0x3feda9e603db32ae, 0x3cff900000000000, + 0x3fedbe7cd63a8325, 0x3ce9800000000000, + 0x3fedd321f301b445, 0xbcf5200000000000, + 0x3fede7d5641c05bf, 0xbd1d700000000000, + 0x3fedfc97337b9aec, 0xbd16140000000000, + 0x3fee11676b197d5e, 0x3d0b480000000000, + 0x3fee264614f5a3e7, 0x3d40ce0000000000, + 0x3fee3b333b16ee5c, 0x3d0c680000000000, + 0x3fee502ee78b3fb4, 0xbd09300000000000, + 0x3fee653924676d68, 0xbce5000000000000, + 0x3fee7a51fbc74c44, 0xbd07f80000000000, + 0x3fee8f7977cdb726, 0xbcf3700000000000, + 0x3feea4afa2a490e8, 0x3ce5d00000000000, + 0x3feeb9f4867ccae4, 0x3d161a0000000000, + 0x3feecf482d8e680d, 0x3cf5500000000000, + 0x3feee4aaa2188514, 0x3cc6400000000000, + 0x3feefa1bee615a13, 0xbcee800000000000, + 0x3fef0f9c1cb64106, 0xbcfa880000000000, + 0x3fef252b376bb963, 0xbd2c900000000000, + 0x3fef3ac948dd7275, 0x3caa000000000000, + 0x3fef50765b6e4524, 0xbcf4f00000000000, + 0x3fef6632798844fd, 0x3cca800000000000, + 0x3fef7bfdad9cbe38, 0x3cfabc0000000000, + 0x3fef91d802243c82, 0xbcd4600000000000, + 0x3fefa7c1819e908e, 0xbd0b0c0000000000, + 0x3fefbdba3692d511, 0xbcc0e00000000000, + 0x3fefd3c22b8f7194, 0xbd10de8000000000, + 0x3fefe9d96b2a23ee, 0x3cee430000000000, + 0x3ff0000000000000, 0x0, + 0x3ff00b1afa5abcbe, 0xbcb3400000000000, + 0x3ff0163da9fb3303, 0xbd12170000000000, + 0x3ff02168143b0282, 0x3cba400000000000, + 0x3ff02c9a3e77806c, 0x3cef980000000000, + 0x3ff037d42e11bbca, 0xbcc7400000000000, + 0x3ff04315e86e7f89, 0x3cd8300000000000, + 0x3ff04e5f72f65467, 0xbd1a3f0000000000, + 0x3ff059b0d315855a, 0xbd02840000000000, + 0x3ff0650a0e3c1f95, 0x3cf1600000000000, + 0x3ff0706b29ddf71a, 0x3d15240000000000, + 0x3ff07bd42b72a82d, 0xbce9a00000000000, + 0x3ff0874518759bd0, 0x3ce6400000000000, + 0x3ff092bdf66607c8, 0xbd00780000000000, + 0x3ff09e3ecac6f383, 0xbc98000000000000, + 0x3ff0a9c79b1f3930, 0x3cffa00000000000, + 0x3ff0b5586cf988fc, 0xbcfac80000000000, + 0x3ff0c0f145e46c8a, 0x3cd9c00000000000, + 0x3ff0cc922b724816, 0x3d05200000000000, + 0x3ff0d83b23395dd8, 0xbcfad00000000000, + 0x3ff0e3ec32d3d1f3, 0x3d1bac0000000000, + 0x3ff0efa55fdfa9a6, 0xbd04e80000000000, + 0x3ff0fb66affed2f0, 0xbd0d300000000000, + 0x3ff1073028d7234b, 0x3cf1500000000000, + 0x3ff11301d0125b5b, 0x3cec000000000000, + 0x3ff11edbab5e2af9, 0x3d16bc0000000000, + 0x3ff12abdc06c31d5, 0x3ce8400000000000, + 0x3ff136a814f2047d, 0xbd0ed00000000000, + 0x3ff1429aaea92de9, 0x3ce8e00000000000, + 0x3ff14e95934f3138, 0x3ceb400000000000, + 0x3ff15a98c8a58e71, 0x3d05300000000000, + 0x3ff166a45471c3df, 0x3d03380000000000, + 0x3ff172b83c7d5211, 0x3d28d40000000000, + 0x3ff17ed48695bb9f, 0xbd05d00000000000, + 0x3ff18af9388c8d93, 0xbd1c880000000000, + 0x3ff1972658375d66, 0x3d11f00000000000, + 0x3ff1a35beb6fcba7, 0x3d10480000000000, + 0x3ff1af99f81387e3, 0xbd47390000000000, + 0x3ff1bbe084045d54, 0x3d24e40000000000, + 0x3ff1c82f95281c43, 0xbd0a200000000000, + 0x3ff1d4873168b9b2, 0x3ce3800000000000, + 0x3ff1e0e75eb44031, 0x3ceac00000000000, + 0x3ff1ed5022fcd938, 0x3d01900000000000, + 0x3ff1f9c18438cdf7, 0xbd1b780000000000, + 0x3ff2063b88628d8f, 0x3d2d940000000000, + 0x3ff212be3578a81e, 0x3cd8000000000000, + 0x3ff21f49917ddd41, 0x3d2b340000000000, + 0x3ff22bdda2791323, 0x3d19f80000000000, + 0x3ff2387a6e7561e7, 0xbd19c80000000000, + 0x3ff2451ffb821427, 0x3d02300000000000, + 0x3ff251ce4fb2a602, 0xbd13480000000000, + 0x3ff25e85711eceb0, 0x3d12700000000000, + 0x3ff26b4565e27d16, 0x3d11d00000000000, + 0x3ff2780e341de00f, 0x3d31ee0000000000, + 0x3ff284dfe1f5633e, 0xbd14c00000000000, + 0x3ff291ba7591bb30, 0xbd13d80000000000, + 0x3ff29e9df51fdf09, 0x3d08b00000000000, + 0x3ff2ab8a66d10e9b, 0xbd227c0000000000, + 0x3ff2b87fd0dada3a, 0x3d2a340000000000, + 0x3ff2c57e39771af9, 0xbd10800000000000, + 0x3ff2d285a6e402d9, 0xbd0ed00000000000, + 0x3ff2df961f641579, 0xbcf4200000000000, + 0x3ff2ecafa93e2ecf, 0xbd24980000000000, + 0x3ff2f9d24abd8822, 0xbd16300000000000, + 0x3ff306fe0a31b625, 0xbd32360000000000, + 0x3ff31432edeea50b, 0xbd70df8000000000, + 0x3ff32170fc4cd7b8, 0xbd22480000000000, + 0x3ff32eb83ba8e9a2, 0xbd25980000000000, + 0x3ff33c08b2641766, 0x3d1ed00000000000, + 0x3ff3496266e3fa27, 0xbcdc000000000000, + 0x3ff356c55f929f0f, 0xbd30d80000000000, + 0x3ff36431a2de88b9, 0x3d22c80000000000, + 0x3ff371a7373aaa39, 0x3d20600000000000, + 0x3ff37f26231e74fe, 0xbd16600000000000, + 0x3ff38cae6d05d838, 0xbd0ae00000000000, + 0x3ff39a401b713ec3, 0xbd44720000000000, + 0x3ff3a7db34e5a020, 0x3d08200000000000, + 0x3ff3b57fbfec6e95, 0x3d3e800000000000, + 0x3ff3c32dc313a8f2, 0x3cef800000000000, + 0x3ff3d0e544ede122, 0xbd17a00000000000, + 0x3ff3dea64c1234bb, 0x3d26300000000000, + 0x3ff3ec70df1c4ecc, 0xbd48a60000000000, + 0x3ff3fa4504ac7e8c, 0xbd3cdc0000000000, + 0x3ff40822c367a0bb, 0x3d25b80000000000, + 0x3ff4160a21f72e95, 0x3d1ec00000000000, + 0x3ff423fb27094646, 0xbd13600000000000, + 0x3ff431f5d950a920, 0x3d23980000000000, + 0x3ff43ffa3f84b9eb, 0x3cfa000000000000, + 0x3ff44e0860618919, 0xbcf6c00000000000, + 0x3ff45c2042a7d201, 0xbd0bc00000000000, + 0x3ff46a41ed1d0016, 0xbd12800000000000, + 0x3ff4786d668b3326, 0x3d30e00000000000, + 0x3ff486a2b5c13c00, 0xbd2d400000000000, + 0x3ff494e1e192af04, 0x3d0c200000000000, + 0x3ff4a32af0d7d372, 0xbd1e500000000000, + 0x3ff4b17dea6db801, 0x3d07800000000000, + 0x3ff4bfdad53629e1, 0xbd13800000000000, + 0x3ff4ce41b817c132, 0x3d00800000000000, + 0x3ff4dcb299fddddb, 0x3d2c700000000000, + 0x3ff4eb2d81d8ab96, 0xbd1ce00000000000, + 0x3ff4f9b2769d2d02, 0x3d19200000000000, + 0x3ff508417f4531c1, 0xbd08c00000000000, + 0x3ff516daa2cf662a, 0xbcfa000000000000, + 0x3ff5257de83f51ea, 0x3d4a080000000000, + 0x3ff5342b569d4eda, 0xbd26d80000000000, + 0x3ff542e2f4f6ac1a, 0xbd32440000000000, + 0x3ff551a4ca5d94db, 0x3d483c0000000000, + 0x3ff56070dde9116b, 0x3d24b00000000000, + 0x3ff56f4736b529de, 0x3d415a0000000000, + 0x3ff57e27dbe2c40e, 0xbd29e00000000000, + 0x3ff58d12d497c76f, 0xbd23080000000000, + 0x3ff59c0827ff0b4c, 0x3d4dec0000000000, + 0x3ff5ab07dd485427, 0xbcc4000000000000, + 0x3ff5ba11fba87af4, 0x3d30080000000000, + 0x3ff5c9268a59460b, 0xbd26c80000000000, + 0x3ff5d84590998e3f, 0x3d469a0000000000, + 0x3ff5e76f15ad20e1, 0xbd1b400000000000, + 0x3ff5f6a320dcebca, 0x3d17700000000000, + 0x3ff605e1b976dcb8, 0x3d26f80000000000, + 0x3ff6152ae6cdf715, 0x3d01000000000000, + 0x3ff6247eb03a5531, 0xbd15d00000000000, + 0x3ff633dd1d1929b5, 0xbd12d00000000000, + 0x3ff6434634ccc313, 0xbcea800000000000, + 0x3ff652b9febc8efa, 0xbd28600000000000, + 0x3ff6623882553397, 0x3d71fe0000000000, + 0x3ff671c1c708328e, 0xbd37200000000000, + 0x3ff68155d44ca97e, 0x3ce6800000000000, + 0x3ff690f4b19e9471, 0xbd29780000000000, +]; + // exp2(x): compute the base 2 exponential of x // // Accuracy: Peak error < 0.503 ulp for normalized results. @@ -65,267 +326,6 @@ pub fn exp2(mut x: f64) -> f64 { let p4 = f64::from_bits(0x3f83b2ab88f70400); let p5 = f64::from_bits(0x3f55d88003875c74); - #[cfg_attr(rustfmt, rustfmt_skip)] - let tbl = [ - // exp2(z + eps) eps - f64::from_bits(0x3fe6a09e667f3d5d), f64::from_bits(0x3d39880000000000), - f64::from_bits(0x3fe6b052fa751744), f64::from_bits(0x3cd8000000000000), - f64::from_bits(0x3fe6c012750bd9fe), f64::from_bits(0xbd28780000000000), - f64::from_bits(0x3fe6cfdcddd476bf), f64::from_bits(0x3d1ec00000000000), - f64::from_bits(0x3fe6dfb23c651a29), f64::from_bits(0xbcd8000000000000), - f64::from_bits(0x3fe6ef9298593ae3), f64::from_bits(0xbcbc000000000000), - f64::from_bits(0x3fe6ff7df9519386), f64::from_bits(0xbd2fd80000000000), - f64::from_bits(0x3fe70f7466f42da3), f64::from_bits(0xbd2c880000000000), - f64::from_bits(0x3fe71f75e8ec5fc3), f64::from_bits(0x3d13c00000000000), - f64::from_bits(0x3fe72f8286eacf05), f64::from_bits(0xbd38300000000000), - f64::from_bits(0x3fe73f9a48a58152), f64::from_bits(0xbd00c00000000000), - f64::from_bits(0x3fe74fbd35d7ccfc), f64::from_bits(0x3d2f880000000000), - f64::from_bits(0x3fe75feb564267f1), f64::from_bits(0x3d03e00000000000), - f64::from_bits(0x3fe77024b1ab6d48), f64::from_bits(0xbd27d00000000000), - f64::from_bits(0x3fe780694fde5d38), f64::from_bits(0xbcdd000000000000), - f64::from_bits(0x3fe790b938ac1d00), f64::from_bits(0x3ce3000000000000), - f64::from_bits(0x3fe7a11473eb0178), f64::from_bits(0xbced000000000000), - f64::from_bits(0x3fe7b17b0976d060), f64::from_bits(0x3d20400000000000), - f64::from_bits(0x3fe7c1ed0130c133), f64::from_bits(0x3ca0000000000000), - f64::from_bits(0x3fe7d26a62ff8636), f64::from_bits(0xbd26900000000000), - f64::from_bits(0x3fe7e2f336cf4e3b), f64::from_bits(0xbd02e00000000000), - f64::from_bits(0x3fe7f3878491c3e8), f64::from_bits(0xbd24580000000000), - f64::from_bits(0x3fe80427543e1b4e), f64::from_bits(0x3d33000000000000), - f64::from_bits(0x3fe814d2add1071a), f64::from_bits(0x3d0f000000000000), - f64::from_bits(0x3fe82589994ccd7e), f64::from_bits(0xbd21c00000000000), - f64::from_bits(0x3fe8364c1eb942d0), f64::from_bits(0x3d29d00000000000), - f64::from_bits(0x3fe8471a4623cab5), f64::from_bits(0x3d47100000000000), - f64::from_bits(0x3fe857f4179f5bbc), f64::from_bits(0x3d22600000000000), - f64::from_bits(0x3fe868d99b4491af), f64::from_bits(0xbd32c40000000000), - f64::from_bits(0x3fe879cad931a395), f64::from_bits(0xbd23000000000000), - f64::from_bits(0x3fe88ac7d98a65b8), f64::from_bits(0xbd2a800000000000), - f64::from_bits(0x3fe89bd0a4785800), f64::from_bits(0xbced000000000000), - f64::from_bits(0x3fe8ace5422aa223), f64::from_bits(0x3d33280000000000), - f64::from_bits(0x3fe8be05bad619fa), f64::from_bits(0x3d42b40000000000), - f64::from_bits(0x3fe8cf3216b54383), f64::from_bits(0xbd2ed00000000000), - f64::from_bits(0x3fe8e06a5e08664c), f64::from_bits(0xbd20500000000000), - f64::from_bits(0x3fe8f1ae99157807), f64::from_bits(0x3d28280000000000), - f64::from_bits(0x3fe902fed0282c0e), f64::from_bits(0xbd1cb00000000000), - f64::from_bits(0x3fe9145b0b91ff96), f64::from_bits(0xbd05e00000000000), - f64::from_bits(0x3fe925c353aa2ff9), f64::from_bits(0x3cf5400000000000), - f64::from_bits(0x3fe93737b0cdc64a), f64::from_bits(0x3d17200000000000), - f64::from_bits(0x3fe948b82b5f98ae), f64::from_bits(0xbd09000000000000), - f64::from_bits(0x3fe95a44cbc852cb), f64::from_bits(0x3d25680000000000), - f64::from_bits(0x3fe96bdd9a766f21), f64::from_bits(0xbd36d00000000000), - f64::from_bits(0x3fe97d829fde4e2a), f64::from_bits(0xbd01000000000000), - f64::from_bits(0x3fe98f33e47a23a3), f64::from_bits(0x3d2d000000000000), - f64::from_bits(0x3fe9a0f170ca0604), f64::from_bits(0xbd38a40000000000), - f64::from_bits(0x3fe9b2bb4d53ff89), f64::from_bits(0x3d355c0000000000), - f64::from_bits(0x3fe9c49182a3f15b), f64::from_bits(0x3d26b80000000000), - f64::from_bits(0x3fe9d674194bb8c5), f64::from_bits(0xbcec000000000000), - f64::from_bits(0x3fe9e86319e3238e), f64::from_bits(0x3d17d00000000000), - f64::from_bits(0x3fe9fa5e8d07f302), f64::from_bits(0x3d16400000000000), - f64::from_bits(0x3fea0c667b5de54d), f64::from_bits(0xbcf5000000000000), - f64::from_bits(0x3fea1e7aed8eb8f6), f64::from_bits(0x3d09e00000000000), - f64::from_bits(0x3fea309bec4a2e27), f64::from_bits(0x3d2ad80000000000), - f64::from_bits(0x3fea42c980460a5d), f64::from_bits(0xbd1af00000000000), - f64::from_bits(0x3fea5503b23e259b), f64::from_bits(0x3d0b600000000000), - f64::from_bits(0x3fea674a8af46213), f64::from_bits(0x3d38880000000000), - f64::from_bits(0x3fea799e1330b3a7), f64::from_bits(0x3d11200000000000), - f64::from_bits(0x3fea8bfe53c12e8d), f64::from_bits(0x3d06c00000000000), - f64::from_bits(0x3fea9e6b5579fcd2), f64::from_bits(0xbd29b80000000000), - f64::from_bits(0x3feab0e521356fb8), f64::from_bits(0x3d2b700000000000), - f64::from_bits(0x3feac36bbfd3f381), f64::from_bits(0x3cd9000000000000), - f64::from_bits(0x3fead5ff3a3c2780), f64::from_bits(0x3ce4000000000000), - f64::from_bits(0x3feae89f995ad2a3), f64::from_bits(0xbd2c900000000000), - f64::from_bits(0x3feafb4ce622f367), f64::from_bits(0x3d16500000000000), - f64::from_bits(0x3feb0e07298db790), f64::from_bits(0x3d2fd40000000000), - f64::from_bits(0x3feb20ce6c9a89a9), f64::from_bits(0x3d12700000000000), - f64::from_bits(0x3feb33a2b84f1a4b), f64::from_bits(0x3d4d470000000000), - f64::from_bits(0x3feb468415b747e7), f64::from_bits(0xbd38380000000000), - f64::from_bits(0x3feb59728de5593a), f64::from_bits(0x3c98000000000000), - f64::from_bits(0x3feb6c6e29f1c56a), f64::from_bits(0x3d0ad00000000000), - f64::from_bits(0x3feb7f76f2fb5e50), f64::from_bits(0x3cde800000000000), - f64::from_bits(0x3feb928cf22749b2), f64::from_bits(0xbd04c00000000000), - f64::from_bits(0x3feba5b030a10603), f64::from_bits(0xbd0d700000000000), - f64::from_bits(0x3febb8e0b79a6f66), f64::from_bits(0x3d0d900000000000), - f64::from_bits(0x3febcc1e904bc1ff), f64::from_bits(0x3d02a00000000000), - f64::from_bits(0x3febdf69c3f3a16f), f64::from_bits(0xbd1f780000000000), - f64::from_bits(0x3febf2c25bd71db8), f64::from_bits(0xbd10a00000000000), - f64::from_bits(0x3fec06286141b2e9), f64::from_bits(0xbd11400000000000), - f64::from_bits(0x3fec199bdd8552e0), f64::from_bits(0x3d0be00000000000), - f64::from_bits(0x3fec2d1cd9fa64ee), f64::from_bits(0xbd09400000000000), - f64::from_bits(0x3fec40ab5fffd02f), f64::from_bits(0xbd0ed00000000000), - f64::from_bits(0x3fec544778fafd15), f64::from_bits(0x3d39660000000000), - f64::from_bits(0x3fec67f12e57d0cb), f64::from_bits(0xbd1a100000000000), - f64::from_bits(0x3fec7ba88988c1b6), f64::from_bits(0xbd58458000000000), - f64::from_bits(0x3fec8f6d9406e733), f64::from_bits(0xbd1a480000000000), - f64::from_bits(0x3feca3405751c4df), f64::from_bits(0x3ccb000000000000), - f64::from_bits(0x3fecb720dcef9094), f64::from_bits(0x3d01400000000000), - f64::from_bits(0x3feccb0f2e6d1689), f64::from_bits(0x3cf0200000000000), - f64::from_bits(0x3fecdf0b555dc412), f64::from_bits(0x3cf3600000000000), - f64::from_bits(0x3fecf3155b5bab3b), f64::from_bits(0xbd06900000000000), - f64::from_bits(0x3fed072d4a0789bc), f64::from_bits(0x3d09a00000000000), - f64::from_bits(0x3fed1b532b08c8fa), f64::from_bits(0xbd15e00000000000), - f64::from_bits(0x3fed2f87080d8a85), f64::from_bits(0x3d1d280000000000), - f64::from_bits(0x3fed43c8eacaa203), f64::from_bits(0x3d01a00000000000), - f64::from_bits(0x3fed5818dcfba491), f64::from_bits(0x3cdf000000000000), - f64::from_bits(0x3fed6c76e862e6a1), f64::from_bits(0xbd03a00000000000), - f64::from_bits(0x3fed80e316c9834e), f64::from_bits(0xbd0cd80000000000), - f64::from_bits(0x3fed955d71ff6090), f64::from_bits(0x3cf4c00000000000), - f64::from_bits(0x3feda9e603db32ae), f64::from_bits(0x3cff900000000000), - f64::from_bits(0x3fedbe7cd63a8325), f64::from_bits(0x3ce9800000000000), - f64::from_bits(0x3fedd321f301b445), f64::from_bits(0xbcf5200000000000), - f64::from_bits(0x3fede7d5641c05bf), f64::from_bits(0xbd1d700000000000), - f64::from_bits(0x3fedfc97337b9aec), f64::from_bits(0xbd16140000000000), - f64::from_bits(0x3fee11676b197d5e), f64::from_bits(0x3d0b480000000000), - f64::from_bits(0x3fee264614f5a3e7), f64::from_bits(0x3d40ce0000000000), - f64::from_bits(0x3fee3b333b16ee5c), f64::from_bits(0x3d0c680000000000), - f64::from_bits(0x3fee502ee78b3fb4), f64::from_bits(0xbd09300000000000), - f64::from_bits(0x3fee653924676d68), f64::from_bits(0xbce5000000000000), - f64::from_bits(0x3fee7a51fbc74c44), f64::from_bits(0xbd07f80000000000), - f64::from_bits(0x3fee8f7977cdb726), f64::from_bits(0xbcf3700000000000), - f64::from_bits(0x3feea4afa2a490e8), f64::from_bits(0x3ce5d00000000000), - f64::from_bits(0x3feeb9f4867ccae4), f64::from_bits(0x3d161a0000000000), - f64::from_bits(0x3feecf482d8e680d), f64::from_bits(0x3cf5500000000000), - f64::from_bits(0x3feee4aaa2188514), f64::from_bits(0x3cc6400000000000), - f64::from_bits(0x3feefa1bee615a13), f64::from_bits(0xbcee800000000000), - f64::from_bits(0x3fef0f9c1cb64106), f64::from_bits(0xbcfa880000000000), - f64::from_bits(0x3fef252b376bb963), f64::from_bits(0xbd2c900000000000), - f64::from_bits(0x3fef3ac948dd7275), f64::from_bits(0x3caa000000000000), - f64::from_bits(0x3fef50765b6e4524), f64::from_bits(0xbcf4f00000000000), - f64::from_bits(0x3fef6632798844fd), f64::from_bits(0x3cca800000000000), - f64::from_bits(0x3fef7bfdad9cbe38), f64::from_bits(0x3cfabc0000000000), - f64::from_bits(0x3fef91d802243c82), f64::from_bits(0xbcd4600000000000), - f64::from_bits(0x3fefa7c1819e908e), f64::from_bits(0xbd0b0c0000000000), - f64::from_bits(0x3fefbdba3692d511), f64::from_bits(0xbcc0e00000000000), - f64::from_bits(0x3fefd3c22b8f7194), f64::from_bits(0xbd10de8000000000), - f64::from_bits(0x3fefe9d96b2a23ee), f64::from_bits(0x3cee430000000000), - f64::from_bits(0x3ff0000000000000), f64::from_bits(0x0), - f64::from_bits(0x3ff00b1afa5abcbe), f64::from_bits(0xbcb3400000000000), - f64::from_bits(0x3ff0163da9fb3303), f64::from_bits(0xbd12170000000000), - f64::from_bits(0x3ff02168143b0282), f64::from_bits(0x3cba400000000000), - f64::from_bits(0x3ff02c9a3e77806c), f64::from_bits(0x3cef980000000000), - f64::from_bits(0x3ff037d42e11bbca), f64::from_bits(0xbcc7400000000000), - f64::from_bits(0x3ff04315e86e7f89), f64::from_bits(0x3cd8300000000000), - f64::from_bits(0x3ff04e5f72f65467), f64::from_bits(0xbd1a3f0000000000), - f64::from_bits(0x3ff059b0d315855a), f64::from_bits(0xbd02840000000000), - f64::from_bits(0x3ff0650a0e3c1f95), f64::from_bits(0x3cf1600000000000), - f64::from_bits(0x3ff0706b29ddf71a), f64::from_bits(0x3d15240000000000), - f64::from_bits(0x3ff07bd42b72a82d), f64::from_bits(0xbce9a00000000000), - f64::from_bits(0x3ff0874518759bd0), f64::from_bits(0x3ce6400000000000), - f64::from_bits(0x3ff092bdf66607c8), f64::from_bits(0xbd00780000000000), - f64::from_bits(0x3ff09e3ecac6f383), f64::from_bits(0xbc98000000000000), - f64::from_bits(0x3ff0a9c79b1f3930), f64::from_bits(0x3cffa00000000000), - f64::from_bits(0x3ff0b5586cf988fc), f64::from_bits(0xbcfac80000000000), - f64::from_bits(0x3ff0c0f145e46c8a), f64::from_bits(0x3cd9c00000000000), - f64::from_bits(0x3ff0cc922b724816), f64::from_bits(0x3d05200000000000), - f64::from_bits(0x3ff0d83b23395dd8), f64::from_bits(0xbcfad00000000000), - f64::from_bits(0x3ff0e3ec32d3d1f3), f64::from_bits(0x3d1bac0000000000), - f64::from_bits(0x3ff0efa55fdfa9a6), f64::from_bits(0xbd04e80000000000), - f64::from_bits(0x3ff0fb66affed2f0), f64::from_bits(0xbd0d300000000000), - f64::from_bits(0x3ff1073028d7234b), f64::from_bits(0x3cf1500000000000), - f64::from_bits(0x3ff11301d0125b5b), f64::from_bits(0x3cec000000000000), - f64::from_bits(0x3ff11edbab5e2af9), f64::from_bits(0x3d16bc0000000000), - f64::from_bits(0x3ff12abdc06c31d5), f64::from_bits(0x3ce8400000000000), - f64::from_bits(0x3ff136a814f2047d), f64::from_bits(0xbd0ed00000000000), - f64::from_bits(0x3ff1429aaea92de9), f64::from_bits(0x3ce8e00000000000), - f64::from_bits(0x3ff14e95934f3138), f64::from_bits(0x3ceb400000000000), - f64::from_bits(0x3ff15a98c8a58e71), f64::from_bits(0x3d05300000000000), - f64::from_bits(0x3ff166a45471c3df), f64::from_bits(0x3d03380000000000), - f64::from_bits(0x3ff172b83c7d5211), f64::from_bits(0x3d28d40000000000), - f64::from_bits(0x3ff17ed48695bb9f), f64::from_bits(0xbd05d00000000000), - f64::from_bits(0x3ff18af9388c8d93), f64::from_bits(0xbd1c880000000000), - f64::from_bits(0x3ff1972658375d66), f64::from_bits(0x3d11f00000000000), - f64::from_bits(0x3ff1a35beb6fcba7), f64::from_bits(0x3d10480000000000), - f64::from_bits(0x3ff1af99f81387e3), f64::from_bits(0xbd47390000000000), - f64::from_bits(0x3ff1bbe084045d54), f64::from_bits(0x3d24e40000000000), - f64::from_bits(0x3ff1c82f95281c43), f64::from_bits(0xbd0a200000000000), - f64::from_bits(0x3ff1d4873168b9b2), f64::from_bits(0x3ce3800000000000), - f64::from_bits(0x3ff1e0e75eb44031), f64::from_bits(0x3ceac00000000000), - f64::from_bits(0x3ff1ed5022fcd938), f64::from_bits(0x3d01900000000000), - f64::from_bits(0x3ff1f9c18438cdf7), f64::from_bits(0xbd1b780000000000), - f64::from_bits(0x3ff2063b88628d8f), f64::from_bits(0x3d2d940000000000), - f64::from_bits(0x3ff212be3578a81e), f64::from_bits(0x3cd8000000000000), - f64::from_bits(0x3ff21f49917ddd41), f64::from_bits(0x3d2b340000000000), - f64::from_bits(0x3ff22bdda2791323), f64::from_bits(0x3d19f80000000000), - f64::from_bits(0x3ff2387a6e7561e7), f64::from_bits(0xbd19c80000000000), - f64::from_bits(0x3ff2451ffb821427), f64::from_bits(0x3d02300000000000), - f64::from_bits(0x3ff251ce4fb2a602), f64::from_bits(0xbd13480000000000), - f64::from_bits(0x3ff25e85711eceb0), f64::from_bits(0x3d12700000000000), - f64::from_bits(0x3ff26b4565e27d16), f64::from_bits(0x3d11d00000000000), - f64::from_bits(0x3ff2780e341de00f), f64::from_bits(0x3d31ee0000000000), - f64::from_bits(0x3ff284dfe1f5633e), f64::from_bits(0xbd14c00000000000), - f64::from_bits(0x3ff291ba7591bb30), f64::from_bits(0xbd13d80000000000), - f64::from_bits(0x3ff29e9df51fdf09), f64::from_bits(0x3d08b00000000000), - f64::from_bits(0x3ff2ab8a66d10e9b), f64::from_bits(0xbd227c0000000000), - f64::from_bits(0x3ff2b87fd0dada3a), f64::from_bits(0x3d2a340000000000), - f64::from_bits(0x3ff2c57e39771af9), f64::from_bits(0xbd10800000000000), - f64::from_bits(0x3ff2d285a6e402d9), f64::from_bits(0xbd0ed00000000000), - f64::from_bits(0x3ff2df961f641579), f64::from_bits(0xbcf4200000000000), - f64::from_bits(0x3ff2ecafa93e2ecf), f64::from_bits(0xbd24980000000000), - f64::from_bits(0x3ff2f9d24abd8822), f64::from_bits(0xbd16300000000000), - f64::from_bits(0x3ff306fe0a31b625), f64::from_bits(0xbd32360000000000), - f64::from_bits(0x3ff31432edeea50b), f64::from_bits(0xbd70df8000000000), - f64::from_bits(0x3ff32170fc4cd7b8), f64::from_bits(0xbd22480000000000), - f64::from_bits(0x3ff32eb83ba8e9a2), f64::from_bits(0xbd25980000000000), - f64::from_bits(0x3ff33c08b2641766), f64::from_bits(0x3d1ed00000000000), - f64::from_bits(0x3ff3496266e3fa27), f64::from_bits(0xbcdc000000000000), - f64::from_bits(0x3ff356c55f929f0f), f64::from_bits(0xbd30d80000000000), - f64::from_bits(0x3ff36431a2de88b9), f64::from_bits(0x3d22c80000000000), - f64::from_bits(0x3ff371a7373aaa39), f64::from_bits(0x3d20600000000000), - f64::from_bits(0x3ff37f26231e74fe), f64::from_bits(0xbd16600000000000), - f64::from_bits(0x3ff38cae6d05d838), f64::from_bits(0xbd0ae00000000000), - f64::from_bits(0x3ff39a401b713ec3), f64::from_bits(0xbd44720000000000), - f64::from_bits(0x3ff3a7db34e5a020), f64::from_bits(0x3d08200000000000), - f64::from_bits(0x3ff3b57fbfec6e95), f64::from_bits(0x3d3e800000000000), - f64::from_bits(0x3ff3c32dc313a8f2), f64::from_bits(0x3cef800000000000), - f64::from_bits(0x3ff3d0e544ede122), f64::from_bits(0xbd17a00000000000), - f64::from_bits(0x3ff3dea64c1234bb), f64::from_bits(0x3d26300000000000), - f64::from_bits(0x3ff3ec70df1c4ecc), f64::from_bits(0xbd48a60000000000), - f64::from_bits(0x3ff3fa4504ac7e8c), f64::from_bits(0xbd3cdc0000000000), - f64::from_bits(0x3ff40822c367a0bb), f64::from_bits(0x3d25b80000000000), - f64::from_bits(0x3ff4160a21f72e95), f64::from_bits(0x3d1ec00000000000), - f64::from_bits(0x3ff423fb27094646), f64::from_bits(0xbd13600000000000), - f64::from_bits(0x3ff431f5d950a920), f64::from_bits(0x3d23980000000000), - f64::from_bits(0x3ff43ffa3f84b9eb), f64::from_bits(0x3cfa000000000000), - f64::from_bits(0x3ff44e0860618919), f64::from_bits(0xbcf6c00000000000), - f64::from_bits(0x3ff45c2042a7d201), f64::from_bits(0xbd0bc00000000000), - f64::from_bits(0x3ff46a41ed1d0016), f64::from_bits(0xbd12800000000000), - f64::from_bits(0x3ff4786d668b3326), f64::from_bits(0x3d30e00000000000), - f64::from_bits(0x3ff486a2b5c13c00), f64::from_bits(0xbd2d400000000000), - f64::from_bits(0x3ff494e1e192af04), f64::from_bits(0x3d0c200000000000), - f64::from_bits(0x3ff4a32af0d7d372), f64::from_bits(0xbd1e500000000000), - f64::from_bits(0x3ff4b17dea6db801), f64::from_bits(0x3d07800000000000), - f64::from_bits(0x3ff4bfdad53629e1), f64::from_bits(0xbd13800000000000), - f64::from_bits(0x3ff4ce41b817c132), f64::from_bits(0x3d00800000000000), - f64::from_bits(0x3ff4dcb299fddddb), f64::from_bits(0x3d2c700000000000), - f64::from_bits(0x3ff4eb2d81d8ab96), f64::from_bits(0xbd1ce00000000000), - f64::from_bits(0x3ff4f9b2769d2d02), f64::from_bits(0x3d19200000000000), - f64::from_bits(0x3ff508417f4531c1), f64::from_bits(0xbd08c00000000000), - f64::from_bits(0x3ff516daa2cf662a), f64::from_bits(0xbcfa000000000000), - f64::from_bits(0x3ff5257de83f51ea), f64::from_bits(0x3d4a080000000000), - f64::from_bits(0x3ff5342b569d4eda), f64::from_bits(0xbd26d80000000000), - f64::from_bits(0x3ff542e2f4f6ac1a), f64::from_bits(0xbd32440000000000), - f64::from_bits(0x3ff551a4ca5d94db), f64::from_bits(0x3d483c0000000000), - f64::from_bits(0x3ff56070dde9116b), f64::from_bits(0x3d24b00000000000), - f64::from_bits(0x3ff56f4736b529de), f64::from_bits(0x3d415a0000000000), - f64::from_bits(0x3ff57e27dbe2c40e), f64::from_bits(0xbd29e00000000000), - f64::from_bits(0x3ff58d12d497c76f), f64::from_bits(0xbd23080000000000), - f64::from_bits(0x3ff59c0827ff0b4c), f64::from_bits(0x3d4dec0000000000), - f64::from_bits(0x3ff5ab07dd485427), f64::from_bits(0xbcc4000000000000), - f64::from_bits(0x3ff5ba11fba87af4), f64::from_bits(0x3d30080000000000), - f64::from_bits(0x3ff5c9268a59460b), f64::from_bits(0xbd26c80000000000), - f64::from_bits(0x3ff5d84590998e3f), f64::from_bits(0x3d469a0000000000), - f64::from_bits(0x3ff5e76f15ad20e1), f64::from_bits(0xbd1b400000000000), - f64::from_bits(0x3ff5f6a320dcebca), f64::from_bits(0x3d17700000000000), - f64::from_bits(0x3ff605e1b976dcb8), f64::from_bits(0x3d26f80000000000), - f64::from_bits(0x3ff6152ae6cdf715), f64::from_bits(0x3d01000000000000), - f64::from_bits(0x3ff6247eb03a5531), f64::from_bits(0xbd15d00000000000), - f64::from_bits(0x3ff633dd1d1929b5), f64::from_bits(0xbd12d00000000000), - f64::from_bits(0x3ff6434634ccc313), f64::from_bits(0xbcea800000000000), - f64::from_bits(0x3ff652b9febc8efa), f64::from_bits(0xbd28600000000000), - f64::from_bits(0x3ff6623882553397), f64::from_bits(0x3d71fe0000000000), - f64::from_bits(0x3ff671c1c708328e), f64::from_bits(0xbd37200000000000), - f64::from_bits(0x3ff68155d44ca97e), f64::from_bits(0x3ce6800000000000), - f64::from_bits(0x3ff690f4b19e9471), f64::from_bits(0xbd29780000000000), - ]; - // double_t r, t, z; // uint32_t ix, i0; // union {double f; uint64_t i;} u = {x}; @@ -375,8 +375,8 @@ pub fn exp2(mut x: f64) -> f64 { let mut z = x - uf; /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */ - let t = tbl[2 * i0 as usize]; /* exp2t[i0] */ - z -= tbl[2 * i0 as usize + 1]; /* eps[i0] */ + let t = f64::from_bits(TBL[2 * i0 as usize]); /* exp2t[i0] */ + z -= f64::from_bits(TBL[2 * i0 as usize + 1]); /* eps[i0] */ let r = t + t * z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * p5)))); scalbn(r, ki) diff --git a/libm/src/math/exp2f.rs b/libm/src/math/exp2f.rs index 947679a83..79929ce90 100644 --- a/libm/src/math/exp2f.rs +++ b/libm/src/math/exp2f.rs @@ -26,6 +26,25 @@ const TBLSIZE: usize = 16; +static EXP2FT: [u64; TBLSIZE] = [ + 0x3fe6a09e667f3bcd, + 0x3fe7a11473eb0187, + 0x3fe8ace5422aa0db, + 0x3fe9c49182a3f090, + 0x3feae89f995ad3ad, + 0x3fec199bdd85529c, + 0x3fed5818dcfba487, + 0x3feea4afa2a490da, + 0x3ff0000000000000, + 0x3ff0b5586cf9890f, + 0x3ff172b83c7d517b, + 0x3ff2387a6e756238, + 0x3ff306fe0a31b715, + 0x3ff3dea64c123422, + 0x3ff4bfdad5362a27, + 0x3ff5ab07dd485429, +]; + // exp2f(x): compute the base 2 exponential of x // // Accuracy: Peak error < 0.501 ulp; location of peak: -0.030110927. @@ -57,25 +76,6 @@ pub fn exp2f(mut x: f32) -> f32 { let p3 = f32::from_bits(0x3d6359a4); let p4 = f32::from_bits(0x3c1d964e); - let exp2ft: [f64; TBLSIZE] = [ - f64::from_bits(0x3fe6a09e667f3bcd), - f64::from_bits(0x3fe7a11473eb0187), - f64::from_bits(0x3fe8ace5422aa0db), - f64::from_bits(0x3fe9c49182a3f090), - f64::from_bits(0x3feae89f995ad3ad), - f64::from_bits(0x3fec199bdd85529c), - f64::from_bits(0x3fed5818dcfba487), - f64::from_bits(0x3feea4afa2a490da), - f64::from_bits(0x3ff0000000000000), - f64::from_bits(0x3ff0b5586cf9890f), - f64::from_bits(0x3ff172b83c7d517b), - f64::from_bits(0x3ff2387a6e756238), - f64::from_bits(0x3ff306fe0a31b715), - f64::from_bits(0x3ff3dea64c123422), - f64::from_bits(0x3ff4bfdad5362a27), - f64::from_bits(0x3ff5ab07dd485429), - ]; - // double_t t, r, z; // uint32_t ix, i0, k; @@ -121,7 +121,7 @@ pub fn exp2f(mut x: f32) -> f32 { uf -= redux; let z: f64 = (x - uf) as f64; /* Compute r = exp2(y) = exp2ft[i0] * p(z). */ - let r: f64 = exp2ft[i0 as usize]; + let r: f64 = f64::from_bits(EXP2FT[i0 as usize]); let t: f64 = r as f64 * z; let r: f64 = r + t * (p1 as f64 + z * p2 as f64) + t * (z * z) * (p3 as f64 + z * p4 as f64); From 28b293877e8b891fa08a8dd2574c88ea7f069c30 Mon Sep 17 00:00:00 2001 From: C Jones Date: Sat, 14 Jul 2018 15:25:09 -0400 Subject: [PATCH 0075/1459] Add some extra notes to CONTRIBUTING --- libm/CONTRIBUTING.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libm/CONTRIBUTING.md b/libm/CONTRIBUTING.md index 6f8e984f3..680c40e80 100644 --- a/libm/CONTRIBUTING.md +++ b/libm/CONTRIBUTING.md @@ -28,7 +28,7 @@ Check [PR #65] for an example. have any external dependencies (other than `core` itself). - Only use relative imports within the `math` directory / module, e.g. `use self::fabs::fabs` or -`use super::isnanf`. Absolute imports from core are OK, e.g. `use core::u64`. +`use super::k_cos`. Absolute imports from core are OK, e.g. `use core::u64`. - To reinterpret a float as an integer use the `to_bits` method. The MUSL code uses the `GET_FLOAT_WORD` macro, or a union, to do this operation. @@ -36,6 +36,13 @@ Check [PR #65] for an example. - To reinterpret an integer as a float use the `f32::from_bits` constructor. The MUSL code uses the `SET_FLOAT_WORD` macro, or a union, to do this operation. +- You may use other methods from core like `f64::is_nan`, etc. as appropriate. + +- If you're implementing one of the private double-underscore functions, take a look at the + "source" name in the comment at the top for an idea for alternate naming. For example, `__sin` + was renamed to `k_sin` after the FreeBSD source code naming. Do `use` these private functions in + `mod.rs`. + - You may encounter weird literals like `0x1p127f` in the MUSL code. These are hexadecimal floating point literals. Rust (the language) doesn't support these kind of literals. The best way I have found to deal with these literals is to turn them into their integer representation using the From 128d5763398ee48b8b5480302372242f08ee17cb Mon Sep 17 00:00:00 2001 From: Erik Date: Sat, 14 Jul 2018 15:21:23 -0400 Subject: [PATCH 0076/1459] implement fma --- libm/src/lib.rs | 2 - libm/src/math/fma.rs | 201 ++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 2 + libm/test-generator/src/main.rs | 2 +- 4 files changed, 204 insertions(+), 3 deletions(-) create mode 100644 libm/src/math/fma.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index bef466787..c3a200593 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -366,7 +366,6 @@ pub trait F64Ext: private::Sealed { #[cfg(todo)] fn signum(self) -> Self; - #[cfg(todo)] fn mul_add(self, a: Self, b: Self) -> Self; #[cfg(todo)] @@ -485,7 +484,6 @@ impl F64Ext for f64 { fabs(self) } - #[cfg(todo)] #[inline] fn mul_add(self, a: Self, b: Self) -> Self { fma(self, a, b) diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs new file mode 100644 index 000000000..99a27164a --- /dev/null +++ b/libm/src/math/fma.rs @@ -0,0 +1,201 @@ +use core::{f32, f64}; + +use super::scalbn; + +const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1; + +struct Num { + m: u64, + e: i32, + sign: i32, +} + +#[inline] +fn normalize(x: f64) -> Num { + let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 + + let mut ix: u64 = x.to_bits(); + let mut e: i32 = (ix >> 52) as i32; + let sign: i32 = e & 0x800; + e &= 0x7ff; + if e == 0 { + ix = (x * x1p63).to_bits(); + e = (ix >> 52) as i32 & 0x7ff; + e = if e != 0 { e - 63 } else { 0x800 }; + } + ix &= (1 << 52) - 1; + ix |= 1 << 52; + ix <<= 1; + e -= 0x3ff + 52 + 1; + Num { m: ix, e, sign } +} + +#[inline] +fn mul(x: u64, y: u64) -> (u64, u64) { + let t1: u64; + let t2: u64; + let t3: u64; + let xlo: u64 = x as u32 as u64; + let xhi: u64 = x >> 32; + let ylo: u64 = y as u32 as u64; + let yhi: u64 = y >> 32; + + t1 = xlo * ylo; + t2 = xlo * yhi + xhi * ylo; + t3 = xhi * yhi; + let lo = t1 + (t2 << 32); + let hi = t3 + (t2 >> 32) + (t1 > lo) as u64; + (hi, lo) +} + +#[inline] +pub fn fma(x: f64, y: f64, z: f64) -> f64 { + let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 + let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63 + + /* normalize so top 10bits and last bit are 0 */ + let nx = normalize(x); + let ny = normalize(y); + let nz = normalize(z); + + if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN { + return x * y + z; + } + if nz.e >= ZEROINFNAN { + if nz.e > ZEROINFNAN { + /* z==0 */ + return x * y + z; + } + return z; + } + + /* mul: r = x*y */ + let zhi: u64; + let zlo: u64; + let (mut rhi, mut rlo) = mul(nx.m, ny.m); + /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */ + + /* align exponents */ + let mut e: i32 = nx.e + ny.e; + let mut d: i32 = nz.e - e; + /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */ + if d > 0 { + if d < 64 { + zlo = nz.m << d; + zhi = nz.m >> 64 - d; + } else { + zlo = 0; + zhi = nz.m; + e = nz.e - 64; + d -= 64; + if d == 0 { + } else if d < 64 { + rlo = rhi << 64 - d | rlo >> d | ((rlo << 64 - d) != 0) as u64; + rhi = rhi >> d; + } else { + rlo = 1; + rhi = 0; + } + } + } else { + zhi = 0; + d = -d; + if d == 0 { + zlo = nz.m; + } else if d < 64 { + zlo = nz.m >> d | ((nz.m << 64 - d) != 0) as u64; + } else { + zlo = 1; + } + } + + /* add */ + let mut sign: i32 = nx.sign ^ ny.sign; + let samesign: bool = (sign ^ nz.sign) == 0; + let mut nonzero: i32 = 1; + if samesign { + /* r += z */ + rlo += zlo; + rhi += zhi + (rlo < zlo) as u64; + } else { + /* r -= z */ + let t = rlo; + rlo -= zlo; + rhi = rhi - zhi - (t < rlo) as u64; + if (rhi >> 63) != 0 { + rlo = (-(rlo as i64)) as u64; + rhi = (-(rhi as i64)) as u64 - (rlo != 0) as u64; + sign = (sign == 0) as i32; + } + nonzero = (rhi != 0) as i32; + } + + /* set rhi to top 63bit of the result (last bit is sticky) */ + if nonzero != 0 { + e += 64; + d = rhi.leading_zeros() as i32 - 1; + /* note: d > 0 */ + rhi = rhi << d | rlo >> 64 - d | ((rlo << d) != 0) as u64; + } else if rlo != 0 { + d = rlo.leading_zeros() as i32 - 1; + if d < 0 { + rhi = rlo >> 1 | (rlo & 1); + } else { + rhi = rlo << d; + } + } else { + /* exact +-0 */ + return x * y + z; + } + e -= d; + + /* convert to double */ + let mut i: i64 = rhi as i64; /* i is in [1<<62,(1<<63)-1] */ + if sign != 0 { + i = -i; + } + let mut r: f64 = i as f64; /* |r| is in [0x1p62,0x1p63] */ + + if e < -1022 - 62 { + /* result is subnormal before rounding */ + if e == -1022 - 63 { + let mut c: f64 = x1p63; + if sign != 0 { + c = -c; + } + if r == c { + /* min normal after rounding, underflow depends + on arch behaviour which can be imitated by + a double to float conversion */ + let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32; + return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64; + } + /* one bit is lost when scaled, add another top bit to + only round once at conversion if it is inexact */ + if (rhi << 53) != 0 { + i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64; + if sign != 0 { + i = -i; + } + r = i as f64; + r = 2. * r - c; /* remove top bit */ + + /* raise underflow portably, such that it + cannot be optimized away */ + { + let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r; + r += (tiny * tiny) * (r - r); + } + } + } else { + /* only round once when scaled */ + d = 10; + i = ((rhi >> d | ((rhi << 64 - d) != 0) as u64) << d) as i64; + if sign != 0 { + i = -i; + } + r = i as f64; + } + } + scalbn(r, e) +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 828e3cb1a..c9a34fae4 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -22,6 +22,7 @@ mod fdim; mod fdimf; mod floor; mod floorf; +mod fma; mod fmod; mod fmodf; mod hypot; @@ -61,6 +62,7 @@ pub use self::fdim::fdim; pub use self::fdimf::fdimf; pub use self::floor::floor; pub use self::floorf::floorf; +pub use self::fma::fma; pub use self::fmod::fmod; pub use self::fmodf::fmodf; pub use self::hypot::hypot; diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index e5b8fdc1d..f4b7cd7ca 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -732,7 +732,7 @@ f64f64_f64! { // With signature `fn(f64, f64, f64) -> f64` f64f64f64_f64! { - // fma, + fma, } // With signature `fn(f64, i32) -> f64` From e49be42bc10c56e2f79e80a2554d942c6e4db643 Mon Sep 17 00:00:00 2001 From: Erik Date: Sat, 14 Jul 2018 14:49:22 -0400 Subject: [PATCH 0077/1459] allow some functions to be inlined --- libm/src/math/ceilf.rs | 1 + libm/src/math/exp2.rs | 1 + libm/src/math/exp2f.rs | 1 + libm/src/math/fdim.rs | 1 + libm/src/math/fdimf.rs | 1 + libm/src/math/hypot.rs | 1 + libm/src/math/log1p.rs | 1 + libm/src/math/log1pf.rs | 1 + libm/src/math/log2.rs | 1 + libm/src/math/log2f.rs | 1 + libm/src/math/round.rs | 1 + libm/src/math/roundf.rs | 1 + 12 files changed, 12 insertions(+) diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs index b4f58bfb8..16bffb300 100644 --- a/libm/src/math/ceilf.rs +++ b/libm/src/math/ceilf.rs @@ -1,5 +1,6 @@ use core::f32; +#[inline] pub fn ceilf(x: f32) -> f32 { let mut ui = x.to_bits(); let e = (((ui >> 23) & 0xff) - 0x7f) as i32; diff --git a/libm/src/math/exp2.rs b/libm/src/math/exp2.rs index 61bfd1015..0ab119896 100644 --- a/libm/src/math/exp2.rs +++ b/libm/src/math/exp2.rs @@ -318,6 +318,7 @@ static TBL: [u64; TBLSIZE * 2] = [ // // Gal, S. and Bachelis, B. An Accurate Elementary Mathematical Library // for the IEEE Floating Point Standard. TOMS 17(1), 26-46 (1991). +#[inline] pub fn exp2(mut x: f64) -> f64 { let redux = f64::from_bits(0x4338000000000000) / TBLSIZE as f64; let p1 = f64::from_bits(0x3fe62e42fefa39ef); diff --git a/libm/src/math/exp2f.rs b/libm/src/math/exp2f.rs index 79929ce90..a3f6db8c5 100644 --- a/libm/src/math/exp2f.rs +++ b/libm/src/math/exp2f.rs @@ -69,6 +69,7 @@ static EXP2FT: [u64; TBLSIZE] = [ // // Tang, P. Table-driven Implementation of the Exponential Function // in IEEE Floating-Point Arithmetic. TOMS 15(2), 144-157 (1989). +#[inline] pub fn exp2f(mut x: f32) -> f32 { let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32; let p1 = f32::from_bits(0x3f317218); diff --git a/libm/src/math/fdim.rs b/libm/src/math/fdim.rs index 2b277eab0..1daae4ebc 100644 --- a/libm/src/math/fdim.rs +++ b/libm/src/math/fdim.rs @@ -1,5 +1,6 @@ use core::f64; +#[inline] pub fn fdim(x: f64, y: f64) -> f64 { if x.is_nan() { x diff --git a/libm/src/math/fdimf.rs b/libm/src/math/fdimf.rs index 44bf2d680..953e0c8df 100644 --- a/libm/src/math/fdimf.rs +++ b/libm/src/math/fdimf.rs @@ -1,5 +1,6 @@ use core::f32; +#[inline] pub fn fdimf(x: f32, y: f32) -> f32 { if x.is_nan() { x diff --git a/libm/src/math/hypot.rs b/libm/src/math/hypot.rs index 7ad1baf79..f011415fd 100644 --- a/libm/src/math/hypot.rs +++ b/libm/src/math/hypot.rs @@ -4,6 +4,7 @@ use super::sqrt; const SPLIT: f64 = 134217728. + 1.; // 0x1p27 + 1 === (2 ^ 27) + 1 +#[inline] fn sq(x: f64) -> (f64, f64) { let xh: f64; let xl: f64; diff --git a/libm/src/math/log1p.rs b/libm/src/math/log1p.rs index 7f7a5125b..f42669dee 100644 --- a/libm/src/math/log1p.rs +++ b/libm/src/math/log1p.rs @@ -65,6 +65,7 @@ const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ +#[inline] pub fn log1p(x: f64) -> f64 { let mut ui: u64 = x.to_bits(); let hfsq: f64; diff --git a/libm/src/math/log1pf.rs b/libm/src/math/log1pf.rs index c8e242259..e6e1c14c8 100644 --- a/libm/src/math/log1pf.rs +++ b/libm/src/math/log1pf.rs @@ -20,6 +20,7 @@ const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ +#[inline] pub fn log1pf(x: f32) -> f32 { let mut ui: u32 = x.to_bits(); let hfsq: f32; diff --git a/libm/src/math/log2.rs b/libm/src/math/log2.rs index f6640d296..35eb9bf72 100644 --- a/libm/src/math/log2.rs +++ b/libm/src/math/log2.rs @@ -29,6 +29,7 @@ const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ +#[inline] pub fn log2(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 diff --git a/libm/src/math/log2f.rs b/libm/src/math/log2f.rs index c007ff9b0..8684b142f 100644 --- a/libm/src/math/log2f.rs +++ b/libm/src/math/log2f.rs @@ -23,6 +23,7 @@ const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ +#[inline] pub fn log2f(mut x: f32) -> f32 { let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs index 73d431c51..1a6e75448 100644 --- a/libm/src/math/round.rs +++ b/libm/src/math/round.rs @@ -2,6 +2,7 @@ use core::f64; const TOINT: f64 = 1.0 / f64::EPSILON; +#[inline] pub fn round(mut x: f64) -> f64 { let (f, i) = (x, x.to_bits()); let e: u64 = i >> 52 & 0x7ff; diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs index bd2488fa9..7dd79557a 100644 --- a/libm/src/math/roundf.rs +++ b/libm/src/math/roundf.rs @@ -2,6 +2,7 @@ use core::f32; const TOINT: f32 = 1.0 / f32::EPSILON; +#[inline] pub fn roundf(mut x: f32) -> f32 { let i = x.to_bits(); let e: u32 = i >> 23 & 0xff; From 689755fbab2ef77d57d54998c76f6401b458f2a5 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sat, 14 Jul 2018 22:41:48 +0200 Subject: [PATCH 0078/1459] rustfmt --- libm/src/math/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index b2c01f651..d5f4e0d54 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -90,6 +90,4 @@ mod k_sinf; mod rem_pio2_large; mod rem_pio2f; -use self::{ - k_cosf::k_cosf, k_sinf::k_sinf, rem_pio2_large::rem_pio2_large, rem_pio2f::rem_pio2f, -}; +use self::{k_cosf::k_cosf, k_sinf::k_sinf, rem_pio2_large::rem_pio2_large, rem_pio2f::rem_pio2f}; From 795dd8b0123e25feb936b770e8cc4ffe358f7b92 Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Sat, 14 Jul 2018 18:57:12 +0300 Subject: [PATCH 0079/1459] acosf asinf atanf expm1f sinf tanf --- libm/src/lib.rs | 12 ---- libm/src/math/acosf.rs | 59 +++++++++++++++++ libm/src/math/asinf.rs | 52 +++++++++++++++ libm/src/math/atanf.rs | 95 +++++++++++++++++++++++++++ libm/src/math/expm1f.rs | 112 ++++++++++++++++++++++++++++++++ libm/src/math/k_tanf.rs | 35 ++++++++++ libm/src/math/mod.rs | 18 ++++- libm/src/math/sinf.rs | 77 ++++++++++++++++++++++ libm/src/math/tanf.rs | 62 ++++++++++++++++++ libm/test-generator/src/main.rs | 11 ++-- 10 files changed, 515 insertions(+), 18 deletions(-) create mode 100644 libm/src/math/acosf.rs create mode 100644 libm/src/math/asinf.rs create mode 100644 libm/src/math/atanf.rs create mode 100644 libm/src/math/expm1f.rs create mode 100644 libm/src/math/k_tanf.rs create mode 100644 libm/src/math/sinf.rs create mode 100644 libm/src/math/tanf.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 27925e806..df7ee813f 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -84,21 +84,16 @@ pub trait F32Ext: private::Sealed { fn hypot(self, other: Self) -> Self; - #[cfg(todo)] fn sin(self) -> Self; fn cos(self) -> Self; - #[cfg(todo)] fn tan(self) -> Self; - #[cfg(todo)] fn asin(self) -> Self; - #[cfg(todo)] fn acos(self) -> Self; - #[cfg(todo)] fn atan(self) -> Self; #[cfg(todo)] @@ -110,7 +105,6 @@ pub trait F32Ext: private::Sealed { (self.sin(), self.cos()) } - #[cfg(todo)] fn exp_m1(self) -> Self; fn ln_1p(self) -> Self; @@ -248,7 +242,6 @@ impl F32Ext for f32 { hypotf(self, other) } - #[cfg(todo)] #[inline] fn sin(self) -> Self { sinf(self) @@ -259,25 +252,21 @@ impl F32Ext for f32 { cosf(self) } - #[cfg(todo)] #[inline] fn tan(self) -> Self { tanf(self) } - #[cfg(todo)] #[inline] fn asin(self) -> Self { asinf(self) } - #[cfg(todo)] #[inline] fn acos(self) -> Self { acosf(self) } - #[cfg(todo)] #[inline] fn atan(self) -> Self { atanf(self) @@ -289,7 +278,6 @@ impl F32Ext for f32 { atan2f(self, other) } - #[cfg(todo)] #[inline] fn exp_m1(self) -> Self { expm1f(self) diff --git a/libm/src/math/acosf.rs b/libm/src/math/acosf.rs new file mode 100644 index 000000000..bbe29c17c --- /dev/null +++ b/libm/src/math/acosf.rs @@ -0,0 +1,59 @@ +use super::sqrtf::sqrtf; + +const PIO2_HI: f32 = 1.5707962513e+00; /* 0x3fc90fda */ +const PIO2_LO: f32 = 7.5497894159e-08; /* 0x33a22168 */ +const P_S0: f32 = 1.6666586697e-01; +const P_S1: f32 = -4.2743422091e-02; +const P_S2: f32 = -8.6563630030e-03; +const Q_S1: f32 = -7.0662963390e-01; + +fn r(z: f32) -> f32 { + let p = z * (P_S0 + z * (P_S1 + z * P_S2)); + let q = 1. + z * Q_S1; + p / q +} + +#[inline] +pub fn acosf(x: f32) -> f32 { + let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120) + + let z: f32; + let w: f32; + let s: f32; + + let mut hx = x.to_bits(); + let ix = hx & 0x7fffffff; + /* |x| >= 1 or nan */ + if ix >= 0x3f800000 { + if ix == 0x3f800000 { + if (hx >> 31) != 0 { + return 2. * PIO2_HI + x1p_120; + } + return 0.; + } + return 0. / (x - x); + } + /* |x| < 0.5 */ + if ix < 0x3f000000 { + if ix <= 0x32800000 { + /* |x| < 2**-26 */ + return PIO2_HI + x1p_120; + } + return PIO2_HI - (x - (PIO2_LO - x * r(x * x))); + } + /* x < -0.5 */ + if (hx >> 31) != 0 { + z = (1. + x) * 0.5; + s = sqrtf(z); + w = r(z) * s - PIO2_LO; + return 2. * (PIO2_HI - (s + w)); + } + /* x > 0.5 */ + z = (1. - x) * 0.5; + s = sqrtf(z); + hx = s.to_bits(); + let df = f32::from_bits(hx & 0xfffff000); + let c = (z - df * df) / (s + df); + w = r(z) * s + c; + 2. * (df + w) +} diff --git a/libm/src/math/asinf.rs b/libm/src/math/asinf.rs new file mode 100644 index 000000000..597be4cb7 --- /dev/null +++ b/libm/src/math/asinf.rs @@ -0,0 +1,52 @@ +use super::fabsf::fabsf; +use super::sqrt::sqrt; + +const PIO2: f64 = 1.570796326794896558e+00; + +/* coefficients for R(x^2) */ +const P_S0: f32 = 1.6666586697e-01; +const P_S1: f32 = -4.2743422091e-02; +const P_S2: f32 = -8.6563630030e-03; +const Q_S1: f32 = -7.0662963390e-01; + +fn r(z: f32) -> f32 { + let p = z * (P_S0 + z * (P_S1 + z * P_S2)); + let q = 1. + z * Q_S1; + p / q +} + +#[inline] +pub fn asinf(mut x: f32) -> f32 { + let x1p_120 = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ (-120) + + let hx = x.to_bits(); + let ix = hx & 0x7fffffff; + + if ix >= 0x3f800000 { + /* |x| >= 1 */ + if ix == 0x3f800000 { + /* |x| == 1 */ + return ((x as f64) * PIO2 + x1p_120) as f32; /* asin(+-1) = +-pi/2 with inexact */ + } + return 0. / (x - x); /* asin(|x|>1) is NaN */ + } + + if ix < 0x3f000000 { + /* |x| < 0.5 */ + /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */ + if (ix < 0x39800000) && (ix >= 0x00800000) { + return x; + } + return x + x * r(x * x); + } + + /* 1 > |x| >= 0.5 */ + let z = (1. - fabsf(x)) * 0.5; + let s = sqrt(z as f64); + x = (PIO2 - 2. * (s + s * (r(z) as f64))) as f32; + if (hx >> 31) != 0 { + -x + } else { + x + } +} diff --git a/libm/src/math/atanf.rs b/libm/src/math/atanf.rs new file mode 100644 index 000000000..01c41f4ce --- /dev/null +++ b/libm/src/math/atanf.rs @@ -0,0 +1,95 @@ +use super::fabsf; + +const ATAN_HI: [f32; 4] = [ + 4.6364760399e-01, /* atan(0.5)hi 0x3eed6338 */ + 7.8539812565e-01, /* atan(1.0)hi 0x3f490fda */ + 9.8279368877e-01, /* atan(1.5)hi 0x3f7b985e */ + 1.5707962513e+00, /* atan(inf)hi 0x3fc90fda */ +]; + +const ATAN_LO: [f32; 4] = [ + 5.0121582440e-09, /* atan(0.5)lo 0x31ac3769 */ + 3.7748947079e-08, /* atan(1.0)lo 0x33222168 */ + 3.4473217170e-08, /* atan(1.5)lo 0x33140fb4 */ + 7.5497894159e-08, /* atan(inf)lo 0x33a22168 */ +]; + +const A_T: [f32; 5] = [ + 3.3333328366e-01, + -1.9999158382e-01, + 1.4253635705e-01, + -1.0648017377e-01, + 6.1687607318e-02, +]; + +#[inline] +pub fn atanf(mut x: f32) -> f32 { + let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120) + + let z: f32; + + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + if ix >= 0x4c800000 { + /* if |x| >= 2**26 */ + if x.is_nan() { + return x; + } + z = ATAN_HI[3] + x1p_120; + return if sign { -z } else { z }; + } + let id = if ix < 0x3ee00000 { + /* |x| < 0.4375 */ + if ix < 0x39800000 { + /* |x| < 2**-12 */ + if ix < 0x00800000 { + /* raise underflow for subnormal x */ + force_eval!(x * x); + } + return x; + } + -1 + } else { + x = fabsf(x); + if ix < 0x3f980000 { + /* |x| < 1.1875 */ + if ix < 0x3f300000 { + /* 7/16 <= |x| < 11/16 */ + x = (2. * x - 1.) / (2. + x); + 0 + } else { + /* 11/16 <= |x| < 19/16 */ + x = (x - 1.) / (x + 1.); + 1 + } + } else { + if ix < 0x401c0000 { + /* |x| < 2.4375 */ + x = (x - 1.5) / (1. + 1.5 * x); + 2 + } else { + /* 2.4375 <= |x| < 2**26 */ + x = -1. / x; + 3 + } + } + }; + /* end of argument reduction */ + z = x * x; + let w = z * z; + /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */ + let s1 = z * (A_T[0] + w * (A_T[2] + w * A_T[4])); + let s2 = w * (A_T[1] + w * A_T[3]); + if id < 0 { + return x - x * (s1 + s2); + } + let id = id as usize; + let z = ATAN_HI[id] - ((x * (s1 + s2) - ATAN_LO[id]) - x); + if sign { + -z + } else { + z + } +} diff --git a/libm/src/math/expm1f.rs b/libm/src/math/expm1f.rs new file mode 100644 index 000000000..011e09b69 --- /dev/null +++ b/libm/src/math/expm1f.rs @@ -0,0 +1,112 @@ +const O_THRESHOLD: f32 = 8.8721679688e+01; /* 0x42b17180 */ +const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */ +const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */ +const INV_LN2: f32 = 1.4426950216e+00; /* 0x3fb8aa3b */ +/* + * Domain [-0.34568, 0.34568], range ~[-6.694e-10, 6.696e-10]: + * |6 / x * (1 + 2 * (1 / (exp(x) - 1) - 1 / x)) - q(x)| < 2**-30.04 + * Scaled coefficients: Qn_here = 2**n * Qn_for_q (see s_expm1.c): + */ +const Q1: f32 = -3.3333212137e-2; /* -0x888868.0p-28 */ +const Q2: f32 = 1.5807170421e-3; /* 0xcf3010.0p-33 */ + +#[inline] +pub fn expm1f(mut x: f32) -> f32 { + let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 + + let mut hx = x.to_bits(); + let sign = (hx >> 31) != 0; + hx &= 0x7fffffff; + + /* filter out huge and non-finite argument */ + if hx >= 0x4195b844 { + /* if |x|>=27*ln2 */ + if hx > 0x7f800000 { + /* NaN */ + return x; + } + if sign { + return -1.; + } + if x > O_THRESHOLD { + x *= x1p127; + return x; + } + } + + let k: i32; + let hi: f32; + let lo: f32; + let mut c = 0f32; + /* argument reduction */ + if hx > 0x3eb17218 { + /* if |x| > 0.5 ln2 */ + if hx < 0x3F851592 { + /* and |x| < 1.5 ln2 */ + if !sign { + hi = x - LN2_HI; + lo = LN2_LO; + k = 1; + } else { + hi = x + LN2_HI; + lo = -LN2_LO; + k = -1; + } + } else { + k = (INV_LN2 * x + (if sign { -0.5 } else { 0.5 })) as i32; + let t = k as f32; + hi = x - t * LN2_HI; /* t*ln2_hi is exact here */ + lo = t * LN2_LO; + } + x = hi - lo; + c = (hi - x) - lo; + } else if hx < 0x33000000 { + /* when |x|<2**-25, return x */ + if hx < 0x00800000 { + force_eval!(x * x); + } + return x; + } else { + k = 0; + } + + /* x is now in primary range */ + let hfx = 0.5 * x; + let hxs = x * hfx; + let r1 = 1. + hxs * (Q1 + hxs * Q2); + let t = 3. - r1 * hfx; + let mut e = hxs * ((r1 - t) / (6. - x * t)); + if k == 0 { + /* c is 0 */ + return x - (x * e - hxs); + } + e = x * (e - c) - c; + e -= hxs; + /* exp(x) ~ 2^k (x_reduced - e + 1) */ + if k == -1 { + return 0.5 * (x - e) - 0.5; + } + if k == 1 { + if x < -0.25 { + return -2. * (e - (x + 0.5)); + } + return 1. + 2. * (x - e); + } + let twopk = f32::from_bits(((0x7f + k) << 23) as u32); /* 2^k */ + if (k < 0) || (k > 56) { + /* suffice to return exp(x)-1 */ + let mut y = x - e + 1.; + if k == 128 { + y = y * 2. * x1p127; + } else { + y = y * twopk; + } + return y - 1.; + } + let uf = f32::from_bits(((0x7f - k) << 23) as u32); /* 2^-k */ + if k < 23 { + (x - e + (1. - uf)) * twopk + } else { + (x - (e + uf) + 1.) * twopk + } +} diff --git a/libm/src/math/k_tanf.rs b/libm/src/math/k_tanf.rs new file mode 100644 index 000000000..db2e0caa7 --- /dev/null +++ b/libm/src/math/k_tanf.rs @@ -0,0 +1,35 @@ +/* |tan(x)/x - t(x)| < 2**-25.5 (~[-2e-08, 2e-08]). */ +const T: [f64; 6] = [ + 0.333331395030791399758, /* 0x15554d3418c99f.0p-54 */ + 0.133392002712976742718, /* 0x1112fd38999f72.0p-55 */ + 0.0533812378445670393523, /* 0x1b54c91d865afe.0p-57 */ + 0.0245283181166547278873, /* 0x191df3908c33ce.0p-58 */ + 0.00297435743359967304927, /* 0x185dadfcecf44e.0p-61 */ + 0.00946564784943673166728, /* 0x1362b9bf971bcd.0p-59 */ +]; + +#[inline] +pub(crate) fn k_tanf(x: f64, odd: bool) -> f32 { + let z = x * x; + /* + * Split up the polynomial into small independent terms to give + * opportunities for parallel evaluation. The chosen splitting is + * micro-optimized for Athlons (XP, X64). It costs 2 multiplications + * relative to Horner's method on sequential machines. + * + * We add the small terms from lowest degree up for efficiency on + * non-sequential machines (the lowest degree terms tend to be ready + * earlier). Apart from this, we don't care about order of + * operations, and don't need to to care since we have precision to + * spare. However, the chosen splitting is good for accuracy too, + * and would give results as accurate as Horner's method if the + * small terms were added from highest degree down. + */ + let mut r = T[4] + z * T[5]; + let t = T[2] + z * T[3]; + let w = z * z; + let s = z * x; + let u = T[0] + z * T[1]; + r = (x + s * u) + (s * w) * (t + w * r); + (if odd { -1. / r } else { r }) as f32 +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index c903d3787..792d05623 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -7,6 +7,9 @@ macro_rules! force_eval { } mod acos; +mod acosf; +mod asinf; +mod atanf; mod cbrt; mod cbrtf; mod ceil; @@ -17,6 +20,7 @@ mod exp2; mod exp2f; mod expf; mod expm1; +mod expm1f; mod fabs; mod fabsf; mod fdim; @@ -41,13 +45,18 @@ mod round; mod roundf; mod scalbn; mod scalbnf; +mod sinf; mod sqrt; mod sqrtf; +mod tanf; mod trunc; mod truncf; // Use separated imports instead of {}-grouped imports for easier merging. pub use self::acos::acos; +pub use self::acosf::acosf; +pub use self::asinf::asinf; +pub use self::atanf::atanf; pub use self::cbrt::cbrt; pub use self::cbrtf::cbrtf; pub use self::ceil::ceil; @@ -58,6 +67,7 @@ pub use self::exp2::exp2; pub use self::exp2f::exp2f; pub use self::expf::expf; pub use self::expm1::expm1; +pub use self::expm1f::expm1f; pub use self::fabs::fabs; pub use self::fabsf::fabsf; pub use self::fdim::fdim; @@ -82,14 +92,20 @@ pub use self::round::round; pub use self::roundf::roundf; pub use self::scalbn::scalbn; pub use self::scalbnf::scalbnf; +pub use self::sinf::sinf; pub use self::sqrt::sqrt; pub use self::sqrtf::sqrtf; +pub use self::tanf::tanf; pub use self::trunc::trunc; pub use self::truncf::truncf; mod k_cosf; mod k_sinf; +mod k_tanf; mod rem_pio2_large; mod rem_pio2f; -use self::{k_cosf::k_cosf, k_sinf::k_sinf, rem_pio2_large::rem_pio2_large, rem_pio2f::rem_pio2f}; +use self::{ + k_cosf::k_cosf, k_sinf::k_sinf, k_tanf::k_tanf, rem_pio2_large::rem_pio2_large, + rem_pio2f::rem_pio2f, +}; diff --git a/libm/src/math/sinf.rs b/libm/src/math/sinf.rs new file mode 100644 index 000000000..09f62cddc --- /dev/null +++ b/libm/src/math/sinf.rs @@ -0,0 +1,77 @@ +use super::{k_cosf, k_sinf, rem_pio2f}; + +use core::f64::consts::FRAC_PI_2; + +/* Small multiples of pi/2 rounded to double precision. */ +const S1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ +const S2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ +const S3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const S4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ + +#[inline] +pub fn sinf(x: f32) -> f32 { + let x64 = x as f64; + + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + if ix <= 0x3f490fda { + /* |x| ~<= pi/4 */ + if ix < 0x39800000 { + /* |x| < 2**-12 */ + /* raise inexact if x!=0 and underflow if subnormal */ + force_eval!(if ix < 0x00800000 { + x / x1p120 + } else { + x + x1p120 + }); + return x; + } + return k_sinf(x64); + } + if ix <= 0x407b53d1 { + /* |x| ~<= 5*pi/4 */ + if ix <= 0x4016cbe3 { + /* |x| ~<= 3pi/4 */ + if sign { + return -k_cosf(x64 + S1_PIO2); + } else { + return k_cosf(x64 - S1_PIO2); + } + } + return k_sinf(if sign { + -(x64 + S2_PIO2) + } else { + -(x64 - S2_PIO2) + }); + } + if ix <= 0x40e231d5 { + /* |x| ~<= 9*pi/4 */ + if ix <= 0x40afeddf { + /* |x| ~<= 7*pi/4 */ + if sign { + return k_cosf(x64 + S3_PIO2); + } else { + return -k_cosf(x64 - S3_PIO2); + } + } + return k_sinf(if sign { x64 + S4_PIO2 } else { x64 - S4_PIO2 }); + } + + /* sin(Inf or NaN) is NaN */ + if ix >= 0x7f800000 { + return x - x; + } + + /* general argument reduction needed */ + let (n, y) = rem_pio2f(x); + match n & 3 { + 0 => k_sinf(y), + 1 => k_cosf(y), + 2 => return k_sinf(-y), + _ => -k_cosf(y), + } +} diff --git a/libm/src/math/tanf.rs b/libm/src/math/tanf.rs new file mode 100644 index 000000000..6bfbe06c1 --- /dev/null +++ b/libm/src/math/tanf.rs @@ -0,0 +1,62 @@ +use super::{k_tanf, rem_pio2f}; + +use core::f64::consts::FRAC_PI_2; + +/* Small multiples of pi/2 rounded to double precision. */ +const T1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ +const T2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ +const T3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const T4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ + +#[inline] +pub fn tanf(x: f32) -> f32 { + let x64 = x as f64; + + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + if ix <= 0x3f490fda { + /* |x| ~<= pi/4 */ + if ix < 0x39800000 { + /* |x| < 2**-12 */ + /* raise inexact if x!=0 and underflow if subnormal */ + force_eval!(if ix < 0x00800000 { + x / x1p120 + } else { + x + x1p120 + }); + return x; + } + return k_tanf(x64, false); + } + if ix <= 0x407b53d1 { + /* |x| ~<= 5*pi/4 */ + if ix <= 0x4016cbe3 { + /* |x| ~<= 3pi/4 */ + return k_tanf(if sign { x64 + T1_PIO2 } else { x64 - T1_PIO2 }, true); + } else { + return k_tanf(if sign { x64 + T2_PIO2 } else { x64 - T2_PIO2 }, false); + } + } + if ix <= 0x40e231d5 { + /* |x| ~<= 9*pi/4 */ + if ix <= 0x40afeddf { + /* |x| ~<= 7*pi/4 */ + return k_tanf(if sign { x64 + T3_PIO2 } else { x64 - T3_PIO2 }, true); + } else { + return k_tanf(if sign { x64 + T4_PIO2 } else { x64 - T4_PIO2 }, false); + } + } + + /* tan(Inf or NaN) is NaN */ + if ix >= 0x7f800000 { + return x - x; + } + + /* argument reduction */ + let (n, y) = rem_pio2f(x); + k_tanf(y, n & 1 != 0) +} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index aa50f57cd..0f9360650 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -651,25 +651,26 @@ fn main() -> Result<(), Box> { // With signature `fn(f32) -> f32` f32_f32! { - // acosf, + acosf, floorf, truncf, - // asinf, - // atanf, + asinf, + atanf, cbrtf, cosf, ceilf, // coshf, exp2f, expf, + expm1f, log10f, log1pf, log2f, logf, roundf, - // sinf, + sinf, // sinhf, - // tanf, + tanf, // tanhf, fabsf, sqrtf, From 42aadee952f9676ffadd0e1209d329e65bd5469e Mon Sep 17 00:00:00 2001 From: vjackson725 Date: Sat, 14 Jul 2018 23:10:26 +1000 Subject: [PATCH 0080/1459] add an implemetation of asin Fixes rust-lang/libm#7 Additonally adds inline functions for macros dealing with low and high words of f64s to the root module. Also, the muslc code used a hexdecimal float. This shouldn't be needed because Rust implements floating point parsing well. --- libm/src/math/asin.rs | 113 ++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 28 ++++++++ libm/test-generator/src/main.rs | 2 +- 3 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 libm/src/math/asin.rs diff --git a/libm/src/math/asin.rs b/libm/src/math/asin.rs new file mode 100644 index 000000000..720169bdc --- /dev/null +++ b/libm/src/math/asin.rs @@ -0,0 +1,113 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_asin.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* asin(x) + * Method : + * Since asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ... + * we approximate asin(x) on [0,0.5] by + * asin(x) = x + x*x^2*R(x^2) + * where + * R(x^2) is a rational approximation of (asin(x)-x)/x^3 + * and its remez error is bounded by + * |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75) + * + * For x in [0.5,1] + * asin(x) = pi/2-2*asin(sqrt((1-x)/2)) + * Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2; + * then for x>0.98 + * asin(x) = pi/2 - 2*(s+s*z*R(z)) + * = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo) + * For x<=0.98, let pio4_hi = pio2_hi/2, then + * f = hi part of s; + * c = sqrt(z) - f = (z-f*f)/(s+f) ...f+c=sqrt(z) + * and + * asin(x) = pi/2 - 2*(s+s*z*R(z)) + * = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo) + * = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c)) + * + * Special cases: + * if x is NaN, return x itself; + * if |x|>1, return NaN with invalid signal. + * + */ + +use super::{fabs, get_high_word, get_low_word, sqrt, with_set_low_word}; + +const PIO2_HI: f64 = 1.57079632679489655800e+00; /* 0x3FF921FB, 0x54442D18 */ +const PIO2_LO: f64 = 6.12323399573676603587e-17; /* 0x3C91A626, 0x33145C07 */ +/* coefficients for R(x^2) */ +const P_S0: f64 = 1.66666666666666657415e-01; /* 0x3FC55555, 0x55555555 */ +const P_S1: f64 = -3.25565818622400915405e-01; /* 0xBFD4D612, 0x03EB6F7D */ +const P_S2: f64 = 2.01212532134862925881e-01; /* 0x3FC9C155, 0x0E884455 */ +const P_S3: f64 = -4.00555345006794114027e-02; /* 0xBFA48228, 0xB5688F3B */ +const P_S4: f64 = 7.91534994289814532176e-04; /* 0x3F49EFE0, 0x7501B288 */ +const P_S5: f64 = 3.47933107596021167570e-05; /* 0x3F023DE1, 0x0DFDF709 */ +const Q_S1: f64 = -2.40339491173441421878e+00; /* 0xC0033A27, 0x1C8A2D4B */ +const Q_S2: f64 = 2.02094576023350569471e+00; /* 0x40002AE5, 0x9C598AC8 */ +const Q_S3: f64 = -6.88283971605453293030e-01; /* 0xBFE6066C, 0x1B8D0159 */ +const Q_S4: f64 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ + +fn comp_r(z: f64) -> f64 { + let p = z * (P_S0 + z * (P_S1 + z * (P_S2 + z * (P_S3 + z * (P_S4 + z * P_S5))))); + let q = 1.0 + z * (Q_S1 + z * (Q_S2 + z * (Q_S3 + z * Q_S4))); + return p / q; +} + +pub fn asin(mut x: f64) -> f64 { + let z: f64; + let r: f64; + let s: f64; + let hx: u32; + let ix: u32; + + hx = get_high_word(x); + ix = hx & 0x7fffffff; + /* |x| >= 1 or nan */ + if ix >= 0x3ff00000 { + let lx: u32; + lx = get_low_word(x); + if (ix - 0x3ff00000 | lx) == 0 { + /* asin(1) = +-pi/2 with inexact */ + return x * PIO2_HI + f64::from_bits(0x3870000000000000); + } else { + return 0.0 / (x - x); + } + } + /* |x| < 0.5 */ + if ix < 0x3fe00000 { + /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */ + if ix < 0x3e500000 && ix >= 0x00100000 { + return x; + } else { + return x + x * comp_r(x * x); + } + } + /* 1 > |x| >= 0.5 */ + z = (1.0 - fabs(x)) * 0.5; + s = sqrt(z); + r = comp_r(z); + if ix >= 0x3fef3333 { + /* if |x| > 0.975 */ + x = PIO2_HI - (2. * (s + s * r) - PIO2_LO); + } else { + let f: f64; + let c: f64; + /* f+c = sqrt(z) */ + f = with_set_low_word(s, 0); + c = (z - f * f) / (s + f); + x = 0.5 * PIO2_HI - (2.0 * s * r - (PIO2_LO - 2.0 * c) - (0.5 * PIO2_HI - 2.0 * f)); + } + if hx >> 31 != 0 { + return -x; + } else { + return x; + } +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 792d05623..42c596857 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -8,6 +8,7 @@ macro_rules! force_eval { mod acos; mod acosf; +mod asin; mod asinf; mod atanf; mod cbrt; @@ -55,6 +56,7 @@ mod truncf; // Use separated imports instead of {}-grouped imports for easier merging. pub use self::acos::acos; pub use self::acosf::acosf; +pub use self::asin::asin; pub use self::asinf::asinf; pub use self::atanf::atanf; pub use self::cbrt::cbrt; @@ -109,3 +111,29 @@ use self::{ k_cosf::k_cosf, k_sinf::k_sinf, k_tanf::k_tanf, rem_pio2_large::rem_pio2_large, rem_pio2f::rem_pio2f, }; + +#[inline] +pub fn get_high_word(x: f64) -> u32 { + (x.to_bits() >> 32) as u32 +} + +#[inline] +pub fn get_low_word(x: f64) -> u32 { + x.to_bits() as u32 +} + +#[inline] +pub fn with_set_high_word(f: f64, hi: u32) -> f64 { + let mut tmp = f.to_bits(); + tmp &= 0x00000000_ffffffff; + tmp |= (hi as u64) << 32; + f64::from_bits(tmp) +} + +#[inline] +pub fn with_set_low_word(f: f64, lo: u32) -> f64 { + let mut tmp = f.to_bits(); + tmp &= 0xffffffff_00000000; + tmp |= lo as u64; + f64::from_bits(tmp) +} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 0f9360650..b639cf11b 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -698,7 +698,7 @@ f32i32_f32! { // With signature `fn(f64) -> f64` f64_f64! { acos, - // asin, + asin, // atan, cbrt, ceil, From ac8d7a81340073cc278338cd40dca39cb6f55f90 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Sat, 14 Jul 2018 16:58:24 -0500 Subject: [PATCH 0081/1459] v0.1.1 --- libm/CHANGELOG.md | 38 +++++++++++++++++++++++++++++++++++++- libm/Cargo.toml | 2 +- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index c3e74a814..3a496527b 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -5,8 +5,44 @@ This project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] +## [v0.1.1] - 2018-07-14 + +### Added + +- acos +- acosf +- asin +- asinf +- atanf +- cbrt +- cbrtf +- ceil +- ceilf +- cosf +- exp +- exp2 +- exp2f +- expm1 +- expm1f +- fdim +- fdimf +- floorf +- fma +- fmod +- log +- log2 +- log10 +- log10f +- log1p +- log1pf +- log2f +- roundf +- sinf +- tanf + ## v0.1.0 - 2018-07-13 - Initial release -[Unreleased]: https://github.com/japaric/libm/compare/v0.1.0...HEAD +[Unreleased]: https://github.com/japaric/libm/compare/v0.1.1...HEAD +[v0.1.1]: https://github.com/japaric/libm/compare/v0.1.0...v0.1.1 diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 5a1ae4a6c..a82fc99a7 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -7,7 +7,7 @@ keywords = ["libm", "math"] license = "MIT OR Apache-2.0" name = "libm" repository = "https://github.com/japaric/libm" -version = "0.1.0" +version = "0.1.1" [workspace] members = ["cb", "test-generator"] \ No newline at end of file From e33af9dcc7375a2ad7ecc79fb8cb2d34d6736908 Mon Sep 17 00:00:00 2001 From: C Jones Date: Fri, 13 Jul 2018 22:10:30 -0400 Subject: [PATCH 0082/1459] Implement part of sin/cos with quadrant selection unimplemented --- libm/src/lib.rs | 4 - libm/src/math/cos.rs | 73 +++++++++ libm/src/math/mod.rs | 14 +- libm/src/math/sin.rs | 80 ++++++++++ libm/src/math/trig_common.rs | 289 +++++++++++++++++++++++++++++++++++ 5 files changed, 452 insertions(+), 8 deletions(-) create mode 100644 libm/src/math/cos.rs create mode 100644 libm/src/math/sin.rs create mode 100644 libm/src/math/trig_common.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index df7ee813f..0b9efeeb3 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -386,10 +386,8 @@ pub trait F64Ext: private::Sealed { fn hypot(self, other: Self) -> Self; - #[cfg(todo)] fn sin(self) -> Self; - #[cfg(todo)] fn cos(self) -> Self; #[cfg(todo)] @@ -548,13 +546,11 @@ impl F64Ext for f64 { hypot(self, other) } - #[cfg(todo)] #[inline] fn sin(self) -> Self { sin(self) } - #[cfg(todo)] #[inline] fn cos(self) -> Self { cos(self) diff --git a/libm/src/math/cos.rs b/libm/src/math/cos.rs new file mode 100644 index 000000000..91412c648 --- /dev/null +++ b/libm/src/math/cos.rs @@ -0,0 +1,73 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use math::trig_common::{_cos, _sin, rem_pio2}; + +// cos(x) +// Return cosine function of x. +// +// kernel function: +// __sin ... sine function on [-pi/4,pi/4] +// __cos ... cosine function on [-pi/4,pi/4] +// __rem_pio2 ... argument reduction routine +// +// Method. +// Let S,C and T denote the sin, cos and tan respectively on +// [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 +// in [-pi/4 , +pi/4], and let n = k mod 4. +// We have +// +// n sin(x) cos(x) tan(x) +// ---------------------------------------------------------- +// 0 S C T +// 1 C -S -1/T +// 2 -S -C T +// 3 -C S -1/T +// ---------------------------------------------------------- +// +// Special cases: +// Let trig be any of sin, cos, or tan. +// trig(+-INF) is NaN, with signals; +// trig(NaN) is that NaN; +// +// Accuracy: +// TRIG(x) returns trig(x) nearly rounded +// +pub fn cos(x: f64) -> f64 { + let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; + + /* |x| ~< pi/4 */ + if ix <= 0x3fe921fb { + if ix < 0x3e46a09e { + /* if x < 2**-27 * sqrt(2) */ + /* raise inexact if x != 0 */ + if x as i32 == 0 { + return 1.0; + } + } + return _cos(x, 0.0); + } + + /* cos(Inf or NaN) is NaN */ + if ix >= 0x7ff00000 { + return x - x; + } + + /* argument reduction needed */ + let (n, y0, y1) = rem_pio2(x); + match n & 3 { + 0 => _cos(y0, y1), + 1 => -_sin(y0, y1, 1), + 2 => -_cos(y0, y1), + _ => _sin(y0, y1, 1), + } +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 42c596857..a657da810 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -15,6 +15,7 @@ mod cbrt; mod cbrtf; mod ceil; mod ceilf; +mod cos; mod cosf; mod exp; mod exp2; @@ -46,6 +47,7 @@ mod round; mod roundf; mod scalbn; mod scalbnf; +mod sin; mod sinf; mod sqrt; mod sqrtf; @@ -63,6 +65,7 @@ pub use self::cbrt::cbrt; pub use self::cbrtf::cbrtf; pub use self::ceil::ceil; pub use self::ceilf::ceilf; +pub use self::cos::cos; pub use self::cosf::cosf; pub use self::exp::exp; pub use self::exp2::exp2; @@ -94,6 +97,7 @@ pub use self::round::round; pub use self::roundf::roundf; pub use self::scalbn::scalbn; pub use self::scalbnf::scalbnf; +pub use self::sin::sin; pub use self::sinf::sinf; pub use self::sqrt::sqrt; pub use self::sqrtf::sqrtf; @@ -106,11 +110,13 @@ mod k_sinf; mod k_tanf; mod rem_pio2_large; mod rem_pio2f; +mod trig_common; -use self::{ - k_cosf::k_cosf, k_sinf::k_sinf, k_tanf::k_tanf, rem_pio2_large::rem_pio2_large, - rem_pio2f::rem_pio2f, -}; +use self::k_cosf::k_cosf; +use self::k_sinf::k_sinf; +use self::k_tanf::k_tanf; +use self::rem_pio2_large::rem_pio2_large; +use self::rem_pio2f::rem_pio2f; #[inline] pub fn get_high_word(x: f64) -> u32 { diff --git a/libm/src/math/sin.rs b/libm/src/math/sin.rs new file mode 100644 index 000000000..0717c1ae3 --- /dev/null +++ b/libm/src/math/sin.rs @@ -0,0 +1,80 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use core::f64; + +use math::trig_common::{_cos, _sin, rem_pio2}; + +// sin(x) +// Return sine function of x. +// +// kernel function: +// __sin ... sine function on [-pi/4,pi/4] +// __cos ... cose function on [-pi/4,pi/4] +// __rem_pio2 ... argument reduction routine +// +// Method. +// Let S,C and T denote the sin, cos and tan respectively on +// [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 +// in [-pi/4 , +pi/4], and let n = k mod 4. +// We have +// +// n sin(x) cos(x) tan(x) +// ---------------------------------------------------------- +// 0 S C T +// 1 C -S -1/T +// 2 -S -C T +// 3 -C S -1/T +// ---------------------------------------------------------- +// +// Special cases: +// Let trig be any of sin, cos, or tan. +// trig(+-INF) is NaN, with signals; +// trig(NaN) is that NaN; +// +// Accuracy: +// TRIG(x) returns trig(x) nearly rounded +pub fn sin(x: f64) -> f64 { + let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 + + /* High word of x. */ + let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; + + /* |x| ~< pi/4 */ + if ix <= 0x3fe921fb { + if ix < 0x3e500000 { + /* |x| < 2**-26 */ + /* raise inexact if x != 0 and underflow if subnormal*/ + if ix < 0x00100000 { + force_eval!(x / x1p120); + } else { + force_eval!(x + x1p120); + } + return x; + } + return _sin(x, 0.0, 0); + } + + /* sin(Inf or NaN) is NaN */ + if ix >= 0x7ff00000 { + return x - x; + } + + /* argument reduction needed */ + let (n, y0, y1) = rem_pio2(x); + match n & 3 { + 0 => _sin(y0, y1, 1), + 1 => _cos(y0, y1), + 2 => -_sin(y0, y1, 1), + _ => -_cos(y0, y1), + } +} diff --git a/libm/src/math/trig_common.rs b/libm/src/math/trig_common.rs new file mode 100644 index 000000000..59e75e2dd --- /dev/null +++ b/libm/src/math/trig_common.rs @@ -0,0 +1,289 @@ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +/* origin: FreeBSD /usr/src/lib/msun/src/k_sin.c */ + +const S1: f64 = -1.66666666666666324348e-01; /* 0xBFC55555, 0x55555549 */ +const S2: f64 = 8.33333333332248946124e-03; /* 0x3F811111, 0x1110F8A6 */ +const S3: f64 = -1.98412698298579493134e-04; /* 0xBF2A01A0, 0x19C161D5 */ +const S4: f64 = 2.75573137070700676789e-06; /* 0x3EC71DE3, 0x57B1FE7D */ +const S5: f64 = -2.50507602534068634195e-08; /* 0xBE5AE5E6, 0x8A2B9CEB */ +const S6: f64 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ + +// kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854 +// Input x is assumed to be bounded by ~pi/4 in magnitude. +// Input y is the tail of x. +// Input iy indicates whether y is 0. (if iy=0, y assume to be 0). +// +// Algorithm +// 1. Since sin(-x) = -sin(x), we need only to consider positive x. +// 2. Callers must return sin(-0) = -0 without calling here since our +// odd polynomial is not evaluated in a way that preserves -0. +// Callers may do the optimization sin(x) ~ x for tiny x. +// 3. sin(x) is approximated by a polynomial of degree 13 on +// [0,pi/4] +// 3 13 +// sin(x) ~ x + S1*x + ... + S6*x +// where +// +// |sin(x) 2 4 6 8 10 12 | -58 +// |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 +// | x | +// +// 4. sin(x+y) = sin(x) + sin'(x')*y +// ~ sin(x) + (1-x*x/2)*y +// For better accuracy, let +// 3 2 2 2 2 +// r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) +// then 3 2 +// sin(x) = x + (S1*x + (x *(r-y/2)+y)) +pub fn _sin(x: f64, y: f64, iy: i32) -> f64 { + let z = x * x; + let w = z * z; + let r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6); + let v = z * x; + if iy == 0 { + x + v * (S1 + z * r) + } else { + x - ((z * (0.5 * y - v * r) - y) - v * S1) + } +} + +/* origin: FreeBSD /usr/src/lib/msun/src/k_cos.c */ +const C1: f64 = 4.16666666666666019037e-02; /* 0x3FA55555, 0x5555554C */ +const C2: f64 = -1.38888888888741095749e-03; /* 0xBF56C16C, 0x16C15177 */ +const C3: f64 = 2.48015872894767294178e-05; /* 0x3EFA01A0, 0x19CB1590 */ +const C4: f64 = -2.75573143513906633035e-07; /* 0xBE927E4F, 0x809C52AD */ +const C5: f64 = 2.08757232129817482790e-09; /* 0x3E21EE9E, 0xBDB4B1C4 */ +const C6: f64 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ + +// kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 +// Input x is assumed to be bounded by ~pi/4 in magnitude. +// Input y is the tail of x. +// +// Algorithm +// 1. Since cos(-x) = cos(x), we need only to consider positive x. +// 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. +// 3. cos(x) is approximated by a polynomial of degree 14 on +// [0,pi/4] +// 4 14 +// cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x +// where the remez error is +// +// | 2 4 6 8 10 12 14 | -58 +// |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 +// | | +// +// 4 6 8 10 12 14 +// 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then +// cos(x) ~ 1 - x*x/2 + r +// since cos(x+y) ~ cos(x) - sin(x)*y +// ~ cos(x) - x*y, +// a correction term is necessary in cos(x) and hence +// cos(x+y) = 1 - (x*x/2 - (r - x*y)) +// For better accuracy, rearrange to +// cos(x+y) ~ w + (tmp + (r-x*y)) +// where w = 1 - x*x/2 and tmp is a tiny correction term +// (1 - x*x/2 == w + tmp exactly in infinite precision). +// The exactness of w + tmp in infinite precision depends on w +// and tmp having the same precision as x. If they have extra +// precision due to compiler bugs, then the extra precision is +// only good provided it is retained in all terms of the final +// expression for cos(). Retention happens in all cases tested +// under FreeBSD, so don't pessimize things by forcibly clipping +// any extra precision in w. +pub fn _cos(x: f64, y: f64) -> f64 { + let z = x * x; + let w = z * z; + let r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6)); + let hz = 0.5 * z; + let w = 1.0 - hz; + w + (((1.0 - w) - hz) + (z * r - x * y)) +} + +// origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c +// Optimized by Bruce D. Evans. */ + +// #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 +// #define EPS DBL_EPSILON +const EPS: f64 = 2.2204460492503131e-16; +// #elif FLT_EVAL_METHOD==2 +// #define EPS LDBL_EPSILON +// #endif + +// TODO: Support FLT_EVAL_METHOD? + +#[allow(unused, non_upper_case_globals)] +const toint: f64 = 1.5 / EPS; +/// 53 bits of 2/pi +#[allow(unused, non_upper_case_globals)] +const invpio2: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ +/// first 33 bits of pi/2 +#[allow(unused, non_upper_case_globals)] +const pio2_1: f64 = 1.57079632673412561417e+00; /* 0x3FF921FB, 0x54400000 */ +/// pi/2 - pio2_1 +#[allow(unused, non_upper_case_globals)] +const pio2_1t: f64 = 6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */ +/// second 33 bits of pi/2 +#[allow(unused, non_upper_case_globals)] +const pio2_2: f64 = 6.07710050630396597660e-11; /* 0x3DD0B461, 0x1A600000 */ +/// pi/2 - (pio2_1+pio2_2) +#[allow(unused, non_upper_case_globals)] +const pio2_2t: f64 = 2.02226624879595063154e-21; /* 0x3BA3198A, 0x2E037073 */ +/// third 33 bits of pi/2 +#[allow(unused, non_upper_case_globals)] +const pio2_3: f64 = 2.02226624871116645580e-21; /* 0x3BA3198A, 0x2E000000 */ +/// pi/2 - (pio2_1+pio2_2+pio2_3) +#[allow(unused, non_upper_case_globals)] +const pio2_3t: f64 = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ + +/* __rem_pio2(x,y) + * + * return the remainder of x rem pi/2 in y[0]+y[1] + * use __rem_pio2_large() for large x + */ + +/* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ +/* +{ + union {double f; uint64_t i;} u = {x}; + double_t z,w,t,r,fn; + double tx[3],ty[2]; + uint32_t ix; + int sign, n, ex, ey, i; + + sign = u.i>>63; + ix = u.i>>32 & 0x7fffffff; + if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */ + if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */ + goto medium; /* cancellation -- use medium case */ + if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */ + if (!sign) { + z = x - pio2_1; /* one round good to 85 bits */ + y[0] = z - pio2_1t; + y[1] = (z-y[0]) - pio2_1t; + return 1; + } else { + z = x + pio2_1; + y[0] = z + pio2_1t; + y[1] = (z-y[0]) + pio2_1t; + return -1; + } + } else { + if (!sign) { + z = x - 2*pio2_1; + y[0] = z - 2*pio2_1t; + y[1] = (z-y[0]) - 2*pio2_1t; + return 2; + } else { + z = x + 2*pio2_1; + y[0] = z + 2*pio2_1t; + y[1] = (z-y[0]) + 2*pio2_1t; + return -2; + } + } + } + if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */ + if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */ + if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */ + goto medium; + if (!sign) { + z = x - 3*pio2_1; + y[0] = z - 3*pio2_1t; + y[1] = (z-y[0]) - 3*pio2_1t; + return 3; + } else { + z = x + 3*pio2_1; + y[0] = z + 3*pio2_1t; + y[1] = (z-y[0]) + 3*pio2_1t; + return -3; + } + } else { + if (ix == 0x401921fb) /* |x| ~= 4pi/2 */ + goto medium; + if (!sign) { + z = x - 4*pio2_1; + y[0] = z - 4*pio2_1t; + y[1] = (z-y[0]) - 4*pio2_1t; + return 4; + } else { + z = x + 4*pio2_1; + y[0] = z + 4*pio2_1t; + y[1] = (z-y[0]) + 4*pio2_1t; + return -4; + } + } + } + if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */ +medium: + /* rint(x/(pi/2)), Assume round-to-nearest. */ + fn = (double_t)x*invpio2 + toint - toint; + n = (int32_t)fn; + r = x - fn*pio2_1; + w = fn*pio2_1t; /* 1st round, good to 85 bits */ + y[0] = r - w; + u.f = y[0]; + ey = u.i>>52 & 0x7ff; + ex = ix>>20; + if (ex - ey > 16) { /* 2nd round, good to 118 bits */ + t = r; + w = fn*pio2_2; + r = t - w; + w = fn*pio2_2t - ((t-r)-w); + y[0] = r - w; + u.f = y[0]; + ey = u.i>>52 & 0x7ff; + if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */ + t = r; + w = fn*pio2_3; + r = t - w; + w = fn*pio2_3t - ((t-r)-w); + y[0] = r - w; + } + } + y[1] = (r - y[0]) - w; + return n; + } + /* + * all other (large) arguments + */ + if (ix >= 0x7ff00000) { /* x is inf or NaN */ + y[0] = y[1] = x - x; + return 0; + } + /* set z = scalbn(|x|,-ilogb(x)+23) */ + u.f = x; + u.i &= (uint64_t)-1>>12; + u.i |= (uint64_t)(0x3ff + 23)<<52; + z = u.f; + for (i=0; i < 2; i++) { + tx[i] = (double)(int32_t)z; + z = (z-tx[i])*0x1p24; + } + tx[i] = z; + /* skip zero terms, first term is non-zero */ + while (tx[i] == 0.0) + i--; + n = __rem_pio2_large(tx,ty,(int)(ix>>20)-(0x3ff+23),i+1,1); + if (sign) { + y[0] = -ty[0]; + y[1] = -ty[1]; + return -n; + } + y[0] = ty[0]; + y[1] = ty[1]; + return n; +} +*/ + +pub fn rem_pio2(_x: f64) -> (i32, f64, f64) { + unimplemented!() +} From 85ffabe58267fec370f62711f5d738e1adc132ed Mon Sep 17 00:00:00 2001 From: C Jones Date: Fri, 13 Jul 2018 23:03:21 -0400 Subject: [PATCH 0083/1459] Convert rem_pio2 code, split up modules --- libm/src/math/cos.rs | 39 +++-- libm/src/math/exp2.rs | 2 +- libm/src/math/k_cos.rs | 62 +++++++ libm/src/math/k_sin.rs | 57 +++++++ libm/src/math/mod.rs | 10 +- libm/src/math/rem_pio2.rs | 187 +++++++++++++++++++++ libm/src/math/sin.rs | 41 +++-- libm/src/math/trig_common.rs | 289 -------------------------------- libm/test-generator/src/main.rs | 4 +- 9 files changed, 356 insertions(+), 335 deletions(-) create mode 100644 libm/src/math/k_cos.rs create mode 100644 libm/src/math/k_sin.rs create mode 100644 libm/src/math/rem_pio2.rs delete mode 100644 libm/src/math/trig_common.rs diff --git a/libm/src/math/cos.rs b/libm/src/math/cos.rs index 91412c648..e6e9b3736 100644 --- a/libm/src/math/cos.rs +++ b/libm/src/math/cos.rs @@ -1,24 +1,23 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ +// origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */ +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== -use math::trig_common::{_cos, _sin, rem_pio2}; +use super::{k_cos, k_sin, rem_pio2}; // cos(x) // Return cosine function of x. // // kernel function: -// __sin ... sine function on [-pi/4,pi/4] -// __cos ... cosine function on [-pi/4,pi/4] -// __rem_pio2 ... argument reduction routine +// k_sin ... sine function on [-pi/4,pi/4] +// k_cos ... cosine function on [-pi/4,pi/4] +// rem_pio2 ... argument reduction routine // // Method. // Let S,C and T denote the sin, cos and tan respectively on @@ -54,7 +53,7 @@ pub fn cos(x: f64) -> f64 { return 1.0; } } - return _cos(x, 0.0); + return k_cos(x, 0.0); } /* cos(Inf or NaN) is NaN */ @@ -65,9 +64,9 @@ pub fn cos(x: f64) -> f64 { /* argument reduction needed */ let (n, y0, y1) = rem_pio2(x); match n & 3 { - 0 => _cos(y0, y1), - 1 => -_sin(y0, y1, 1), - 2 => -_cos(y0, y1), - _ => _sin(y0, y1, 1), + 0 => k_cos(y0, y1), + 1 => -k_sin(y0, y1, 1), + 2 => -k_cos(y0, y1), + _ => k_sin(y0, y1, 1), } } diff --git a/libm/src/math/exp2.rs b/libm/src/math/exp2.rs index 0ab119896..3952e9300 100644 --- a/libm/src/math/exp2.rs +++ b/libm/src/math/exp2.rs @@ -24,7 +24,7 @@ // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF // SUCH DAMAGE. -use super::scalbn::scalbn; +use super::scalbn; const TBLSIZE: usize = 256; diff --git a/libm/src/math/k_cos.rs b/libm/src/math/k_cos.rs new file mode 100644 index 000000000..693950d1d --- /dev/null +++ b/libm/src/math/k_cos.rs @@ -0,0 +1,62 @@ +// origin: FreeBSD /usr/src/lib/msun/src/k_cos.c +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunSoft, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +const C1: f64 = 4.16666666666666019037e-02; /* 0x3FA55555, 0x5555554C */ +const C2: f64 = -1.38888888888741095749e-03; /* 0xBF56C16C, 0x16C15177 */ +const C3: f64 = 2.48015872894767294178e-05; /* 0x3EFA01A0, 0x19CB1590 */ +const C4: f64 = -2.75573143513906633035e-07; /* 0xBE927E4F, 0x809C52AD */ +const C5: f64 = 2.08757232129817482790e-09; /* 0x3E21EE9E, 0xBDB4B1C4 */ +const C6: f64 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ + +// kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 +// Input x is assumed to be bounded by ~pi/4 in magnitude. +// Input y is the tail of x. +// +// Algorithm +// 1. Since cos(-x) = cos(x), we need only to consider positive x. +// 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. +// 3. cos(x) is approximated by a polynomial of degree 14 on +// [0,pi/4] +// 4 14 +// cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x +// where the remez error is +// +// | 2 4 6 8 10 12 14 | -58 +// |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 +// | | +// +// 4 6 8 10 12 14 +// 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then +// cos(x) ~ 1 - x*x/2 + r +// since cos(x+y) ~ cos(x) - sin(x)*y +// ~ cos(x) - x*y, +// a correction term is necessary in cos(x) and hence +// cos(x+y) = 1 - (x*x/2 - (r - x*y)) +// For better accuracy, rearrange to +// cos(x+y) ~ w + (tmp + (r-x*y)) +// where w = 1 - x*x/2 and tmp is a tiny correction term +// (1 - x*x/2 == w + tmp exactly in infinite precision). +// The exactness of w + tmp in infinite precision depends on w +// and tmp having the same precision as x. If they have extra +// precision due to compiler bugs, then the extra precision is +// only good provided it is retained in all terms of the final +// expression for cos(). Retention happens in all cases tested +// under FreeBSD, so don't pessimize things by forcibly clipping +// any extra precision in w. +#[inline] +pub fn k_cos(x: f64, y: f64) -> f64 { + let z = x * x; + let w = z * z; + let r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6)); + let hz = 0.5 * z; + let w = 1.0 - hz; + w + (((1.0 - w) - hz) + (z * r - x * y)) +} diff --git a/libm/src/math/k_sin.rs b/libm/src/math/k_sin.rs new file mode 100644 index 000000000..3e07c3594 --- /dev/null +++ b/libm/src/math/k_sin.rs @@ -0,0 +1,57 @@ +// origin: FreeBSD /usr/src/lib/msun/src/k_sin.c +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunSoft, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +const S1: f64 = -1.66666666666666324348e-01; /* 0xBFC55555, 0x55555549 */ +const S2: f64 = 8.33333333332248946124e-03; /* 0x3F811111, 0x1110F8A6 */ +const S3: f64 = -1.98412698298579493134e-04; /* 0xBF2A01A0, 0x19C161D5 */ +const S4: f64 = 2.75573137070700676789e-06; /* 0x3EC71DE3, 0x57B1FE7D */ +const S5: f64 = -2.50507602534068634195e-08; /* 0xBE5AE5E6, 0x8A2B9CEB */ +const S6: f64 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ + +// kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854 +// Input x is assumed to be bounded by ~pi/4 in magnitude. +// Input y is the tail of x. +// Input iy indicates whether y is 0. (if iy=0, y assume to be 0). +// +// Algorithm +// 1. Since sin(-x) = -sin(x), we need only to consider positive x. +// 2. Callers must return sin(-0) = -0 without calling here since our +// odd polynomial is not evaluated in a way that preserves -0. +// Callers may do the optimization sin(x) ~ x for tiny x. +// 3. sin(x) is approximated by a polynomial of degree 13 on +// [0,pi/4] +// 3 13 +// sin(x) ~ x + S1*x + ... + S6*x +// where +// +// |sin(x) 2 4 6 8 10 12 | -58 +// |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 +// | x | +// +// 4. sin(x+y) = sin(x) + sin'(x')*y +// ~ sin(x) + (1-x*x/2)*y +// For better accuracy, let +// 3 2 2 2 2 +// r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) +// then 3 2 +// sin(x) = x + (S1*x + (x *(r-y/2)+y)) +#[inline] +pub fn k_sin(x: f64, y: f64, iy: i32) -> f64 { + let z = x * x; + let w = z * z; + let r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6); + let v = z * x; + if iy == 0 { + x + v * (S1 + z * r) + } else { + x - ((z * (0.5 * y - v * r) - y) - v * S1) + } +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index a657da810..7b3d9abee 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -6,6 +6,7 @@ macro_rules! force_eval { }; } +// Public modules mod acos; mod acosf; mod asin; @@ -105,16 +106,23 @@ pub use self::tanf::tanf; pub use self::trunc::trunc; pub use self::truncf::truncf; +// Private modules +mod k_cos; mod k_cosf; +mod k_sin; mod k_sinf; mod k_tanf; +mod rem_pio2; mod rem_pio2_large; mod rem_pio2f; -mod trig_common; +// Private re-imports +use self::k_cos::k_cos; use self::k_cosf::k_cosf; +use self::k_sin::k_sin; use self::k_sinf::k_sinf; use self::k_tanf::k_tanf; +use self::rem_pio2::rem_pio2; use self::rem_pio2_large::rem_pio2_large; use self::rem_pio2f::rem_pio2f; diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs new file mode 100644 index 000000000..68db7056b --- /dev/null +++ b/libm/src/math/rem_pio2.rs @@ -0,0 +1,187 @@ +// origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// Optimized by Bruce D. Evans. */ + +use super::rem_pio2_large; + +// #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 +// #define EPS DBL_EPSILON +const EPS: f64 = 2.2204460492503131e-16; +// #elif FLT_EVAL_METHOD==2 +// #define EPS LDBL_EPSILON +// #endif + +// TODO: Support FLT_EVAL_METHOD? + +const TO_INT: f64 = 1.5 / EPS; +/// 53 bits of 2/pi +const INV_PIO2: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ +/// first 33 bits of pi/2 +const PIO2_1: f64 = 1.57079632673412561417e+00; /* 0x3FF921FB, 0x54400000 */ +/// pi/2 - PIO2_1 +const PIO2_1T: f64 = 6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */ +/// second 33 bits of pi/2 +const PIO2_2: f64 = 6.07710050630396597660e-11; /* 0x3DD0B461, 0x1A600000 */ +/// pi/2 - (PIO2_1+PIO2_2) +const PIO2_2T: f64 = 2.02226624879595063154e-21; /* 0x3BA3198A, 0x2E037073 */ +/// third 33 bits of pi/2 +const PIO2_3: f64 = 2.02226624871116645580e-21; /* 0x3BA3198A, 0x2E000000 */ +/// pi/2 - (PIO2_1+PIO2_2+PIO2_3) +const PIO2_3T: f64 = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ + +// return the remainder of x rem pi/2 in y[0]+y[1] +// use rem_pio2_large() for large x +// +// caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ +pub fn rem_pio2(x: f64) -> (i32, f64, f64) { + let x1p24 = f64::from_bits(0x7041); + + let sign = (f64::to_bits(x) >> 63) as i32; + let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; + + fn medium(x: f64, ix: u32) -> (i32, f64, f64) { + /* rint(x/(pi/2)), Assume round-to-nearest. */ + let f_n = x as f64 * INV_PIO2 + TO_INT - TO_INT; + let n = f_n as i32; + let mut r = x - f_n * PIO2_1; + let mut w = f_n * PIO2_1T; /* 1st round, good to 85 bits */ + let mut y0 = r - w; + let ui = f64::to_bits(y0); + let ey = (ui >> 52) as i32 & 0x7ff; + let ex = (ix >> 20) as i32; + if ex - ey > 16 { + /* 2nd round, good to 118 bits */ + let t = r; + w = f_n * PIO2_2; + r = t - w; + w = f_n * PIO2_2T - ((t - r) - w); + y0 = r - w; + let ey = (f64::to_bits(y0) >> 52) as i32 & 0x7ff; + if ex - ey > 49 { + /* 3rd round, good to 151 bits, covers all cases */ + let t = r; + w = f_n * PIO2_3; + r = t - w; + w = f_n * PIO2_3T - ((t - r) - w); + y0 = r - w; + } + } + let y1 = (r - y0) - w; + return (n, y0, y1); + } + + if ix <= 0x400f6a7a { + /* |x| ~<= 5pi/4 */ + if (ix & 0xfffff) == 0x921fb { + /* |x| ~= pi/2 or 2pi/2 */ + medium(x, ix); /* cancellation -- use medium case */ + } + if ix <= 0x4002d97c { + /* |x| ~<= 3pi/4 */ + if sign == 0 { + let z = x - PIO2_1; /* one round good to 85 bits */ + let y0 = z - PIO2_1T; + let y1 = (z - y0) - PIO2_1T; + return (1, y0, y1); + } else { + let z = x + PIO2_1; + let y0 = z + PIO2_1T; + let y1 = (z - y0) + PIO2_1T; + return (-1, y0, y1); + } + } else { + if sign == 0 { + let z = x - 2.0 * PIO2_1; + let y0 = z - 2.0 * PIO2_1T; + let y1 = (z - y0) - 2.0 * PIO2_1T; + return (2, y0, y1); + } else { + let z = x + 2.0 * PIO2_1; + let y0 = z + 2.0 * PIO2_1T; + let y1 = (z - y0) + 2.0 * PIO2_1T; + return (-2, y0, y1); + } + } + } + if ix <= 0x401c463b { + /* |x| ~<= 9pi/4 */ + if ix <= 0x4015fdbc { + /* |x| ~<= 7pi/4 */ + if ix == 0x4012d97c { + /* |x| ~= 3pi/2 */ + return medium(x, ix); + } + if sign == 0 { + let z = x - 3.0 * PIO2_1; + let y0 = z - 3.0 * PIO2_1T; + let y1 = (z - y0) - 3.0 * PIO2_1T; + return (3, y0, y1); + } else { + let z = x + 3.0 * PIO2_1; + let y0 = z + 3.0 * PIO2_1T; + let y1 = (z - y0) + 3.0 * PIO2_1T; + return (-3, y0, y1); + } + } else { + if ix == 0x401921fb { + /* |x| ~= 4pi/2 */ + return medium(x, ix); + } + if sign == 0 { + let z = x - 4.0 * PIO2_1; + let y0 = z - 4.0 * PIO2_1T; + let y1 = (z - y0) - 4.0 * PIO2_1T; + return (4, y0, y1); + } else { + let z = x + 4.0 * PIO2_1; + let y0 = z + 4.0 * PIO2_1T; + let y1 = (z - y0) + 4.0 * PIO2_1T; + return (-4, y0, y1); + } + } + } + if ix < 0x413921fb { + /* |x| ~< 2^20*(pi/2), medium size */ + return medium(x, ix); + } + /* + * all other (large) arguments + */ + if ix >= 0x7ff00000 { + /* x is inf or NaN */ + let y0 = x - x; + let y1 = y0; + return (0, y0, y1); + } + /* set z = scalbn(|x|,-ilogb(x)+23) */ + let mut ui = f64::to_bits(x); + ui &= (!1) >> 12; + ui |= (0x3ff + 23) << 52; + let mut z = f64::from_bits(ui); + let mut tx = [0.0; 3]; + for i in 0..2 { + tx[i] = z as i32 as f64; + z = (z - tx[i]) * x1p24; + } + tx[2] = z; + /* skip zero terms, first term is non-zero */ + let mut i = 2; + while tx[i] == 0.0 { + i -= 1; + } + let mut ty = [0.0; 3]; + let n = rem_pio2_large(&tx[..=i], &mut ty, ((ix >> 20) - (0x3ff + 23)) as i32, 1); + if sign != 0 { + return (-n, -ty[0], -ty[1]); + } + return (n, ty[0], ty[1]); +} diff --git a/libm/src/math/sin.rs b/libm/src/math/sin.rs index 0717c1ae3..13eb30248 100644 --- a/libm/src/math/sin.rs +++ b/libm/src/math/sin.rs @@ -1,26 +1,23 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -use core::f64; +// origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== -use math::trig_common::{_cos, _sin, rem_pio2}; +use super::{k_cos, k_sin, rem_pio2}; // sin(x) // Return sine function of x. // // kernel function: -// __sin ... sine function on [-pi/4,pi/4] -// __cos ... cose function on [-pi/4,pi/4] -// __rem_pio2 ... argument reduction routine +// k_sin ... sine function on [-pi/4,pi/4] +// k_cos ... cose function on [-pi/4,pi/4] +// rem_pio2 ... argument reduction routine // // Method. // Let S,C and T denote the sin, cos and tan respectively on @@ -61,7 +58,7 @@ pub fn sin(x: f64) -> f64 { } return x; } - return _sin(x, 0.0, 0); + return k_sin(x, 0.0, 0); } /* sin(Inf or NaN) is NaN */ @@ -72,9 +69,9 @@ pub fn sin(x: f64) -> f64 { /* argument reduction needed */ let (n, y0, y1) = rem_pio2(x); match n & 3 { - 0 => _sin(y0, y1, 1), - 1 => _cos(y0, y1), - 2 => -_sin(y0, y1, 1), - _ => -_cos(y0, y1), + 0 => k_sin(y0, y1, 1), + 1 => k_cos(y0, y1), + 2 => -k_sin(y0, y1, 1), + _ => -k_cos(y0, y1), } } diff --git a/libm/src/math/trig_common.rs b/libm/src/math/trig_common.rs deleted file mode 100644 index 59e75e2dd..000000000 --- a/libm/src/math/trig_common.rs +++ /dev/null @@ -1,289 +0,0 @@ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -/* origin: FreeBSD /usr/src/lib/msun/src/k_sin.c */ - -const S1: f64 = -1.66666666666666324348e-01; /* 0xBFC55555, 0x55555549 */ -const S2: f64 = 8.33333333332248946124e-03; /* 0x3F811111, 0x1110F8A6 */ -const S3: f64 = -1.98412698298579493134e-04; /* 0xBF2A01A0, 0x19C161D5 */ -const S4: f64 = 2.75573137070700676789e-06; /* 0x3EC71DE3, 0x57B1FE7D */ -const S5: f64 = -2.50507602534068634195e-08; /* 0xBE5AE5E6, 0x8A2B9CEB */ -const S6: f64 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ - -// kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854 -// Input x is assumed to be bounded by ~pi/4 in magnitude. -// Input y is the tail of x. -// Input iy indicates whether y is 0. (if iy=0, y assume to be 0). -// -// Algorithm -// 1. Since sin(-x) = -sin(x), we need only to consider positive x. -// 2. Callers must return sin(-0) = -0 without calling here since our -// odd polynomial is not evaluated in a way that preserves -0. -// Callers may do the optimization sin(x) ~ x for tiny x. -// 3. sin(x) is approximated by a polynomial of degree 13 on -// [0,pi/4] -// 3 13 -// sin(x) ~ x + S1*x + ... + S6*x -// where -// -// |sin(x) 2 4 6 8 10 12 | -58 -// |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 -// | x | -// -// 4. sin(x+y) = sin(x) + sin'(x')*y -// ~ sin(x) + (1-x*x/2)*y -// For better accuracy, let -// 3 2 2 2 2 -// r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) -// then 3 2 -// sin(x) = x + (S1*x + (x *(r-y/2)+y)) -pub fn _sin(x: f64, y: f64, iy: i32) -> f64 { - let z = x * x; - let w = z * z; - let r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6); - let v = z * x; - if iy == 0 { - x + v * (S1 + z * r) - } else { - x - ((z * (0.5 * y - v * r) - y) - v * S1) - } -} - -/* origin: FreeBSD /usr/src/lib/msun/src/k_cos.c */ -const C1: f64 = 4.16666666666666019037e-02; /* 0x3FA55555, 0x5555554C */ -const C2: f64 = -1.38888888888741095749e-03; /* 0xBF56C16C, 0x16C15177 */ -const C3: f64 = 2.48015872894767294178e-05; /* 0x3EFA01A0, 0x19CB1590 */ -const C4: f64 = -2.75573143513906633035e-07; /* 0xBE927E4F, 0x809C52AD */ -const C5: f64 = 2.08757232129817482790e-09; /* 0x3E21EE9E, 0xBDB4B1C4 */ -const C6: f64 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ - -// kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 -// Input x is assumed to be bounded by ~pi/4 in magnitude. -// Input y is the tail of x. -// -// Algorithm -// 1. Since cos(-x) = cos(x), we need only to consider positive x. -// 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. -// 3. cos(x) is approximated by a polynomial of degree 14 on -// [0,pi/4] -// 4 14 -// cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x -// where the remez error is -// -// | 2 4 6 8 10 12 14 | -58 -// |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 -// | | -// -// 4 6 8 10 12 14 -// 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then -// cos(x) ~ 1 - x*x/2 + r -// since cos(x+y) ~ cos(x) - sin(x)*y -// ~ cos(x) - x*y, -// a correction term is necessary in cos(x) and hence -// cos(x+y) = 1 - (x*x/2 - (r - x*y)) -// For better accuracy, rearrange to -// cos(x+y) ~ w + (tmp + (r-x*y)) -// where w = 1 - x*x/2 and tmp is a tiny correction term -// (1 - x*x/2 == w + tmp exactly in infinite precision). -// The exactness of w + tmp in infinite precision depends on w -// and tmp having the same precision as x. If they have extra -// precision due to compiler bugs, then the extra precision is -// only good provided it is retained in all terms of the final -// expression for cos(). Retention happens in all cases tested -// under FreeBSD, so don't pessimize things by forcibly clipping -// any extra precision in w. -pub fn _cos(x: f64, y: f64) -> f64 { - let z = x * x; - let w = z * z; - let r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6)); - let hz = 0.5 * z; - let w = 1.0 - hz; - w + (((1.0 - w) - hz) + (z * r - x * y)) -} - -// origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c -// Optimized by Bruce D. Evans. */ - -// #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 -// #define EPS DBL_EPSILON -const EPS: f64 = 2.2204460492503131e-16; -// #elif FLT_EVAL_METHOD==2 -// #define EPS LDBL_EPSILON -// #endif - -// TODO: Support FLT_EVAL_METHOD? - -#[allow(unused, non_upper_case_globals)] -const toint: f64 = 1.5 / EPS; -/// 53 bits of 2/pi -#[allow(unused, non_upper_case_globals)] -const invpio2: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ -/// first 33 bits of pi/2 -#[allow(unused, non_upper_case_globals)] -const pio2_1: f64 = 1.57079632673412561417e+00; /* 0x3FF921FB, 0x54400000 */ -/// pi/2 - pio2_1 -#[allow(unused, non_upper_case_globals)] -const pio2_1t: f64 = 6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */ -/// second 33 bits of pi/2 -#[allow(unused, non_upper_case_globals)] -const pio2_2: f64 = 6.07710050630396597660e-11; /* 0x3DD0B461, 0x1A600000 */ -/// pi/2 - (pio2_1+pio2_2) -#[allow(unused, non_upper_case_globals)] -const pio2_2t: f64 = 2.02226624879595063154e-21; /* 0x3BA3198A, 0x2E037073 */ -/// third 33 bits of pi/2 -#[allow(unused, non_upper_case_globals)] -const pio2_3: f64 = 2.02226624871116645580e-21; /* 0x3BA3198A, 0x2E000000 */ -/// pi/2 - (pio2_1+pio2_2+pio2_3) -#[allow(unused, non_upper_case_globals)] -const pio2_3t: f64 = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ - -/* __rem_pio2(x,y) - * - * return the remainder of x rem pi/2 in y[0]+y[1] - * use __rem_pio2_large() for large x - */ - -/* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ -/* -{ - union {double f; uint64_t i;} u = {x}; - double_t z,w,t,r,fn; - double tx[3],ty[2]; - uint32_t ix; - int sign, n, ex, ey, i; - - sign = u.i>>63; - ix = u.i>>32 & 0x7fffffff; - if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */ - if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */ - goto medium; /* cancellation -- use medium case */ - if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */ - if (!sign) { - z = x - pio2_1; /* one round good to 85 bits */ - y[0] = z - pio2_1t; - y[1] = (z-y[0]) - pio2_1t; - return 1; - } else { - z = x + pio2_1; - y[0] = z + pio2_1t; - y[1] = (z-y[0]) + pio2_1t; - return -1; - } - } else { - if (!sign) { - z = x - 2*pio2_1; - y[0] = z - 2*pio2_1t; - y[1] = (z-y[0]) - 2*pio2_1t; - return 2; - } else { - z = x + 2*pio2_1; - y[0] = z + 2*pio2_1t; - y[1] = (z-y[0]) + 2*pio2_1t; - return -2; - } - } - } - if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */ - if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */ - if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */ - goto medium; - if (!sign) { - z = x - 3*pio2_1; - y[0] = z - 3*pio2_1t; - y[1] = (z-y[0]) - 3*pio2_1t; - return 3; - } else { - z = x + 3*pio2_1; - y[0] = z + 3*pio2_1t; - y[1] = (z-y[0]) + 3*pio2_1t; - return -3; - } - } else { - if (ix == 0x401921fb) /* |x| ~= 4pi/2 */ - goto medium; - if (!sign) { - z = x - 4*pio2_1; - y[0] = z - 4*pio2_1t; - y[1] = (z-y[0]) - 4*pio2_1t; - return 4; - } else { - z = x + 4*pio2_1; - y[0] = z + 4*pio2_1t; - y[1] = (z-y[0]) + 4*pio2_1t; - return -4; - } - } - } - if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */ -medium: - /* rint(x/(pi/2)), Assume round-to-nearest. */ - fn = (double_t)x*invpio2 + toint - toint; - n = (int32_t)fn; - r = x - fn*pio2_1; - w = fn*pio2_1t; /* 1st round, good to 85 bits */ - y[0] = r - w; - u.f = y[0]; - ey = u.i>>52 & 0x7ff; - ex = ix>>20; - if (ex - ey > 16) { /* 2nd round, good to 118 bits */ - t = r; - w = fn*pio2_2; - r = t - w; - w = fn*pio2_2t - ((t-r)-w); - y[0] = r - w; - u.f = y[0]; - ey = u.i>>52 & 0x7ff; - if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */ - t = r; - w = fn*pio2_3; - r = t - w; - w = fn*pio2_3t - ((t-r)-w); - y[0] = r - w; - } - } - y[1] = (r - y[0]) - w; - return n; - } - /* - * all other (large) arguments - */ - if (ix >= 0x7ff00000) { /* x is inf or NaN */ - y[0] = y[1] = x - x; - return 0; - } - /* set z = scalbn(|x|,-ilogb(x)+23) */ - u.f = x; - u.i &= (uint64_t)-1>>12; - u.i |= (uint64_t)(0x3ff + 23)<<52; - z = u.f; - for (i=0; i < 2; i++) { - tx[i] = (double)(int32_t)z; - z = (z-tx[i])*0x1p24; - } - tx[i] = z; - /* skip zero terms, first term is non-zero */ - while (tx[i] == 0.0) - i--; - n = __rem_pio2_large(tx,ty,(int)(ix>>20)-(0x3ff+23),i+1,1); - if (sign) { - y[0] = -ty[0]; - y[1] = -ty[1]; - return -n; - } - y[0] = ty[0]; - y[1] = ty[1]; - return n; -} -*/ - -pub fn rem_pio2(_x: f64) -> (i32, f64, f64) { - unimplemented!() -} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index b639cf11b..c8e35cea7 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -702,7 +702,7 @@ f64_f64! { // atan, cbrt, ceil, - // cos, + cos, // cosh, exp, exp2, @@ -713,7 +713,7 @@ f64_f64! { log1p, log2, round, - // sin, + sin, // sinh, sqrt, // tan, From 5a57b33d7cbdf6870bbb087e6616df5d469ec03a Mon Sep 17 00:00:00 2001 From: C Jones Date: Sat, 14 Jul 2018 18:40:52 -0400 Subject: [PATCH 0084/1459] Fix x1p24 constant --- libm/src/math/rem_pio2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 68db7056b..47bab6e65 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -43,7 +43,7 @@ const PIO2_3T: f64 = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ // // caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ pub fn rem_pio2(x: f64) -> (i32, f64, f64) { - let x1p24 = f64::from_bits(0x7041); + let x1p24 = f64::from_bits(0x4170000000000000); let sign = (f64::to_bits(x) >> 63) as i32; let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; From a77b2302e9e1367c6160c0bf98500f1494feeb71 Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Sun, 15 Jul 2018 02:06:20 +0300 Subject: [PATCH 0085/1459] coshf tanhf and atan2f --- libm/src/lib.rs | 6 --- libm/src/math/atan2f.rs | 71 +++++++++++++++++++++++++++++++++ libm/src/math/coshf.rs | 33 +++++++++++++++ libm/src/math/k_expo2f.rs | 14 +++++++ libm/src/math/mod.rs | 11 ++++- libm/src/math/tanhf.rs | 39 ++++++++++++++++++ libm/test-generator/src/main.rs | 6 +-- 7 files changed, 169 insertions(+), 11 deletions(-) create mode 100644 libm/src/math/atan2f.rs create mode 100644 libm/src/math/coshf.rs create mode 100644 libm/src/math/k_expo2f.rs create mode 100644 libm/src/math/tanhf.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index df7ee813f..969e7c45b 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -96,7 +96,6 @@ pub trait F32Ext: private::Sealed { fn atan(self) -> Self; - #[cfg(todo)] fn atan2(self, other: Self) -> Self; #[cfg(todo)] @@ -112,10 +111,8 @@ pub trait F32Ext: private::Sealed { #[cfg(todo)] fn sinh(self) -> Self; - #[cfg(todo)] fn cosh(self) -> Self; - #[cfg(todo)] fn tanh(self) -> Self; #[cfg(todo)] @@ -272,7 +269,6 @@ impl F32Ext for f32 { atanf(self) } - #[cfg(todo)] #[inline] fn atan2(self, other: Self) -> Self { atan2f(self, other) @@ -294,13 +290,11 @@ impl F32Ext for f32 { sinhf(self) } - #[cfg(todo)] #[inline] fn cosh(self) -> Self { coshf(self) } - #[cfg(todo)] #[inline] fn tanh(self) -> Self { tanhf(self) diff --git a/libm/src/math/atan2f.rs b/libm/src/math/atan2f.rs new file mode 100644 index 000000000..a232ffdd6 --- /dev/null +++ b/libm/src/math/atan2f.rs @@ -0,0 +1,71 @@ +use super::atanf; +use super::fabsf; + +const PI: f32 = 3.1415927410e+00; /* 0x40490fdb */ +const PI_LO: f32 = -8.7422776573e-08; /* 0xb3bbbd2e */ + +#[inline] +pub fn atan2f(y: f32, x: f32) -> f32 { + if x.is_nan() || y.is_nan() { + return x + y; + } + let mut ix = x.to_bits(); + let mut iy = y.to_bits(); + + if ix == 0x3f800000 { + /* x=1.0 */ + return atanf(y); + } + let m = ((iy >> 31) & 1) | ((ix >> 30) & 2); /* 2*sign(x)+sign(y) */ + ix &= 0x7fffffff; + iy &= 0x7fffffff; + + /* when y = 0 */ + if iy == 0 { + return match m { + 0 | 1 => y, /* atan(+-0,+anything)=+-0 */ + 2 => PI, /* atan(+0,-anything) = pi */ + 3 | _ => -PI, /* atan(-0,-anything) =-pi */ + }; + } + /* when x = 0 */ + if ix == 0 { + return if m & 1 != 0 { -PI / 2. } else { PI / 2. }; + } + /* when x is INF */ + if ix == 0x7f800000 { + return if iy == 0x7f800000 { + match m { + 0 => PI / 4., /* atan(+INF,+INF) */ + 1 => -PI / 4., /* atan(-INF,+INF) */ + 2 => 3. * PI / 4., /* atan(+INF,-INF)*/ + 3 | _ => -3. * PI / 4., /* atan(-INF,-INF)*/ + } + } else { + match m { + 0 => 0., /* atan(+...,+INF) */ + 1 => -0., /* atan(-...,+INF) */ + 2 => PI, /* atan(+...,-INF) */ + 3 | _ => -PI, /* atan(-...,-INF) */ + } + }; + } + /* |y/x| > 0x1p26 */ + if (ix + (26 << 23) < iy) || (iy == 0x7f800000) { + return if m & 1 != 0 { -PI / 2. } else { PI / 2. }; + } + + /* z = atan(|y/x|) with correct underflow */ + let z = if (m & 2 != 0) && (iy + (26 << 23) < ix) { + /*|y/x| < 0x1p-26, x < 0 */ + 0. + } else { + atanf(fabsf(y / x)) + }; + match m { + 0 => z, /* atan(+,+) */ + 1 => -z, /* atan(-,+) */ + 2 => PI - (z - PI_LO), /* atan(+,-) */ + _ => (z - PI_LO) - PI, /* case 3 */ /* atan(-,-) */ + } +} diff --git a/libm/src/math/coshf.rs b/libm/src/math/coshf.rs new file mode 100644 index 000000000..bd468f5da --- /dev/null +++ b/libm/src/math/coshf.rs @@ -0,0 +1,33 @@ +use super::expf; +use super::expm1f; +use super::k_expo2f; + +#[inline] +pub fn coshf(mut x: f32) -> f32 { + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + /* |x| */ + let mut ix = x.to_bits(); + ix &= 0x7fffffff; + x = f32::from_bits(ix); + let w = ix; + + /* |x| < log(2) */ + if w < 0x3f317217 { + if w < (0x3f800000 - (12 << 23)) { + force_eval!(x + x1p120); + return 1.; + } + let t = expm1f(x); + return 1. + t * t / (2. * (1. + t)); + } + + /* |x| < log(FLT_MAX) */ + if w < 0x42b17217 { + let t = expf(x); + return 0.5 * (t + 1. / t); + } + + /* |x| > log(FLT_MAX) or nan */ + k_expo2f(x) +} diff --git a/libm/src/math/k_expo2f.rs b/libm/src/math/k_expo2f.rs new file mode 100644 index 000000000..031a0bdd0 --- /dev/null +++ b/libm/src/math/k_expo2f.rs @@ -0,0 +1,14 @@ +use super::expf; + +/* k is such that k*ln2 has minimal relative error and x - kln2 > log(FLT_MIN) */ +const K: i32 = 235; +const K_LN2: f32 = 162.89; /* 0x1.45c778p+7f */ + +/* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ +#[inline] +pub(crate) fn k_expo2f(x: f32) -> f32 { + /* note that k is odd and scale*scale overflows */ + let scale = f32::from_bits(((0x7f + K / 2) as u32) << 23); + /* exp(x - k ln2) * 2**(k-1) */ + expf(x - K_LN2) * scale * scale +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 42c596857..1ac547257 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -10,12 +10,14 @@ mod acos; mod acosf; mod asin; mod asinf; +mod atan2f; mod atanf; mod cbrt; mod cbrtf; mod ceil; mod ceilf; mod cosf; +mod coshf; mod exp; mod exp2; mod exp2f; @@ -50,6 +52,7 @@ mod sinf; mod sqrt; mod sqrtf; mod tanf; +mod tanhf; mod trunc; mod truncf; @@ -58,12 +61,14 @@ pub use self::acos::acos; pub use self::acosf::acosf; pub use self::asin::asin; pub use self::asinf::asinf; +pub use self::atan2f::atan2f; pub use self::atanf::atanf; pub use self::cbrt::cbrt; pub use self::cbrtf::cbrtf; pub use self::ceil::ceil; pub use self::ceilf::ceilf; pub use self::cosf::cosf; +pub use self::coshf::coshf; pub use self::exp::exp; pub use self::exp2::exp2; pub use self::exp2f::exp2f; @@ -98,18 +103,20 @@ pub use self::sinf::sinf; pub use self::sqrt::sqrt; pub use self::sqrtf::sqrtf; pub use self::tanf::tanf; +pub use self::tanhf::tanhf; pub use self::trunc::trunc; pub use self::truncf::truncf; mod k_cosf; +mod k_expo2f; mod k_sinf; mod k_tanf; mod rem_pio2_large; mod rem_pio2f; use self::{ - k_cosf::k_cosf, k_sinf::k_sinf, k_tanf::k_tanf, rem_pio2_large::rem_pio2_large, - rem_pio2f::rem_pio2f, + k_cosf::k_cosf, k_expo2f::k_expo2f, k_sinf::k_sinf, k_tanf::k_tanf, + rem_pio2_large::rem_pio2_large, rem_pio2f::rem_pio2f, }; #[inline] diff --git a/libm/src/math/tanhf.rs b/libm/src/math/tanhf.rs new file mode 100644 index 000000000..98a1b60c2 --- /dev/null +++ b/libm/src/math/tanhf.rs @@ -0,0 +1,39 @@ +use super::expm1f; + +#[inline] +pub fn tanhf(mut x: f32) -> f32 { + /* x = |x| */ + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + x = f32::from_bits(ix); + let w = ix; + + let tt = if w > 0x3f0c9f54 { + /* |x| > log(3)/2 ~= 0.5493 or nan */ + if w > 0x41200000 { + /* |x| > 10 */ + 1. + 0. / x + } else { + let t = expm1f(2. * x); + 1. - 2. / (t + 2.) + } + } else if w > 0x3e82c578 { + /* |x| > log(5/3)/2 ~= 0.2554 */ + let t = expm1f(2. * x); + t / (t + 2.) + } else if w >= 0x00800000 { + /* |x| >= 0x1p-126 */ + let t = expm1f(-2. * x); + -t / (t + 2.) + } else { + /* |x| is subnormal */ + force_eval!(x * x); + x + }; + if sign { + -tt + } else { + tt + } +} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index b639cf11b..1c261b2eb 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -659,7 +659,7 @@ f32_f32! { cbrtf, cosf, ceilf, - // coshf, + coshf, exp2f, expf, expm1f, @@ -671,14 +671,14 @@ f32_f32! { sinf, // sinhf, tanf, - // tanhf, + tanhf, fabsf, sqrtf, } // With signature `fn(f32, f32) -> f32` f32f32_f32! { - // atan2f, + atan2f, fdimf, hypotf, fmodf, From 969dd065e6744ef572fce595bb16e61d4b436e3d Mon Sep 17 00:00:00 2001 From: C Jones Date: Sat, 14 Jul 2018 19:23:01 -0400 Subject: [PATCH 0086/1459] Implement sinh This also adds expo2 for the __expo2 function, and combine_words() to replace the INSERT_WORDS macro. Closes rust-lang/libm#35 --- libm/src/lib.rs | 2 -- libm/src/math/expo2.rs | 13 +++++++++ libm/src/math/mod.rs | 9 +++++++ libm/src/math/sinh.rs | 48 +++++++++++++++++++++++++++++++++ libm/test-generator/src/main.rs | 2 +- 5 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 libm/src/math/expo2.rs create mode 100644 libm/src/math/sinh.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 0b9efeeb3..ee864a36c 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -414,7 +414,6 @@ pub trait F64Ext: private::Sealed { fn ln_1p(self) -> Self; - #[cfg(todo)] fn sinh(self) -> Self; #[cfg(todo)] @@ -595,7 +594,6 @@ impl F64Ext for f64 { log1p(self) } - #[cfg(todo)] #[inline] fn sinh(self) -> Self { sinh(self) diff --git a/libm/src/math/expo2.rs b/libm/src/math/expo2.rs new file mode 100644 index 000000000..8a121b0a2 --- /dev/null +++ b/libm/src/math/expo2.rs @@ -0,0 +1,13 @@ +use super::{combine_words, exp}; + +/* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */ +pub(crate) fn expo2(x: f64) -> f64 { + /* k is such that k*ln2 has minimal relative error and x - kln2 > log(DBL_MIN) */ + const K: i32 = 2043; + let kln2 = f64::from_bits(0x40962066151add8b); + + /* note that k is odd and scale*scale overflows */ + let scale = combine_words(((0x3ff + K / 2) as u32) << 20, 0); + /* exp(x - k ln2) * 2**(k-1) */ + return exp(x - kln2) * scale * scale; +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 7b3d9abee..19d8ad58d 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -50,6 +50,7 @@ mod scalbn; mod scalbnf; mod sin; mod sinf; +mod sinh; mod sqrt; mod sqrtf; mod tanf; @@ -100,6 +101,7 @@ pub use self::scalbn::scalbn; pub use self::scalbnf::scalbnf; pub use self::sin::sin; pub use self::sinf::sinf; +pub use self::sinh::sinh; pub use self::sqrt::sqrt; pub use self::sqrtf::sqrtf; pub use self::tanf::tanf; @@ -107,6 +109,7 @@ pub use self::trunc::trunc; pub use self::truncf::truncf; // Private modules +mod expo2; mod k_cos; mod k_cosf; mod k_sin; @@ -117,6 +120,7 @@ mod rem_pio2_large; mod rem_pio2f; // Private re-imports +use self::expo2::expo2; use self::k_cos::k_cos; use self::k_cosf::k_cosf; use self::k_sin::k_sin; @@ -151,3 +155,8 @@ pub fn with_set_low_word(f: f64, lo: u32) -> f64 { tmp |= lo as u64; f64::from_bits(tmp) } + +#[inline] +fn combine_words(hi: u32, lo: u32) -> f64 { + f64::from_bits((hi as u64) << 32 | lo as u64) +} diff --git a/libm/src/math/sinh.rs b/libm/src/math/sinh.rs new file mode 100644 index 000000000..057987171 --- /dev/null +++ b/libm/src/math/sinh.rs @@ -0,0 +1,48 @@ +use super::{expm1, expo2}; + +// sinh(x) = (exp(x) - 1/exp(x))/2 +// = (exp(x)-1 + (exp(x)-1)/exp(x))/2 +// = x + x^3/6 + o(x^5) +// +pub fn sinh(x: f64) -> f64 { + // union {double f; uint64_t i;} u = {.f = x}; + // uint32_t w; + // double t, h, absx; + + let mut uf: f64 = x; + let mut ui: u64 = f64::to_bits(uf); + let w: u32; + let t: f64; + let mut h: f64; + let absx: f64; + + h = 0.5; + if ui >> 63 != 0 { + h = -h; + } + /* |x| */ + ui &= !1 / 2; + uf = f64::from_bits(ui); + absx = uf; + w = (ui >> 32) as u32; + + /* |x| < log(DBL_MAX) */ + if w < 0x40862e42 { + t = expm1(absx); + if w < 0x3ff00000 { + if w < 0x3ff00000 - (26 << 20) { + /* note: inexact and underflow are raised by expm1 */ + /* note: this branch avoids spurious underflow */ + return x; + } + return h * (2.0 * t - t * t / (t + 1.0)); + } + /* note: |x|>log(0x1p26)+eps could be just h*exp(x) */ + return h * (t + t / (t + 1.0)); + } + + /* |x| > log(DBL_MAX) or nan */ + /* note: the result is stored to handle overflow */ + t = 2.0 * h * expo2(absx); + return t; +} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index c8e35cea7..758db9f52 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -714,7 +714,7 @@ f64_f64! { log2, round, sin, - // sinh, + sinh, sqrt, // tan, // tanh, From 04f5528c90475febf1a1153528c21e526d98ef7a Mon Sep 17 00:00:00 2001 From: C Jones Date: Sat, 14 Jul 2018 16:08:41 -0400 Subject: [PATCH 0087/1459] Implement tan Also includes implementing the private k_tan function. Closes rust-lang/libm#36 --- libm/src/lib.rs | 2 - libm/src/math/k_tan.rs | 105 ++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 4 ++ libm/src/math/tan.rs | 69 +++++++++++++++++++++ libm/test-generator/src/main.rs | 2 +- 5 files changed, 179 insertions(+), 3 deletions(-) create mode 100644 libm/src/math/k_tan.rs create mode 100644 libm/src/math/tan.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 0b9efeeb3..e6f286a9e 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -390,7 +390,6 @@ pub trait F64Ext: private::Sealed { fn cos(self) -> Self; - #[cfg(todo)] fn tan(self) -> Self; #[cfg(todo)] @@ -556,7 +555,6 @@ impl F64Ext for f64 { cos(self) } - #[cfg(todo)] #[inline] fn tan(self) -> Self { tan(self) diff --git a/libm/src/math/k_tan.rs b/libm/src/math/k_tan.rs new file mode 100644 index 000000000..b0dd317a2 --- /dev/null +++ b/libm/src/math/k_tan.rs @@ -0,0 +1,105 @@ +// origin: FreeBSD /usr/src/lib/msun/src/k_tan.c */ +// +// ==================================================== +// Copyright 2004 Sun Microsystems, Inc. All Rights Reserved. +// +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +// kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854 +// Input x is assumed to be bounded by ~pi/4 in magnitude. +// Input y is the tail of x. +// Input odd indicates whether tan (if odd = 0) or -1/tan (if odd = 1) is returned. +// +// Algorithm +// 1. Since tan(-x) = -tan(x), we need only to consider positive x. +// 2. Callers must return tan(-0) = -0 without calling here since our +// odd polynomial is not evaluated in a way that preserves -0. +// Callers may do the optimization tan(x) ~ x for tiny x. +// 3. tan(x) is approximated by a odd polynomial of degree 27 on +// [0,0.67434] +// 3 27 +// tan(x) ~ x + T1*x + ... + T13*x +// where +// +// |tan(x) 2 4 26 | -59.2 +// |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2 +// | x | +// +// Note: tan(x+y) = tan(x) + tan'(x)*y +// ~ tan(x) + (1+x*x)*y +// Therefore, for better accuracy in computing tan(x+y), let +// 3 2 2 2 2 +// r = x *(T2+x *(T3+x *(...+x *(T12+x *T13)))) +// then +// 3 2 +// tan(x+y) = x + (T1*x + (x *(r+y)+y)) +// +// 4. For x in [0.67434,pi/4], let y = pi/4 - x, then +// tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y)) +// = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y))) +static T: [f64; 13] = [ + 3.33333333333334091986e-01, /* 3FD55555, 55555563 */ + 1.33333333333201242699e-01, /* 3FC11111, 1110FE7A */ + 5.39682539762260521377e-02, /* 3FABA1BA, 1BB341FE */ + 2.18694882948595424599e-02, /* 3F9664F4, 8406D637 */ + 8.86323982359930005737e-03, /* 3F8226E3, E96E8493 */ + 3.59207910759131235356e-03, /* 3F6D6D22, C9560328 */ + 1.45620945432529025516e-03, /* 3F57DBC8, FEE08315 */ + 5.88041240820264096874e-04, /* 3F4344D8, F2F26501 */ + 2.46463134818469906812e-04, /* 3F3026F7, 1A8D1068 */ + 7.81794442939557092300e-05, /* 3F147E88, A03792A6 */ + 7.14072491382608190305e-05, /* 3F12B80F, 32F0A7E9 */ + -1.85586374855275456654e-05, /* BEF375CB, DB605373 */ + 2.59073051863633712884e-05, /* 3EFB2A70, 74BF7AD4 */ +]; +const PIO4: f64 = 7.85398163397448278999e-01; /* 3FE921FB, 54442D18 */ +const PIO4_LO: f64 = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */ + +pub(crate) fn k_tan(mut x: f64, mut y: f64, odd: i32) -> f64 { + let hx = (f64::to_bits(x) >> 32) as u32; + let big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */ + if big { + let sign = hx >> 31; + if sign != 0 { + x = -x; + y = -y; + } + x = (PIO4 - x) + (PIO4_LO - y); + y = 0.0; + } + let z = x * x; + let w = z * z; + /* + * Break x^5*(T[1]+x^2*T[2]+...) into + * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) + + * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12])) + */ + let r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11])))); + let v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12]))))); + let s = z * x; + let r = y + z * (s * (r + v) + y) + s * T[0]; + let w = x + r; + if big { + let sign = hx >> 31; + let s = 1.0 - 2.0 * odd as f64; + let v = s - 2.0 * (x + (r - w * w / (w + s))); + return if sign != 0 { -v } else { v }; + } + if odd == 0 { + return w; + } + /* -1.0/(x+r) has up to 2ulp error, so compute it accurately */ + let w0 = zero_low_word(w); + let v = r - (w0 - x); /* w0+v = r+x */ + let a = -1.0 / w; + let a0 = zero_low_word(a); + a0 + a * (1.0 + a0 * w0 + a0 * v) +} + +#[inline] +fn zero_low_word(x: f64) -> f64 { + f64::from_bits(f64::to_bits(x) & 0xFFFF_FFFF_0000_0000) +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 7b3d9abee..a6ce52394 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -52,6 +52,7 @@ mod sin; mod sinf; mod sqrt; mod sqrtf; +mod tan; mod tanf; mod trunc; mod truncf; @@ -102,6 +103,7 @@ pub use self::sin::sin; pub use self::sinf::sinf; pub use self::sqrt::sqrt; pub use self::sqrtf::sqrtf; +pub use self::tan::tan; pub use self::tanf::tanf; pub use self::trunc::trunc; pub use self::truncf::truncf; @@ -111,6 +113,7 @@ mod k_cos; mod k_cosf; mod k_sin; mod k_sinf; +mod k_tan; mod k_tanf; mod rem_pio2; mod rem_pio2_large; @@ -121,6 +124,7 @@ use self::k_cos::k_cos; use self::k_cosf::k_cosf; use self::k_sin::k_sin; use self::k_sinf::k_sinf; +use self::k_tan::k_tan; use self::k_tanf::k_tanf; use self::rem_pio2::rem_pio2; use self::rem_pio2_large::rem_pio2_large; diff --git a/libm/src/math/tan.rs b/libm/src/math/tan.rs new file mode 100644 index 000000000..92bbb6221 --- /dev/null +++ b/libm/src/math/tan.rs @@ -0,0 +1,69 @@ +// origin: FreeBSD /usr/src/lib/msun/src/s_tan.c */ +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +use super::{k_tan, rem_pio2}; + +// tan(x) +// Return tangent function of x. +// +// kernel function: +// k_tan ... tangent function on [-pi/4,pi/4] +// rem_pio2 ... argument reduction routine +// +// Method. +// Let S,C and T denote the sin, cos and tan respectively on +// [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 +// in [-pi/4 , +pi/4], and let n = k mod 4. +// We have +// +// n sin(x) cos(x) tan(x) +// ---------------------------------------------------------- +// 0 S C T +// 1 C -S -1/T +// 2 -S -C T +// 3 -C S -1/T +// ---------------------------------------------------------- +// +// Special cases: +// Let trig be any of sin, cos, or tan. +// trig(+-INF) is NaN, with signals; +// trig(NaN) is that NaN; +// +// Accuracy: +// TRIG(x) returns trig(x) nearly rounded +pub fn tan(x: f64) -> f64 { + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; + /* |x| ~< pi/4 */ + if ix <= 0x3fe921fb { + if ix < 0x3e400000 { + /* |x| < 2**-27 */ + /* raise inexact if x!=0 and underflow if subnormal */ + force_eval!(if ix < 0x00100000 { + x / x1p120 as f64 + } else { + x + x1p120 as f64 + }); + return x; + } + return k_tan(x, 0.0, 0); + } + + /* tan(Inf or NaN) is NaN */ + if ix >= 0x7ff00000 { + return x - x; + } + + /* argument reduction */ + let (n, y0, y1) = rem_pio2(x); + k_tan(y0, y1, n & 1) +} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index c8e35cea7..5d47cb2a4 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -716,7 +716,7 @@ f64_f64! { sin, // sinh, sqrt, - // tan, + tan, // tanh, trunc, fabs, From 7050af437d92479fd4781271496a3419b4ddbc4d Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Sun, 15 Jul 2018 07:01:26 +0300 Subject: [PATCH 0088/1459] sinf, fix constant in k_expo2f --- libm/src/lib.rs | 2 -- libm/src/math/k_expo2f.rs | 4 ++-- libm/src/math/mod.rs | 2 ++ libm/src/math/sinhf.rs | 30 ++++++++++++++++++++++++++++++ libm/test-generator/src/main.rs | 2 +- 5 files changed, 35 insertions(+), 5 deletions(-) create mode 100644 libm/src/math/sinhf.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 1ed078734..45213a25f 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -108,7 +108,6 @@ pub trait F32Ext: private::Sealed { fn ln_1p(self) -> Self; - #[cfg(todo)] fn sinh(self) -> Self; fn cosh(self) -> Self; @@ -284,7 +283,6 @@ impl F32Ext for f32 { log1pf(self) } - #[cfg(todo)] #[inline] fn sinh(self) -> Self { sinhf(self) diff --git a/libm/src/math/k_expo2f.rs b/libm/src/math/k_expo2f.rs index 031a0bdd0..e2eaa4e6b 100644 --- a/libm/src/math/k_expo2f.rs +++ b/libm/src/math/k_expo2f.rs @@ -2,13 +2,13 @@ use super::expf; /* k is such that k*ln2 has minimal relative error and x - kln2 > log(FLT_MIN) */ const K: i32 = 235; -const K_LN2: f32 = 162.89; /* 0x1.45c778p+7f */ /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ #[inline] pub(crate) fn k_expo2f(x: f32) -> f32 { + let k_ln2 = f32::from_bits(0x4322e3bc); /* note that k is odd and scale*scale overflows */ let scale = f32::from_bits(((0x7f + K / 2) as u32) << 23); /* exp(x - k ln2) * 2**(k-1) */ - expf(x - K_LN2) * scale * scale + expf(x - k_ln2) * scale * scale } diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index a29f2fb0a..eda6b5b72 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -53,6 +53,7 @@ mod scalbnf; mod sin; mod sinf; mod sinh; +mod sinhf; mod sqrt; mod sqrtf; mod tan; @@ -108,6 +109,7 @@ pub use self::scalbnf::scalbnf; pub use self::sin::sin; pub use self::sinf::sinf; pub use self::sinh::sinh; +pub use self::sinhf::sinhf; pub use self::sqrt::sqrt; pub use self::sqrtf::sqrtf; pub use self::tan::tan; diff --git a/libm/src/math/sinhf.rs b/libm/src/math/sinhf.rs new file mode 100644 index 000000000..90c4b9312 --- /dev/null +++ b/libm/src/math/sinhf.rs @@ -0,0 +1,30 @@ +use super::expm1f; +use super::k_expo2f; + +#[inline] +pub fn sinhf(x: f32) -> f32 { + let mut h = 0.5f32; + let mut ix = x.to_bits(); + if (ix >> 31) != 0 { + h = -h; + } + /* |x| */ + ix &= 0x7fffffff; + let absx = f32::from_bits(ix); + let w = ix; + + /* |x| < log(FLT_MAX) */ + if w < 0x42b17217 { + let t = expm1f(absx); + if w < 0x3f800000 { + if w < (0x3f800000 - (12 << 23)) { + return x; + } + return h * (2. * t - t * t / (t + 1.)); + } + return h * (t + t / (t + 1.)); + } + + /* |x| > logf(FLT_MAX) or nan */ + 2. * h * k_expo2f(absx) +} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 6708891f3..7caada4ef 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -669,7 +669,7 @@ f32_f32! { logf, roundf, sinf, - // sinhf, + sinhf, tanf, tanhf, fabsf, From c5d03a8db7c89999281676dcd7272b7d93c8d621 Mon Sep 17 00:00:00 2001 From: Joseph Ryan Date: Sun, 15 Jul 2018 01:17:46 -0500 Subject: [PATCH 0089/1459] implement atan2 --- libm/src/lib.rs | 2 - libm/src/math/atan2.rs | 72 +++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 2 + libm/test-generator/src/main.rs | 2 +- 4 files changed, 75 insertions(+), 3 deletions(-) create mode 100644 libm/src/math/atan2.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 45213a25f..e7abce540 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -392,7 +392,6 @@ pub trait F64Ext: private::Sealed { #[cfg(todo)] fn atan(self) -> Self; - #[cfg(todo)] fn atan2(self, other: Self) -> Self; #[cfg(todo)] @@ -568,7 +567,6 @@ impl F64Ext for f64 { atan(self) } - #[cfg(todo)] #[inline] fn atan2(self, other: Self) -> Self { atan2(self, other) diff --git a/libm/src/math/atan2.rs b/libm/src/math/atan2.rs new file mode 100644 index 000000000..324b1e937 --- /dev/null +++ b/libm/src/math/atan2.rs @@ -0,0 +1,72 @@ +use super::atan; +use super::fabs; + +const PI: f64 = 3.1415926535897931160E+00; /* 0x400921FB, 0x54442D18 */ +const PI_LO: f64 = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */ + +#[inline] +pub fn atan2(y: f64, x: f64) -> f64 { + if x.is_nan() || y.is_nan() { + return x + y; + } + let mut ix = (x.to_bits() >> 32) as u32; + let lx = x.to_bits() as u32; + let mut iy = (y.to_bits() >> 32) as u32; + let ly = y.to_bits() as u32; + if (ix - 0x3ff00000 | lx) == 0 { + /* x = 1.0 */ + return atan(y); + } + let m = ((iy >> 31) & 1) | ((ix >> 30) & 2); /* 2*sign(x)+sign(y) */ + ix &= 0x7fffffff; + iy &= 0x7fffffff; + + /* when y = 0 */ + if (iy | ly) == 0 { + return match m { + 0 | 1 => y, /* atan(+-0,+anything)=+-0 */ + 2 => PI, /* atan(+0,-anything) = PI */ + _ => -PI, /* atan(-0,-anything) =-PI */ + }; + } + /* when x = 0 */ + if (ix | lx) == 0 { + return if m & 1 != 0 { -PI / 2.0 } else { PI / 2.0 }; + } + /* when x is INF */ + if ix == 0x7ff00000 { + if iy == 0x7ff00000 { + return match m { + 0 => PI / 4.0, /* atan(+INF,+INF) */ + 1 => -PI / 4.0, /* atan(-INF,+INF) */ + 2 => 3.0 * PI / 4.0, /* atan(+INF,-INF) */ + _ => -3.0 * PI / 4.0, /* atan(-INF,-INF) */ + }; + } else { + return match m { + 0 => 0.0, /* atan(+...,+INF) */ + 1 => -0.0, /* atan(-...,+INF) */ + 2 => PI, /* atan(+...,-INF) */ + _ => -PI, /* atan(-...,-INF) */ + }; + } + } + /* |y/x| > 0x1p64 */ + if ix + (64 << 20) < iy || iy == 0x7ff00000 { + return if m & 1 != 0 { -PI / 2.0 } else { PI / 2.0 }; + } + + /* z = atan(|y/x|) without spurious underflow */ + let z = if (m & 2 != 0) && iy + (64 << 20) < ix { + /* |y/x| < 0x1p-64, x<0 */ + 0.0 + } else { + atan(fabs(y / x)) + }; + match m { + 0 => z, /* atan(+,+) */ + 1 => -z, /* atan(-,+) */ + 2 => PI - (z - PI_LO), /* atan(+,-) */ + _ => (z - PI_LO) - PI, /* atan(-,-) */ + } +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index eda6b5b72..07032dcfb 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -11,6 +11,7 @@ mod acos; mod acosf; mod asin; mod asinf; +mod atan2; mod atan2f; mod atanf; mod cbrt; @@ -67,6 +68,7 @@ pub use self::acos::acos; pub use self::acosf::acosf; pub use self::asin::asin; pub use self::asinf::asinf; +pub use self::atan2::atan2; pub use self::atan2f::atan2f; pub use self::atanf::atanf; pub use self::cbrt::cbrt; diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 7caada4ef..e65f0f69b 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -724,7 +724,7 @@ f64_f64! { // With signature `fn(f64, f64) -> f64` f64f64_f64! { - // atan2, + atan2, fdim, fmod, hypot, From 636815e43954bb1d087024b36a2619df590678c3 Mon Sep 17 00:00:00 2001 From: C Jones Date: Sat, 14 Jul 2018 19:42:47 -0400 Subject: [PATCH 0090/1459] Implement tanh Closes rust-lang/libm#37 --- libm/src/lib.rs | 2 -- libm/src/math/mod.rs | 2 ++ libm/src/math/tanh.rs | 52 +++++++++++++++++++++++++++++++++ libm/test-generator/src/main.rs | 2 +- 4 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 libm/src/math/tanh.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 45213a25f..1eea30667 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -410,7 +410,6 @@ pub trait F64Ext: private::Sealed { #[cfg(todo)] fn cosh(self) -> Self; - #[cfg(todo)] fn tanh(self) -> Self; #[cfg(todo)] @@ -595,7 +594,6 @@ impl F64Ext for f64 { cosh(self) } - #[cfg(todo)] #[inline] fn tanh(self) -> Self { tanh(self) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index eda6b5b72..5fc787c82 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -58,6 +58,7 @@ mod sqrt; mod sqrtf; mod tan; mod tanf; +mod tanh; mod tanhf; mod trunc; mod truncf; @@ -114,6 +115,7 @@ pub use self::sqrt::sqrt; pub use self::sqrtf::sqrtf; pub use self::tan::tan; pub use self::tanf::tanf; +pub use self::tanh::tanh; pub use self::tanhf::tanhf; pub use self::trunc::trunc; pub use self::truncf::truncf; diff --git a/libm/src/math/tanh.rs b/libm/src/math/tanh.rs new file mode 100644 index 000000000..657d7e0e2 --- /dev/null +++ b/libm/src/math/tanh.rs @@ -0,0 +1,52 @@ +use super::expm1; + +/* tanh(x) = (exp(x) - exp(-x))/(exp(x) + exp(-x)) + * = (exp(2*x) - 1)/(exp(2*x) - 1 + 2) + * = (1 - exp(-2*x))/(exp(-2*x) - 1 + 2) + */ +pub fn tanh(mut x: f64) -> f64 { + let mut uf: f64 = x; + let mut ui: u64 = f64::to_bits(uf); + + let w: u32; + let sign: bool; + let mut t: f64; + + /* x = |x| */ + sign = ui >> 63 != 0; + ui &= !1 / 2; + uf = f64::from_bits(ui); + x = uf; + w = (ui >> 32) as u32; + + if w > 0x3fe193ea { + /* |x| > log(3)/2 ~= 0.5493 or nan */ + if w > 0x40340000 { + /* |x| > 20 or nan */ + /* note: this branch avoids raising overflow */ + t = 1.0 - 0.0 / x; + } else { + t = expm1(2.0 * x); + t = 1.0 - 2.0 / (t + 2.0); + } + } else if w > 0x3fd058ae { + /* |x| > log(5/3)/2 ~= 0.2554 */ + t = expm1(2.0 * x); + t = t / (t + 2.0); + } else if w >= 0x00100000 { + /* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */ + t = expm1(-2.0 * x); + t = -t / (t + 2.0); + } else { + /* |x| is subnormal */ + /* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */ + force_eval!(x as f32); + t = x; + } + + if sign { + -t + } else { + t + } +} diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 7caada4ef..1d619cd92 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -717,7 +717,7 @@ f64_f64! { sinh, sqrt, tan, - // tanh, + tanh, trunc, fabs, } From 261f7423e794698c75d9511e83423d8057c92859 Mon Sep 17 00:00:00 2001 From: Erik Date: Sat, 14 Jul 2018 15:22:41 -0400 Subject: [PATCH 0091/1459] implement fmaf --- libm/src/lib.rs | 2 - libm/src/math/fmaf.rs | 94 +++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 2 + libm/test-generator/src/main.rs | 2 +- 4 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 libm/src/math/fmaf.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 45213a25f..f0fdd464c 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -52,7 +52,6 @@ pub trait F32Ext: private::Sealed { #[cfg(todo)] fn signum(self) -> Self; - #[cfg(todo)] fn mul_add(self, a: Self, b: Self) -> Self; #[cfg(todo)] @@ -161,7 +160,6 @@ impl F32Ext for f32 { fabsf(self) } - #[cfg(todo)] #[inline] fn mul_add(self, a: Self, b: Self) -> Self { fmaf(self, a, b) diff --git a/libm/src/math/fmaf.rs b/libm/src/math/fmaf.rs new file mode 100644 index 000000000..70d2c54a2 --- /dev/null +++ b/libm/src/math/fmaf.rs @@ -0,0 +1,94 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */ +/*- + * Copyright (c) 2005-2011 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +use core::f32; +use core::ptr::read_volatile; + +/* + * Fused multiply-add: Compute x * y + z with a single rounding error. + * + * A double has more than twice as much precision than a float, so + * direct double-precision arithmetic suffices, except where double + * rounding occurs. + */ +#[inline] +pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 { + let xy: f64; + let mut result: f64; + let mut ui: u64; + let e: i32; + + xy = x as f64 * y as f64; + result = xy + z as f64; + ui = result.to_bits(); + e = (ui >> 52) as i32 & 0x7ff; + /* Common case: The double precision result is fine. */ + if ( + /* not a halfway case */ + ui & 0x1fffffff) != 0x10000000 || + /* NaN */ + e == 0x7ff || + /* exact */ + (result - xy == z as f64 && result - z as f64 == xy) || + /* not round-to-nearest */ + fegetround() != FE_TONEAREST + { + /* + underflow may not be raised correctly, example: + fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) + */ + if e < 0x3ff - 126 && e >= 0x3ff - 149 && fetestexcept(FE_INEXACT) { + feclearexcept(FE_INEXACT); + /* TODO: gcc and clang bug workaround */ + let vz: f32 = unsafe { read_volatile(&z) }; + result = xy + vz as f64; + if fetestexcept(FE_INEXACT) { + feraiseexcept(FE_UNDERFLOW); + } else { + feraiseexcept(FE_INEXACT); + } + } + z = result as f32; + return z; + } + + /* + * If result is inexact, and exactly halfway between two float values, + * we need to adjust the low-order bit in the direction of the error. + */ + fesetround(FE_TOWARDZERO); + let vxy: f64 = unsafe { read_volatile(&xy) }; /* XXX work around gcc CSE bug */ + let mut adjusted_result: f64 = vxy + z as f64; + fesetround(FE_TONEAREST); + if result == adjusted_result { + ui = adjusted_result.to_bits(); + ui += 1; + adjusted_result = f64::from_bits(ui); + } + z = adjusted_result as f32; + z +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index eda6b5b72..ad9aead7c 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -33,6 +33,7 @@ mod fdimf; mod floor; mod floorf; mod fma; +mod fmaf; mod fmod; mod fmodf; mod hypot; @@ -89,6 +90,7 @@ pub use self::fdimf::fdimf; pub use self::floor::floor; pub use self::floorf::floorf; pub use self::fma::fma; +pub use self::fmaf::fmaf; pub use self::fmod::fmod; pub use self::fmodf::fmodf; pub use self::hypot::hypot; diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 7caada4ef..a2a301d13 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -687,7 +687,7 @@ f32f32_f32! { // With signature `fn(f32, f32, f32) -> f32` f32f32f32_f32! { - // fmaf, + fmaf, } // With signature `fn(f32, i32) -> f32` From 098e3a230e1297d5150f377cc333bf6139ac24ba Mon Sep 17 00:00:00 2001 From: Erik Date: Sun, 15 Jul 2018 13:14:29 -0400 Subject: [PATCH 0092/1459] add dummy fenv implementation --- libm/src/math/fenv.rs | 33 +++++++++++++++++++++++++++++++++ libm/src/math/fmaf.rs | 14 ++++++++++---- libm/src/math/mod.rs | 1 + 3 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 libm/src/math/fenv.rs diff --git a/libm/src/math/fenv.rs b/libm/src/math/fenv.rs new file mode 100644 index 000000000..652e60324 --- /dev/null +++ b/libm/src/math/fenv.rs @@ -0,0 +1,33 @@ +// src: musl/src/fenv/fenv.c +/* Dummy functions for archs lacking fenv implementation */ + +pub(crate) const FE_UNDERFLOW: i32 = 0; +pub(crate) const FE_INEXACT: i32 = 0; + +pub(crate) const FE_TONEAREST: i32 = 0; +pub(crate) const FE_TOWARDZERO: i32 = 0; + +#[inline] +pub(crate) fn feclearexcept(_mask: i32) -> i32 { + 0 +} + +#[inline] +pub(crate) fn feraiseexcept(_mask: i32) -> i32 { + 0 +} + +#[inline] +pub(crate) fn fetestexcept(_mask: i32) -> i32 { + 0 +} + +#[inline] +pub(crate) fn fegetround() -> i32 { + FE_TONEAREST +} + +#[inline] +pub(crate) fn fesetround(_r: i32) -> i32 { + 0 +} diff --git a/libm/src/math/fmaf.rs b/libm/src/math/fmaf.rs index 70d2c54a2..25b04fc23 100644 --- a/libm/src/math/fmaf.rs +++ b/libm/src/math/fmaf.rs @@ -28,6 +28,11 @@ use core::f32; use core::ptr::read_volatile; +use super::fenv::{ + feclearexcept, fegetround, feraiseexcept, fesetround, fetestexcept, FE_INEXACT, FE_TONEAREST, + FE_TOWARDZERO, FE_UNDERFLOW, +}; + /* * Fused multiply-add: Compute x * y + z with a single rounding error. * @@ -61,12 +66,12 @@ pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 { underflow may not be raised correctly, example: fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) */ - if e < 0x3ff - 126 && e >= 0x3ff - 149 && fetestexcept(FE_INEXACT) { + if e < 0x3ff - 126 && e >= 0x3ff - 149 && fetestexcept(FE_INEXACT) != 0 { feclearexcept(FE_INEXACT); - /* TODO: gcc and clang bug workaround */ + // prevent `xy + vz` from being CSE'd with `xy + z` above let vz: f32 = unsafe { read_volatile(&z) }; result = xy + vz as f64; - if fetestexcept(FE_INEXACT) { + if fetestexcept(FE_INEXACT) != 0 { feraiseexcept(FE_UNDERFLOW); } else { feraiseexcept(FE_INEXACT); @@ -81,7 +86,8 @@ pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 { * we need to adjust the low-order bit in the direction of the error. */ fesetround(FE_TOWARDZERO); - let vxy: f64 = unsafe { read_volatile(&xy) }; /* XXX work around gcc CSE bug */ + // prevent `vxy + z` from being CSE'd with `xy + z` above + let vxy: f64 = unsafe { read_volatile(&xy) }; let mut adjusted_result: f64 = vxy + z as f64; fesetround(FE_TONEAREST); if result == adjusted_result { diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index ad9aead7c..92db426bf 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -122,6 +122,7 @@ pub use self::truncf::truncf; // Private modules mod expo2; +mod fenv; mod k_cos; mod k_cosf; mod k_expo2f; From 3fecdd6a9044ac2623c345f6557bd568dd26a56e Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Sun, 15 Jul 2018 13:24:09 -0500 Subject: [PATCH 0093/1459] update CHANGELOG; make utility functions private --- libm/CHANGELOG.md | 13 +++++++++++++ libm/src/math/mod.rs | 9 +++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index 3a496527b..107813b08 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -5,6 +5,19 @@ This project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] +### Added + +- atan2f +- cos +- coshf +- fmaf +- sin +- sinh +- sinhf +- tan +- tanh +- tanhf + ## [v0.1.1] - 2018-07-14 ### Added diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index c6a3424d8..7fa425207 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -150,17 +150,18 @@ use self::rem_pio2_large::rem_pio2_large; use self::rem_pio2f::rem_pio2f; #[inline] -pub fn get_high_word(x: f64) -> u32 { +fn get_high_word(x: f64) -> u32 { (x.to_bits() >> 32) as u32 } #[inline] -pub fn get_low_word(x: f64) -> u32 { +fn get_low_word(x: f64) -> u32 { x.to_bits() as u32 } +#[allow(dead_code)] #[inline] -pub fn with_set_high_word(f: f64, hi: u32) -> f64 { +fn with_set_high_word(f: f64, hi: u32) -> f64 { let mut tmp = f.to_bits(); tmp &= 0x00000000_ffffffff; tmp |= (hi as u64) << 32; @@ -168,7 +169,7 @@ pub fn with_set_high_word(f: f64, hi: u32) -> f64 { } #[inline] -pub fn with_set_low_word(f: f64, lo: u32) -> f64 { +fn with_set_low_word(f: f64, lo: u32) -> f64 { let mut tmp = f.to_bits(); tmp &= 0xffffffff_00000000; tmp |= lo as u64; From a9fd742d44848a50ade061f055b9d3e21ea9f065 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Sun, 15 Jul 2018 14:50:00 -0500 Subject: [PATCH 0094/1459] add more copyright notices --- libm/src/math/acosf.rs | 15 +++++++++++++++ libm/src/math/asinf.rs | 15 +++++++++++++++ libm/src/math/atanf.rs | 15 +++++++++++++++ libm/src/math/cosf.rs | 16 ++++++++++++++++ libm/src/math/expm1.rs | 12 ++++++++++++ libm/src/math/expm1f.rs | 15 +++++++++++++++ libm/src/math/k_cosf.rs | 16 ++++++++++++++++ libm/src/math/k_sinf.rs | 16 ++++++++++++++++ libm/src/math/k_tanf.rs | 11 +++++++++++ libm/src/math/rem_pio2_large.rs | 12 ++++++++++++ libm/src/math/rem_pio2f.rs | 16 ++++++++++++++++ libm/src/math/sinf.rs | 16 ++++++++++++++++ libm/src/math/tanf.rs | 16 ++++++++++++++++ 13 files changed, 191 insertions(+) diff --git a/libm/src/math/acosf.rs b/libm/src/math/acosf.rs index bbe29c17c..ff07fe935 100644 --- a/libm/src/math/acosf.rs +++ b/libm/src/math/acosf.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_acosf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + use super::sqrtf::sqrtf; const PIO2_HI: f32 = 1.5707962513e+00; /* 0x3fc90fda */ diff --git a/libm/src/math/asinf.rs b/libm/src/math/asinf.rs index 597be4cb7..86944bb00 100644 --- a/libm/src/math/asinf.rs +++ b/libm/src/math/asinf.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_asinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + use super::fabsf::fabsf; use super::sqrt::sqrt; diff --git a/libm/src/math/atanf.rs b/libm/src/math/atanf.rs index 01c41f4ce..b05152e2b 100644 --- a/libm/src/math/atanf.rs +++ b/libm/src/math/atanf.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_atanf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + use super::fabsf; const ATAN_HI: [f32; 4] = [ diff --git a/libm/src/math/cosf.rs b/libm/src/math/cosf.rs index 79df97e35..23faacdc2 100644 --- a/libm/src/math/cosf.rs +++ b/libm/src/math/cosf.rs @@ -1,3 +1,19 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_cosf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + use super::{k_cosf, k_sinf, rem_pio2f}; use core::f64::consts::FRAC_PI_2; diff --git a/libm/src/math/expm1.rs b/libm/src/math/expm1.rs index 48082cd74..a00396524 100644 --- a/libm/src/math/expm1.rs +++ b/libm/src/math/expm1.rs @@ -1,3 +1,15 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_expm1.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + use core::f64; const O_THRESHOLD: f64 = 7.09782712893383973096e+02; /* 0x40862E42, 0xFEFA39EF */ diff --git a/libm/src/math/expm1f.rs b/libm/src/math/expm1f.rs index 011e09b69..8f581733a 100644 --- a/libm/src/math/expm1f.rs +++ b/libm/src/math/expm1f.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_expm1f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + const O_THRESHOLD: f32 = 8.8721679688e+01; /* 0x42b17180 */ const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */ const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */ diff --git a/libm/src/math/k_cosf.rs b/libm/src/math/k_cosf.rs index 83d13b2e9..75579ef9b 100644 --- a/libm/src/math/k_cosf.rs +++ b/libm/src/math/k_cosf.rs @@ -1,3 +1,19 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_cosf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Debugged and optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + /* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */ const C0: f64 = -0.499999997251031003120; /* -0x1ffffffd0c5e81.0p-54 */ const C1: f64 = 0.0416666233237390631894; /* 0x155553e1053a42.0p-57 */ diff --git a/libm/src/math/k_sinf.rs b/libm/src/math/k_sinf.rs index bb2183afc..bb519c9c0 100644 --- a/libm/src/math/k_sinf.rs +++ b/libm/src/math/k_sinf.rs @@ -1,3 +1,19 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + /* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */ const S1: f64 = -0.166666666416265235595; /* -0x15555554cbac77.0p-55 */ const S2: f64 = 0.0083333293858894631756; /* 0x111110896efbb2.0p-59 */ diff --git a/libm/src/math/k_tanf.rs b/libm/src/math/k_tanf.rs index db2e0caa7..6d3da1dcc 100644 --- a/libm/src/math/k_tanf.rs +++ b/libm/src/math/k_tanf.rs @@ -1,3 +1,14 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_tan.c */ +/* + * ==================================================== + * Copyright 2004 Sun Microsystems, Inc. All Rights Reserved. + * + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + /* |tan(x)/x - t(x)| < 2**-25.5 (~[-2e-08, 2e-08]). */ const T: [f64; 6] = [ 0.333331395030791399758, /* 0x15554d3418c99f.0p-54 */ diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 52b47279c..462eb0345 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -1,3 +1,15 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_rem_pio2.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + use super::floor; use super::scalbn; diff --git a/libm/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs index 73ec3775d..e2368d93d 100644 --- a/libm/src/math/rem_pio2f.rs +++ b/libm/src/math/rem_pio2f.rs @@ -1,3 +1,19 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Debugged and optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + use super::rem_pio2_large; use core::f64; diff --git a/libm/src/math/sinf.rs b/libm/src/math/sinf.rs index 09f62cddc..c9b02bcdc 100644 --- a/libm/src/math/sinf.rs +++ b/libm/src/math/sinf.rs @@ -1,3 +1,19 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + use super::{k_cosf, k_sinf, rem_pio2f}; use core::f64::consts::FRAC_PI_2; diff --git a/libm/src/math/tanf.rs b/libm/src/math/tanf.rs index 6bfbe06c1..15a462d4e 100644 --- a/libm/src/math/tanf.rs +++ b/libm/src/math/tanf.rs @@ -1,3 +1,19 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_tanf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + use super::{k_tanf, rem_pio2f}; use core::f64::consts::FRAC_PI_2; From fb73f40fbb76fa1c209e5906658f7e607fe5556d Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Sun, 15 Jul 2018 16:00:09 -0500 Subject: [PATCH 0095/1459] inline more functions; add more methods to F{32,64}Ext --- libm/src/lib.rs | 56 +++++++++++---------------------- libm/src/math/acosf.rs | 1 + libm/src/math/asin.rs | 2 ++ libm/src/math/asinf.rs | 1 + libm/src/math/cos.rs | 1 + libm/src/math/expm1.rs | 2 +- libm/src/math/expo2.rs | 3 +- libm/src/math/fenv.rs | 18 +++++------ libm/src/math/k_cosf.rs | 2 +- libm/src/math/k_expo2f.rs | 2 +- libm/src/math/k_sinf.rs | 2 +- libm/src/math/k_tan.rs | 3 +- libm/src/math/k_tanf.rs | 2 +- libm/src/math/rem_pio2.rs | 2 ++ libm/src/math/rem_pio2_large.rs | 2 +- libm/src/math/rem_pio2f.rs | 2 +- libm/src/math/sin.rs | 1 + libm/src/math/sinh.rs | 1 + libm/src/math/tan.rs | 1 + libm/src/math/tanh.rs | 1 + 20 files changed, 49 insertions(+), 56 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 644091bb7..bd4bff98d 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -14,18 +14,19 @@ mod math; -#[cfg(todo)] use core::{f32, f64}; pub use math::*; /// Approximate equality with 1 ULP of tolerance #[doc(hidden)] +#[inline] pub fn _eqf(a: u32, b: u32) -> bool { (a as i32).wrapping_sub(b as i32).abs() <= 1 } #[doc(hidden)] +#[inline] pub fn _eq(a: u64, b: u64) -> bool { (a as i64).wrapping_sub(b as i64).abs() <= 1 } @@ -33,7 +34,7 @@ pub fn _eq(a: u64, b: u64) -> bool { /// Math support for `f32` /// /// This trait is sealed and cannot be implemented outside of `libm`. -pub trait F32Ext: private::Sealed { +pub trait F32Ext: private::Sealed + Sized { fn floor(self) -> Self; fn ceil(self) -> Self; @@ -44,20 +45,17 @@ pub trait F32Ext: private::Sealed { fn fdim(self, rhs: Self) -> Self; - #[cfg(todo)] fn fract(self) -> Self; fn abs(self) -> Self; - #[cfg(todo)] - fn signum(self) -> Self; + // NOTE depends on unstable intrinsics::copysignf32 + // fn signum(self) -> Self; fn mul_add(self, a: Self, b: Self) -> Self; - #[cfg(todo)] fn div_euc(self, rhs: Self) -> Self; - #[cfg(todo)] fn mod_euc(self, rhs: Self) -> Self; // NOTE depends on unstable intrinsics::powif32 @@ -97,9 +95,11 @@ pub trait F32Ext: private::Sealed { fn atan2(self, other: Self) -> Self; - #[cfg(todo)] #[inline] - fn sin_cos(self) -> (Self, Self) { + fn sin_cos(self) -> (Self, Self) + where + Self: Copy, + { (self.sin(), self.cos()) } @@ -113,13 +113,10 @@ pub trait F32Ext: private::Sealed { fn tanh(self) -> Self; - #[cfg(todo)] fn asinh(self) -> Self; - #[cfg(todo)] fn acosh(self) -> Self; - #[cfg(todo)] fn atanh(self) -> Self; } @@ -149,7 +146,6 @@ impl F32Ext for f32 { fdimf(self, rhs) } - #[cfg(todo)] #[inline] fn fract(self) -> Self { self - self.trunc() @@ -165,7 +161,6 @@ impl F32Ext for f32 { fmaf(self, a, b) } - #[cfg(todo)] #[inline] fn div_euc(self, rhs: Self) -> Self { let q = (self / rhs).trunc(); @@ -175,7 +170,6 @@ impl F32Ext for f32 { q } - #[cfg(todo)] #[inline] fn mod_euc(self, rhs: f32) -> f32 { let r = self % rhs; @@ -296,7 +290,6 @@ impl F32Ext for f32 { tanhf(self) } - #[cfg(todo)] #[inline] fn asinh(self) -> Self { if self == f32::NEG_INFINITY { @@ -306,7 +299,6 @@ impl F32Ext for f32 { } } - #[cfg(todo)] #[inline] fn acosh(self) -> Self { match self { @@ -315,7 +307,6 @@ impl F32Ext for f32 { } } - #[cfg(todo)] #[inline] fn atanh(self) -> Self { 0.5 * ((2.0 * self) / (1.0 - self)).ln_1p() @@ -325,7 +316,7 @@ impl F32Ext for f32 { /// Math support for `f64` /// /// This trait is sealed and cannot be implemented outside of `libm`. -pub trait F64Ext: private::Sealed { +pub trait F64Ext: private::Sealed + Sized { fn floor(self) -> Self; fn ceil(self) -> Self; @@ -336,20 +327,17 @@ pub trait F64Ext: private::Sealed { fn fdim(self, rhs: Self) -> Self; - #[cfg(todo)] fn fract(self) -> Self; fn abs(self) -> Self; - #[cfg(todo)] - fn signum(self) -> Self; + // NOTE depends on unstable intrinsics::copysignf64 + // fn signum(self) -> Self; fn mul_add(self, a: Self, b: Self) -> Self; - #[cfg(todo)] fn div_euc(self, rhs: Self) -> Self; - #[cfg(todo)] fn mod_euc(self, rhs: Self) -> Self; // NOTE depends on unstable intrinsics::powif64 @@ -382,7 +370,6 @@ pub trait F64Ext: private::Sealed { fn tan(self) -> Self; - #[cfg(todo)] fn asin(self) -> Self; fn acos(self) -> Self; @@ -393,9 +380,11 @@ pub trait F64Ext: private::Sealed { #[cfg(todo)] fn atan2(self, other: Self) -> Self; - #[cfg(todo)] #[inline] - fn sin_cos(self) -> (Self, Self) { + fn sin_cos(self) -> (Self, Self) + where + Self: Copy, + { (self.sin(), self.cos()) } @@ -410,13 +399,10 @@ pub trait F64Ext: private::Sealed { fn tanh(self) -> Self; - #[cfg(todo)] fn asinh(self) -> Self; - #[cfg(todo)] fn acosh(self) -> Self; - #[cfg(todo)] fn atanh(self) -> Self; } @@ -445,7 +431,7 @@ impl F64Ext for f64 { fn fdim(self, rhs: Self) -> Self { fdim(self, rhs) } - #[cfg(todo)] + #[inline] fn fract(self) -> Self { self - self.trunc() @@ -461,7 +447,6 @@ impl F64Ext for f64 { fma(self, a, b) } - #[cfg(todo)] #[inline] fn div_euc(self, rhs: Self) -> Self { let q = (self / rhs).trunc(); @@ -471,9 +456,8 @@ impl F64Ext for f64 { q } - #[cfg(todo)] #[inline] - fn mod_euc(self, rhs: f32) -> f32 { + fn mod_euc(self, rhs: f64) -> f64 { let r = self % rhs; if r < 0.0 { r + rhs.abs() @@ -548,7 +532,6 @@ impl F64Ext for f64 { tan(self) } - #[cfg(todo)] #[inline] fn asin(self) -> Self { asin(self) @@ -597,7 +580,6 @@ impl F64Ext for f64 { tanh(self) } - #[cfg(todo)] #[inline] fn asinh(self) -> Self { if self == f64::NEG_INFINITY { @@ -607,7 +589,6 @@ impl F64Ext for f64 { } } - #[cfg(todo)] #[inline] fn acosh(self) -> Self { match self { @@ -616,7 +597,6 @@ impl F64Ext for f64 { } } - #[cfg(todo)] #[inline] fn atanh(self) -> Self { 0.5 * ((2.0 * self) / (1.0 - self)).ln_1p() diff --git a/libm/src/math/acosf.rs b/libm/src/math/acosf.rs index ff07fe935..469601cab 100644 --- a/libm/src/math/acosf.rs +++ b/libm/src/math/acosf.rs @@ -22,6 +22,7 @@ const P_S1: f32 = -4.2743422091e-02; const P_S2: f32 = -8.6563630030e-03; const Q_S1: f32 = -7.0662963390e-01; +#[inline] fn r(z: f32) -> f32 { let p = z * (P_S0 + z * (P_S1 + z * P_S2)); let q = 1. + z * Q_S1; diff --git a/libm/src/math/asin.rs b/libm/src/math/asin.rs index 720169bdc..a0bb4918c 100644 --- a/libm/src/math/asin.rs +++ b/libm/src/math/asin.rs @@ -55,12 +55,14 @@ const Q_S2: f64 = 2.02094576023350569471e+00; /* 0x40002AE5, 0x9C598AC8 */ const Q_S3: f64 = -6.88283971605453293030e-01; /* 0xBFE6066C, 0x1B8D0159 */ const Q_S4: f64 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ +#[inline] fn comp_r(z: f64) -> f64 { let p = z * (P_S0 + z * (P_S1 + z * (P_S2 + z * (P_S3 + z * (P_S4 + z * P_S5))))); let q = 1.0 + z * (Q_S1 + z * (Q_S2 + z * (Q_S3 + z * Q_S4))); return p / q; } +#[inline] pub fn asin(mut x: f64) -> f64 { let z: f64; let r: f64; diff --git a/libm/src/math/asinf.rs b/libm/src/math/asinf.rs index 86944bb00..79c85d81d 100644 --- a/libm/src/math/asinf.rs +++ b/libm/src/math/asinf.rs @@ -24,6 +24,7 @@ const P_S1: f32 = -4.2743422091e-02; const P_S2: f32 = -8.6563630030e-03; const Q_S1: f32 = -7.0662963390e-01; +#[inline] fn r(z: f32) -> f32 { let p = z * (P_S0 + z * (P_S1 + z * P_S2)); let q = 1. + z * Q_S1; diff --git a/libm/src/math/cos.rs b/libm/src/math/cos.rs index e6e9b3736..df16b5c36 100644 --- a/libm/src/math/cos.rs +++ b/libm/src/math/cos.rs @@ -41,6 +41,7 @@ use super::{k_cos, k_sin, rem_pio2}; // Accuracy: // TRIG(x) returns trig(x) nearly rounded // +#[inline] pub fn cos(x: f64) -> f64 { let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; diff --git a/libm/src/math/expm1.rs b/libm/src/math/expm1.rs index a00396524..9da064ee7 100644 --- a/libm/src/math/expm1.rs +++ b/libm/src/math/expm1.rs @@ -23,7 +23,7 @@ const Q3: f64 = -7.93650757867487942473e-05; /* BF14CE19 9EAADBB7 */ const Q4: f64 = 4.00821782732936239552e-06; /* 3ED0CFCA 86E65239 */ const Q5: f64 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ -#[allow(warnings)] +#[inline] pub fn expm1(mut x: f64) -> f64 { let hi: f64; let lo: f64; diff --git a/libm/src/math/expo2.rs b/libm/src/math/expo2.rs index 8a121b0a2..39f9815c4 100644 --- a/libm/src/math/expo2.rs +++ b/libm/src/math/expo2.rs @@ -1,7 +1,8 @@ use super::{combine_words, exp}; /* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */ -pub(crate) fn expo2(x: f64) -> f64 { +#[inline] +pub fn expo2(x: f64) -> f64 { /* k is such that k*ln2 has minimal relative error and x - kln2 > log(DBL_MIN) */ const K: i32 = 2043; let kln2 = f64::from_bits(0x40962066151add8b); diff --git a/libm/src/math/fenv.rs b/libm/src/math/fenv.rs index 652e60324..63bb20368 100644 --- a/libm/src/math/fenv.rs +++ b/libm/src/math/fenv.rs @@ -1,33 +1,33 @@ // src: musl/src/fenv/fenv.c /* Dummy functions for archs lacking fenv implementation */ -pub(crate) const FE_UNDERFLOW: i32 = 0; -pub(crate) const FE_INEXACT: i32 = 0; +pub const FE_UNDERFLOW: i32 = 0; +pub const FE_INEXACT: i32 = 0; -pub(crate) const FE_TONEAREST: i32 = 0; -pub(crate) const FE_TOWARDZERO: i32 = 0; +pub const FE_TONEAREST: i32 = 0; +pub const FE_TOWARDZERO: i32 = 0; #[inline] -pub(crate) fn feclearexcept(_mask: i32) -> i32 { +pub fn feclearexcept(_mask: i32) -> i32 { 0 } #[inline] -pub(crate) fn feraiseexcept(_mask: i32) -> i32 { +pub fn feraiseexcept(_mask: i32) -> i32 { 0 } #[inline] -pub(crate) fn fetestexcept(_mask: i32) -> i32 { +pub fn fetestexcept(_mask: i32) -> i32 { 0 } #[inline] -pub(crate) fn fegetround() -> i32 { +pub fn fegetround() -> i32 { FE_TONEAREST } #[inline] -pub(crate) fn fesetround(_r: i32) -> i32 { +pub fn fesetround(_r: i32) -> i32 { 0 } diff --git a/libm/src/math/k_cosf.rs b/libm/src/math/k_cosf.rs index 75579ef9b..4aa10c0f0 100644 --- a/libm/src/math/k_cosf.rs +++ b/libm/src/math/k_cosf.rs @@ -21,7 +21,7 @@ const C2: f64 = -0.00138867637746099294692; /* -0x16c087e80f1e27.0p-62 */ const C3: f64 = 0.0000243904487962774090654; /* 0x199342e0ee5069.0p-68 */ #[inline] -pub(crate) fn k_cosf(x: f64) -> f32 { +pub fn k_cosf(x: f64) -> f32 { let z = x * x; let w = z * z; let r = C2 + z * C3; diff --git a/libm/src/math/k_expo2f.rs b/libm/src/math/k_expo2f.rs index e2eaa4e6b..ec2a2c5e2 100644 --- a/libm/src/math/k_expo2f.rs +++ b/libm/src/math/k_expo2f.rs @@ -5,7 +5,7 @@ const K: i32 = 235; /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ #[inline] -pub(crate) fn k_expo2f(x: f32) -> f32 { +pub fn k_expo2f(x: f32) -> f32 { let k_ln2 = f32::from_bits(0x4322e3bc); /* note that k is odd and scale*scale overflows */ let scale = f32::from_bits(((0x7f + K / 2) as u32) << 23); diff --git a/libm/src/math/k_sinf.rs b/libm/src/math/k_sinf.rs index bb519c9c0..1c5f5f98a 100644 --- a/libm/src/math/k_sinf.rs +++ b/libm/src/math/k_sinf.rs @@ -21,7 +21,7 @@ const S3: f64 = -0.000198393348360966317347; /* -0x1a00f9e2cae774.0p-65 */ const S4: f64 = 0.0000027183114939898219064; /* 0x16cd878c3b46a7.0p-71 */ #[inline] -pub(crate) fn k_sinf(x: f64) -> f32 { +pub fn k_sinf(x: f64) -> f32 { let z = x * x; let w = z * z; let r = S3 + z * S4; diff --git a/libm/src/math/k_tan.rs b/libm/src/math/k_tan.rs index b0dd317a2..e9ba21499 100644 --- a/libm/src/math/k_tan.rs +++ b/libm/src/math/k_tan.rs @@ -58,7 +58,8 @@ static T: [f64; 13] = [ const PIO4: f64 = 7.85398163397448278999e-01; /* 3FE921FB, 54442D18 */ const PIO4_LO: f64 = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */ -pub(crate) fn k_tan(mut x: f64, mut y: f64, odd: i32) -> f64 { +#[inline] +pub fn k_tan(mut x: f64, mut y: f64, odd: i32) -> f64 { let hx = (f64::to_bits(x) >> 32) as u32; let big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */ if big { diff --git a/libm/src/math/k_tanf.rs b/libm/src/math/k_tanf.rs index 6d3da1dcc..b9ccf2570 100644 --- a/libm/src/math/k_tanf.rs +++ b/libm/src/math/k_tanf.rs @@ -20,7 +20,7 @@ const T: [f64; 6] = [ ]; #[inline] -pub(crate) fn k_tanf(x: f64, odd: bool) -> f32 { +pub fn k_tanf(x: f64, odd: bool) -> f32 { let z = x * x; /* * Split up the polynomial into small independent terms to give diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 47bab6e65..6e655e7d4 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -42,12 +42,14 @@ const PIO2_3T: f64 = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ // use rem_pio2_large() for large x // // caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ +#[inline] pub fn rem_pio2(x: f64) -> (i32, f64, f64) { let x1p24 = f64::from_bits(0x4170000000000000); let sign = (f64::to_bits(x) >> 63) as i32; let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; + #[inline] fn medium(x: f64, ix: u32) -> (i32, f64, f64) { /* rint(x/(pi/2)), Assume round-to-nearest. */ let f_n = x as f64 * INV_PIO2 + TO_INT - TO_INT; diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 462eb0345..745b700a5 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -222,7 +222,7 @@ const PIO2: [f64; 8] = [ /// more accurately, = 0 mod 8 ). Thus the number of operations are /// independent of the exponent of the input. #[inline] -pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { +pub fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24) diff --git a/libm/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs index e2368d93d..5e7a7d439 100644 --- a/libm/src/math/rem_pio2f.rs +++ b/libm/src/math/rem_pio2f.rs @@ -32,7 +32,7 @@ const PIO2_1T: f64 = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ /// use double precision for everything except passing x /// use __rem_pio2_large() for large x #[inline] -pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) { +pub fn rem_pio2f(x: f32) -> (i32, f64) { let x64 = x as f64; let mut tx: [f64; 1] = [0.]; diff --git a/libm/src/math/sin.rs b/libm/src/math/sin.rs index 13eb30248..e749094e6 100644 --- a/libm/src/math/sin.rs +++ b/libm/src/math/sin.rs @@ -40,6 +40,7 @@ use super::{k_cos, k_sin, rem_pio2}; // // Accuracy: // TRIG(x) returns trig(x) nearly rounded +#[inline] pub fn sin(x: f64) -> f64 { let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 diff --git a/libm/src/math/sinh.rs b/libm/src/math/sinh.rs index 057987171..684e8e309 100644 --- a/libm/src/math/sinh.rs +++ b/libm/src/math/sinh.rs @@ -4,6 +4,7 @@ use super::{expm1, expo2}; // = (exp(x)-1 + (exp(x)-1)/exp(x))/2 // = x + x^3/6 + o(x^5) // +#[inline] pub fn sinh(x: f64) -> f64 { // union {double f; uint64_t i;} u = {.f = x}; // uint32_t w; diff --git a/libm/src/math/tan.rs b/libm/src/math/tan.rs index 92bbb6221..5a5f178a5 100644 --- a/libm/src/math/tan.rs +++ b/libm/src/math/tan.rs @@ -39,6 +39,7 @@ use super::{k_tan, rem_pio2}; // // Accuracy: // TRIG(x) returns trig(x) nearly rounded +#[inline] pub fn tan(x: f64) -> f64 { let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 diff --git a/libm/src/math/tanh.rs b/libm/src/math/tanh.rs index 657d7e0e2..1c3dd0be4 100644 --- a/libm/src/math/tanh.rs +++ b/libm/src/math/tanh.rs @@ -4,6 +4,7 @@ use super::expm1; * = (exp(2*x) - 1)/(exp(2*x) - 1 + 2) * = (1 - exp(-2*x))/(exp(-2*x) - 1 + 2) */ +#[inline] pub fn tanh(mut x: f64) -> f64 { let mut uf: f64 = x; let mut ui: u64 = f64::to_bits(uf); From 1f4d0d4842d43dd786aee3c6ccb344f11a13bc5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Kirch?= Date: Sun, 15 Jul 2018 19:49:12 -0300 Subject: [PATCH 0096/1459] implement cosh --- libm/src/lib.rs | 2 -- libm/src/math/cosh.rs | 34 ++++++++++++++++++++++++++++++++++ libm/src/math/k_expo2.rs | 14 ++++++++++++++ libm/src/math/mod.rs | 4 ++++ 4 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 libm/src/math/cosh.rs create mode 100644 libm/src/math/k_expo2.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 45213a25f..3a1785357 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -407,7 +407,6 @@ pub trait F64Ext: private::Sealed { fn sinh(self) -> Self; - #[cfg(todo)] fn cosh(self) -> Self; #[cfg(todo)] @@ -589,7 +588,6 @@ impl F64Ext for f64 { sinh(self) } - #[cfg(todo)] #[inline] fn cosh(self) -> Self { cosh(self) diff --git a/libm/src/math/cosh.rs b/libm/src/math/cosh.rs new file mode 100644 index 000000000..54c0a61dc --- /dev/null +++ b/libm/src/math/cosh.rs @@ -0,0 +1,34 @@ +use super::exp; +use super::expm1; +use super::k_expo2; + +#[inline] +pub fn cosh(mut x: f64) -> f64 { + /* |x| */ + let mut ix = x.to_bits(); + ix &= 0x7fffffffffffffff; + x = f64::from_bits(ix); + let w = ix >> 32; + let w = w as u32; + + /* |x| < log(2) */ + if w < 0x3fe62e42 { + if w < 0x3ff00000 - (26 << 20) { + let x1p120 = f64::from_bits(0x4770000000000000); + force_eval!(x + x1p120); + return 1.; + } + let t = expm1(x); // exponential minus 1 + return 1. + t * t / (2. * (1. + t)); + } + + /* |x| < log(DBL_MAX) */ + if w < 0x40862e42 { + let t = exp(x); + /* note: if x>log(0x1p26) then the 1/t is not needed */ + return 0.5 * (t + 1. / t); + } + + /* |x| > log(DBL_MAX) or nan */ + k_expo2(x) +} diff --git a/libm/src/math/k_expo2.rs b/libm/src/math/k_expo2.rs new file mode 100644 index 000000000..7a4045dfc --- /dev/null +++ b/libm/src/math/k_expo2.rs @@ -0,0 +1,14 @@ +use super::exp; + +/* k is such that k*ln2 has minimal relative error and x - kln2 > log(FLT_MIN) */ +const K: i64 = 2043; + +/* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ +#[inline] +pub(crate) fn k_expo2(x: f64) -> f64 { + let k_ln2 = f64::from_bits(0x40962066151add8b); + /* note that k is odd and scale*scale overflows */ + let scale = f64::from_bits(((0x3ff + K / 2) as u64) << 20); + /* exp(x - k ln2) * 2**(k-1) */ + exp(x - k_ln2) * scale * scale +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index eda6b5b72..3b3822711 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -19,6 +19,7 @@ mod ceil; mod ceilf; mod cos; mod cosf; +mod cosh; mod coshf; mod exp; mod exp2; @@ -75,6 +76,7 @@ pub use self::ceil::ceil; pub use self::ceilf::ceilf; pub use self::cos::cos; pub use self::cosf::cosf; +pub use self::cosh::cosh; pub use self::coshf::coshf; pub use self::exp::exp; pub use self::exp2::exp2; @@ -122,6 +124,7 @@ pub use self::truncf::truncf; mod expo2; mod k_cos; mod k_cosf; +mod k_expo2; mod k_expo2f; mod k_sin; mod k_sinf; @@ -135,6 +138,7 @@ mod rem_pio2f; use self::expo2::expo2; use self::k_cos::k_cos; use self::k_cosf::k_cosf; +use self::k_expo2::k_expo2; use self::k_expo2f::k_expo2f; use self::k_sin::k_sin; use self::k_sinf::k_sinf; From 7648d1b2409a7feb92b3bf07b563df83916ddbb4 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Sun, 15 Jul 2018 18:13:09 -0500 Subject: [PATCH 0097/1459] enable tests for cosh --- libm/test-generator/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 7caada4ef..897f16d6c 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -703,7 +703,7 @@ f64_f64! { cbrt, ceil, cos, - // cosh, + cosh, exp, exp2, expm1, From acd36c13e434bf6a4c3d59f5465bc817697c494f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Kirch?= Date: Sun, 15 Jul 2018 21:35:08 -0300 Subject: [PATCH 0098/1459] unused cast removed --- libm/src/math/cosh.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/libm/src/math/cosh.rs b/libm/src/math/cosh.rs index 54c0a61dc..f3f7fbfbe 100644 --- a/libm/src/math/cosh.rs +++ b/libm/src/math/cosh.rs @@ -9,7 +9,6 @@ pub fn cosh(mut x: f64) -> f64 { ix &= 0x7fffffffffffffff; x = f64::from_bits(ix); let w = ix >> 32; - let w = w as u32; /* |x| < log(2) */ if w < 0x3fe62e42 { From cf56ba2864846f4e77abda64990daed069a1b10f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Kirch?= Date: Sun, 15 Jul 2018 21:36:22 -0300 Subject: [PATCH 0099/1459] words insertion on k_expo2 --- libm/src/math/k_expo2.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/src/math/k_expo2.rs b/libm/src/math/k_expo2.rs index 7a4045dfc..e295c7a53 100644 --- a/libm/src/math/k_expo2.rs +++ b/libm/src/math/k_expo2.rs @@ -1,14 +1,14 @@ use super::exp; /* k is such that k*ln2 has minimal relative error and x - kln2 > log(FLT_MIN) */ -const K: i64 = 2043; +const K: i32 = 2043; /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ #[inline] pub(crate) fn k_expo2(x: f64) -> f64 { let k_ln2 = f64::from_bits(0x40962066151add8b); /* note that k is odd and scale*scale overflows */ - let scale = f64::from_bits(((0x3ff + K / 2) as u64) << 20); + let scale = f64::from_bits(((((0x3ff + K / 2) as u32) << 20) as u64) << 32); /* exp(x - k ln2) * 2**(k-1) */ exp(x - k_ln2) * scale * scale } From f2e56aa98e0cfbec353ba1b3eb9b3f6bd52094d3 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Mon, 16 Jul 2018 13:03:40 -0500 Subject: [PATCH 0100/1459] test edge cases --- libm/ci/script.sh | 5 -- libm/test-generator/Cargo.toml | 1 + libm/test-generator/src/main.rs | 92 ++++++++++++++++++++++++--------- 3 files changed, 70 insertions(+), 28 deletions(-) diff --git a/libm/ci/script.sh b/libm/ci/script.sh index f2a294b48..cf37ac1ca 100644 --- a/libm/ci/script.sh +++ b/libm/ci/script.sh @@ -15,11 +15,6 @@ main() { # generate tests cargo run --package test-generator --target x86_64-unknown-linux-musl - if cargo fmt --version >/dev/null 2>&1; then - # nicer syntax error messages (if any) - cargo fmt - fi - # run tests cross test --target $TARGET --release diff --git a/libm/test-generator/Cargo.toml b/libm/test-generator/Cargo.toml index f45d173b4..b810d9daf 100644 --- a/libm/test-generator/Cargo.toml +++ b/libm/test-generator/Cargo.toml @@ -6,3 +6,4 @@ publish = false [dependencies] rand = "0.5.3" +itertools = "0.7.8" diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 7c48031c1..e22e20186 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -4,13 +4,15 @@ // NOTE usually the only thing you need to do to test a new math function is to add it to one of the // macro invocations found in the bottom of this file. +#[macro_use] +extern crate itertools; extern crate rand; use std::error::Error; use std::fmt::Write as _0; use std::fs::{self, File}; use std::io::Write as _1; -use std::{i16, u16, u32, u64, u8}; +use std::{f32, f64, i16, u16, u32, u64, u8}; use rand::{Rng, SeedableRng, XorShiftRng}; @@ -34,6 +36,30 @@ fn f64(rng: &mut XorShiftRng) -> f64 { f64::from_bits(sign + exponent + mantissa) } +const EDGE_CASES32: &[f32] = &[ + -0., + 0., + f32::EPSILON, + f32::INFINITY, + f32::MAX, + f32::MIN, + f32::MIN_POSITIVE, + f32::NAN, + f32::NEG_INFINITY, +]; + +const EDGE_CASES64: &[f64] = &[ + -0., + 0., + f64::EPSILON, + f64::INFINITY, + f64::MAX, + f64::MIN, + f64::MIN_POSITIVE, + f64::NAN, + f64::NEG_INFINITY, +]; + // fn(f32) -> f32 macro_rules! f32_f32 { ($($intr:ident,)*) => { @@ -45,8 +71,9 @@ macro_rules! f32_f32 { $( let mut cases = String::new(); - for _ in 0..NTESTS { - let inp = f32(rng); + + // random inputs + for inp in EDGE_CASES32.iter().cloned().chain((0..NTESTS).map(|_| f32(rng))) { let out = unsafe { $intr(inp) }; let inp = inp.to_bits(); @@ -112,11 +139,17 @@ macro_rules! f32f32_f32 { $(fn $intr(_: f32, _: f32) -> f32;)* } + let mut rng2 = rng.clone(); + let mut rng3 = rng.clone(); $( let mut cases = String::new(); - for _ in 0..NTESTS { - let i1 = f32(rng); - let i2 = f32(rng); + for (i1, i2) in iproduct!( + EDGE_CASES32.iter().cloned(), + EDGE_CASES32.iter().cloned() + ).chain(EDGE_CASES32.iter().map(|i1| (*i1, f32(rng)))) + .chain(EDGE_CASES32.iter().map(|i2| (f32(&mut rng2), *i2))) + .chain((0..NTESTS).map(|_| (f32(&mut rng3), f32(&mut rng3)))) + { let out = unsafe { $intr(i1, i2) }; let i1 = i1.to_bits(); @@ -186,12 +219,16 @@ macro_rules! f32f32f32_f32 { $(fn $intr(_: f32, _: f32, _: f32) -> f32;)* } + let mut rng2 = rng.clone(); $( let mut cases = String::new(); - for _ in 0..NTESTS { - let i1 = f32(rng); - let i2 = f32(rng); - let i3 = f32(rng); + for (i1, i2, i3) in iproduct!( + EDGE_CASES32.iter().cloned(), + EDGE_CASES32.iter().cloned(), + EDGE_CASES32.iter().cloned() + ).chain(EDGE_CASES32.iter().map(|i1| (*i1, f32(rng), f32(rng)))) + .chain((0..NTESTS).map(|_| (f32(&mut rng2), f32(&mut rng2), f32(&mut rng2)))) + { let out = unsafe { $intr(i1, i2, i3) }; let i1 = i1.to_bits(); @@ -266,10 +303,10 @@ macro_rules! f32i32_f32 { $(fn $intr(_: f32, _: i32) -> f32;)* } + let mut rng2 = rng.clone(); $( let mut cases = String::new(); - for _ in 0..NTESTS { - let i1 = f32(rng); + for i1 in EDGE_CASES32.iter().cloned().chain((0..NTESTS).map(|_| f32(&mut rng2))) { let i2 = rng.gen_range(i16::MIN, i16::MAX); let out = unsafe { $intr(i1, i2 as i32) }; @@ -342,8 +379,7 @@ macro_rules! f64_f64 { $( let mut cases = String::new(); - for _ in 0..NTESTS { - let inp = f64(rng); + for inp in EDGE_CASES64.iter().cloned().chain((0..NTESTS).map(|_| f64(rng))) { let out = unsafe { $intr(inp) }; let inp = inp.to_bits(); @@ -412,11 +448,17 @@ macro_rules! f64f64_f64 { $(fn $intr(_: f64, _: f64) -> f64;)* } + let mut rng2 = rng.clone(); + let mut rng3 = rng.clone(); $( let mut cases = String::new(); - for _ in 0..NTESTS { - let i1 = f64(rng); - let i2 = f64(rng); + for (i1, i2) in iproduct!( + EDGE_CASES64.iter().cloned(), + EDGE_CASES64.iter().cloned() + ).chain(EDGE_CASES64.iter().map(|i1| (*i1, f64(rng)))) + .chain(EDGE_CASES64.iter().map(|i2| (f64(&mut rng2), *i2))) + .chain((0..NTESTS).map(|_| (f64(&mut rng3), f64(&mut rng3)))) + { let out = unsafe { $intr(i1, i2) }; let i1 = i1.to_bits(); @@ -485,12 +527,16 @@ macro_rules! f64f64f64_f64 { $(fn $intr(_: f64, _: f64, _: f64) -> f64;)* } + let mut rng2 = rng.clone(); $( let mut cases = String::new(); - for _ in 0..NTESTS { - let i1 = f64(rng); - let i2 = f64(rng); - let i3 = f64(rng); + for (i1, i2, i3) in iproduct!( + EDGE_CASES64.iter().cloned(), + EDGE_CASES64.iter().cloned(), + EDGE_CASES64.iter().cloned() + ).chain(EDGE_CASES64.iter().map(|i1| (*i1, f64(rng), f64(rng)))) + .chain((0..NTESTS).map(|_| (f64(&mut rng2), f64(&mut rng2), f64(&mut rng2)))) + { let out = unsafe { $intr(i1, i2, i3) }; let i1 = i1.to_bits(); @@ -565,10 +611,10 @@ macro_rules! f64i32_f64 { $(fn $intr(_: f64, _: i32) -> f64;)* } + let mut rng2 = rng.clone(); $( let mut cases = String::new(); - for _ in 0..NTESTS { - let i1 = f64(rng); + for i1 in EDGE_CASES64.iter().cloned().chain((0..NTESTS).map(|_| f64(&mut rng2))) { let i2 = rng.gen_range(i16::MIN, i16::MAX); let out = unsafe { $intr(i1, i2 as i32) }; From 83fb3fc40b43d784baecc92ffab95eaf99ce6627 Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Mon, 16 Jul 2018 17:56:14 +0100 Subject: [PATCH 0101/1459] Implement atan --- libm/src/lib.rs | 2 - libm/src/math/atan.rs | 170 ++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 2 + libm/test-generator/src/main.rs | 2 +- 4 files changed, 173 insertions(+), 3 deletions(-) create mode 100644 libm/src/math/atan.rs diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 38ebeac41..0e1ed61d2 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -374,7 +374,6 @@ pub trait F64Ext: private::Sealed + Sized { fn acos(self) -> Self; - #[cfg(todo)] fn atan(self) -> Self; #[cfg(todo)] @@ -541,7 +540,6 @@ impl F64Ext for f64 { acos(self) } - #[cfg(todo)] #[inline] fn atan(self) -> Self { atan(self) diff --git a/libm/src/math/atan.rs b/libm/src/math/atan.rs new file mode 100644 index 000000000..d057af6d6 --- /dev/null +++ b/libm/src/math/atan.rs @@ -0,0 +1,170 @@ +/* atan(x) + * Method + * 1. Reduce x to positive by atan(x) = -atan(-x). + * 2. According to the integer k=4t+0.25 chopped, t=x, the argument + * is further reduced to one of the following intervals and the + * arctangent of t is evaluated by the corresponding formula: + * + * [0,7/16] atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...) + * [7/16,11/16] atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) ) + * [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) ) + * [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) ) + * [39/16,INF] atan(x) = atan(INF) + atan( -1/t ) + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ + +use super::fabs; +use core::f64; + +const ATANHI: [f64; 4] = [ + 4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */ + 7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */ + 9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */ + 1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */ +]; + +const ATANLO: [f64; 4] = [ + 2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */ + 3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */ + 1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */ + 6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */ +]; + +const AT: [f64; 11] = [ + 3.33333333333329318027e-01, /* 0x3FD55555, 0x5555550D */ + -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */ + 1.42857142725034663711e-01, /* 0x3FC24924, 0x920083FF */ + -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */ + 9.09088713343650656196e-02, /* 0x3FB745CD, 0xC54C206E */ + -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */ + 6.66107313738753120669e-02, /* 0x3FB10D66, 0xA0D03D51 */ + -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */ + 4.97687799461593236017e-02, /* 0x3FA97B4B, 0x24760DEB */ + -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */ + 1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */ +]; + +#[inline] +pub fn atan(x: f64) -> f64 { + let mut x = x; + let mut ix = (x.to_bits() >> 32) as u32; + let sign = ix >> 31; + ix &= 0x7fff_ffff; + if ix >= 0x4410_0000 { + if x.is_nan() { + return x; + } + + let z = ATANHI[3] + f64::from_bits(0x0380_0000); // 0x1p-120f + return if sign != 0 { -z } else { z }; + } + + let id = if ix < 0x3fdc_0000 { + /* |x| < 0.4375 */ + if ix < 0x3e40_0000 { + /* |x| < 2^-27 */ + if ix < 0x0010_0000 { + /* raise underflow for subnormal x */ + force_eval!(x as f32); + } + + return x; + } + + -1 + } else { + x = fabs(x); + if ix < 0x3ff30000 { + /* |x| < 1.1875 */ + if ix < 0x3fe60000 { + /* 7/16 <= |x| < 11/16 */ + x = (2. * x - 1.) / (2. + x); + 0 + } else { + /* 11/16 <= |x| < 19/16 */ + x = (x - 1.) / (x + 1.); + 1 + } + } else { + if ix < 0x40038000 { + /* |x| < 2.4375 */ + x = (x - 1.5) / (1. + 1.5 * x); + 2 + } else { + /* 2.4375 <= |x| < 2^66 */ + x = -1. / x; + 3 + } + } + }; + + let z = x * x; + let w = z * z; + /* break sum from i=0 to 10 AT[i]z**(i+1) into odd and even poly */ + let s1 = z * (AT[0] + w * (AT[2] + w * (AT[4] + w * (AT[6] + w * (AT[8] + w * AT[10]))))); + let s2 = w * (AT[1] + w * (AT[3] + w * (AT[5] + w * (AT[7] + w * AT[9])))); + + if id < 0 { + return x - x * (s1 + s2); + } + + let z = ATANHI[id as usize] - (x * (s1 + s2) - ATANLO[id as usize] - x); + + if sign != 0 { + -z + } else { + z + } +} + +#[cfg(test)] +mod tests { + use super::atan; + use core::f64; + + #[test] + fn sanity_check() { + for (input, answer) in [ + (3.0_f64.sqrt() / 3.0, f64::consts::FRAC_PI_6), + (1.0, f64::consts::FRAC_PI_4), + (3.0_f64.sqrt(), f64::consts::FRAC_PI_3), + (-3.0_f64.sqrt() / 3.0, -f64::consts::FRAC_PI_6), + (-1.0, -f64::consts::FRAC_PI_4), + (-3.0_f64.sqrt(), -f64::consts::FRAC_PI_3), + ].iter() + { + assert!( + (atan(*input) - answer) / answer < 1e-5, + "\natan({:.4}/16) = {:.4}, actual: {}", + input * 16.0, + answer, + atan(*input) + ); + } + } + + #[test] + fn zero() { + assert_eq!(atan(0.0), 0.0); + } + + #[test] + fn infinity() { + assert_eq!(atan(f64::INFINITY), f64::consts::FRAC_PI_2); + } + + #[test] + fn minus_infinity() { + assert_eq!(atan(f64::NEG_INFINITY), -f64::consts::FRAC_PI_2); + } + + #[test] + fn nan() { + assert!(atan(f64::NAN).is_nan()); + } +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 352e1d37b..64ab3faa7 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -11,6 +11,7 @@ mod acos; mod acosf; mod asin; mod asinf; +mod atan; mod atan2f; mod atanf; mod cbrt; @@ -70,6 +71,7 @@ pub use self::acos::acos; pub use self::acosf::acosf; pub use self::asin::asin; pub use self::asinf::asinf; +pub use self::atan::atan; pub use self::atan2f::atan2f; pub use self::atanf::atanf; pub use self::cbrt::cbrt; diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index e22e20186..a3efd0769 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -745,7 +745,7 @@ f32i32_f32! { f64_f64! { acos, asin, - // atan, + atan, cbrt, ceil, cos, From 58db1cd358b3a40f74a4c796d50d919645c620d9 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Mon, 16 Jul 2018 18:46:59 -0500 Subject: [PATCH 0102/1459] fix rebase error --- libm/src/math/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 3f6fa3211..f3f533ddd 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -10,6 +10,7 @@ macro_rules! force_eval { mod acos; mod acosf; mod asin; +mod asinf; mod atan2; mod atan2f; mod atanf; From 0cc3b6bc6a017838068f9aa96e93a9128f6d1331 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Mon, 16 Jul 2018 18:54:19 -0500 Subject: [PATCH 0103/1459] fix another rebase error --- libm/src/math/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index f3f533ddd..49f5059dc 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -11,6 +11,7 @@ mod acos; mod acosf; mod asin; mod asinf; +mod atan; mod atan2; mod atan2f; mod atanf; From d431acc08269c600a20d9f79b30cc8cb764908ba Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Mon, 16 Jul 2018 20:03:30 -0500 Subject: [PATCH 0104/1459] cargo fmt --- libm/src/math/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 49f5059dc..754efa9f6 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -72,8 +72,8 @@ pub use self::acos::acos; pub use self::acosf::acosf; pub use self::asin::asin; pub use self::asinf::asinf; -pub use self::atan2::atan2; pub use self::atan::atan; +pub use self::atan2::atan2; pub use self::atan2f::atan2f; pub use self::atanf::atanf; pub use self::cbrt::cbrt; From abb0506e38c7980ab52bba6800fb2292b5c811ae Mon Sep 17 00:00:00 2001 From: Joseph Ryan Date: Mon, 16 Jul 2018 21:18:38 -0500 Subject: [PATCH 0105/1459] Add unit tests for atan2 --- libm/src/math/atan2.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/libm/src/math/atan2.rs b/libm/src/math/atan2.rs index 324b1e937..1816bca5c 100644 --- a/libm/src/math/atan2.rs +++ b/libm/src/math/atan2.rs @@ -70,3 +70,14 @@ pub fn atan2(y: f64, x: f64) -> f64 { _ => (z - PI_LO) - PI, /* atan(-,-) */ } } + +#[test] +fn sanity_check() { + assert_eq!(atan2(0.0, 1.0), 0.0); + assert_eq!(atan2(0.0, -1.0), PI); + assert_eq!(atan2(-0.0, -1.0), -PI); + assert_eq!(atan2(3.0, 2.0), atan(3.0/2.0)); + assert_eq!(atan2(2.0, -1.0), atan(2.0/-1.0) + PI); + assert_eq!(atan2(-2.0, -1.0), atan(-2.0/-1.0) - PI); +} + From f75394bbd6df8653ad32c5897c7c867617798e74 Mon Sep 17 00:00:00 2001 From: Joseph Ryan Date: Mon, 16 Jul 2018 21:22:35 -0500 Subject: [PATCH 0106/1459] rustfmt --- libm/src/math/atan2.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libm/src/math/atan2.rs b/libm/src/math/atan2.rs index 1816bca5c..a719b6d63 100644 --- a/libm/src/math/atan2.rs +++ b/libm/src/math/atan2.rs @@ -76,8 +76,7 @@ fn sanity_check() { assert_eq!(atan2(0.0, 1.0), 0.0); assert_eq!(atan2(0.0, -1.0), PI); assert_eq!(atan2(-0.0, -1.0), -PI); - assert_eq!(atan2(3.0, 2.0), atan(3.0/2.0)); - assert_eq!(atan2(2.0, -1.0), atan(2.0/-1.0) + PI); - assert_eq!(atan2(-2.0, -1.0), atan(-2.0/-1.0) - PI); + assert_eq!(atan2(3.0, 2.0), atan(3.0 / 2.0)); + assert_eq!(atan2(2.0, -1.0), atan(2.0 / -1.0) + PI); + assert_eq!(atan2(-2.0, -1.0), atan(-2.0 / -1.0) - PI); } - From 5097847fbee6ea8eea1a6bfed5d7795b1baa7f88 Mon Sep 17 00:00:00 2001 From: Rahul Butani Date: Tue, 17 Jul 2018 14:56:11 -0500 Subject: [PATCH 0107/1459] pow! --- libm/src/math/pow.rs | 395 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 395 insertions(+) create mode 100644 libm/src/math/pow.rs diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs new file mode 100644 index 000000000..3d0769b34 --- /dev/null +++ b/libm/src/math/pow.rs @@ -0,0 +1,395 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_pow.c */ +/* + * ==================================================== + * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. + * + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* pow(x,y) return x**y + * + * n + * Method: Let x = 2 * (1+f) + * 1. Compute and return log2(x) in two pieces: + * log2(x) = w1 + w2, + * where w1 has 53-24 = 29 bit trailing zeros. + * 2. Perform y*log2(x) = n+y' by simulating muti-precision + * arithmetic, where |y'|<=0.5. + * 3. Return x**y = 2**n*exp(y'*log2) + * + * Special cases: + * 1. (anything) ** 0 is 1 + * 2. 1 ** (anything) is 1 + * 3. (anything except 1) ** NAN is NAN + * 4. NAN ** (anything except 0) is NAN + * 5. +-(|x| > 1) ** +INF is +INF + * 6. +-(|x| > 1) ** -INF is +0 + * 7. +-(|x| < 1) ** +INF is +0 + * 8. +-(|x| < 1) ** -INF is +INF + * 9. -1 ** +-INF is 1 + * 10. +0 ** (+anything except 0, NAN) is +0 + * 11. -0 ** (+anything except 0, NAN, odd integer) is +0 + * 12. +0 ** (-anything except 0, NAN) is +INF, raise divbyzero + * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF, raise divbyzero + * 14. -0 ** (+odd integer) is -0 + * 15. -0 ** (-odd integer) is -INF, raise divbyzero + * 16. +INF ** (+anything except 0,NAN) is +INF + * 17. +INF ** (-anything except 0,NAN) is +0 + * 18. -INF ** (+odd integer) is -INF + * 19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer) + * 20. (anything) ** 1 is (anything) + * 21. (anything) ** -1 is 1/(anything) + * 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) + * 23. (-anything except 0 and inf) ** (non-integer) is NAN + * + * Accuracy: + * pow(x,y) returns x**y nearly rounded. In particular + * pow(integer,integer) + * always returns the correct integer provided it is + * representable. + * + * Constants : + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ + +// #include "libm.h" + +/* Concerns: + * - Some constants are shared; DRY? + * - FLT_EVAL_METHOD: the others sidestep this (epsilon or just always true in the case of hypot (#71)) + */ + +use super::{fabs, scalbn, sqrt, with_set_low_word, with_set_high_word, get_high_word}; + +const BP: [f64; 2] = [1.0, 1.5]; +const DP_H: [f64; 2] = [0.0, 5.84962487220764160156e-01]; /* 0x3fe2b803_40000000 */ +const DP_L: [f64; 2] = [0.0, 1.35003920212974897128e-08]; /* 0x3E4CFDEB, 0x43CFD006 */ +const TWO53: f64 = 9007199254740992.0; /* 0x43400000_00000000 */ +const HUGE: f64 = 1.0e300; +const TINY: f64 = 1.0e-300; + +// poly coefs for (3/2)*(log(x)-2s-2/3*s**3: +const L1: f64 = 5.99999999999994648725e-01; /* 0x3fe33333_33333303 */ +const L2: f64 = 4.28571428578550184252e-01; /* 0x3fdb6db6_db6fabff */ +const L3: f64 = 3.33333329818377432918e-01; /* 0x3fd55555_518f264d */ +const L4: f64 = 2.72728123808534006489e-01; /* 0x3fd17460_a91d4101 */ +const L5: f64 = 2.30660745775561754067e-01; /* 0x3fcd864a_93c9db65 */ +const L6: f64 = 2.06975017800338417784e-01; /* 0x3fca7e28_4a454eef */ +const P1: f64 = 1.66666666666666019037e-01; /* 0x3fc55555_5555553e */ +const P2: f64 = -2.77777777770155933842e-03; /* 0xbf66c16c_16bebd93 */ +const P3: f64 = 6.61375632143793436117e-05; /* 0x3f11566a_af25de2c */ +const P4: f64 = -1.65339022054652515390e-06; /* 0xbebbbd41_c5d26bf1 */ +const P5: f64 = 4.13813679705723846039e-08; /* 0x3e663769_72bea4d0 */ +const LG2: f64 = 6.93147180559945286227e-01; /* 0x3fe62e42_fefa39ef */ +const LG2_H: f64 = 6.93147182464599609375e-01; /* 0x3fe62e43_00000000 */ +const LG2_L: f64 = -1.90465429995776804525e-09; /* 0xbe205c61_0ca86c39 */ +const OVT: f64 = 8.0085662595372944372e-017; /* -(1024-log2(ovfl+.5ulp)) */ +const CP: f64 = 9.61796693925975554329e-01; /* 0x3feec709_dc3a03fd =2/(3ln2) */ +const CP_H: f64 = 9.61796700954437255859e-01; /* 0x3feec709_e0000000 =(float)cp */ +const CP_L: f64 = -7.02846165095275826516e-09; /* 0xbe3e2fe0_145b01f5 =tail of cp_h*/ +const IVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547_652b82fe =1/ln2 */ +const IVLN2_H: f64 = 1.44269502162933349609e+00; /* 0x3ff71547_60000000 =24b 1/ln2*/ +const IVLN2_L: f64 = 1.92596299112661746887e-08; /* 0x3e54ae0b_f85ddf44 =1/ln2 tail*/ + +#[inline] +pub fn pow(x: f64, y: f64) -> f64 { + let t1: f64; + let t2: f64; + + let (hx, lx): (i32, u32) = ((x.to_bits() >> 32) as i32, x.to_bits() as u32); + let (hy, ly): (i32, u32) = ((y.to_bits() >> 32) as i32, y.to_bits() as u32); + + let mut ix: i32 = (hx & 0x7fffffff) as i32; + let iy: i32 = (hy & 0x7fffffff) as i32; + + /* x**0 = 1, even if x is NaN */ + if ((iy as u32) | ly) == 0 { + return 1.0; + } + + /* 1**y = 1, even if y is NaN */ + if hx == 0x3ff00000 && lx == 0 { + return 1.0; + } + + /* NaN if either arg is NaN */ + if ix > 0x7ff00000 || (ix == 0x7ff00000 && lx != 0) || + iy > 0x7ff00000 || (iy == 0x7ff00000 && ly != 0) { + return x + y; + } + + /* determine if y is an odd int when x < 0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + let mut yisint: i32 = 0; + let mut k: i32; + let mut j: i32; + if hx < 0 { + if iy >= 0x43400000 { + yisint = 2; /* even integer y */ + } else if iy >= 0x3ff00000 { + k = (iy >> 20) - 0x3ff; /* exponent */ + + if k > 20 { + j = (ly >> (52 - k)) as i32; + + if (j << (52 - k)) == (ly as i32) { + yisint = 2 - (j & 1); + } + } else if ly == 0 { + j = iy >> (20 - k); + + if (j << (20 - k)) == iy { + yisint = 2 - (j & 1); + } + } + } + } + + if ly == 0 { + /* special value of y */ + if iy == 0x7ff00000 { + /* y is +-inf */ + return if ((ix - 0x3ff00000) | (lx as i32)) == 0 { + /* (-1)**+-inf is 1 */ + 1.0 + } else if ix >= 0x3ff00000 { + /* (|x|>1)**+-inf = inf,0 */ + if hy >= 0 { y } else { 0.0 } + } else { + /* (|x|<1)**+-inf = 0,inf */ + if hy >= 0 { 0.0 } else { -y } + }; + } + + if iy == 0x3ff00000 { + /* y is +-1 */ + return if hy >= 0 { x } else { 1.0 / x }; + } + + if hy == 0x40000000 { + /* y is 2 */ + return x * x; + } + + if hy == 0x3fe00000 { + /* y is 0.5 */ + if hx >= 0 { + /* x >= +0 */ + return sqrt(x); + } + } + } + + let mut ax: f64 = fabs(x); + if lx == 0 { + /* special value of x */ + if ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000 { + /* x is +-0,+-inf,+-1 */ + let mut z: f64 = ax; + + if hy < 0 { + /* z = (1/|x|) */ + z = 1.0 / z; + } + + if hx < 0 { + if ((ix-0x3ff00000)|yisint) == 0 { + z = (z - z) / (z - z); /* (-1)**non-int is NaN */ + } else if yisint == 1 { + z = -z; /* (x<0)**odd = -(|x|**odd) */ + } + } + + return z; + } + } + + let mut s: f64 = 1.0; /* sign of result */ + if hx < 0 { + if yisint == 0 { + /* (x<0)**(non-int) is NaN */ + return (x - x) / (x - x); + } + + if yisint == 1 { + /* (x<0)**(odd int) */ + s = -1.0; + } + } + + /* |y| is HUGE */ + if iy > 0x41e00000 { + /* if |y| > 2**31 */ + if iy > 0x43f00000 { + /* if |y| > 2**64, must o/uflow */ + if ix <= 0x3fefffff { + return if hy < 0 { HUGE * HUGE } else { TINY * TINY }; + } + + if ix >= 0x3ff00000 { + return if hy > 0 { HUGE * HUGE } else { TINY * TINY }; + } + } + + /* over/underflow if x is not close to one */ + if ix < 0x3fefffff { + return if hy < 0 { s * HUGE * HUGE } else { s * TINY * TINY }; + } + if ix > 0x3ff00000 { + return if hy > 0 { s * HUGE * HUGE } else { s * TINY * TINY }; + } + + /* now |1-x| is TINY <= 2**-20, suffice to compute + log(x) by x-x^2/2+x^3/3-x^4/4 */ + let t: f64 = ax - 1.0; /* t has 20 trailing zeros */ + let w: f64 = (t * t) * (0.5 - t * (0.3333333333333333333333 - t * 0.25)); + let u: f64 = IVLN2_H * t; /* ivln2_h has 21 sig. bits */ + let v: f64 = t * IVLN2_L - w * IVLN2; + t1 = with_set_low_word(u + v, 0); + t2 = v - (t1 - u); + } else { + // double ss,s2,s_h,s_l,t_h,t_l; + let mut n: i32 = 0; + + if ix < 0x00100000 { + /* take care subnormal number */ + ax *= TWO53; + n -= 53; + ix = get_high_word(ax) as i32; + } + + n += (ix >> 20) - 0x3ff; + j = ix & 0x000fffff; + + /* determine interval */ + let k: i32; + ix = j | 0x3ff00000; /* normalize ix */ + if j <= 0x3988E { + /* |x|> 1) | 0x20000000) + 0x00080000 + ((k as u32) << 18)); + let t_l: f64 = ax - (t_h - BP[k as usize]); + let s_l: f64 = v * ((u - s_h * t_h) - s_h * t_l); + + /* compute log(ax) */ + let s2: f64 = ss * ss; + let mut r: f64 = s2 * s2 * (L1 + s2 * (L2 + s2 *(L3 + s2 *(L4 + s2 *(L5 + s2 * L6))))); + r += s_l * (s_h + ss); + let s2: f64 = s_h * s_h; + let t_h: f64 = with_set_low_word(3.0 + s2 + r, 0); + let t_l: f64 = r - ((t_h - 3.0) - s2); + + /* u+v = ss*(1+...) */ + let u: f64 = s_h * t_h; + let v: f64 = s_l * t_h + t_l * ss; + + /* 2/(3log2)*(ss+...) */ + let p_h: f64 = with_set_low_word(u + v, 0); + let p_l = v - (p_h-u); + let z_h: f64 = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ + let z_l: f64 = CP_L * p_h + p_l * CP + DP_L[k as usize]; + + /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ + let t: f64 = n as f64; + t1 = with_set_low_word(((z_h + z_l) + DP_H[k as usize]) + t, 0); + t2 = z_l - (((t1 - t) - DP_H[k as usize]) - z_h); + } + + /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ + let y1: f64 = with_set_low_word(y, 0); + let p_l: f64 = (y - y1) * t1 + y * t2; + let mut p_h: f64 = y1 * t1; + let z: f64 = p_l + p_h; + let mut j: i32 = (z.to_bits() >> 32) as i32; + let i: i32 = z.to_bits() as i32; + // let (j, i): (i32, i32) = ((z.to_bits() >> 32) as i32, z.to_bits() as i32); + + if j >= 0x40900000 { + /* z >= 1024 */ + if (j - 0x40900000) | i != 0 { + /* if z > 1024 */ + return s * HUGE * HUGE; /* overflow */ + } + + if p_l + OVT > z - p_h { + return s * HUGE * HUGE; /* overflow */ + } + } else if (j & 0x7fffffff) >= 0x4090cc00 { + /* z <= -1075 */ + // FIXME: instead of abs(j) use unsigned j + + if (((j as u32) - 0xc090cc00) | (i as u32)) != 0 { + /* z < -1075 */ + return s * TINY * TINY; /* underflow */ + } + + if p_l <= z - p_h { + return s * TINY * TINY; /* underflow */ + } + } + + /* compute 2**(p_h+p_l) */ + let i: i32 = j & (0x7fffffff as i32); + k = (i >> 20) - 0x3ff; + let mut n: i32 = 0; + + if i > 0x3fe00000 { + /* if |z| > 0.5, set n = [z+0.5] */ + n = j + (0x00100000 >> (k + 1)); + k = ((n&0x7fffffff) >> 20) - 0x3ff; /* new k for n */ + let t: f64 = with_set_high_word(0.0, (n & !(0x000fffff >> k)) as u32); + n = ((n & 0x000fffff) | 0x00100000) >> (20 - k); + if j < 0 { + n = -n; + } + p_h -= t; + } + + let t: f64 = with_set_low_word(p_l + p_h, 0); + let u: f64 = t * LG2_H; + let v: f64 = (p_l - (t - p_h)) * LG2 + t * LG2_L; + let mut z: f64 = u + v; + let w: f64 = v - (z - u); + let t: f64 = z * z; + let t1: f64 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); + let r: f64 = (z * t1) / (t1 - 2.0) - (w + z*w); + z = 1.0 - (r - z); + j = get_high_word(z) as i32; + j += n << 20; + + if (j >> 20) <= 0 { + /* subnormal output */ + z = scalbn(z,n); + } else { + z = with_set_high_word(z, j as u32); + } + + return s*z; +} From d9aee5bcd5af015189c6d6ac549873f4b8df94a7 Mon Sep 17 00:00:00 2001 From: Rahul Butani Date: Tue, 17 Jul 2018 15:03:39 -0500 Subject: [PATCH 0108/1459] Enable tests and expose --- libm/src/lib.rs | 2 -- libm/src/math/mod.rs | 3 ++- libm/test-generator/src/main.rs | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 1837b9c1f..06991effe 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -343,7 +343,6 @@ pub trait F64Ext: private::Sealed + Sized { // NOTE depends on unstable intrinsics::powif64 // fn powi(self, n: i32) -> Self; - #[cfg(todo)] fn powf(self, n: Self) -> Self; fn sqrt(self) -> Self; @@ -463,7 +462,6 @@ impl F64Ext for f64 { } } - #[cfg(todo)] #[inline] fn powf(self, n: Self) -> Self { pow(self, n) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 754efa9f6..6dd362d00 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -50,6 +50,7 @@ mod log2; mod log2f; mod logf; mod powf; +mod pow; mod round; mod roundf; mod scalbn; @@ -111,6 +112,7 @@ pub use self::log2::log2; pub use self::log2f::log2f; pub use self::logf::logf; pub use self::powf::powf; +pub use self::pow::pow; pub use self::round::round; pub use self::roundf::roundf; pub use self::scalbn::scalbn; @@ -167,7 +169,6 @@ fn get_low_word(x: f64) -> u32 { x.to_bits() as u32 } -#[allow(dead_code)] #[inline] fn with_set_high_word(f: f64, hi: u32) -> f64 { let mut tmp = f.to_bits(); diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs index 09a9cb8f5..4c4e420a2 100644 --- a/libm/test-generator/src/main.rs +++ b/libm/test-generator/src/main.rs @@ -774,7 +774,7 @@ f64f64_f64! { fdim, fmod, hypot, - // pow, + pow, } // With signature `fn(f64, f64, f64) -> f64` From 6360a501bd97b4cb765e8ae8c7bd5074757ef575 Mon Sep 17 00:00:00 2001 From: Rahul Butani Date: Tue, 17 Jul 2018 15:06:50 -0500 Subject: [PATCH 0109/1459] rustfmt'ed + some clean up --- libm/src/math/mod.rs | 4 +- libm/src/math/pow.rs | 239 +++++++++++++++++++++++-------------------- 2 files changed, 129 insertions(+), 114 deletions(-) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 6dd362d00..752a5991a 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -49,8 +49,8 @@ mod log1pf; mod log2; mod log2f; mod logf; -mod powf; mod pow; +mod powf; mod round; mod roundf; mod scalbn; @@ -111,8 +111,8 @@ pub use self::log1pf::log1pf; pub use self::log2::log2; pub use self::log2f::log2f; pub use self::logf::logf; -pub use self::powf::powf; pub use self::pow::pow; +pub use self::powf::powf; pub use self::round::round; pub use self::roundf::roundf; pub use self::scalbn::scalbn; diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 3d0769b34..69c086b0f 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -8,93 +8,87 @@ * is preserved. * ==================================================== */ -/* pow(x,y) return x**y - * - * n - * Method: Let x = 2 * (1+f) - * 1. Compute and return log2(x) in two pieces: - * log2(x) = w1 + w2, - * where w1 has 53-24 = 29 bit trailing zeros. - * 2. Perform y*log2(x) = n+y' by simulating muti-precision - * arithmetic, where |y'|<=0.5. - * 3. Return x**y = 2**n*exp(y'*log2) - * - * Special cases: - * 1. (anything) ** 0 is 1 - * 2. 1 ** (anything) is 1 - * 3. (anything except 1) ** NAN is NAN - * 4. NAN ** (anything except 0) is NAN - * 5. +-(|x| > 1) ** +INF is +INF - * 6. +-(|x| > 1) ** -INF is +0 - * 7. +-(|x| < 1) ** +INF is +0 - * 8. +-(|x| < 1) ** -INF is +INF - * 9. -1 ** +-INF is 1 - * 10. +0 ** (+anything except 0, NAN) is +0 - * 11. -0 ** (+anything except 0, NAN, odd integer) is +0 - * 12. +0 ** (-anything except 0, NAN) is +INF, raise divbyzero - * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF, raise divbyzero - * 14. -0 ** (+odd integer) is -0 - * 15. -0 ** (-odd integer) is -INF, raise divbyzero - * 16. +INF ** (+anything except 0,NAN) is +INF - * 17. +INF ** (-anything except 0,NAN) is +0 - * 18. -INF ** (+odd integer) is -INF - * 19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer) - * 20. (anything) ** 1 is (anything) - * 21. (anything) ** -1 is 1/(anything) - * 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) - * 23. (-anything except 0 and inf) ** (non-integer) is NAN - * - * Accuracy: - * pow(x,y) returns x**y nearly rounded. In particular - * pow(integer,integer) - * always returns the correct integer provided it is - * representable. - * - * Constants : - * The hexadecimal values are the intended ones for the following - * constants. The decimal values may be used, provided that the - * compiler will convert from decimal to binary accurately enough - * to produce the hexadecimal values shown. - */ - -// #include "libm.h" -/* Concerns: - * - Some constants are shared; DRY? - * - FLT_EVAL_METHOD: the others sidestep this (epsilon or just always true in the case of hypot (#71)) - */ - -use super::{fabs, scalbn, sqrt, with_set_low_word, with_set_high_word, get_high_word}; - -const BP: [f64; 2] = [1.0, 1.5]; +/// pow(x,y) return x**y +/// +/// n +/// Method: Let x = 2 * (1+f) +/// 1. Compute and return log2(x) in two pieces: +/// log2(x) = w1 + w2, +/// where w1 has 53-24 = 29 bit trailing zeros. +/// 2. Perform y*log2(x) = n+y' by simulating muti-precision +/// arithmetic, where |y'|<=0.5. +/// 3. Return x**y = 2**n*exp(y'*log2) +/// +/// Special cases: +/// 1. (anything) ** 0 is 1 +/// 2. 1 ** (anything) is 1 +/// 3. (anything except 1) ** NAN is NAN +/// 4. NAN ** (anything except 0) is NAN +/// 5. +-(|x| > 1) ** +INF is +INF +/// 6. +-(|x| > 1) ** -INF is +0 +/// 7. +-(|x| < 1) ** +INF is +0 +/// 8. +-(|x| < 1) ** -INF is +INF +/// 9. -1 ** +-INF is 1 +/// 10. +0 ** (+anything except 0, NAN) is +0 +/// 11. -0 ** (+anything except 0, NAN, odd integer) is +0 +/// 12. +0 ** (-anything except 0, NAN) is +INF, raise divbyzero +/// 13. -0 ** (-anything except 0, NAN, odd integer) is +INF, raise divbyzero +/// 14. -0 ** (+odd integer) is -0 +/// 15. -0 ** (-odd integer) is -INF, raise divbyzero +/// 16. +INF ** (+anything except 0,NAN) is +INF +/// 17. +INF ** (-anything except 0,NAN) is +0 +/// 18. -INF ** (+odd integer) is -INF +/// 19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer) +/// 20. (anything) ** 1 is (anything) +/// 21. (anything) ** -1 is 1/(anything) +/// 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) +/// 23. (-anything except 0 and inf) ** (non-integer) is NAN +/// +/// Accuracy: +/// pow(x,y) returns x**y nearly rounded. In particular +/// pow(integer,integer) +/// always returns the correct integer provided it is +/// representable. +/// +/// Constants : +/// The hexadecimal values are the intended ones for the following +/// constants. The decimal values may be used, provided that the +/// compiler will convert from decimal to binary accurately enough +/// to produce the hexadecimal values shown. +/// + +use super::{fabs, get_high_word, scalbn, sqrt, with_set_high_word, with_set_low_word}; + +const BP: [f64; 2] = [1.0, 1.5]; const DP_H: [f64; 2] = [0.0, 5.84962487220764160156e-01]; /* 0x3fe2b803_40000000 */ const DP_L: [f64; 2] = [0.0, 1.35003920212974897128e-08]; /* 0x3E4CFDEB, 0x43CFD006 */ -const TWO53: f64 = 9007199254740992.0; /* 0x43400000_00000000 */ -const HUGE: f64 = 1.0e300; -const TINY: f64 = 1.0e-300; +const TWO53: f64 = 9007199254740992.0; /* 0x43400000_00000000 */ +const HUGE: f64 = 1.0e300; +const TINY: f64 = 1.0e-300; // poly coefs for (3/2)*(log(x)-2s-2/3*s**3: -const L1: f64 = 5.99999999999994648725e-01; /* 0x3fe33333_33333303 */ -const L2: f64 = 4.28571428578550184252e-01; /* 0x3fdb6db6_db6fabff */ -const L3: f64 = 3.33333329818377432918e-01; /* 0x3fd55555_518f264d */ -const L4: f64 = 2.72728123808534006489e-01; /* 0x3fd17460_a91d4101 */ -const L5: f64 = 2.30660745775561754067e-01; /* 0x3fcd864a_93c9db65 */ -const L6: f64 = 2.06975017800338417784e-01; /* 0x3fca7e28_4a454eef */ -const P1: f64 = 1.66666666666666019037e-01; /* 0x3fc55555_5555553e */ +const L1: f64 = 5.99999999999994648725e-01; /* 0x3fe33333_33333303 */ +const L2: f64 = 4.28571428578550184252e-01; /* 0x3fdb6db6_db6fabff */ +const L3: f64 = 3.33333329818377432918e-01; /* 0x3fd55555_518f264d */ +const L4: f64 = 2.72728123808534006489e-01; /* 0x3fd17460_a91d4101 */ +const L5: f64 = 2.30660745775561754067e-01; /* 0x3fcd864a_93c9db65 */ +const L6: f64 = 2.06975017800338417784e-01; /* 0x3fca7e28_4a454eef */ +const P1: f64 = 1.66666666666666019037e-01; /* 0x3fc55555_5555553e */ const P2: f64 = -2.77777777770155933842e-03; /* 0xbf66c16c_16bebd93 */ -const P3: f64 = 6.61375632143793436117e-05; /* 0x3f11566a_af25de2c */ +const P3: f64 = 6.61375632143793436117e-05; /* 0x3f11566a_af25de2c */ const P4: f64 = -1.65339022054652515390e-06; /* 0xbebbbd41_c5d26bf1 */ -const P5: f64 = 4.13813679705723846039e-08; /* 0x3e663769_72bea4d0 */ -const LG2: f64 = 6.93147180559945286227e-01; /* 0x3fe62e42_fefa39ef */ -const LG2_H: f64 = 6.93147182464599609375e-01; /* 0x3fe62e43_00000000 */ -const LG2_L: f64 = -1.90465429995776804525e-09; /* 0xbe205c61_0ca86c39 */ -const OVT: f64 = 8.0085662595372944372e-017; /* -(1024-log2(ovfl+.5ulp)) */ -const CP: f64 = 9.61796693925975554329e-01; /* 0x3feec709_dc3a03fd =2/(3ln2) */ -const CP_H: f64 = 9.61796700954437255859e-01; /* 0x3feec709_e0000000 =(float)cp */ -const CP_L: f64 = -7.02846165095275826516e-09; /* 0xbe3e2fe0_145b01f5 =tail of cp_h*/ -const IVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547_652b82fe =1/ln2 */ -const IVLN2_H: f64 = 1.44269502162933349609e+00; /* 0x3ff71547_60000000 =24b 1/ln2*/ -const IVLN2_L: f64 = 1.92596299112661746887e-08; /* 0x3e54ae0b_f85ddf44 =1/ln2 tail*/ +const P5: f64 = 4.13813679705723846039e-08; /* 0x3e663769_72bea4d0 */ +const LG2: f64 = 6.93147180559945286227e-01; /* 0x3fe62e42_fefa39ef */ +const LG2_H: f64 = 6.93147182464599609375e-01; /* 0x3fe62e43_00000000 */ +const LG2_L: f64 = -1.90465429995776804525e-09; /* 0xbe205c61_0ca86c39 */ +const OVT: f64 = 8.0085662595372944372e-017; /* -(1024-log2(ovfl+.5ulp)) */ +const CP: f64 = 9.61796693925975554329e-01; /* 0x3feec709_dc3a03fd =2/(3ln2) */ +const CP_H: f64 = 9.61796700954437255859e-01; /* 0x3feec709_e0000000 =(float)cp */ +const CP_L: f64 = -7.02846165095275826516e-09; /* 0xbe3e2fe0_145b01f5 =tail of cp_h*/ +const IVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547_652b82fe =1/ln2 */ +const IVLN2_H: f64 = 1.44269502162933349609e+00; /* 0x3ff71547_60000000 =24b 1/ln2*/ +const IVLN2_L: f64 = 1.92596299112661746887e-08; /* 0x3e54ae0b_f85ddf44 =1/ln2 tail*/ #[inline] pub fn pow(x: f64, y: f64) -> f64 { @@ -103,7 +97,7 @@ pub fn pow(x: f64, y: f64) -> f64 { let (hx, lx): (i32, u32) = ((x.to_bits() >> 32) as i32, x.to_bits() as u32); let (hy, ly): (i32, u32) = ((y.to_bits() >> 32) as i32, y.to_bits() as u32); - + let mut ix: i32 = (hx & 0x7fffffff) as i32; let iy: i32 = (hy & 0x7fffffff) as i32; @@ -118,9 +112,12 @@ pub fn pow(x: f64, y: f64) -> f64 { } /* NaN if either arg is NaN */ - if ix > 0x7ff00000 || (ix == 0x7ff00000 && lx != 0) || - iy > 0x7ff00000 || (iy == 0x7ff00000 && ly != 0) { - return x + y; + if ix > 0x7ff00000 + || (ix == 0x7ff00000 && lx != 0) + || iy > 0x7ff00000 + || (iy == 0x7ff00000 && ly != 0) + { + return x + y; } /* determine if y is an odd int when x < 0 @@ -136,16 +133,16 @@ pub fn pow(x: f64, y: f64) -> f64 { yisint = 2; /* even integer y */ } else if iy >= 0x3ff00000 { k = (iy >> 20) - 0x3ff; /* exponent */ - + if k > 20 { j = (ly >> (52 - k)) as i32; - + if (j << (52 - k)) == (ly as i32) { yisint = 2 - (j & 1); } } else if ly == 0 { j = iy >> (20 - k); - + if (j << (20 - k)) == iy { yisint = 2 - (j & 1); } @@ -156,16 +153,25 @@ pub fn pow(x: f64, y: f64) -> f64 { if ly == 0 { /* special value of y */ if iy == 0x7ff00000 { - /* y is +-inf */ + /* y is +-inf */ + return if ((ix - 0x3ff00000) | (lx as i32)) == 0 { /* (-1)**+-inf is 1 */ 1.0 } else if ix >= 0x3ff00000 { /* (|x|>1)**+-inf = inf,0 */ - if hy >= 0 { y } else { 0.0 } + if hy >= 0 { + y + } else { + 0.0 + } } else { /* (|x|<1)**+-inf = 0,inf */ - if hy >= 0 { 0.0 } else { -y } + if hy >= 0 { + 0.0 + } else { + -y + } }; } @@ -194,14 +200,14 @@ pub fn pow(x: f64, y: f64) -> f64 { if ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000 { /* x is +-0,+-inf,+-1 */ let mut z: f64 = ax; - + if hy < 0 { /* z = (1/|x|) */ z = 1.0 / z; } if hx < 0 { - if ((ix-0x3ff00000)|yisint) == 0 { + if ((ix - 0x3ff00000) | yisint) == 0 { z = (z - z) / (z - z); /* (-1)**non-int is NaN */ } else if yisint == 1 { z = -z; /* (x<0)**odd = -(|x|**odd) */ @@ -241,17 +247,25 @@ pub fn pow(x: f64, y: f64) -> f64 { /* over/underflow if x is not close to one */ if ix < 0x3fefffff { - return if hy < 0 { s * HUGE * HUGE } else { s * TINY * TINY }; + return if hy < 0 { + s * HUGE * HUGE + } else { + s * TINY * TINY + }; } if ix > 0x3ff00000 { - return if hy > 0 { s * HUGE * HUGE } else { s * TINY * TINY }; + return if hy > 0 { + s * HUGE * HUGE + } else { + s * TINY * TINY + }; } /* now |1-x| is TINY <= 2**-20, suffice to compute log(x) by x-x^2/2+x^3/3-x^4/4 */ - let t: f64 = ax - 1.0; /* t has 20 trailing zeros */ + let t: f64 = ax - 1.0; /* t has 20 trailing zeros */ let w: f64 = (t * t) * (0.5 - t * (0.3333333333333333333333 - t * 0.25)); - let u: f64 = IVLN2_H * t; /* ivln2_h has 21 sig. bits */ + let u: f64 = IVLN2_H * t; /* ivln2_h has 21 sig. bits */ let v: f64 = t * IVLN2_L - w * IVLN2; t1 = with_set_low_word(u + v, 0); t2 = v - (t1 - u); @@ -262,8 +276,8 @@ pub fn pow(x: f64, y: f64) -> f64 { if ix < 0x00100000 { /* take care subnormal number */ ax *= TWO53; - n -= 53; - ix = get_high_word(ax) as i32; + n -= 53; + ix = get_high_word(ax) as i32; } n += (ix >> 20) - 0x3ff; @@ -271,12 +285,11 @@ pub fn pow(x: f64, y: f64) -> f64 { /* determine interval */ let k: i32; - ix = j | 0x3ff00000; /* normalize ix */ + ix = j | 0x3ff00000; /* normalize ix */ if j <= 0x3988E { /* |x| f64 { /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ let u: f64 = ax - BP[k as usize]; /* bp[0]=1.0, bp[1]=1.5 */ let v: f64 = 1.0 / (ax + BP[k as usize]); - let ss: f64 = u * v; + let ss: f64 = u * v; let s_h = with_set_low_word(ss, 0); /* t_h=ax+bp[k] High */ - let t_h: f64 = with_set_high_word(0.0, - ((ix as u32 >> 1) | 0x20000000) + 0x00080000 + ((k as u32) << 18)); + let t_h: f64 = with_set_high_word( + 0.0, + ((ix as u32 >> 1) | 0x20000000) + 0x00080000 + ((k as u32) << 18), + ); let t_l: f64 = ax - (t_h - BP[k as usize]); let s_l: f64 = v * ((u - s_h * t_h) - s_h * t_l); /* compute log(ax) */ let s2: f64 = ss * ss; - let mut r: f64 = s2 * s2 * (L1 + s2 * (L2 + s2 *(L3 + s2 *(L4 + s2 *(L5 + s2 * L6))))); + let mut r: f64 = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); r += s_l * (s_h + ss); let s2: f64 = s_h * s_h; let t_h: f64 = with_set_low_word(3.0 + s2 + r, 0); @@ -312,7 +327,7 @@ pub fn pow(x: f64, y: f64) -> f64 { /* 2/(3log2)*(ss+...) */ let p_h: f64 = with_set_low_word(u + v, 0); - let p_l = v - (p_h-u); + let p_l = v - (p_h - u); let z_h: f64 = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ let z_l: f64 = CP_L * p_h + p_l * CP + DP_L[k as usize]; @@ -323,10 +338,10 @@ pub fn pow(x: f64, y: f64) -> f64 { } /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ - let y1: f64 = with_set_low_word(y, 0); + let y1: f64 = with_set_low_word(y, 0); let p_l: f64 = (y - y1) * t1 + y * t2; let mut p_h: f64 = y1 * t1; - let z: f64 = p_l + p_h; + let z: f64 = p_l + p_h; let mut j: i32 = (z.to_bits() >> 32) as i32; let i: i32 = z.to_bits() as i32; // let (j, i): (i32, i32) = ((z.to_bits() >> 32) as i32, z.to_bits() as i32); @@ -363,7 +378,7 @@ pub fn pow(x: f64, y: f64) -> f64 { if i > 0x3fe00000 { /* if |z| > 0.5, set n = [z+0.5] */ n = j + (0x00100000 >> (k + 1)); - k = ((n&0x7fffffff) >> 20) - 0x3ff; /* new k for n */ + k = ((n & 0x7fffffff) >> 20) - 0x3ff; /* new k for n */ let t: f64 = with_set_high_word(0.0, (n & !(0x000fffff >> k)) as u32); n = ((n & 0x000fffff) | 0x00100000) >> (20 - k); if j < 0 { @@ -379,17 +394,17 @@ pub fn pow(x: f64, y: f64) -> f64 { let w: f64 = v - (z - u); let t: f64 = z * z; let t1: f64 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); - let r: f64 = (z * t1) / (t1 - 2.0) - (w + z*w); + let r: f64 = (z * t1) / (t1 - 2.0) - (w + z * w); z = 1.0 - (r - z); j = get_high_word(z) as i32; j += n << 20; if (j >> 20) <= 0 { /* subnormal output */ - z = scalbn(z,n); + z = scalbn(z, n); } else { z = with_set_high_word(z, j as u32); } - return s*z; + return s * z; } From 86a972737fc6122b105549e3ccbc43a8405c23d2 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Tue, 17 Jul 2018 19:46:12 -0500 Subject: [PATCH 0110/1459] cargo fmt --- libm/src/math/pow.rs | 97 ++++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 49 deletions(-) diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 69c086b0f..329b3955d 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -9,55 +9,54 @@ * ==================================================== */ -/// pow(x,y) return x**y -/// -/// n -/// Method: Let x = 2 * (1+f) -/// 1. Compute and return log2(x) in two pieces: -/// log2(x) = w1 + w2, -/// where w1 has 53-24 = 29 bit trailing zeros. -/// 2. Perform y*log2(x) = n+y' by simulating muti-precision -/// arithmetic, where |y'|<=0.5. -/// 3. Return x**y = 2**n*exp(y'*log2) -/// -/// Special cases: -/// 1. (anything) ** 0 is 1 -/// 2. 1 ** (anything) is 1 -/// 3. (anything except 1) ** NAN is NAN -/// 4. NAN ** (anything except 0) is NAN -/// 5. +-(|x| > 1) ** +INF is +INF -/// 6. +-(|x| > 1) ** -INF is +0 -/// 7. +-(|x| < 1) ** +INF is +0 -/// 8. +-(|x| < 1) ** -INF is +INF -/// 9. -1 ** +-INF is 1 -/// 10. +0 ** (+anything except 0, NAN) is +0 -/// 11. -0 ** (+anything except 0, NAN, odd integer) is +0 -/// 12. +0 ** (-anything except 0, NAN) is +INF, raise divbyzero -/// 13. -0 ** (-anything except 0, NAN, odd integer) is +INF, raise divbyzero -/// 14. -0 ** (+odd integer) is -0 -/// 15. -0 ** (-odd integer) is -INF, raise divbyzero -/// 16. +INF ** (+anything except 0,NAN) is +INF -/// 17. +INF ** (-anything except 0,NAN) is +0 -/// 18. -INF ** (+odd integer) is -INF -/// 19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer) -/// 20. (anything) ** 1 is (anything) -/// 21. (anything) ** -1 is 1/(anything) -/// 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) -/// 23. (-anything except 0 and inf) ** (non-integer) is NAN -/// -/// Accuracy: -/// pow(x,y) returns x**y nearly rounded. In particular -/// pow(integer,integer) -/// always returns the correct integer provided it is -/// representable. -/// -/// Constants : -/// The hexadecimal values are the intended ones for the following -/// constants. The decimal values may be used, provided that the -/// compiler will convert from decimal to binary accurately enough -/// to produce the hexadecimal values shown. -/// - +// pow(x,y) return x**y +// +// n +// Method: Let x = 2 * (1+f) +// 1. Compute and return log2(x) in two pieces: +// log2(x) = w1 + w2, +// where w1 has 53-24 = 29 bit trailing zeros. +// 2. Perform y*log2(x) = n+y' by simulating muti-precision +// arithmetic, where |y'|<=0.5. +// 3. Return x**y = 2**n*exp(y'*log2) +// +// Special cases: +// 1. (anything) ** 0 is 1 +// 2. 1 ** (anything) is 1 +// 3. (anything except 1) ** NAN is NAN +// 4. NAN ** (anything except 0) is NAN +// 5. +-(|x| > 1) ** +INF is +INF +// 6. +-(|x| > 1) ** -INF is +0 +// 7. +-(|x| < 1) ** +INF is +0 +// 8. +-(|x| < 1) ** -INF is +INF +// 9. -1 ** +-INF is 1 +// 10. +0 ** (+anything except 0, NAN) is +0 +// 11. -0 ** (+anything except 0, NAN, odd integer) is +0 +// 12. +0 ** (-anything except 0, NAN) is +INF, raise divbyzero +// 13. -0 ** (-anything except 0, NAN, odd integer) is +INF, raise divbyzero +// 14. -0 ** (+odd integer) is -0 +// 15. -0 ** (-odd integer) is -INF, raise divbyzero +// 16. +INF ** (+anything except 0,NAN) is +INF +// 17. +INF ** (-anything except 0,NAN) is +0 +// 18. -INF ** (+odd integer) is -INF +// 19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer) +// 20. (anything) ** 1 is (anything) +// 21. (anything) ** -1 is 1/(anything) +// 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) +// 23. (-anything except 0 and inf) ** (non-integer) is NAN +// +// Accuracy: +// pow(x,y) returns x**y nearly rounded. In particular +// pow(integer,integer) +// always returns the correct integer provided it is +// representable. +// +// Constants : +// The hexadecimal values are the intended ones for the following +// constants. The decimal values may be used, provided that the +// compiler will convert from decimal to binary accurately enough +// to produce the hexadecimal values shown. +// use super::{fabs, get_high_word, scalbn, sqrt, with_set_high_word, with_set_low_word}; const BP: [f64; 2] = [1.0, 1.5]; From 7e6e26489187f2548e63c672f1758aa95b7f3ae0 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Tue, 17 Jul 2018 20:04:33 -0500 Subject: [PATCH 0111/1459] update changelog; add more copyright notices --- libm/CHANGELOG.md | 15 +++++++++++++++ libm/src/math/atan.rs | 11 +++++++++++ libm/src/math/atan2.rs | 39 +++++++++++++++++++++++++++++++++++++++ libm/src/math/atan2f.rs | 15 +++++++++++++++ 4 files changed, 80 insertions(+) diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index 107813b08..d37468685 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -7,14 +7,29 @@ This project adheres to [Semantic Versioning](http://semver.org/). ### Added +- acosf +- asin +- asinf +- atan +- atan2 - atan2f +- atanf - cos +- cosf +- cosh - coshf +- exp2 +- expm1 +- expm1f +- expo2 - fmaf +- pow - sin +- sinf - sinh - sinhf - tan +- tanf - tanh - tanhf diff --git a/libm/src/math/atan.rs b/libm/src/math/atan.rs index d057af6d6..47a2951c6 100644 --- a/libm/src/math/atan.rs +++ b/libm/src/math/atan.rs @@ -1,3 +1,14 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_atan.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ /* atan(x) * Method * 1. Reduce x to positive by atan(x) = -atan(-x). diff --git a/libm/src/math/atan2.rs b/libm/src/math/atan2.rs index a719b6d63..a91ddd84d 100644 --- a/libm/src/math/atan2.rs +++ b/libm/src/math/atan2.rs @@ -1,3 +1,42 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_atan2.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + */ +/* atan2(y,x) + * Method : + * 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x). + * 2. Reduce x to positive by (if x and y are unexceptional): + * ARG (x+iy) = arctan(y/x) ... if x > 0, + * ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0, + * + * Special cases: + * + * ATAN2((anything), NaN ) is NaN; + * ATAN2(NAN , (anything) ) is NaN; + * ATAN2(+-0, +(anything but NaN)) is +-0 ; + * ATAN2(+-0, -(anything but NaN)) is +-pi ; + * ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2; + * ATAN2(+-(anything but INF and NaN), +INF) is +-0 ; + * ATAN2(+-(anything but INF and NaN), -INF) is +-pi; + * ATAN2(+-INF,+INF ) is +-pi/4 ; + * ATAN2(+-INF,-INF ) is +-3pi/4; + * ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2; + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ + use super::atan; use super::fabs; diff --git a/libm/src/math/atan2f.rs b/libm/src/math/atan2f.rs index a232ffdd6..211a992a0 100644 --- a/libm/src/math/atan2f.rs +++ b/libm/src/math/atan2f.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_atan2f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + use super::atanf; use super::fabsf; From 634144a6c7b30744ea74231a3555b6c907b207b1 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Wed, 18 Jul 2018 11:48:47 -0500 Subject: [PATCH 0112/1459] v0.1.2 --- libm/CHANGELOG.md | 5 ++++- libm/Cargo.toml | 2 +- libm/src/lib.rs | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index d37468685..1cc396455 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -5,6 +5,8 @@ This project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] +## [v0.1.2] - 2018-07-18 + ### Added - acosf @@ -72,5 +74,6 @@ This project adheres to [Semantic Versioning](http://semver.org/). - Initial release -[Unreleased]: https://github.com/japaric/libm/compare/v0.1.1...HEAD +[Unreleased]: https://github.com/japaric/libm/compare/v0.1.2...HEAD +[v0.1.2]: https://github.com/japaric/libm/compare/v0.1.1...v0.1.2 [v0.1.1]: https://github.com/japaric/libm/compare/v0.1.0...v0.1.1 diff --git a/libm/Cargo.toml b/libm/Cargo.toml index a82fc99a7..11c59a2db 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -7,7 +7,7 @@ keywords = ["libm", "math"] license = "MIT OR Apache-2.0" name = "libm" repository = "https://github.com/japaric/libm" -version = "0.1.1" +version = "0.1.2" [workspace] members = ["cb", "test-generator"] \ No newline at end of file diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 06991effe..5121cbfba 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -1,4 +1,4 @@ -//! Port of MUSL's libm to Rust +//! libm in pure Rust //! //! # Usage //! From a4f9931bb9d79e5f245ed8d84b82207e6b9e4d50 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Sat, 21 Jul 2018 12:01:49 -0500 Subject: [PATCH 0113/1459] README: all the math functions we needed are now supported --- libm/README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/libm/README.md b/libm/README.md index 02de9765a..24b816c35 100644 --- a/libm/README.md +++ b/libm/README.md @@ -33,10 +33,7 @@ fn foo(x: f32) { } ``` -Not all the math functions are available at the moment. Check the [API docs] to learn what's -currently supported. - -[API docs]: https://docs.rs/libm +The API documentation can be found [here](https://docs.rs/libm). ## Contributing From 5dc6c27e5c7238da6fbe9b32291ec8e3120e635f Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Wed, 25 Jul 2018 13:16:10 -0500 Subject: [PATCH 0114/1459] omit bounds check in release mode this eliminates panicking branches in the optimized version of the functions. We keep the bounds checks when running the test suite to check that we never do an out of bounds access. This commit also adds a "must link" test that ensures that future changes in our implementation won't add panicking branches. closes rust-lang/libm#129 --- libm/Cargo.toml | 4 ++ libm/ci/script.sh | 14 +++- libm/examples/no-panic.rs | 115 ++++++++++++++++++++++++++++++++ libm/src/math/atan.rs | 2 +- libm/src/math/mod.rs | 44 ++++++++++++ libm/src/math/rem_pio2.rs | 2 +- libm/src/math/rem_pio2_large.rs | 104 ++++++++++++++++------------- 7 files changed, 233 insertions(+), 52 deletions(-) create mode 100644 libm/examples/no-panic.rs diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 11c59a2db..8a2ba7447 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -9,5 +9,9 @@ name = "libm" repository = "https://github.com/japaric/libm" version = "0.1.2" +[features] +# only used to run our test suite +checked = [] + [workspace] members = ["cb", "test-generator"] \ No newline at end of file diff --git a/libm/ci/script.sh b/libm/ci/script.sh index cf37ac1ca..71d9e0882 100644 --- a/libm/ci/script.sh +++ b/libm/ci/script.sh @@ -12,11 +12,21 @@ main() { # check that we can source import libm into compiler-builtins cargo check --package cb + # test that the functions don't contain invocations of `panic!` + case $TARGET in + armv7-unknown-linux-gnueabihf) + cross build --release --target $TARGET --example no-panic + ;; + esac + + # run unit tests + cross test --lib --features checked --target $TARGET --release + # generate tests cargo run --package test-generator --target x86_64-unknown-linux-musl - # run tests - cross test --target $TARGET --release + # run generated tests + cross test --tests --features checked --target $TARGET --release # TODO need to fix overflow issues (cf. issue #4) # cross test --target $TARGET diff --git a/libm/examples/no-panic.rs b/libm/examples/no-panic.rs new file mode 100644 index 000000000..fb79f99af --- /dev/null +++ b/libm/examples/no-panic.rs @@ -0,0 +1,115 @@ +#![feature(lang_items)] +#![feature(panic_implementation)] +#![no_main] +#![no_std] + +extern crate libm; + +use core::panic::PanicInfo; +use core::ptr; + +macro_rules! force_eval { + ($e:expr) => { + unsafe { + core::ptr::read_volatile(&$e); + } + }; +} + +#[no_mangle] +pub fn main() { + force_eval!(libm::acos(random())); + force_eval!(libm::acosf(random())); + force_eval!(libm::asin(random())); + force_eval!(libm::asinf(random())); + force_eval!(libm::atan(random())); + force_eval!(libm::atan2(random(), random())); + force_eval!(libm::atan2f(random(), random())); + force_eval!(libm::atanf(random())); + force_eval!(libm::cbrt(random())); + force_eval!(libm::cbrtf(random())); + force_eval!(libm::ceil(random())); + force_eval!(libm::ceilf(random())); + force_eval!(libm::cos(random())); + force_eval!(libm::cosf(random())); + force_eval!(libm::cosh(random())); + force_eval!(libm::coshf(random())); + force_eval!(libm::exp(random())); + force_eval!(libm::exp2(random())); + force_eval!(libm::exp2f(random())); + force_eval!(libm::expf(random())); + force_eval!(libm::expm1(random())); + force_eval!(libm::expm1f(random())); + force_eval!(libm::fabs(random())); + force_eval!(libm::fabsf(random())); + force_eval!(libm::fdim(random(), random())); + force_eval!(libm::fdimf(random(), random())); + force_eval!(libm::floor(random())); + force_eval!(libm::floorf(random())); + force_eval!(libm::fma(random(), random(), random())); + force_eval!(libm::fmaf(random(), random(), random())); + force_eval!(libm::fmod(random(), random())); + force_eval!(libm::fmodf(random(), random())); + force_eval!(libm::hypot(random(), random())); + force_eval!(libm::hypotf(random(), random())); + force_eval!(libm::log(random())); + force_eval!(libm::log2(random())); + force_eval!(libm::log10(random())); + force_eval!(libm::log10f(random())); + force_eval!(libm::log1p(random())); + force_eval!(libm::log1pf(random())); + force_eval!(libm::log2f(random())); + force_eval!(libm::logf(random())); + force_eval!(libm::pow(random(), random())); + force_eval!(libm::powf(random(), random())); + force_eval!(libm::round(random())); + force_eval!(libm::roundf(random())); + force_eval!(libm::scalbn(random(), random())); + force_eval!(libm::scalbnf(random(), random())); + force_eval!(libm::sin(random())); + force_eval!(libm::sinf(random())); + force_eval!(libm::sinh(random())); + force_eval!(libm::sinhf(random())); + force_eval!(libm::sqrt(random())); + force_eval!(libm::sqrtf(random())); + force_eval!(libm::tan(random())); + force_eval!(libm::tanf(random())); + force_eval!(libm::tanh(random())); + force_eval!(libm::tanhf(random())); + force_eval!(libm::trunc(random())); + force_eval!(libm::truncf(random())); +} + +fn random() -> T +where + T: Copy, +{ + unsafe { + static mut X: usize = 0; + X += 8; + ptr::read_volatile(X as *const T) + } +} + +#[panic_implementation] +#[no_mangle] +pub fn panic(_info: &PanicInfo) -> ! { + // loop {} + extern "C" { + fn thou_shalt_not_panic() -> !; + } + + unsafe { thou_shalt_not_panic() } +} + +#[link(name = "c")] +extern "C" {} + +#[lang = "eh_personality"] +fn eh() {} + +#[no_mangle] +pub extern "C" fn __aeabi_unwind_cpp_pr0() {} + +#[no_mangle] +pub extern "C" fn __aeabi_unwind_cpp_pr1() {} diff --git a/libm/src/math/atan.rs b/libm/src/math/atan.rs index 47a2951c6..cf6a62a54 100644 --- a/libm/src/math/atan.rs +++ b/libm/src/math/atan.rs @@ -124,7 +124,7 @@ pub fn atan(x: f64) -> f64 { return x - x * (s1 + s2); } - let z = ATANHI[id as usize] - (x * (s1 + s2) - ATANLO[id as usize] - x); + let z = i!(ATANHI, id as usize) - (x * (s1 + s2) - i!(ATANLO, id as usize) - x); if sign != 0 { -z diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 752a5991a..f663ae674 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -6,6 +6,50 @@ macro_rules! force_eval { }; } +#[cfg(not(feature = "checked"))] +macro_rules! i { + ($array:expr, $index:expr) => { + unsafe { *$array.get_unchecked($index) } + }; + ($array:expr, $index:expr, =, $rhs:expr) => { + unsafe { *$array.get_unchecked_mut($index) = $rhs; } + }; + ($array:expr, $index:expr, +=, $rhs:expr) => { + unsafe { *$array.get_unchecked_mut($index) += $rhs; } + }; + ($array:expr, $index:expr, -=, $rhs:expr) => { + unsafe { *$array.get_unchecked_mut($index) -= $rhs; } + }; + ($array:expr, $index:expr, &=, $rhs:expr) => { + unsafe { *$array.get_unchecked_mut($index) &= $rhs; } + }; + ($array:expr, $index:expr, ==, $rhs:expr) => { + unsafe { *$array.get_unchecked_mut($index) == $rhs } + }; +} + +#[cfg(feature = "checked")] +macro_rules! i { + ($array:expr, $index:expr) => { + *$array.get($index).unwrap() + }; + ($array:expr, $index:expr, =, $rhs:expr) => { + *$array.get_mut($index).unwrap() = $rhs; + }; + ($array:expr, $index:expr, -=, $rhs:expr) => { + *$array.get_mut($index).unwrap() -= $rhs; + }; + ($array:expr, $index:expr, +=, $rhs:expr) => { + *$array.get_mut($index).unwrap() += $rhs; + }; + ($array:expr, $index:expr, &=, $rhs:expr) => { + *$array.get_mut($index).unwrap() &= $rhs; + }; + ($array:expr, $index:expr, ==, $rhs:expr) => { + *$array.get_mut($index).unwrap() == $rhs + }; +} + // Public modules mod acos; mod acosf; diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 6e655e7d4..5c1685877 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -177,7 +177,7 @@ pub fn rem_pio2(x: f64) -> (i32, f64, f64) { tx[2] = z; /* skip zero terms, first term is non-zero */ let mut i = 2; - while tx[i] == 0.0 { + while i != 0 && tx[i] == 0.0 { i -= 1; } let mut ty = [0.0; 3]; diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 745b700a5..f44520931 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -1,3 +1,4 @@ +#![allow(unused_unsafe)] /* origin: FreeBSD /usr/src/lib/msun/src/k_rem_pio2.c */ /* * ==================================================== @@ -257,17 +258,21 @@ pub fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { let mut j = (jv - jx) as i32; let m = jx + jk; for i in 0..=m { - f[i] = if j < 0 { 0. } else { IPIO2[j as usize] as f64 }; - j += 1 + i!(f, i, =, if j < 0 { + 0. + } else { + i!(IPIO2, j as usize) as f64 + }); + j += 1; } /* compute q[0],q[1],...q[jk] */ for i in 0..=jk { fw = 0f64; for j in 0..=jx { - fw += x[j] * f[jx + i - j]; + fw += i!(x, j) * i!(f, jx + i - j); } - q[i] = fw; + i!(q, i, =, fw); } let mut jz = jk; @@ -275,11 +280,11 @@ pub fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { 'recompute: loop { /* distill q[] into iq[] reversingly */ let mut i = 0i32; - z = q[jz]; + z = i!(q, jz); for j in (1..=jz).rev() { fw = (x1p_24 * z) as i32 as f64; - iq[i as usize] = (z - x1p24 * fw) as i32; - z = q[j - 1] + fw; + i!(iq, i as usize, =, (z - x1p24 * fw) as i32); + z = i!(q, j - 1) + fw; i += 1; } @@ -291,12 +296,12 @@ pub fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { ih = 0; if q0 > 0 { /* need iq[jz-1] to determine n */ - i = iq[jz - 1] >> (24 - q0); + i = i!(iq, jz - 1) >> (24 - q0); n += i; - iq[jz - 1] -= i << (24 - q0); - ih = iq[jz - 1] >> (23 - q0); + i!(iq, jz - 1, -=, i << (24 - q0)); + ih = i!(iq, jz - 1) >> (23 - q0); } else if q0 == 0 { - ih = iq[jz - 1] >> 23; + ih = i!(iq, jz - 1) >> 23; } else if z >= 0.5 { ih = 2; } @@ -307,24 +312,24 @@ pub fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { let mut carry = 0i32; for i in 0..jz { /* compute 1-q */ - let j = iq[i]; + let j = i!(iq, i); if carry == 0 { if j != 0 { carry = 1; - iq[i] = 0x1000000 - j; + i!(iq, i, =, 0x1000000 - j); } } else { - iq[i] = 0xffffff - j; + i!(iq, i, =, 0xffffff - j); } } if q0 > 0 { /* rare case: chance is 1 in 12 */ match q0 { 1 => { - iq[jz - 1] &= 0x7fffff; + i!(iq, jz - 1, &=, 0x7fffff); } 2 => { - iq[jz - 1] &= 0x3fffff; + i!(iq, jz - 1, &=, 0x3fffff); } _ => {} } @@ -341,23 +346,23 @@ pub fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { if z == 0. { let mut j = 0; for i in (jk..=jz - 1).rev() { - j |= iq[i]; + j |= i!(iq, i); } if j == 0 { /* need recomputation */ let mut k = 1; - while iq[jk - k] == 0 { + while i!(iq, jk - k, ==, 0) { k += 1; /* k = no. of terms needed */ } for i in (jz + 1)..=(jz + k) { /* add q[jz+1] to q[jz+k] */ - f[jx + i] = IPIO2[jv + i] as f64; + i!(f, jx + i, =, i!(IPIO2, jv + i) as f64); fw = 0f64; for j in 0..=jx { - fw += x[j] * f[jx + i - j]; + fw += i!(x, j) * i!(f, jx + i - j); } - q[i] = fw; + i!(q, i, =, fw); } jz += k; continue 'recompute; @@ -371,7 +376,7 @@ pub fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { if z == 0. { jz -= 1; q0 -= 24; - while iq[jz] == 0 { + while i!(iq, jz) == 0 { jz -= 1; q0 -= 24; } @@ -380,19 +385,19 @@ pub fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { z = scalbn(z, -q0); if z >= x1p24 { fw = (x1p_24 * z) as i32 as f64; - iq[jz] = (z - x1p24 * fw) as i32; + i!(iq, jz, =, (z - x1p24 * fw) as i32); jz += 1; q0 += 24; - iq[jz] = fw as i32; + i!(iq, jz, =, fw as i32); } else { - iq[jz] = z as i32; + i!(iq, jz, =, z as i32); } } /* convert integer "bit" chunk to floating-point value */ fw = scalbn(1., q0); for i in (0..=jz).rev() { - q[i] = fw * (iq[i] as f64); + i!(q, i, =, fw * (i!(iq, i) as f64)); fw *= x1p_24; } @@ -401,10 +406,10 @@ pub fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { fw = 0f64; let mut k = 0; while (k <= jp) && (k <= jz - i) { - fw += PIO2[k] * q[i + k]; + fw += i!(PIO2, k) * i!(q, i + k); k += 1; } - fq[jz - i] = fw; + i!(fq, jz - i, =, fw); } /* compress fq[] into y[] */ @@ -412,51 +417,54 @@ pub fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { 0 => { fw = 0f64; for i in (0..=jz).rev() { - fw += fq[i]; + fw += i!(fq, i); } - y[0] = if ih == 0 { fw } else { -fw }; + i!(y, 0, =, if ih == 0 { fw } else { -fw }); } 1 | 2 => { fw = 0f64; for i in (0..=jz).rev() { - fw += fq[i]; + fw += i!(fq, i); } // TODO: drop excess precision here once double_t is used fw = fw as f64; - y[0] = if ih == 0 { fw } else { -fw }; - fw = fq[0] - fw; + i!(y, 0, =, if ih == 0 { fw } else { -fw }); + fw = i!(fq, 0) - fw; for i in 1..=jz { - fw += fq[i]; + fw += i!(fq, i); } - y[1] = if ih == 0 { fw } else { -fw }; + i!(y, 1, =, if ih == 0 { fw } else { -fw }); } 3 => { /* painful */ for i in (1..=jz).rev() { - fw = fq[i - 1] + fq[i]; - fq[i] += fq[i - 1] - fw; - fq[i - 1] = fw; + fw = i!(fq, i - 1) + i!(fq, i); + i!(fq, i, +=, i!(fq, i - 1) - fw); + i!(fq, i - 1, =, fw); } for i in (2..=jz).rev() { - fw = fq[i - 1] + fq[i]; - fq[i] += fq[i - 1] - fw; - fq[i - 1] = fw; + fw = i!(fq, i - 1) + i!(fq, i); + i!(fq, i, +=, i!(fq, i - 1) - fw); + i!(fq, i - 1, =, fw); } fw = 0f64; for i in (2..=jz).rev() { - fw += fq[i]; + fw += i!(fq, i); } if ih == 0 { - y[0] = fq[0]; - y[1] = fq[1]; - y[2] = fw; + i!(y, 0, =, i!(fq, 0)); + i!(y, 1, =, i!(fq, 1)); + i!(y, 2, =, fw); } else { - y[0] = -fq[0]; - y[1] = -fq[1]; - y[2] = -fw; + i!(y, 0, =, -i!(fq, 0)); + i!(y, 1, =, -i!(fq, 1)); + i!(y, 2, =, -fw); } } + #[cfg(feature = "checked")] _ => unreachable!(), + #[cfg(not(feature = "checked"))] + _ => {}, } n & 7 } From bcee021da9e2d275fc6d2e54b025727cdd9cfe00 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Wed, 25 Jul 2018 14:48:49 -0500 Subject: [PATCH 0115/1459] ci: add nightly build job to rnu the no-panic test --- libm/.travis.yml | 3 +++ libm/ci/script.sh | 13 ++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/libm/.travis.yml b/libm/.travis.yml index 223b586ea..922273e09 100644 --- a/libm/.travis.yml +++ b/libm/.travis.yml @@ -17,6 +17,9 @@ matrix: - env: TARGET=x86_64-unknown-linux-gnu - env: TARGET=cargo-fmt rust: beta + # no-panic link test + - env: TARGET=armv7-unknown-linux-gnueabihf + rust: nightly before_install: set -e diff --git a/libm/ci/script.sh b/libm/ci/script.sh index 71d9e0882..5156047df 100644 --- a/libm/ci/script.sh +++ b/libm/ci/script.sh @@ -6,19 +6,18 @@ main() { return fi + # test that the functions don't contain invocations of `panic!` + if [ $TRAVIS_RUST_VERSION ]; then + cross build --release --target $TARGET --example no-panic + return + fi + # quick check cargo check # check that we can source import libm into compiler-builtins cargo check --package cb - # test that the functions don't contain invocations of `panic!` - case $TARGET in - armv7-unknown-linux-gnueabihf) - cross build --release --target $TARGET --example no-panic - ;; - esac - # run unit tests cross test --lib --features checked --target $TARGET --release From 5efcf71e774c7fffd37c3677d669d34756765916 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Wed, 25 Jul 2018 14:49:14 -0500 Subject: [PATCH 0116/1459] cargo fmt --- libm/src/math/mod.rs | 36 ++++++++++++++++++++------------- libm/src/math/rem_pio2_large.rs | 2 +- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index f663ae674..da34fb4ce 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -11,19 +11,27 @@ macro_rules! i { ($array:expr, $index:expr) => { unsafe { *$array.get_unchecked($index) } }; - ($array:expr, $index:expr, =, $rhs:expr) => { - unsafe { *$array.get_unchecked_mut($index) = $rhs; } + ($array:expr, $index:expr, = , $rhs:expr) => { + unsafe { + *$array.get_unchecked_mut($index) = $rhs; + } }; - ($array:expr, $index:expr, +=, $rhs:expr) => { - unsafe { *$array.get_unchecked_mut($index) += $rhs; } + ($array:expr, $index:expr, += , $rhs:expr) => { + unsafe { + *$array.get_unchecked_mut($index) += $rhs; + } }; - ($array:expr, $index:expr, -=, $rhs:expr) => { - unsafe { *$array.get_unchecked_mut($index) -= $rhs; } + ($array:expr, $index:expr, -= , $rhs:expr) => { + unsafe { + *$array.get_unchecked_mut($index) -= $rhs; + } }; - ($array:expr, $index:expr, &=, $rhs:expr) => { - unsafe { *$array.get_unchecked_mut($index) &= $rhs; } + ($array:expr, $index:expr, &= , $rhs:expr) => { + unsafe { + *$array.get_unchecked_mut($index) &= $rhs; + } }; - ($array:expr, $index:expr, ==, $rhs:expr) => { + ($array:expr, $index:expr, == , $rhs:expr) => { unsafe { *$array.get_unchecked_mut($index) == $rhs } }; } @@ -33,19 +41,19 @@ macro_rules! i { ($array:expr, $index:expr) => { *$array.get($index).unwrap() }; - ($array:expr, $index:expr, =, $rhs:expr) => { + ($array:expr, $index:expr, = , $rhs:expr) => { *$array.get_mut($index).unwrap() = $rhs; }; - ($array:expr, $index:expr, -=, $rhs:expr) => { + ($array:expr, $index:expr, -= , $rhs:expr) => { *$array.get_mut($index).unwrap() -= $rhs; }; - ($array:expr, $index:expr, +=, $rhs:expr) => { + ($array:expr, $index:expr, += , $rhs:expr) => { *$array.get_mut($index).unwrap() += $rhs; }; - ($array:expr, $index:expr, &=, $rhs:expr) => { + ($array:expr, $index:expr, &= , $rhs:expr) => { *$array.get_mut($index).unwrap() &= $rhs; }; - ($array:expr, $index:expr, ==, $rhs:expr) => { + ($array:expr, $index:expr, == , $rhs:expr) => { *$array.get_mut($index).unwrap() == $rhs }; } diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index f44520931..4d9146af9 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -464,7 +464,7 @@ pub fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { #[cfg(feature = "checked")] _ => unreachable!(), #[cfg(not(feature = "checked"))] - _ => {}, + _ => {} } n & 7 } From 725f8031f89bcbe0aaf0c94aacb3f4e95493aca2 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Wed, 25 Jul 2018 15:02:10 -0500 Subject: [PATCH 0117/1459] ci: fix if condition --- libm/ci/script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/ci/script.sh b/libm/ci/script.sh index 5156047df..bb19e23d8 100644 --- a/libm/ci/script.sh +++ b/libm/ci/script.sh @@ -7,7 +7,7 @@ main() { fi # test that the functions don't contain invocations of `panic!` - if [ $TRAVIS_RUST_VERSION ]; then + if [ $TRAVIS_RUST_VERSION = nightly ]; then cross build --release --target $TARGET --example no-panic return fi From 4dd99f627913eb0c17a9fa4d88afe6bc96ccc42b Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 27 Jul 2018 00:11:06 -0500 Subject: [PATCH 0118/1459] add newlib support to the test generator --- libm/.gitignore | 3 + libm/.travis.yml | 35 +- libm/Cargo.toml | 11 +- libm/ci/install.sh | 3 + libm/ci/script.sh | 17 +- .../Cargo.toml | 6 +- libm/input-generator/src/main.rs | 189 +++++ libm/math/.cargo/config | 11 + libm/math/Cargo.toml | 8 + libm/math/Cross.toml | 2 + libm/musl-generator/Cargo.toml | 9 + libm/musl-generator/src/macros.rs | 191 +++++ libm/musl-generator/src/main.rs | 97 +++ libm/newlib-generator/Cargo.toml | 7 + libm/newlib-generator/src/macros.rs | 245 ++++++ libm/newlib-generator/src/main.rs | 32 + libm/shared/Cargo.toml | 7 + libm/shared/src/lib.rs | 471 +++++++++++ libm/src/lib.rs | 28 +- libm/test-generator/README.md | 8 - libm/test-generator/src/main.rs | 788 ------------------ 21 files changed, 1343 insertions(+), 825 deletions(-) rename libm/{test-generator => input-generator}/Cargo.toml (55%) create mode 100644 libm/input-generator/src/main.rs create mode 100644 libm/math/.cargo/config create mode 100644 libm/math/Cargo.toml create mode 100644 libm/math/Cross.toml create mode 100644 libm/musl-generator/Cargo.toml create mode 100644 libm/musl-generator/src/macros.rs create mode 100644 libm/musl-generator/src/main.rs create mode 100644 libm/newlib-generator/Cargo.toml create mode 100644 libm/newlib-generator/src/macros.rs create mode 100644 libm/newlib-generator/src/main.rs create mode 100644 libm/shared/Cargo.toml create mode 100644 libm/shared/src/lib.rs delete mode 100644 libm/test-generator/README.md delete mode 100644 libm/test-generator/src/main.rs diff --git a/libm/.gitignore b/libm/.gitignore index 6db0ab6ef..39950911a 100644 --- a/libm/.gitignore +++ b/libm/.gitignore @@ -1,5 +1,8 @@ **/*.rs.bk .#* +/bin +/math/src +/math/target /target /tests Cargo.lock diff --git a/libm/.travis.yml b/libm/.travis.yml index 922273e09..b3beecb09 100644 --- a/libm/.travis.yml +++ b/libm/.travis.yml @@ -5,21 +5,29 @@ sudo: required matrix: include: - env: TARGET=aarch64-unknown-linux-gnu - - env: TARGET=armv7-unknown-linux-gnueabihf - - env: TARGET=i686-unknown-linux-gnu - - env: TARGET=mips-unknown-linux-gnu - - env: TARGET=mips64-unknown-linux-gnuabi64 - - env: TARGET=mips64el-unknown-linux-gnuabi64 - - env: TARGET=mipsel-unknown-linux-gnu - - env: TARGET=powerpc-unknown-linux-gnu - - env: TARGET=powerpc64-unknown-linux-gnu - - env: TARGET=powerpc64le-unknown-linux-gnu + rust: nightly + # - env: TARGET=armv7-unknown-linux-gnueabihf + # rust: nightly + # - env: TARGET=i686-unknown-linux-gnu + # rust: nightly + # - env: TARGET=mips-unknown-linux-gnu + # rust: nightly + # - env: TARGET=mips64-unknown-linux-gnuabi64 + # rust: nightly + # - env: TARGET=mips64el-unknown-linux-gnuabi64 + # rust: nightly + # - env: TARGET=mipsel-unknown-linux-gnu + # rust: nightly + # - env: TARGET=powerpc-unknown-linux-gnu + # rust: nightly + # - env: TARGET=powerpc64-unknown-linux-gnu + # rust: nightly + # - env: TARGET=powerpc64le-unknown-linux-gnu + # rust: nightly - env: TARGET=x86_64-unknown-linux-gnu - - env: TARGET=cargo-fmt - rust: beta - # no-panic link test - - env: TARGET=armv7-unknown-linux-gnueabihf rust: nightly + # - env: TARGET=cargo-fmt + # rust: beta before_install: set -e @@ -27,6 +35,7 @@ install: - bash ci/install.sh script: + - export PATH=$HOME/.local/bin:$PATH - bash ci/script.sh after_script: set +e diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 8a2ba7447..cedf8d267 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -14,4 +14,13 @@ version = "0.1.2" checked = [] [workspace] -members = ["cb", "test-generator"] \ No newline at end of file +members = [ + "cb", + "input-generator", + "musl-generator", + "newlib-generator", + "shared", +] + +[dev-dependencies] +shared = { path = "shared" } diff --git a/libm/ci/install.sh b/libm/ci/install.sh index 4d9552d23..7f73c5fea 100644 --- a/libm/ci/install.sh +++ b/libm/ci/install.sh @@ -15,6 +15,9 @@ main() { if [ $TARGET != x86_64-unknown-linux-gnu ]; then rustup target add $TARGET fi + + mkdir -p ~/.local/bin + curl -L https://github.com/japaric/qemu-bin/raw/master/14.04/qemu-arm-2.12.0 > ~/.local/bin/qemu-arm } main diff --git a/libm/ci/script.sh b/libm/ci/script.sh index bb19e23d8..59e5a5f37 100644 --- a/libm/ci/script.sh +++ b/libm/ci/script.sh @@ -7,10 +7,11 @@ main() { fi # test that the functions don't contain invocations of `panic!` - if [ $TRAVIS_RUST_VERSION = nightly ]; then - cross build --release --target $TARGET --example no-panic - return - fi + case $TARGET in + armv7-unknown-linux-gnueabihf) + cross build --release --target $TARGET --example no-panic + ;; + esac # quick check cargo check @@ -18,12 +19,14 @@ main() { # check that we can source import libm into compiler-builtins cargo check --package cb + # generate tests + cargo run -p input-generator --target x86_64-unknown-linux-musl + cargo run -p musl-generator --target x86_64-unknown-linux-musl + cargo run -p newlib-generator + # run unit tests cross test --lib --features checked --target $TARGET --release - # generate tests - cargo run --package test-generator --target x86_64-unknown-linux-musl - # run generated tests cross test --tests --features checked --target $TARGET --release diff --git a/libm/test-generator/Cargo.toml b/libm/input-generator/Cargo.toml similarity index 55% rename from libm/test-generator/Cargo.toml rename to libm/input-generator/Cargo.toml index b810d9daf..fef2558a8 100644 --- a/libm/test-generator/Cargo.toml +++ b/libm/input-generator/Cargo.toml @@ -1,9 +1,7 @@ [package] -name = "test-generator" +name = "input-generator" version = "0.1.0" authors = ["Jorge Aparicio "] -publish = false [dependencies] -rand = "0.5.3" -itertools = "0.7.8" +rand = "0.5.4" diff --git a/libm/input-generator/src/main.rs b/libm/input-generator/src/main.rs new file mode 100644 index 000000000..b4a6ad142 --- /dev/null +++ b/libm/input-generator/src/main.rs @@ -0,0 +1,189 @@ +extern crate rand; + +use std::collections::BTreeSet; +use std::error::Error; +use std::fs::{self, File}; +use std::io::Write; + +use rand::{RngCore, SeedableRng, XorShiftRng}; + +const NTESTS: usize = 10_000; + +fn main() -> Result<(), Box> { + let mut rng = XorShiftRng::from_rng(&mut rand::thread_rng())?; + + fs::remove_dir_all("bin").ok(); + fs::create_dir_all("bin/input")?; + fs::create_dir_all("bin/output")?; + + f32(&mut rng)?; + f32f32(&mut rng)?; + f32f32f32(&mut rng)?; + f32i16(&mut rng)?; + f64(&mut rng)?; + f64f64(&mut rng)?; + f64f64f64(&mut rng)?; + f64i16(&mut rng)?; + + Ok(()) +} + +fn f32(rng: &mut XorShiftRng) -> Result<(), Box> { + let mut set = BTreeSet::new(); + + while set.len() < NTESTS { + let f = f32::from_bits(rng.next_u32()); + + if f.is_nan() { + continue; + } + + set.insert(f.to_bits()); + } + + let mut f = File::create("bin/input/f32")?; + for i in set { + f.write_all(&i.to_bytes())?; + } + + Ok(()) +} + +fn f32f32(rng: &mut XorShiftRng) -> Result<(), Box> { + let mut f = File::create("bin/input/f32f32")?; + let mut i = 0; + while i < NTESTS { + let x0 = f32::from_bits(rng.next_u32()); + let x1 = f32::from_bits(rng.next_u32()); + + if x0.is_nan() || x1.is_nan() { + continue; + } + + i += 1; + f.write_all(&x0.to_bits().to_bytes())?; + f.write_all(&x1.to_bits().to_bytes())?; + } + + Ok(()) +} + +fn f32i16(rng: &mut XorShiftRng) -> Result<(), Box> { + let mut f = File::create("bin/input/f32i16")?; + let mut i = 0; + while i < NTESTS { + let x0 = f32::from_bits(rng.next_u32()); + let x1 = rng.next_u32() as i16; + + if x0.is_nan() { + continue; + } + + i += 1; + f.write_all(&x0.to_bits().to_bytes())?; + f.write_all(&x1.to_bytes())?; + } + + Ok(()) +} + +fn f32f32f32(rng: &mut XorShiftRng) -> Result<(), Box> { + let mut f = File::create("bin/input/f32f32f32")?; + let mut i = 0; + while i < NTESTS { + let x0 = f32::from_bits(rng.next_u32()); + let x1 = f32::from_bits(rng.next_u32()); + let x2 = f32::from_bits(rng.next_u32()); + + if x0.is_nan() || x1.is_nan() || x2.is_nan() { + continue; + } + + i += 1; + f.write_all(&x0.to_bits().to_bytes())?; + f.write_all(&x1.to_bits().to_bytes())?; + f.write_all(&x2.to_bits().to_bytes())?; + } + + Ok(()) +} + +fn f64(rng: &mut XorShiftRng) -> Result<(), Box> { + let mut set = BTreeSet::new(); + + while set.len() < NTESTS { + let f = f64::from_bits(rng.next_u64()); + + if f.is_nan() { + continue; + } + + set.insert(f.to_bits()); + } + + let mut f = File::create("bin/input/f64")?; + for i in set { + f.write_all(&i.to_bytes())?; + } + + Ok(()) +} + +fn f64f64(rng: &mut XorShiftRng) -> Result<(), Box> { + let mut f = File::create("bin/input/f64f64")?; + let mut i = 0; + while i < NTESTS { + let x0 = f64::from_bits(rng.next_u64()); + let x1 = f64::from_bits(rng.next_u64()); + + if x0.is_nan() || x1.is_nan() { + continue; + } + + i += 1; + f.write_all(&x0.to_bits().to_bytes())?; + f.write_all(&x1.to_bits().to_bytes())?; + } + + Ok(()) +} + +fn f64f64f64(rng: &mut XorShiftRng) -> Result<(), Box> { + let mut f = File::create("bin/input/f64f64f64")?; + let mut i = 0; + while i < NTESTS { + let x0 = f64::from_bits(rng.next_u64()); + let x1 = f64::from_bits(rng.next_u64()); + let x2 = f64::from_bits(rng.next_u64()); + + if x0.is_nan() || x1.is_nan() || x2.is_nan() { + continue; + } + + i += 1; + f.write_all(&x0.to_bits().to_bytes())?; + f.write_all(&x1.to_bits().to_bytes())?; + f.write_all(&x2.to_bits().to_bytes())?; + } + + Ok(()) +} + +fn f64i16(rng: &mut XorShiftRng) -> Result<(), Box> { + let mut f = File::create("bin/input/f64i16")?; + let mut i = 0; + while i < NTESTS { + let x0 = f64::from_bits(rng.next_u64()); + let x1 = rng.next_u32() as i16; + + if x0.is_nan() { + continue; + } + + i += 1; + f.write_all(&x0.to_bits().to_bytes())?; + f.write_all(&x1.to_bytes())?; + } + + Ok(()) +} diff --git a/libm/math/.cargo/config b/libm/math/.cargo/config new file mode 100644 index 000000000..be79c453a --- /dev/null +++ b/libm/math/.cargo/config @@ -0,0 +1,11 @@ +[target.thumbv7em-none-eabi] +rustflags = [ + "-C", "link-arg=-Wl,-Tlink.x", + "-C", "link-arg=-nostartfiles", + "-C", "link-arg=-mthumb", + "-C", "link-arg=-march=armv7e-m", + "-C", "link-arg=-mfloat-abi=soft", +] + +[build] +target = "thumbv7em-none-eabi" \ No newline at end of file diff --git a/libm/math/Cargo.toml b/libm/math/Cargo.toml new file mode 100644 index 000000000..5bca038a9 --- /dev/null +++ b/libm/math/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "math" +version = "0.0.0" + +[dependencies] +qemu-arm-rt = { git = "https://github.com/japaric/qemu-arm-rt" } + +[workspace] \ No newline at end of file diff --git a/libm/math/Cross.toml b/libm/math/Cross.toml new file mode 100644 index 000000000..471770b52 --- /dev/null +++ b/libm/math/Cross.toml @@ -0,0 +1,2 @@ +[target.thumbv7em-none-eabi] +xargo = false \ No newline at end of file diff --git a/libm/musl-generator/Cargo.toml b/libm/musl-generator/Cargo.toml new file mode 100644 index 000000000..0564f3536 --- /dev/null +++ b/libm/musl-generator/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "musl-generator" +version = "0.1.0" +authors = ["Jorge Aparicio "] + +[dependencies] +lazy_static = "1.0.2" +shared = { path = "../shared" } +libm = { path = ".." } diff --git a/libm/musl-generator/src/macros.rs b/libm/musl-generator/src/macros.rs new file mode 100644 index 000000000..16ba99d64 --- /dev/null +++ b/libm/musl-generator/src/macros.rs @@ -0,0 +1,191 @@ +macro_rules! f32 { + ($($fun:ident,)+) => {{ + $( + // check type signature + let _: fn(f32) -> f32 = libm::$fun; + let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; + )+ + + for x in shared::F32.iter() { + $( + let y = unsafe { + extern "C" { + fn $fun(_: f32) -> f32; + } + + $fun(*x) + }; + + $fun.write_all(&y.to_bits().to_bytes())?; + )+ + } + }}; +} + +macro_rules! f32f32 { + ($($fun:ident,)+) => {{ + $( + // check type signature + let _: fn(f32, f32) -> f32 = libm::$fun; + let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; + )+ + + for (x0, x1) in shared::F32F32.iter() { + $( + let y = unsafe { + extern "C" { + fn $fun(_: f32, _: f32) -> f32; + } + + $fun(*x0, *x1) + }; + + $fun.write_all(&y.to_bits().to_bytes())?; + )+ + } + }}; +} + +macro_rules! f32f32f32 { + ($($fun:ident,)+) => {{ + $( + // check type signature + let _: fn(f32, f32, f32) -> f32 = libm::$fun; + let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; + )+ + + for (x0, x1, x2) in shared::F32F32F32.iter() { + $( + let y = unsafe { + extern "C" { + fn $fun(_: f32, _: f32, _: f32) -> f32; + } + + $fun(*x0, *x1, *x2) + }; + + $fun.write_all(&y.to_bits().to_bytes())?; + )+ + } + }}; +} + +macro_rules! f32i32 { + ($($fun:ident,)+) => {{ + $( + // check type signature + let _: fn(f32, i32) -> f32 = libm::$fun; + let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; + )+ + + for (x0, x1) in shared::F32I32.iter() { + $( + let y = unsafe { + extern "C" { + fn $fun(_: f32, _: i32) -> f32; + } + + $fun(*x0, *x1 as i32) + }; + + $fun.write_all(&y.to_bits().to_bytes())?; + )+ + } + }}; +} + +macro_rules! f64 { + ($($fun:ident,)+) => {{ + $( + // check type signature + let _: fn(f64) -> f64 = libm::$fun; + let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; + )+ + + for x in shared::F64.iter() { + $( + let y = unsafe { + extern "C" { + fn $fun(_: f64) -> f64; + } + + $fun(*x) + }; + + $fun.write_all(&y.to_bits().to_bytes())?; + )+ + } + }}; +} + +macro_rules! f64f64 { + ($($fun:ident,)+) => {{ + $( + // check type signature + let _: fn(f64, f64) -> f64 = libm::$fun; + let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; + )+ + + for (x0, x1) in shared::F64F64.iter() { + $( + let y = unsafe { + extern "C" { + fn $fun(_: f64, _: f64) -> f64; + } + + $fun(*x0, *x1) + }; + + $fun.write_all(&y.to_bits().to_bytes())?; + )+ + } + }}; +} + +macro_rules! f64f64f64 { + ($($fun:ident,)+) => {{ + $( + // check type signature + let _: fn(f64, f64, f64) -> f64 = libm::$fun; + let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; + )+ + + for (x0, x1, x2) in shared::F64F64F64.iter() { + $( + let y = unsafe { + extern "C" { + fn $fun(_: f64, _: f64, _: f64) -> f64; + } + + $fun(*x0, *x1, *x2) + }; + + $fun.write_all(&y.to_bits().to_bytes())?; + )+ + } + }}; +} + +macro_rules! f64i32 { + ($($fun:ident,)+) => {{ + $( + // check type signature + let _: fn(f64, i32) -> f64 = libm::$fun; + let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; + )+ + + for (x0, x1) in shared::F64I32.iter() { + $( + let y = unsafe { + extern "C" { + fn $fun(_: f64, _: i32) -> f64; + } + + $fun(*x0, *x1 as i32) + }; + + $fun.write_all(&y.to_bits().to_bytes())?; + )+ + } + }}; +} diff --git a/libm/musl-generator/src/main.rs b/libm/musl-generator/src/main.rs new file mode 100644 index 000000000..6e57e856d --- /dev/null +++ b/libm/musl-generator/src/main.rs @@ -0,0 +1,97 @@ +extern crate libm; +extern crate shared; + +use std::error::Error; +use std::fs::File; +use std::io::Write; + +#[macro_use] +mod macros; + +fn main() -> Result<(), Box> { + f32! { + acosf, + asinf, + atanf, + cbrtf, + ceilf, + cosf, + coshf, + exp2f, + expf, + expm1f, + fabsf, + floorf, + log10f, + log1pf, + log2f, + logf, + roundf, + sinf, + sinhf, + sqrtf, + tanf, + tanhf, + truncf, + } + + f32f32! { + atan2f, + fdimf, + fmodf, + hypotf, + powf, + } + + f32i32! { + scalbnf, + } + + f32f32f32! { + fmaf, + } + + f64! { + acos, + asin, + atan, + cbrt, + ceil, + cos, + cosh, + exp, + exp2, + expm1, + fabs, + floor, + log, + log10, + log1p, + log2, + round, + sin, + sinh, + sqrt, + tan, + tanh, + trunc, + } + + f64f64! { + atan2, + fdim, + fmod, + hypot, + pow, + } + + f64i32! { + scalbn, + } + + f64f64f64! { + fma, + } + + Ok(()) +} diff --git a/libm/newlib-generator/Cargo.toml b/libm/newlib-generator/Cargo.toml new file mode 100644 index 000000000..5766cb4b7 --- /dev/null +++ b/libm/newlib-generator/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "newlib-generator" +version = "0.1.0" +authors = ["Jorge Aparicio "] + +[dependencies] +shared = { path = "../shared" } diff --git a/libm/newlib-generator/src/macros.rs b/libm/newlib-generator/src/macros.rs new file mode 100644 index 000000000..84315a777 --- /dev/null +++ b/libm/newlib-generator/src/macros.rs @@ -0,0 +1,245 @@ +macro_rules! f32 { + ($($fun:ident,)+) => { + $( + let fun = stringify!($fun); + + fs::create_dir_all("math/src")?; + + let main = format!(" +#![no_main] +#![no_std] + +#[macro_use] +extern crate qemu_arm_rt as rt; + +use core::u32; + +use rt::{{io, process}}; + +entry!(main); + +fn main() {{ + run().unwrap_or_else(|e| {{ + eprintln!(\"error: {{}}\", e); + process::exit(1); + }}) +}} + +fn run() -> Result<(), usize> {{ + #[link(name = \"m\")] + extern \"C\" {{ + fn {0}(_: f32) -> f32; + }} + + let mut buf = [0; 4]; + while let Ok(()) = io::Stdin.read_exact(&mut buf) {{ + let x = f32::from_bits(u32::from_bytes(buf)); + let y = unsafe {{ {0}(x) }}; + + io::Stdout.write_all(&y.to_bits().to_bytes())?; + }} + + Ok(()) +}} + +#[no_mangle] +pub fn __errno() -> *mut i32 {{ + static mut ERRNO: i32 = 0; + unsafe {{ &mut ERRNO }} +}} +", fun); + + File::create("math/src/main.rs")?.write_all(main.as_bytes())?; + + assert!( + Command::new("cross") + .args(&["build", "--target", "thumbv7em-none-eabi", "--release"]) + .current_dir("math") + .status()? + .success() + ); + + let mut qemu = Command::new("qemu-arm") + .arg("math/target/thumbv7em-none-eabi/release/math") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn()?; + + qemu.stdin.as_mut().take().unwrap().write_all(F32)?; + + let output = qemu.wait_with_output()?; + + File::create(concat!("bin/output/newlib.", stringify!($fun)))? + .write_all(&output.stdout)?; + )+ + } +} + +macro_rules! f32f32 { + ($($fun:ident,)+) => { + $( + let fun = stringify!($fun); + + fs::create_dir_all("math/src")?; + + let main = format!(" +#![no_main] +#![no_std] + +#[macro_use] +extern crate qemu_arm_rt as rt; + +use core::u32; + +use rt::{{io, process}}; + +entry!(main); + +fn main() {{ + run().unwrap_or_else(|e| {{ + eprintln!(\"error: {{}}\", e); + process::exit(1); + }}) +}} + +fn run() -> Result<(), usize> {{ + #[link(name = \"m\")] + extern \"C\" {{ + fn {0}(_: f32, _: f32) -> f32; + }} + + let mut chunk = [0; 8]; + while let Ok(()) = io::Stdin.read_exact(&mut chunk) {{ + let mut buf = [0; 4]; + buf.copy_from_slice(&chunk[..4]); + let x0 = f32::from_bits(u32::from_bytes(buf)); + + buf.copy_from_slice(&chunk[4..]); + let x1 = f32::from_bits(u32::from_bytes(buf)); + + let y = unsafe {{ {0}(x0, x1) }}; + + io::Stdout.write_all(&y.to_bits().to_bytes())?; + }} + + Ok(()) +}} + +#[no_mangle] +pub fn __errno() -> *mut i32 {{ + static mut ERRNO: i32 = 0; + unsafe {{ &mut ERRNO }} +}} +", fun); + + File::create("math/src/main.rs")?.write_all(main.as_bytes())?; + + assert!( + Command::new("cross") + .args(&["build", "--target", "thumbv7em-none-eabi", "--release"]) + .current_dir("math") + .status()? + .success() + ); + + let mut qemu = Command::new("qemu-arm") + .arg("math/target/thumbv7em-none-eabi/release/math") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn()?; + + qemu.stdin.as_mut().take().unwrap().write_all(F32)?; + + let output = qemu.wait_with_output()?; + + File::create(concat!("bin/output/newlib.", stringify!($fun)))? + .write_all(&output.stdout)?; + )+ + } +} + +macro_rules! f32f32f32 { + ($($fun:ident,)+) => { + $( + let fun = stringify!($fun); + + fs::create_dir_all("math/src")?; + + let main = format!(" +#![no_main] +#![no_std] + +#[macro_use] +extern crate qemu_arm_rt as rt; + +use core::u32; + +use rt::{{io, process}}; + +entry!(main); + +fn main() {{ + run().unwrap_or_else(|e| {{ + eprintln!(\"error: {{}}\", e); + process::exit(1); + }}) +}} + +fn run() -> Result<(), usize> {{ + #[link(name = \"m\")] + extern \"C\" {{ + fn {0}(_: f32, _: f32, _: f32) -> f32; + }} + + let mut chunk = [0; 12]; + while let Ok(()) = io::Stdin.read_exact(&mut chunk) {{ + let mut buf = [0; 4]; + buf.copy_from_slice(&chunk[..4]); + let x0 = f32::from_bits(u32::from_bytes(buf)); + + buf.copy_from_slice(&chunk[4..8]); + let x1 = f32::from_bits(u32::from_bytes(buf)); + + buf.copy_from_slice(&chunk[8..]); + let x2 = f32::from_bits(u32::from_bytes(buf)); + + let y = unsafe {{ {0}(x0, x1, x2) }}; + + io::Stdout.write_all(&y.to_bits().to_bytes())?; + }} + + Ok(()) +}} + +#[no_mangle] +pub fn __errno() -> *mut i32 {{ + static mut ERRNO: i32 = 0; + unsafe {{ &mut ERRNO }} +}} +", fun); + + File::create("math/src/main.rs")?.write_all(main.as_bytes())?; + + assert!( + Command::new("cross") + .args(&["build", "--target", "thumbv7em-none-eabi", "--release"]) + .current_dir("math") + .status()? + .success() + ); + + let mut qemu = Command::new("qemu-arm") + .arg("math/target/thumbv7em-none-eabi/release/math") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn()?; + + qemu.stdin.as_mut().take().unwrap().write_all(F32)?; + + let output = qemu.wait_with_output()?; + + File::create(concat!("bin/output/newlib.", stringify!($fun)))? + .write_all(&output.stdout)?; + )+ + } +} diff --git a/libm/newlib-generator/src/main.rs b/libm/newlib-generator/src/main.rs new file mode 100644 index 000000000..52a97cabb --- /dev/null +++ b/libm/newlib-generator/src/main.rs @@ -0,0 +1,32 @@ +extern crate shared; + +use std::error::Error; +use std::fs::{self, File}; +use std::io::Write; +use std::process::{Command, Stdio}; + +#[macro_use] +mod macros; + +fn main() -> Result<(), Box> { + const F32: &[u8] = include_bytes!("../../bin/input/f32"); + + f32! { + asinf, + cbrtf, + cosf, + exp2f, + sinf, + tanf, + } + + f32f32! { + hypotf, + } + + f32f32f32! { + fmaf, + } + + Ok(()) +} diff --git a/libm/shared/Cargo.toml b/libm/shared/Cargo.toml new file mode 100644 index 000000000..d77823781 --- /dev/null +++ b/libm/shared/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "shared" +version = "0.1.0" +authors = ["Jorge Aparicio "] + +[dependencies] +lazy_static = "1.0.2" diff --git a/libm/shared/src/lib.rs b/libm/shared/src/lib.rs new file mode 100644 index 000000000..84676f94f --- /dev/null +++ b/libm/shared/src/lib.rs @@ -0,0 +1,471 @@ +#![feature(exact_chunks)] + +#[macro_use] +extern crate lazy_static; + +lazy_static! { + pub static ref F32: Vec = { + let bytes = include_bytes!("../../bin/input/f32"); + + bytes + .exact_chunks(4) + .map(|chunk| { + let mut buf = [0; 4]; + buf.copy_from_slice(chunk); + f32::from_bits(u32::from_le(u32::from_bytes(buf))) + }) + .collect() + }; + pub static ref F32F32: Vec<(f32, f32)> = { + let bytes = include_bytes!("../../bin/input/f32f32"); + + bytes + .exact_chunks(8) + .map(|chunk| { + let mut x0 = [0; 4]; + let mut x1 = [0; 4]; + x0.copy_from_slice(&chunk[..4]); + x1.copy_from_slice(&chunk[4..]); + + ( + f32::from_bits(u32::from_le(u32::from_bytes(x0))), + f32::from_bits(u32::from_le(u32::from_bytes(x1))), + ) + }) + .collect() + }; + pub static ref F32F32F32: Vec<(f32, f32, f32)> = { + let bytes = include_bytes!("../../bin/input/f32f32f32"); + + bytes + .exact_chunks(12) + .map(|chunk| { + let mut x0 = [0; 4]; + let mut x1 = [0; 4]; + let mut x2 = [0; 4]; + x0.copy_from_slice(&chunk[..4]); + x1.copy_from_slice(&chunk[4..8]); + x2.copy_from_slice(&chunk[8..]); + + ( + f32::from_bits(u32::from_le(u32::from_bytes(x0))), + f32::from_bits(u32::from_le(u32::from_bytes(x1))), + f32::from_bits(u32::from_le(u32::from_bytes(x2))), + ) + }) + .collect() + }; + pub static ref F32I32: Vec<(f32, i32)> = { + let bytes = include_bytes!("../../bin/input/f32i16"); + + bytes + .exact_chunks(6) + .map(|chunk| { + let mut x0 = [0; 4]; + let mut x1 = [0; 2]; + x0.copy_from_slice(&chunk[..4]); + x1.copy_from_slice(&chunk[4..]); + + ( + f32::from_bits(u32::from_le(u32::from_bytes(x0))), + i16::from_le(i16::from_bytes(x1)) as i32, + ) + }) + .collect() + }; + pub static ref F64: Vec = { + let bytes = include_bytes!("../../bin/input/f64"); + + bytes + .exact_chunks(8) + .map(|chunk| { + let mut buf = [0; 8]; + buf.copy_from_slice(chunk); + f64::from_bits(u64::from_le(u64::from_bytes(buf))) + }) + .collect() + }; + pub static ref F64F64: Vec<(f64, f64)> = { + let bytes = include_bytes!("../../bin/input/f64f64"); + + bytes + .exact_chunks(16) + .map(|chunk| { + let mut x0 = [0; 8]; + let mut x1 = [0; 8]; + x0.copy_from_slice(&chunk[..8]); + x1.copy_from_slice(&chunk[8..]); + + ( + f64::from_bits(u64::from_le(u64::from_bytes(x0))), + f64::from_bits(u64::from_le(u64::from_bytes(x1))), + ) + }) + .collect() + }; + pub static ref F64F64F64: Vec<(f64, f64, f64)> = { + let bytes = include_bytes!("../../bin/input/f64f64f64"); + + bytes + .exact_chunks(24) + .map(|chunk| { + let mut x0 = [0; 8]; + let mut x1 = [0; 8]; + let mut x2 = [0; 8]; + x0.copy_from_slice(&chunk[..8]); + x1.copy_from_slice(&chunk[8..16]); + x2.copy_from_slice(&chunk[16..]); + + ( + f64::from_bits(u64::from_le(u64::from_bytes(x0))), + f64::from_bits(u64::from_le(u64::from_bytes(x1))), + f64::from_bits(u64::from_le(u64::from_bytes(x2))), + ) + }) + .collect() + }; + pub static ref F64I32: Vec<(f64, i32)> = { + let bytes = include_bytes!("../../bin/input/f64i16"); + + bytes + .exact_chunks(10) + .map(|chunk| { + let mut x0 = [0; 8]; + let mut x1 = [0; 2]; + x0.copy_from_slice(&chunk[..8]); + x1.copy_from_slice(&chunk[8..]); + + ( + f64::from_bits(u64::from_le(u64::from_bytes(x0))), + i16::from_le(i16::from_bytes(x1)) as i32, + ) + }) + .collect() + }; +} + +#[macro_export] +macro_rules! f32 { + ($lib:expr, $($fun:ident),+) => { + $( + #[test] + fn $fun() { + let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) + .exact_chunks(4) + .map(|chunk| { + let mut buf = [0; 4]; + buf.copy_from_slice(chunk); + f32::from_bits(u32::from_le(u32::from_bytes(buf))) + }) + .collect::>(); + + for (input, expected) in $crate::F32.iter().zip(&expected) { + if let Ok(output) = panic::catch_unwind(|| libm::$fun(*input)) { + if let Err(error) = libm::_eqf(output, *expected) { + panic!( + "INPUT: {:#x}, OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", + input.to_bits(), + output.to_bits(), + expected.to_bits(), + error + ); + } + } else { + panic!( + "INPUT: {:#x}, OUTPUT: PANIC!, EXPECTED: {:#x}", + input.to_bits(), + expected.to_bits() + ); + } + } + } + )+ + } +} + +#[macro_export] +macro_rules! f32f32 { + ($lib:expr, $($fun:ident),+) => { + $( + #[test] + fn $fun() { + let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) + .exact_chunks(4) + .map(|chunk| { + let mut buf = [0; 4]; + buf.copy_from_slice(chunk); + f32::from_bits(u32::from_le(u32::from_bytes(buf))) + }) + .collect::>(); + + for ((i0, i1), expected) in $crate::F32F32.iter().zip(&expected) { + if let Ok(output) = panic::catch_unwind(|| libm::$fun(*i0, *i1)) { + if let Err(error) = libm::_eqf(output, *expected) { + panic!( + "INPUT: ({:#x}, {:#x}), OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", + i0.to_bits(), + i1.to_bits(), + output.to_bits(), + expected.to_bits(), + error + ); + } + } else { + panic!( + "INPUT: ({:#x}, {:#x}), OUTPUT: PANIC!, EXPECTED: {:#x}", + i0.to_bits(), + i1.to_bits(), + expected.to_bits() + ); + } + } + } + )+ + } +} + +#[macro_export] +macro_rules! f32f32f32 { + ($lib:expr, $($fun:ident),+) => { + $( + #[test] + fn $fun() { + let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) + .exact_chunks(4) + .map(|chunk| { + let mut buf = [0; 4]; + buf.copy_from_slice(chunk); + f32::from_bits(u32::from_le(u32::from_bytes(buf))) + }) + .collect::>(); + + for ((i0, i1, i2), expected) in $crate::F32F32F32.iter().zip(&expected) { + if let Ok(output) = panic::catch_unwind(|| libm::$fun(*i0, *i1, *i2)) { + if let Err(error) = libm::_eqf(output, *expected) { + panic!( + "INPUT: ({:#x}, {:#x}, {:#x}), OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", + i0.to_bits(), + i1.to_bits(), + i2.to_bits(), + output.to_bits(), + expected.to_bits(), + error + ); + } + } else { + panic!( + "INPUT: ({:#x}, {:#x}), OUTPUT: PANIC!, EXPECTED: {:#x}", + i0.to_bits(), + i1.to_bits(), + expected.to_bits() + ); + } + } + } + )+ + } +} + +#[macro_export] +macro_rules! f32i32 { + ($lib:expr, $($fun:ident),+) => { + $( + #[test] + fn $fun() { + let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) + .exact_chunks(4) + .map(|chunk| { + let mut buf = [0; 4]; + buf.copy_from_slice(chunk); + f32::from_bits(u32::from_le(u32::from_bytes(buf))) + }) + .collect::>(); + + for ((i0, i1), expected) in $crate::F32I32.iter().zip(&expected) { + if let Ok(output) = panic::catch_unwind(|| libm::$fun(*i0, *i1)) { + if let Err(error) = libm::_eqf(output, *expected) { + panic!( + "INPUT: ({:#x}, {:#x}), OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", + i0.to_bits(), + i1, + output.to_bits(), + expected.to_bits(), + error + ); + } + } else { + panic!( + "INPUT: ({:#x}, {:#x}), OUTPUT: PANIC!, EXPECTED: {:#x}", + i0.to_bits(), + i1, + expected.to_bits() + ); + } + } + } + )+ + } +} + +#[macro_export] +macro_rules! f64 { + ($lib:expr, $($fun:ident),+) => { + $( + #[test] + fn $fun() { + let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) + .exact_chunks(8) + .map(|chunk| { + let mut buf = [0; 8]; + buf.copy_from_slice(chunk); + f64::from_bits(u64::from_le(u64::from_bytes(buf))) + }) + .collect::>(); + + for (input, expected) in shared::F64.iter().zip(&expected) { + if let Ok(output) = panic::catch_unwind(|| libm::$fun(*input)) { + if let Err(error) = libm::_eq(output, *expected) { + panic!( + "INPUT: {:#x}, OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", + input.to_bits(), + output.to_bits(), + expected.to_bits(), + error + ); + } + } else { + panic!( + "INPUT: {:#x}, OUTPUT: PANIC!, EXPECTED: {:#x}", + input.to_bits(), + expected.to_bits() + ); + } + } + } + )+ + } +} + +#[macro_export] +macro_rules! f64f64 { + ($lib:expr, $($fun:ident),+) => { + $( + #[test] + fn $fun() { + let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) + .exact_chunks(8) + .map(|chunk| { + let mut buf = [0; 8]; + buf.copy_from_slice(chunk); + f64::from_bits(u64::from_le(u64::from_bytes(buf))) + }) + .collect::>(); + + for ((i0, i1), expected) in shared::F64F64.iter().zip(&expected) { + if let Ok(output) = panic::catch_unwind(|| libm::$fun(*i0, *i1)) { + if let Err(error) = libm::_eq(output, *expected) { + panic!( + "INPUT: ({:#x}, {:#x}), OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", + i0.to_bits(), + i1.to_bits(), + output.to_bits(), + expected.to_bits(), + error + ); + } + } else { + panic!( + "INPUT: ({:#x}, {:#x}), OUTPUT: PANIC!, EXPECTED: {:#x}", + i0.to_bits(), + i1.to_bits(), + expected.to_bits() + ); + } + } + } + )+ + } +} + +#[macro_export] +macro_rules! f64f64f64 { + ($lib:expr, $($fun:ident),+) => { + $( + #[test] + fn $fun() { + let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) + .exact_chunks(8) + .map(|chunk| { + let mut buf = [0; 8]; + buf.copy_from_slice(chunk); + f64::from_bits(u64::from_le(u64::from_bytes(buf))) + }) + .collect::>(); + + for ((i0, i1, i2), expected) in shared::F64F64F64.iter().zip(&expected) { + if let Ok(output) = panic::catch_unwind(|| libm::$fun(*i0, *i1, *i2)) { + if let Err(error) = libm::_eq(output, *expected) { + panic!( + "INPUT: ({:#x}, {:#x}, {:#x}), OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", + i0.to_bits(), + i1.to_bits(), + i2.to_bits(), + output.to_bits(), + expected.to_bits(), + error + ); + } + } else { + panic!( + "INPUT: ({:#x}, {:#x}), OUTPUT: PANIC!, EXPECTED: {:#x}", + i0.to_bits(), + i1.to_bits(), + expected.to_bits() + ); + } + } + } + )+ + } +} + +#[macro_export] +macro_rules! f64i32 { + ($lib:expr, $($fun:ident),+) => { + $( + #[test] + fn $fun() { + let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) + .exact_chunks(8) + .map(|chunk| { + let mut buf = [0; 8]; + buf.copy_from_slice(chunk); + f64::from_bits(u64::from_le(u64::from_bytes(buf))) + }) + .collect::>(); + + for ((i0, i1), expected) in shared::F64I32.iter().zip(&expected) { + if let Ok(output) = panic::catch_unwind(|| libm::$fun(*i0, *i1)) { + if let Err(error) = libm::_eq(output, *expected) { + panic!( + "INPUT: ({:#x}, {:#x}), OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", + i0.to_bits(), + i1, + output.to_bits(), + expected.to_bits(), + error + ); + } + } else { + panic!( + "INPUT: ({:#x}, {:#x}), OUTPUT: PANIC!, EXPECTED: {:#x}", + i0.to_bits(), + i1, + expected.to_bits() + ); + } + } + } + )+ + } +} diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 5121cbfba..627c6443e 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -21,14 +21,34 @@ pub use math::*; /// Approximate equality with 1 ULP of tolerance #[doc(hidden)] #[inline] -pub fn _eqf(a: u32, b: u32) -> bool { - (a as i32).wrapping_sub(b as i32).abs() <= 1 +pub fn _eqf(a: f32, b: f32) -> Result<(), u32> { + if a.is_nan() && b.is_nan() { + Ok(()) + } else { + let err = (a.to_bits() as i32).wrapping_sub(b.to_bits() as i32).abs(); + + if err <= 1 { + Ok(()) + } else { + Err(err as u32) + } + } } #[doc(hidden)] #[inline] -pub fn _eq(a: u64, b: u64) -> bool { - (a as i64).wrapping_sub(b as i64).abs() <= 1 +pub fn _eq(a: f64, b: f64) -> Result<(), u64> { + if a.is_nan() && b.is_nan() { + Ok(()) + } else { + let err = (a.to_bits() as i64).wrapping_sub(b.to_bits() as i64).abs(); + + if err <= 1 { + Ok(()) + } else { + Err(err as u64) + } + } } /// Math support for `f32` diff --git a/libm/test-generator/README.md b/libm/test-generator/README.md deleted file mode 100644 index cbacd88f1..000000000 --- a/libm/test-generator/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# `test-generator` - -This is a tool to generate test cases for the `libm` crate. - -The generator randomly creates inputs for each math function, then proceeds to compute the -expected output for the given function by running the MUSL *C implementation* of the function and -finally it packs the test cases as a Cargo test file. For this reason, this generator **must** -always be compiled for the `x86_64-unknown-linux-musl` target. diff --git a/libm/test-generator/src/main.rs b/libm/test-generator/src/main.rs deleted file mode 100644 index 4c4e420a2..000000000 --- a/libm/test-generator/src/main.rs +++ /dev/null @@ -1,788 +0,0 @@ -// NOTE we intentionally avoid using the `quote` crate here because it doesn't work with the -// `x86_64-unknown-linux-musl` target. - -// NOTE usually the only thing you need to do to test a new math function is to add it to one of the -// macro invocations found in the bottom of this file. - -#[macro_use] -extern crate itertools; -extern crate rand; - -use std::error::Error; -use std::fmt::Write as _0; -use std::fs::{self, File}; -use std::io::Write as _1; -use std::{f32, f64, i16, u16, u32, u64, u8}; - -use rand::{Rng, SeedableRng, XorShiftRng}; - -// Number of test cases to generate -const NTESTS: usize = 10_000; - -// TODO tweak these functions to generate edge cases (zero, infinity, NaN) more often -fn f32(rng: &mut XorShiftRng) -> f32 { - let sign = if rng.gen_bool(0.5) { 1 << 31 } else { 0 }; - let exponent = (rng.gen_range(0, u8::MAX) as u32) << 23; - let mantissa = rng.gen_range(0, u32::MAX) & ((1 << 23) - 1); - - f32::from_bits(sign + exponent + mantissa) -} - -fn f64(rng: &mut XorShiftRng) -> f64 { - let sign = if rng.gen_bool(0.5) { 1 << 63 } else { 0 }; - let exponent = (rng.gen_range(0, u16::MAX) as u64 & ((1 << 11) - 1)) << 52; - let mantissa = rng.gen_range(0, u64::MAX) & ((1 << 52) - 1); - - f64::from_bits(sign + exponent + mantissa) -} - -const EDGE_CASES32: &[f32] = &[ - -0., - 0., - f32::EPSILON, - f32::INFINITY, - f32::MAX, - f32::MIN, - f32::MIN_POSITIVE, - f32::NAN, - f32::NEG_INFINITY, -]; - -const EDGE_CASES64: &[f64] = &[ - -0., - 0., - f64::EPSILON, - f64::INFINITY, - f64::MAX, - f64::MIN, - f64::MIN_POSITIVE, - f64::NAN, - f64::NEG_INFINITY, -]; - -// fn(f32) -> f32 -macro_rules! f32_f32 { - ($($intr:ident,)*) => { - fn f32_f32(rng: &mut XorShiftRng) -> Result<(), Box> { - // MUSL C implementation of the function to test - extern "C" { - $(fn $intr(_: f32) -> f32;)* - } - - $( - let mut cases = String::new(); - - // random inputs - for inp in EDGE_CASES32.iter().cloned().chain((0..NTESTS).map(|_| f32(rng))) { - let out = unsafe { $intr(inp) }; - - let inp = inp.to_bits(); - let out = out.to_bits(); - - write!(cases, "({}, {})", inp, out).unwrap(); - cases.push(','); - } - - let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; - write!(f, " - #![deny(warnings)] - - extern crate libm; - - use std::panic; - - #[test] - fn {0}() {{ - const CASES: &[(u32, u32)] = &[ - {1} - ]; - - for case in CASES {{ - let (inp, expected) = *case; - - if let Ok(outf) = - panic::catch_unwind(|| libm::{0}(f32::from_bits(inp))) - {{ - let outi = outf.to_bits(); - - if !((outf.is_nan() && f32::from_bits(expected).is_nan()) - || libm::_eqf(outi, expected)) - {{ - panic!( - \"input: {{}}, output: {{}}, expected: {{}}\", - inp, outi, expected, - ); - }} - }} else {{ - panic!( - \"input: {{}}, output: PANIC, expected: {{}}\", - inp, expected, - ); - }} - }} - }} -", - stringify!($intr), - cases)?; - )* - - Ok(()) - } - } -} - -// fn(f32, f32) -> f32 -macro_rules! f32f32_f32 { - ($($intr:ident,)*) => { - fn f32f32_f32(rng: &mut XorShiftRng) -> Result<(), Box> { - extern "C" { - $(fn $intr(_: f32, _: f32) -> f32;)* - } - - let mut rng2 = rng.clone(); - let mut rng3 = rng.clone(); - $( - let mut cases = String::new(); - for (i1, i2) in iproduct!( - EDGE_CASES32.iter().cloned(), - EDGE_CASES32.iter().cloned() - ).chain(EDGE_CASES32.iter().map(|i1| (*i1, f32(rng)))) - .chain(EDGE_CASES32.iter().map(|i2| (f32(&mut rng2), *i2))) - .chain((0..NTESTS).map(|_| (f32(&mut rng3), f32(&mut rng3)))) - { - let out = unsafe { $intr(i1, i2) }; - - let i1 = i1.to_bits(); - let i2 = i2.to_bits(); - let out = out.to_bits(); - - write!(cases, "(({}, {}), {})", i1, i2, out).unwrap(); - cases.push(','); - } - - let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; - write!(f, " - #![deny(warnings)] - - extern crate libm; - - use std::panic; - - #[test] - fn {0}() {{ - const CASES: &[((u32, u32), u32)] = &[ - {1} - ]; - - for case in CASES {{ - let ((i1, i2), expected) = *case; - - if let Ok(outf) = panic::catch_unwind(|| {{ - libm::{0}(f32::from_bits(i1), f32::from_bits(i2)) - }}) {{ - let outi = outf.to_bits(); - - if !((outf.is_nan() && f32::from_bits(expected).is_nan()) - || libm::_eqf(outi, expected)) - {{ - panic!( - \"input: {{:?}}, output: {{}}, expected: {{}}\", - (i1, i2), - outi, - expected, - ); - }} - }} else {{ - panic!( - \"input: {{:?}}, output: PANIC, expected: {{}}\", - (i1, i2), - expected, - ); - }} - }} - }} -", - stringify!($intr), - cases)?; - )* - - Ok(()) - } - }; -} - -// fn(f32, f32, f32) -> f32 -macro_rules! f32f32f32_f32 { - ($($intr:ident,)*) => { - fn f32f32f32_f32(rng: &mut XorShiftRng) -> Result<(), Box> { - extern "C" { - $(fn $intr(_: f32, _: f32, _: f32) -> f32;)* - } - - let mut rng2 = rng.clone(); - $( - let mut cases = String::new(); - for (i1, i2, i3) in iproduct!( - EDGE_CASES32.iter().cloned(), - EDGE_CASES32.iter().cloned(), - EDGE_CASES32.iter().cloned() - ).chain(EDGE_CASES32.iter().map(|i1| (*i1, f32(rng), f32(rng)))) - .chain((0..NTESTS).map(|_| (f32(&mut rng2), f32(&mut rng2), f32(&mut rng2)))) - { - let out = unsafe { $intr(i1, i2, i3) }; - - let i1 = i1.to_bits(); - let i2 = i2.to_bits(); - let i3 = i3.to_bits(); - let out = out.to_bits(); - - write!(cases, "(({}, {}, {}), {})", i1, i2, i3, out).unwrap(); - cases.push(','); - } - - let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; - write!(f, " - #![deny(warnings)] - - extern crate libm; - - use std::panic; - - #[test] - fn {0}() {{ - const CASES: &[((u32, u32, u32), u32)] = &[ - {1} - ]; - - for case in CASES {{ - let ((i1, i2, i3), expected) = *case; - - if let Ok(outf) = panic::catch_unwind(|| {{ - libm::{0}( - f32::from_bits(i1), - f32::from_bits(i2), - f32::from_bits(i3), - ) - }}) {{ - let outi = outf.to_bits(); - - if !((outf.is_nan() && f32::from_bits(expected).is_nan()) - || libm::_eqf(outi, expected)) - {{ - panic!( - \"input: {{:?}}, output: {{}}, expected: {{}}\", - (i1, i2, i3), - outi, - expected, - ); - }} - }} else {{ - panic!( - \"input: {{:?}}, output: PANIC, expected: {{}}\", - (i1, i2, i3), - expected, - ); - }} - }} - }} -", - stringify!($intr), - cases)?; - )* - - Ok(()) - } - }; -} - -// fn(f32, i32) -> f32 -macro_rules! f32i32_f32 { - ($($intr:ident,)*) => { - fn f32i32_f32(rng: &mut XorShiftRng) -> Result<(), Box> { - extern "C" { - $(fn $intr(_: f32, _: i32) -> f32;)* - } - - let mut rng2 = rng.clone(); - $( - let mut cases = String::new(); - for i1 in EDGE_CASES32.iter().cloned().chain((0..NTESTS).map(|_| f32(&mut rng2))) { - let i2 = rng.gen_range(i16::MIN, i16::MAX); - let out = unsafe { $intr(i1, i2 as i32) }; - - let i1 = i1.to_bits(); - let out = out.to_bits(); - - write!(cases, "(({}, {}), {})", i1, i2, out).unwrap(); - cases.push(','); - } - - let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; - write!(f, " - #![deny(warnings)] - - extern crate libm; - - use std::panic; - - #[test] - fn {0}() {{ - const CASES: &[((u32, i16), u32)] = &[ - {1} - ]; - - for case in CASES {{ - let ((i1, i2), expected) = *case; - - if let Ok(outf) = panic::catch_unwind(|| {{ - libm::{0}(f32::from_bits(i1), i2 as i32) - }}) {{ - let outi = outf.to_bits(); - - if !((outf.is_nan() && f32::from_bits(expected).is_nan()) - || libm::_eqf(outi, expected)) - {{ - panic!( - \"input: {{:?}}, output: {{}}, expected: {{}}\", - (i1, i2), - outi, - expected, - ); - }} - }} else {{ - panic!( - \"input: {{:?}}, output: PANIC, expected: {{}}\", - (i1, i2), - expected, - ); - }} - }} - }} -", - stringify!($intr), - cases)?; - )* - - Ok(()) - } - }; -} - -// fn(f64) -> f64 -macro_rules! f64_f64 { - ($($intr:ident,)*) => { - fn f64_f64(rng: &mut XorShiftRng) -> Result<(), Box> { - // MUSL C implementation of the function to test - extern "C" { - $(fn $intr(_: f64) -> f64;)* - } - - $( - let mut cases = String::new(); - for inp in EDGE_CASES64.iter().cloned().chain((0..NTESTS).map(|_| f64(rng))) { - let out = unsafe { $intr(inp) }; - - let inp = inp.to_bits(); - let out = out.to_bits(); - - write!(cases, "({}, {})", inp, out).unwrap(); - cases.push(','); - } - - let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; - write!(f, " - #![deny(warnings)] - - extern crate libm; - - use std::panic; - - #[test] - fn {0}() {{ - const CASES: &[(u64, u64)] = &[ - {1} - ]; - - for case in CASES {{ - let (inp, expected) = *case; - - if let Ok(outf) = panic::catch_unwind(|| {{ - libm::{0}(f64::from_bits(inp)) - }}) {{ - let outi = outf.to_bits(); - - if !((outf.is_nan() && f64::from_bits(expected).is_nan()) - || libm::_eq(outi, expected)) - {{ - panic!( - \"input: {{}}, output: {{}}, expected: {{}}\", - inp, - outi, - expected, - ); - }} - }} else {{ - panic!( - \"input: {{}}, output: PANIC, expected: {{}}\", - inp, - expected, - ); - }} - }} - }} -", - stringify!($intr), - cases)?; - )* - - Ok(()) - } - } -} - -// fn(f64, f64) -> f64 -macro_rules! f64f64_f64 { - ($($intr:ident,)*) => { - fn f64f64_f64(rng: &mut XorShiftRng) -> Result<(), Box> { - extern "C" { - $(fn $intr(_: f64, _: f64) -> f64;)* - } - - let mut rng2 = rng.clone(); - let mut rng3 = rng.clone(); - $( - let mut cases = String::new(); - for (i1, i2) in iproduct!( - EDGE_CASES64.iter().cloned(), - EDGE_CASES64.iter().cloned() - ).chain(EDGE_CASES64.iter().map(|i1| (*i1, f64(rng)))) - .chain(EDGE_CASES64.iter().map(|i2| (f64(&mut rng2), *i2))) - .chain((0..NTESTS).map(|_| (f64(&mut rng3), f64(&mut rng3)))) - { - let out = unsafe { $intr(i1, i2) }; - - let i1 = i1.to_bits(); - let i2 = i2.to_bits(); - let out = out.to_bits(); - - write!(cases, "(({}, {}), {})", i1, i2, out).unwrap(); - cases.push(','); - } - - let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; - write!(f, " - #![deny(warnings)] - - extern crate libm; - - use std::panic; - - #[test] - fn {0}() {{ - const CASES: &[((u64, u64), u64)] = &[ - {1} - ]; - - for case in CASES {{ - let ((i1, i2), expected) = *case; - - if let Ok(outf) = panic::catch_unwind(|| {{ - libm::{0}(f64::from_bits(i1), f64::from_bits(i2)) - }}) {{ - let outi = outf.to_bits(); - - if !((outf.is_nan() && f64::from_bits(expected).is_nan()) || - libm::_eq(outi, expected)) {{ - panic!( - \"input: {{:?}}, output: {{}}, expected: {{}}\", - (i1, i2), - outi, - expected, - ); - }} - }} else {{ - panic!( - \"input: {{:?}}, output: PANIC, expected: {{}}\", - (i1, i2), - expected, - ); - }} - }} - }} -", - stringify!($intr), - cases)?; - )* - - Ok(()) - } - }; -} - -// fn(f64, f64, f64) -> f64 -macro_rules! f64f64f64_f64 { - ($($intr:ident,)*) => { - fn f64f64f64_f64(rng: &mut XorShiftRng) -> Result<(), Box> { - extern "C" { - $(fn $intr(_: f64, _: f64, _: f64) -> f64;)* - } - - let mut rng2 = rng.clone(); - $( - let mut cases = String::new(); - for (i1, i2, i3) in iproduct!( - EDGE_CASES64.iter().cloned(), - EDGE_CASES64.iter().cloned(), - EDGE_CASES64.iter().cloned() - ).chain(EDGE_CASES64.iter().map(|i1| (*i1, f64(rng), f64(rng)))) - .chain((0..NTESTS).map(|_| (f64(&mut rng2), f64(&mut rng2), f64(&mut rng2)))) - { - let out = unsafe { $intr(i1, i2, i3) }; - - let i1 = i1.to_bits(); - let i2 = i2.to_bits(); - let i3 = i3.to_bits(); - let out = out.to_bits(); - - write!(cases, "(({}, {}, {}), {})", i1, i2, i3, out).unwrap(); - cases.push(','); - } - - let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; - write!(f, " - #![deny(warnings)] - - extern crate libm; - - use std::panic; - - #[test] - fn {0}() {{ - const CASES: &[((u64, u64, u64), u64)] = &[ - {1} - ]; - - for case in CASES {{ - let ((i1, i2, i3), expected) = *case; - - if let Ok(outf) = panic::catch_unwind(|| {{ - libm::{0}( - f64::from_bits(i1), - f64::from_bits(i2), - f64::from_bits(i3), - ) - }}) {{ - let outi = outf.to_bits(); - - if !((outf.is_nan() && f64::from_bits(expected).is_nan()) - || libm::_eq(outi, expected)) - {{ - panic!( - \"input: {{:?}}, output: {{}}, expected: {{}}\", - (i1, i2, i3), - outi, - expected, - ); - }} - }} else {{ - panic!( - \"input: {{:?}}, output: PANIC, expected: {{}}\", - (i1, i2, i3), - expected, - ); - }} - }} - }} -", - stringify!($intr), - cases)?; - )* - - Ok(()) - } - }; -} - -// fn(f64, i32) -> f64 -macro_rules! f64i32_f64 { - ($($intr:ident,)*) => { - fn f64i32_f64(rng: &mut XorShiftRng) -> Result<(), Box> { - extern "C" { - $(fn $intr(_: f64, _: i32) -> f64;)* - } - - let mut rng2 = rng.clone(); - $( - let mut cases = String::new(); - for i1 in EDGE_CASES64.iter().cloned().chain((0..NTESTS).map(|_| f64(&mut rng2))) { - let i2 = rng.gen_range(i16::MIN, i16::MAX); - let out = unsafe { $intr(i1, i2 as i32) }; - - let i1 = i1.to_bits(); - let out = out.to_bits(); - - write!(cases, "(({}, {}), {})", i1, i2, out).unwrap(); - cases.push(','); - } - - let mut f = File::create(concat!("tests/", stringify!($intr), ".rs"))?; - write!(f, " - #![deny(warnings)] - - extern crate libm; - - use std::panic; - - #[test] - fn {0}() {{ - const CASES: &[((u64, i16), u64)] = &[ - {1} - ]; - - for case in CASES {{ - let ((i1, i2), expected) = *case; - - if let Ok(outf) = panic::catch_unwind(|| {{ - libm::{0}(f64::from_bits(i1), i2 as i32) - }}) {{ - let outi = outf.to_bits(); - - if !((outf.is_nan() && f64::from_bits(expected).is_nan()) || - libm::_eq(outi, expected)) {{ - panic!( - \"input: {{:?}}, output: {{}}, expected: {{}}\", - (i1, i2), - outi, - expected, - ); - }} - }} else {{ - panic!( - \"input: {{:?}}, output: PANIC, expected: {{}}\", - (i1, i2), - expected, - ); - }} - }} - }} -", - stringify!($intr), - cases)?; - )* - - Ok(()) - } - }; -} - -fn main() -> Result<(), Box> { - fs::remove_dir_all("tests").ok(); - fs::create_dir("tests")?; - - let mut rng = XorShiftRng::from_rng(&mut rand::thread_rng())?; - - f32_f32(&mut rng)?; - f32f32_f32(&mut rng)?; - f32f32f32_f32(&mut rng)?; - f32i32_f32(&mut rng)?; - f64_f64(&mut rng)?; - f64f64_f64(&mut rng)?; - f64f64f64_f64(&mut rng)?; - f64i32_f64(&mut rng)?; - - Ok(()) -} - -/* Functions to test */ - -// With signature `fn(f32) -> f32` -f32_f32! { - acosf, - floorf, - truncf, - asinf, - atanf, - cbrtf, - cosf, - ceilf, - coshf, - exp2f, - expf, - expm1f, - log10f, - log1pf, - log2f, - logf, - roundf, - sinf, - sinhf, - tanf, - tanhf, - fabsf, - sqrtf, -} - -// With signature `fn(f32, f32) -> f32` -f32f32_f32! { - atan2f, - fdimf, - hypotf, - fmodf, - powf, -} - -// With signature `fn(f32, f32, f32) -> f32` -f32f32f32_f32! { - fmaf, -} - -// With signature `fn(f32, i32) -> f32` -f32i32_f32! { - scalbnf, -} - -// With signature `fn(f64) -> f64` -f64_f64! { - acos, - asin, - atan, - cbrt, - ceil, - cos, - cosh, - exp, - exp2, - expm1, - floor, - log, - log10, - log1p, - log2, - round, - sin, - sinh, - sqrt, - tan, - tanh, - trunc, - fabs, -} - -// With signature `fn(f64, f64) -> f64` -f64f64_f64! { - atan2, - fdim, - fmod, - hypot, - pow, -} - -// With signature `fn(f64, f64, f64) -> f64` -f64f64f64_f64! { - fma, -} - -// With signature `fn(f64, i32) -> f64` -f64i32_f64! { - scalbn, -} From c52c1158f9b0766523f4c057f76ca49e655718e9 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 27 Jul 2018 00:21:54 -0500 Subject: [PATCH 0119/1459] make qemu-arm executable --- libm/ci/install.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libm/ci/install.sh b/libm/ci/install.sh index 7f73c5fea..af26e2d4c 100644 --- a/libm/ci/install.sh +++ b/libm/ci/install.sh @@ -18,6 +18,8 @@ main() { mkdir -p ~/.local/bin curl -L https://github.com/japaric/qemu-bin/raw/master/14.04/qemu-arm-2.12.0 > ~/.local/bin/qemu-arm + chmod +x ~/.local/bin/qemu-arm + qemu-arm --version } main From e240550e49afb21a0baad7b47a2310da8d475fc4 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 27 Jul 2018 00:27:25 -0500 Subject: [PATCH 0120/1459] uncomment the other build jobs --- libm/.travis.yml | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/libm/.travis.yml b/libm/.travis.yml index b3beecb09..47f2b2f20 100644 --- a/libm/.travis.yml +++ b/libm/.travis.yml @@ -6,28 +6,28 @@ matrix: include: - env: TARGET=aarch64-unknown-linux-gnu rust: nightly - # - env: TARGET=armv7-unknown-linux-gnueabihf - # rust: nightly - # - env: TARGET=i686-unknown-linux-gnu - # rust: nightly - # - env: TARGET=mips-unknown-linux-gnu - # rust: nightly - # - env: TARGET=mips64-unknown-linux-gnuabi64 - # rust: nightly - # - env: TARGET=mips64el-unknown-linux-gnuabi64 - # rust: nightly - # - env: TARGET=mipsel-unknown-linux-gnu - # rust: nightly - # - env: TARGET=powerpc-unknown-linux-gnu - # rust: nightly - # - env: TARGET=powerpc64-unknown-linux-gnu - # rust: nightly - # - env: TARGET=powerpc64le-unknown-linux-gnu - # rust: nightly + - env: TARGET=armv7-unknown-linux-gnueabihf + rust: nightly + - env: TARGET=i686-unknown-linux-gnu + rust: nightly + - env: TARGET=mips-unknown-linux-gnu + rust: nightly + - env: TARGET=mips64-unknown-linux-gnuabi64 + rust: nightly + - env: TARGET=mips64el-unknown-linux-gnuabi64 + rust: nightly + - env: TARGET=mipsel-unknown-linux-gnu + rust: nightly + - env: TARGET=powerpc-unknown-linux-gnu + rust: nightly + - env: TARGET=powerpc64-unknown-linux-gnu + rust: nightly + - env: TARGET=powerpc64le-unknown-linux-gnu + rust: nightly - env: TARGET=x86_64-unknown-linux-gnu rust: nightly - # - env: TARGET=cargo-fmt - # rust: beta + - env: TARGET=cargo-fmt + rust: beta before_install: set -e From 7e67fc83ac9e42f28228b348f575d50e29508aaf Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 27 Jul 2018 00:38:56 -0500 Subject: [PATCH 0121/1459] tweak the order of the tests --- libm/ci/script.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/libm/ci/script.sh b/libm/ci/script.sh index 59e5a5f37..c3b6faa6c 100644 --- a/libm/ci/script.sh +++ b/libm/ci/script.sh @@ -6,13 +6,6 @@ main() { return fi - # test that the functions don't contain invocations of `panic!` - case $TARGET in - armv7-unknown-linux-gnueabihf) - cross build --release --target $TARGET --example no-panic - ;; - esac - # quick check cargo check @@ -24,6 +17,13 @@ main() { cargo run -p musl-generator --target x86_64-unknown-linux-musl cargo run -p newlib-generator + # test that the functions don't contain invocations of `panic!` + case $TARGET in + armv7-unknown-linux-gnueabihf) + cross build --release --target $TARGET --example no-panic + ;; + esac + # run unit tests cross test --lib --features checked --target $TARGET --release From 7626e9dd2774cad4152b8650b9340d6af3b384e9 Mon Sep 17 00:00:00 2001 From: Igor null Date: Sun, 26 Aug 2018 16:36:59 +0300 Subject: [PATCH 0122/1459] Ported several remaining math functions from musl Please note that these aren't tested yet. --- libm/src/math/acosh.rs | 22 +++ libm/src/math/acoshf.rs | 21 ++ libm/src/math/asinef.rs | 95 +++++++++ libm/src/math/asinh.rs | 35 ++++ libm/src/math/asinhf.rs | 34 ++++ libm/src/math/atanh.rs | 33 ++++ libm/src/math/atanhf.rs | 32 +++ libm/src/math/copysign.rs | 7 + libm/src/math/copysignf.rs | 7 + libm/src/math/erf.rs | 297 ++++++++++++++++++++++++++++ libm/src/math/erff.rs | 210 ++++++++++++++++++++ libm/src/math/exp10.rs | 24 +++ libm/src/math/exp10f.rs | 22 +++ libm/src/math/frexp.rs | 20 ++ libm/src/math/frexpf.rs | 21 ++ libm/src/math/ilogb.rs | 31 +++ libm/src/math/ilogbf.rs | 31 +++ libm/src/math/j0.rs | 392 +++++++++++++++++++++++++++++++++++++ libm/src/math/j0f.rs | 330 +++++++++++++++++++++++++++++++ libm/src/math/j1.rs | 387 ++++++++++++++++++++++++++++++++++++ libm/src/math/j1f.rs | 331 +++++++++++++++++++++++++++++++ libm/src/math/jn.rs | 338 ++++++++++++++++++++++++++++++++ libm/src/math/jnf.rs | 255 ++++++++++++++++++++++++ libm/src/math/lgamma.rs | 309 +++++++++++++++++++++++++++++ libm/src/math/lgammaf.rs | 244 +++++++++++++++++++++++ libm/src/math/mod.rs | 74 +++++++ libm/src/math/modf.rs | 33 ++++ libm/src/math/modff.rs | 32 +++ libm/src/math/remquo.rs | 98 ++++++++++ libm/src/math/remquof.rs | 97 +++++++++ libm/src/math/sincos.rs | 60 ++++++ libm/src/math/sincosf.rs | 122 ++++++++++++ libm/src/math/tgamma.rs | 179 +++++++++++++++++ libm/src/math/tgammaf.rs | 5 + 34 files changed, 4228 insertions(+) create mode 100644 libm/src/math/acosh.rs create mode 100644 libm/src/math/acoshf.rs create mode 100644 libm/src/math/asinef.rs create mode 100644 libm/src/math/asinh.rs create mode 100644 libm/src/math/asinhf.rs create mode 100644 libm/src/math/atanh.rs create mode 100644 libm/src/math/atanhf.rs create mode 100644 libm/src/math/copysign.rs create mode 100644 libm/src/math/copysignf.rs create mode 100644 libm/src/math/erf.rs create mode 100644 libm/src/math/erff.rs create mode 100644 libm/src/math/exp10.rs create mode 100644 libm/src/math/exp10f.rs create mode 100644 libm/src/math/frexp.rs create mode 100644 libm/src/math/frexpf.rs create mode 100644 libm/src/math/ilogb.rs create mode 100644 libm/src/math/ilogbf.rs create mode 100644 libm/src/math/j0.rs create mode 100644 libm/src/math/j0f.rs create mode 100644 libm/src/math/j1.rs create mode 100644 libm/src/math/j1f.rs create mode 100644 libm/src/math/jn.rs create mode 100644 libm/src/math/jnf.rs create mode 100644 libm/src/math/lgamma.rs create mode 100644 libm/src/math/lgammaf.rs create mode 100644 libm/src/math/modf.rs create mode 100644 libm/src/math/modff.rs create mode 100644 libm/src/math/remquo.rs create mode 100644 libm/src/math/remquof.rs create mode 100644 libm/src/math/sincos.rs create mode 100644 libm/src/math/sincosf.rs create mode 100644 libm/src/math/tgamma.rs create mode 100644 libm/src/math/tgammaf.rs diff --git a/libm/src/math/acosh.rs b/libm/src/math/acosh.rs new file mode 100644 index 000000000..3494e3405 --- /dev/null +++ b/libm/src/math/acosh.rs @@ -0,0 +1,22 @@ +use super::{log, log1p, sqrt}; + +const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa39ef*/ + +/* acosh(x) = log(x + sqrt(x*x-1)) */ +pub fn acosh(x: f64) -> f64 { + let u = x.to_bits(); + let e = ((u >> 52) as usize) & 0x7ff; + + /* x < 1 domain error is handled in the called functions */ + + if e < 0x3ff + 1 { + /* |x| < 2, up to 2ulp error in [1,1.125] */ + return log1p(x-1.0+sqrt((x-1.0)*(x-1.0)+2.0*(x-1.0))); + } + if e < 0x3ff + 26 { + /* |x| < 0x1p26 */ + return log(2.0*x-1.0/(x+sqrt(x*x-1.0))); + } + /* |x| >= 0x1p26 or nan */ + return log(x) + LN2; +} diff --git a/libm/src/math/acoshf.rs b/libm/src/math/acoshf.rs new file mode 100644 index 000000000..1e298a9b3 --- /dev/null +++ b/libm/src/math/acoshf.rs @@ -0,0 +1,21 @@ +use super::{log1pf, logf, sqrtf}; + +const LN2: f32 = 0.693147180559945309417232121458176568; + +/* acosh(x) = log(x + sqrt(x*x-1)) */ +pub fn acoshf(x: f32) -> f32 { + let u = x.to_bits(); + let a = u & 0x7fffffff; + + if a < 0x3f800000+(1<<23) { + /* |x| < 2, invalid if x < 1 or nan */ + /* up to 2ulp error in [1,1.125] */ + return log1pf(x-1.0 + sqrtf((x-1.0)*(x-1.0)+2.0*(x-1.0))); + } + if a < 0x3f800000+(12<<23) { + /* |x| < 0x1p12 */ + return logf(2.0*x - 1.0/(x+sqrtf(x*x-1.0))); + } + /* x >= 0x1p12 */ + return logf(x) + LN2; +} diff --git a/libm/src/math/asinef.rs b/libm/src/math/asinef.rs new file mode 100644 index 000000000..d2cd82699 --- /dev/null +++ b/libm/src/math/asinef.rs @@ -0,0 +1,95 @@ +/* @(#)z_asinef.c 1.0 98/08/13 */ +/****************************************************************** + * The following routines are coded directly from the algorithms + * and coefficients given in "Software Manual for the Elementary + * Functions" by William J. Cody, Jr. and William Waite, Prentice + * Hall, 1980. + ******************************************************************/ +/****************************************************************** + * Arcsine + * + * Input: + * x - floating point value + * acosine - indicates acos calculation + * + * Output: + * Arcsine of x. + * + * Description: + * This routine calculates arcsine / arccosine. + * + *****************************************************************/ + +use super::{fabsf, sqrtf}; + +const P: [f32; 2] = [ 0.933935835, -0.504400557 ]; +const Q: [f32; 2] = [ 0.560363004e+1, -0.554846723e+1 ]; +const A: [f32; 2] = [ 0.0, 0.785398163 ]; +const B: [f32; 2] = [ 1.570796326, 0.785398163 ]; +const Z_ROOTEPS_F: f32 = 1.7263349182589107e-4; + +pub fn asinef(x: f32, acosine: usize) -> f32 +{ + let flag: usize; + let i: usize; + let mut branch: bool = false; + let g: f32; + let mut res: f32 = 0.0; + let mut y: f32; + + /* Check for special values. */ + //i = numtestf (x); + if x.is_nan() || x.is_infinite() { + force_eval!(x); + return x; + } + + y = fabsf(x); + flag = acosine; + + if y > 0.5 { + i = 1 - flag; + + /* Check for range error. */ + if y > 1.0 { + return 0.0 / 0.0; + } + + g = (1.0 - y) / 2.0; + y = -2.0 * sqrtf(g); + branch = true; + } else { + i = flag; + if y < Z_ROOTEPS_F { + res = y; + g = 0.0; // pleasing the uninitialized variable + } else { + g = y * y; + } + } + + if y >= Z_ROOTEPS_F || branch { + /* Calculate the Taylor series. */ + let p = (P[1] * g + P[0]) * g; + let q = (g + Q[1]) * g + Q[0]; + let r = p / q; + + res = y + y * r; + } + + /* Calculate asine or acose. */ + if flag == 0 { + res = (A[i] + res) + A[i]; + if x < 0.0 { + res = -res; + } + } else { + if x < 0.0 { + res = (B[i] + res) + B[i]; + } else { + res = (A[i] - res) + A[i]; + } + } + + return res; +} diff --git a/libm/src/math/asinh.rs b/libm/src/math/asinh.rs new file mode 100644 index 000000000..09e894551 --- /dev/null +++ b/libm/src/math/asinh.rs @@ -0,0 +1,35 @@ +use super::{log, log1p, sqrt}; + +const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa39ef*/ + +/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ +pub fn asinh(mut x: f64) -> f64 { + let mut u = x.to_bits(); + let e = ((u >> 52) as usize) & 0x7ff; + let sign = (u >> 63) != 0; + + /* |x| */ + u &= (!0) >> 1; + x = f64::from_bits(u); + + if e >= 0x3ff + 26 { + /* |x| >= 0x1p26 or inf or nan */ + x = log(x) + LN2; + } else if e >= 0x3ff + 1 { + /* |x| >= 2 */ + x = log(2.0*x + 1.0/(sqrt(x*x+1.0)+x)); + } else if e >= 0x3ff - 26 { + /* |x| >= 0x1p-26, up to 1.6ulp error in [0.125,0.5] */ + x = log1p(x + x*x/(sqrt(x*x+1.0)+1.0)); + } else { + /* |x| < 0x1p-26, raise inexact if x != 0 */ + let x1p120 = f64::from_bits(0x4770000000000000); + force_eval!(x + x1p120); + } + + if sign { + -x + } else { + x + } +} diff --git a/libm/src/math/asinhf.rs b/libm/src/math/asinhf.rs new file mode 100644 index 000000000..236916d83 --- /dev/null +++ b/libm/src/math/asinhf.rs @@ -0,0 +1,34 @@ +use super::{logf, log1pf, sqrtf}; + +const LN2: f32 = 0.693147180559945309417232121458176568; + +/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ +pub fn asinhf(mut x: f32) -> f32 { + let u = x.to_bits(); + let i = u & 0x7fffffff; + let sign = (u >> 31) != 0; + + /* |x| */ + x = f32::from_bits(i); + + if i >= 0x3f800000 + (12<<23) { + /* |x| >= 0x1p12 or inf or nan */ + x = logf(x) + LN2; + } else if i >= 0x3f800000 + (1<<23) { + /* |x| >= 2 */ + x = logf(2.0*x + 1.0/(sqrtf(x*x+1.0)+x)); + } else if i >= 0x3f800000 - (12<<23) { + /* |x| >= 0x1p-12, up to 1.6ulp error in [0.125,0.5] */ + x = log1pf(x + x*x/(sqrtf(x*x+1.0)+1.0)); + } else { + /* |x| < 0x1p-12, raise inexact if x!=0 */ + let x1p120 = f32::from_bits(0x7b800000); + force_eval!(x + x1p120); + } + + if sign { + -x + } else { + x + } +} diff --git a/libm/src/math/atanh.rs b/libm/src/math/atanh.rs new file mode 100644 index 000000000..ea444809c --- /dev/null +++ b/libm/src/math/atanh.rs @@ -0,0 +1,33 @@ +use super::{log1p}; + +/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ +pub fn atanh(mut x: f64) -> f64 { + let mut u = x.to_bits(); + let e = ((u >> 52) as usize) & 0x7ff; + let sign = (u >> 63) != 0; + + /* |x| */ + u &= 0x7fffffff; + x = f64::from_bits(u); + + if e < 0x3ff - 1 { + if e < 0x3ff - 32 { + /* handle underflow */ + if e == 0 { + force_eval!(x as f32); + } + } else { + /* |x| < 0.5, up to 1.7ulp error */ + x = 0.5*log1p(2.0*x + 2.0*x*x/(1.0-x)); + } + } else { + /* avoid overflow */ + x = 0.5*log1p(2.0*(x/(1.0-x))); + } + + if sign { + -x + } else { + x + } +} diff --git a/libm/src/math/atanhf.rs b/libm/src/math/atanhf.rs new file mode 100644 index 000000000..77d451bf2 --- /dev/null +++ b/libm/src/math/atanhf.rs @@ -0,0 +1,32 @@ +use super::{log1pf}; + +/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ +pub fn atanhf(mut x: f32) -> f32 { + let mut u = x.to_bits(); + let sign = (u >> 31) != 0; + + /* |x| */ + u &= 0x7fffffff; + x = f32::from_bits(u); + + if u < 0x3f800000 - (1<<23) { + if u < 0x3f800000 - (32<<23) { + /* handle underflow */ + if u < (1<<23) { + force_eval!((x*x) as f32); + } + } else { + /* |x| < 0.5, up to 1.7ulp error */ + x = 0.5*log1pf(2.0*x + 2.0*x*x/(1.0-x)); + } + } else { + /* avoid overflow */ + x = 0.5*log1pf(2.0*(x/(1.0-x))); + } + + if sign { + -x + } else { + x + } +} diff --git a/libm/src/math/copysign.rs b/libm/src/math/copysign.rs new file mode 100644 index 000000000..74b761e74 --- /dev/null +++ b/libm/src/math/copysign.rs @@ -0,0 +1,7 @@ +pub fn copysign(x: f64, y: f64) -> f64 { + let mut ux = x.to_bits(); + let uy = y.to_bits(); + ux &= (!0) >> 1; + ux |= uy & (1<<63); + f64::from_bits(ux) +} diff --git a/libm/src/math/copysignf.rs b/libm/src/math/copysignf.rs new file mode 100644 index 000000000..a0a814bf6 --- /dev/null +++ b/libm/src/math/copysignf.rs @@ -0,0 +1,7 @@ +pub fn copysignf(x: f32, y: f32) -> f32 { + let mut ux = x.to_bits(); + let uy = y.to_bits(); + ux &= 0x7fffffff; + ux |= uy & 0x80000000; + f32::from_bits(ux) +} diff --git a/libm/src/math/erf.rs b/libm/src/math/erf.rs new file mode 100644 index 000000000..b3ad2ce05 --- /dev/null +++ b/libm/src/math/erf.rs @@ -0,0 +1,297 @@ +use super::{exp, fabs, get_high_word, with_set_low_word}; +/* origin: FreeBSD /usr/src/lib/msun/src/s_erf.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* double erf(double x) + * double erfc(double x) + * x + * 2 |\ + * erf(x) = --------- | exp(-t*t)dt + * sqrt(pi) \| + * 0 + * + * erfc(x) = 1-erf(x) + * Note that + * erf(-x) = -erf(x) + * erfc(-x) = 2 - erfc(x) + * + * Method: + * 1. For |x| in [0, 0.84375] + * erf(x) = x + x*R(x^2) + * erfc(x) = 1 - erf(x) if x in [-.84375,0.25] + * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] + * where R = P/Q where P is an odd poly of degree 8 and + * Q is an odd poly of degree 10. + * -57.90 + * | R - (erf(x)-x)/x | <= 2 + * + * + * Remark. The formula is derived by noting + * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....) + * and that + * 2/sqrt(pi) = 1.128379167095512573896158903121545171688 + * is close to one. The interval is chosen because the fix + * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is + * near 0.6174), and by some experiment, 0.84375 is chosen to + * guarantee the error is less than one ulp for erf. + * + * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and + * c = 0.84506291151 rounded to single (24 bits) + * erf(x) = sign(x) * (c + P1(s)/Q1(s)) + * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 + * 1+(c+P1(s)/Q1(s)) if x < 0 + * |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 + * Remark: here we use the taylor series expansion at x=1. + * erf(1+s) = erf(1) + s*Poly(s) + * = 0.845.. + P1(s)/Q1(s) + * That is, we use rational approximation to approximate + * erf(1+s) - (c = (single)0.84506291151) + * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] + * where + * P1(s) = degree 6 poly in s + * Q1(s) = degree 6 poly in s + * + * 3. For x in [1.25,1/0.35(~2.857143)], + * erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) + * erf(x) = 1 - erfc(x) + * where + * R1(z) = degree 7 poly in z, (z=1/x^2) + * S1(z) = degree 8 poly in z + * + * 4. For x in [1/0.35,28] + * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 + * = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6 x >= 28 + * erf(x) = sign(x) *(1 - tiny) (raise inexact) + * erfc(x) = tiny*tiny (raise underflow) if x > 0 + * = 2 - tiny if x<0 + * + * 7. Special case: + * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, + * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, + * erfc/erf(NaN) is NaN + */ + +const ERX: f64 = 8.45062911510467529297e-01; /* 0x3FEB0AC1, 0x60000000 */ +/* + * Coefficients for approximation to erf on [0,0.84375] + */ +const EFX8: f64 = 1.02703333676410069053e+00; /* 0x3FF06EBA, 0x8214DB69 */ +const PP0: f64 = 1.28379167095512558561e-01; /* 0x3FC06EBA, 0x8214DB68 */ +const PP1: f64 = -3.25042107247001499370e-01; /* 0xBFD4CD7D, 0x691CB913 */ +const PP2: f64 = -2.84817495755985104766e-02; /* 0xBF9D2A51, 0xDBD7194F */ +const PP3: f64 = -5.77027029648944159157e-03; /* 0xBF77A291, 0x236668E4 */ +const PP4: f64 = -2.37630166566501626084e-05; /* 0xBEF8EAD6, 0x120016AC */ +const QQ1: f64 = 3.97917223959155352819e-01; /* 0x3FD97779, 0xCDDADC09 */ +const QQ2: f64 = 6.50222499887672944485e-02; /* 0x3FB0A54C, 0x5536CEBA */ +const QQ3: f64 = 5.08130628187576562776e-03; /* 0x3F74D022, 0xC4D36B0F */ +const QQ4: f64 = 1.32494738004321644526e-04; /* 0x3F215DC9, 0x221C1A10 */ +const QQ5: f64 = -3.96022827877536812320e-06; /* 0xBED09C43, 0x42A26120 */ +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +const PA0: f64 = -2.36211856075265944077e-03; /* 0xBF6359B8, 0xBEF77538 */ +const PA1: f64 = 4.14856118683748331666e-01; /* 0x3FDA8D00, 0xAD92B34D */ +const PA2: f64 = -3.72207876035701323847e-01; /* 0xBFD7D240, 0xFBB8C3F1 */ +const PA3: f64 = 3.18346619901161753674e-01; /* 0x3FD45FCA, 0x805120E4 */ +const PA4: f64 = -1.10894694282396677476e-01; /* 0xBFBC6398, 0x3D3E28EC */ +const PA5: f64 = 3.54783043256182359371e-02; /* 0x3FA22A36, 0x599795EB */ +const PA6: f64 = -2.16637559486879084300e-03; /* 0xBF61BF38, 0x0A96073F */ +const QA1: f64 = 1.06420880400844228286e-01; /* 0x3FBB3E66, 0x18EEE323 */ +const QA2: f64 = 5.40397917702171048937e-01; /* 0x3FE14AF0, 0x92EB6F33 */ +const QA3: f64 = 7.18286544141962662868e-02; /* 0x3FB2635C, 0xD99FE9A7 */ +const QA4: f64 = 1.26171219808761642112e-01; /* 0x3FC02660, 0xE763351F */ +const QA5: f64 = 1.36370839120290507362e-02; /* 0x3F8BEDC2, 0x6B51DD1C */ +const QA6: f64 = 1.19844998467991074170e-02; /* 0x3F888B54, 0x5735151D */ +/* + * Coefficients for approximation to erfc in [1.25,1/0.35] + */ +const RA0: f64 = -9.86494403484714822705e-03; /* 0xBF843412, 0x600D6435 */ +const RA1: f64 = -6.93858572707181764372e-01; /* 0xBFE63416, 0xE4BA7360 */ +const RA2: f64 = -1.05586262253232909814e+01; /* 0xC0251E04, 0x41B0E726 */ +const RA3: f64 = -6.23753324503260060396e+01; /* 0xC04F300A, 0xE4CBA38D */ +const RA4: f64 = -1.62396669462573470355e+02; /* 0xC0644CB1, 0x84282266 */ +const RA5: f64 = -1.84605092906711035994e+02; /* 0xC067135C, 0xEBCCABB2 */ +const RA6: f64 = -8.12874355063065934246e+01; /* 0xC0545265, 0x57E4D2F2 */ +const RA7: f64 = -9.81432934416914548592e+00; /* 0xC023A0EF, 0xC69AC25C */ +const SA1: f64 = 1.96512716674392571292e+01; /* 0x4033A6B9, 0xBD707687 */ +const SA2: f64 = 1.37657754143519042600e+02; /* 0x4061350C, 0x526AE721 */ +const SA3: f64 = 4.34565877475229228821e+02; /* 0x407B290D, 0xD58A1A71 */ +const SA4: f64 = 6.45387271733267880336e+02; /* 0x40842B19, 0x21EC2868 */ +const SA5: f64 = 4.29008140027567833386e+02; /* 0x407AD021, 0x57700314 */ +const SA6: f64 = 1.08635005541779435134e+02; /* 0x405B28A3, 0xEE48AE2C */ +const SA7: f64 = 6.57024977031928170135e+00; /* 0x401A47EF, 0x8E484A93 */ +const SA8: f64 = -6.04244152148580987438e-02; /* 0xBFAEEFF2, 0xEE749A62 */ +/* + * Coefficients for approximation to erfc in [1/.35,28] + */ +const RB0: f64 = -9.86494292470009928597e-03; /* 0xBF843412, 0x39E86F4A */ +const RB1: f64 = -7.99283237680523006574e-01; /* 0xBFE993BA, 0x70C285DE */ +const RB2: f64 = -1.77579549177547519889e+01; /* 0xC031C209, 0x555F995A */ +const RB3: f64 = -1.60636384855821916062e+02; /* 0xC064145D, 0x43C5ED98 */ +const RB4: f64 = -6.37566443368389627722e+02; /* 0xC083EC88, 0x1375F228 */ +const RB5: f64 = -1.02509513161107724954e+03; /* 0xC0900461, 0x6A2E5992 */ +const RB6: f64 = -4.83519191608651397019e+02; /* 0xC07E384E, 0x9BDC383F */ +const SB1: f64 = 3.03380607434824582924e+01; /* 0x403E568B, 0x261D5190 */ +const SB2: f64 = 3.25792512996573918826e+02; /* 0x40745CAE, 0x221B9F0A */ +const SB3: f64 = 1.53672958608443695994e+03; /* 0x409802EB, 0x189D5118 */ +const SB4: f64 = 3.19985821950859553908e+03; /* 0x40A8FFB7, 0x688C246A */ +const SB5: f64 = 2.55305040643316442583e+03; /* 0x40A3F219, 0xCEDF3BE6 */ +const SB6: f64 = 4.74528541206955367215e+02; /* 0x407DA874, 0xE79FE763 */ +const SB7: f64 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */ + +fn erfc1(x: f64) -> f64 { + let s: f64; + let p: f64; + let q: f64; + + s = fabs(x) - 1.0; + p = PA0+s*(PA1+s*(PA2+s*(PA3+s*(PA4+s*(PA5+s*PA6))))); + q = 1.0+s*(QA1+s*(QA2+s*(QA3+s*(QA4+s*(QA5+s*QA6))))); + + 1.0 - ERX - p/q +} + +fn erfc2(ix: u32, mut x: f64) -> f64 { + let s: f64; + let r: f64; + let big_s: f64; + let z: f64; + + if ix < 0x3ff40000 { /* |x| < 1.25 */ + return erfc1(x); + } + + x = fabs(x); + s = 1.0/(x*x); + if ix < 0x4006db6d { /* |x| < 1/.35 ~ 2.85714 */ + r = RA0+s*(RA1+s*(RA2+s*(RA3+s*(RA4+s*( + RA5+s*(RA6+s*RA7)))))); + big_s = 1.0+s*(SA1+s*(SA2+s*(SA3+s*(SA4+s*( + SA5+s*(SA6+s*(SA7+s*SA8))))))); + } else { /* |x| > 1/.35 */ + r = RB0+s*(RB1+s*(RB2+s*(RB3+s*(RB4+s*( + RB5+s*RB6))))); + big_s = 1.0+s*(SB1+s*(SB2+s*(SB3+s*(SB4+s*( + SB5+s*(SB6+s*SB7)))))); + } + z = with_set_low_word(x, 0); + + exp(-z*z-0.5625)*exp((z-x)*(z+x)+r/big_s)/x +} + +pub fn erf(x: f64) -> f64 { + let r: f64; + let s: f64; + let z: f64; + let y: f64; + let mut ix: u32; + let sign: usize; + + ix = get_high_word(x); + sign = (ix>>31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7ff00000 { + /* erf(nan)=nan, erf(+-inf)=+-1 */ + return 1.0-2.0*(sign as f64) + 1.0/x; + } + if ix < 0x3feb0000 { /* |x| < 0.84375 */ + if ix < 0x3e300000 { /* |x| < 2**-28 */ + /* avoid underflow */ + return 0.125*(8.0*x + EFX8*x); + } + z = x*x; + r = PP0+z*(PP1+z*(PP2+z*(PP3+z*PP4))); + s = 1.0+z*(QQ1+z*(QQ2+z*(QQ3+z*(QQ4+z*QQ5)))); + y = r/s; + return x + x*y; + } + if ix < 0x40180000 { /* 0.84375 <= |x| < 6 */ + y = 1.0 - erfc2(ix,x); + } else { + let x1p_1022 = f64::from_bits(0x0010000000000000); + y = 1.0 - x1p_1022; + } + + if sign != 0 { + -y + } else { + y + } +} + +pub fn erfc(x: f64) -> f64 { + let r: f64; + let s: f64; + let z: f64; + let y: f64; + let mut ix: u32; + let sign: usize; + + ix = get_high_word(x); + sign = (ix>>31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7ff00000 { + /* erfc(nan)=nan, erfc(+-inf)=0,2 */ + return 2.0*(sign as f64) + 1.0/x; + } + if ix < 0x3feb0000 { /* |x| < 0.84375 */ + if ix < 0x3c700000 { /* |x| < 2**-56 */ + return 1.0 - x; + } + z = x*x; + r = PP0+z*(PP1+z*(PP2+z*(PP3+z*PP4))); + s = 1.0+z*(QQ1+z*(QQ2+z*(QQ3+z*(QQ4+z*QQ5)))); + y = r/s; + if sign != 0 || ix < 0x3fd00000 { /* x < 1/4 */ + return 1.0 - (x+x*y); + } + return 0.5 - (x - 0.5 + x*y); + } + if ix < 0x403c0000 { /* 0.84375 <= |x| < 28 */ + if sign != 0 { + return 2.0 - erfc2(ix,x); + } else { + return erfc2(ix,x); + } + } + + let x1p_1022 = f64::from_bits(0x0010000000000000); + if sign != 0 { + 2.0 - x1p_1022 + } else { + x1p_1022*x1p_1022 + } +} diff --git a/libm/src/math/erff.rs b/libm/src/math/erff.rs new file mode 100644 index 000000000..0aaa89767 --- /dev/null +++ b/libm/src/math/erff.rs @@ -0,0 +1,210 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_erff.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{expf, fabsf}; + +const ERX: f32 = 8.4506291151e-01; /* 0x3f58560b */ +/* + * Coefficients for approximation to erf on [0,0.84375] + */ +const EFX8: f32 = 1.0270333290e+00; /* 0x3f8375d4 */ +const PP0: f32 = 1.2837916613e-01; /* 0x3e0375d4 */ +const PP1: f32 = -3.2504209876e-01; /* 0xbea66beb */ +const PP2: f32 = -2.8481749818e-02; /* 0xbce9528f */ +const PP3: f32 = -5.7702702470e-03; /* 0xbbbd1489 */ +const PP4: f32 = -2.3763017452e-05; /* 0xb7c756b1 */ +const QQ1: f32 = 3.9791721106e-01; /* 0x3ecbbbce */ +const QQ2: f32 = 6.5022252500e-02; /* 0x3d852a63 */ +const QQ3: f32 = 5.0813062117e-03; /* 0x3ba68116 */ +const QQ4: f32 = 1.3249473704e-04; /* 0x390aee49 */ +const QQ5: f32 = -3.9602282413e-06; /* 0xb684e21a */ +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +const PA0: f32 = -2.3621185683e-03; /* 0xbb1acdc6 */ +const PA1: f32 = 4.1485610604e-01; /* 0x3ed46805 */ +const PA2: f32 = -3.7220788002e-01; /* 0xbebe9208 */ +const PA3: f32 = 3.1834661961e-01; /* 0x3ea2fe54 */ +const PA4: f32 = -1.1089469492e-01; /* 0xbde31cc2 */ +const PA5: f32 = 3.5478305072e-02; /* 0x3d1151b3 */ +const PA6: f32 = -2.1663755178e-03; /* 0xbb0df9c0 */ +const QA1: f32 = 1.0642088205e-01; /* 0x3dd9f331 */ +const QA2: f32 = 5.4039794207e-01; /* 0x3f0a5785 */ +const QA3: f32 = 7.1828655899e-02; /* 0x3d931ae7 */ +const QA4: f32 = 1.2617121637e-01; /* 0x3e013307 */ +const QA5: f32 = 1.3637083583e-02; /* 0x3c5f6e13 */ +const QA6: f32 = 1.1984500103e-02; /* 0x3c445aa3 */ +/* + * Coefficients for approximation to erfc in [1.25,1/0.35] + */ +const RA0: f32 = -9.8649440333e-03; /* 0xbc21a093 */ +const RA1: f32 = -6.9385856390e-01; /* 0xbf31a0b7 */ +const RA2: f32 = -1.0558626175e+01; /* 0xc128f022 */ +const RA3: f32 = -6.2375331879e+01; /* 0xc2798057 */ +const RA4: f32 = -1.6239666748e+02; /* 0xc322658c */ +const RA5: f32 = -1.8460508728e+02; /* 0xc3389ae7 */ +const RA6: f32 = -8.1287437439e+01; /* 0xc2a2932b */ +const RA7: f32 = -9.8143291473e+00; /* 0xc11d077e */ +const SA1: f32 = 1.9651271820e+01; /* 0x419d35ce */ +const SA2: f32 = 1.3765776062e+02; /* 0x4309a863 */ +const SA3: f32 = 4.3456588745e+02; /* 0x43d9486f */ +const SA4: f32 = 6.4538726807e+02; /* 0x442158c9 */ +const SA5: f32 = 4.2900814819e+02; /* 0x43d6810b */ +const SA6: f32 = 1.0863500214e+02; /* 0x42d9451f */ +const SA7: f32 = 6.5702495575e+00; /* 0x40d23f7c */ +const SA8: f32 = -6.0424413532e-02; /* 0xbd777f97 */ +/* + * Coefficients for approximation to erfc in [1/.35,28] + */ +const RB0: f32 = -9.8649431020e-03; /* 0xbc21a092 */ +const RB1: f32 = -7.9928326607e-01; /* 0xbf4c9dd4 */ +const RB2: f32 = -1.7757955551e+01; /* 0xc18e104b */ +const RB3: f32 = -1.6063638306e+02; /* 0xc320a2ea */ +const RB4: f32 = -6.3756646729e+02; /* 0xc41f6441 */ +const RB5: f32 = -1.0250950928e+03; /* 0xc480230b */ +const RB6: f32 = -4.8351919556e+02; /* 0xc3f1c275 */ +const SB1: f32 = 3.0338060379e+01; /* 0x41f2b459 */ +const SB2: f32 = 3.2579251099e+02; /* 0x43a2e571 */ +const SB3: f32 = 1.5367296143e+03; /* 0x44c01759 */ +const SB4: f32 = 3.1998581543e+03; /* 0x4547fdbb */ +const SB5: f32 = 2.5530502930e+03; /* 0x451f90ce */ +const SB6: f32 = 4.7452853394e+02; /* 0x43ed43a7 */ +const SB7: f32 = -2.2440952301e+01; /* 0xc1b38712 */ + +fn erfc1(x: f32) -> f32 { + let s: f32; + let p: f32; + let q: f32; + + s = fabsf(x) - 1.0; + p = PA0+s*(PA1+s*(PA2+s*(PA3+s*(PA4+s*(PA5+s*PA6))))); + q = 1.0+s*(QA1+s*(QA2+s*(QA3+s*(QA4+s*(QA5+s*QA6))))); + return 1.0 - ERX - p/q; +} + +fn erfc2(mut ix: u32, mut x: f32) -> f32 { + let s: f32; + let r: f32; + let big_s: f32; + let z: f32; + + if ix < 0x3fa00000 { /* |x| < 1.25 */ + return erfc1(x); + } + + x = fabsf(x); + s = 1.0/(x*x); + if ix < 0x4036db6d { /* |x| < 1/0.35 */ + r = RA0+s*(RA1+s*(RA2+s*(RA3+s*(RA4+s*( + RA5+s*(RA6+s*RA7)))))); + big_s = 1.0+s*(SA1+s*(SA2+s*(SA3+s*(SA4+s*( + SA5+s*(SA6+s*(SA7+s*SA8))))))); + } else { /* |x| >= 1/0.35 */ + r = RB0+s*(RB1+s*(RB2+s*(RB3+s*(RB4+s*( + RB5+s*RB6))))); + big_s = 1.0+s*(SB1+s*(SB2+s*(SB3+s*(SB4+s*( + SB5+s*(SB6+s*SB7)))))); + } + ix = x.to_bits(); + z = f32::from_bits(ix&0xffffe000); + + expf(-z*z - 0.5625) * expf((z-x)*(z+x) + r/big_s)/x +} + +pub fn erff(x: f32) -> f32 +{ + let r: f32; + let s: f32; + let z: f32; + let y: f32; + let mut ix: u32; + let sign: usize; + + ix = x.to_bits(); + sign = (ix>>31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + /* erf(nan)=nan, erf(+-inf)=+-1 */ + return 1.0-2.0*(sign as f32) + 1.0/x; + } + if ix < 0x3f580000 { /* |x| < 0.84375 */ + if ix < 0x31800000 { /* |x| < 2**-28 */ + /*avoid underflow */ + return 0.125*(8.0*x + EFX8*x); + } + z = x*x; + r = PP0+z*(PP1+z*(PP2+z*(PP3+z*PP4))); + s = 1.0+z*(QQ1+z*(QQ2+z*(QQ3+z*(QQ4+z*QQ5)))); + y = r/s; + return x + x*y; + } + if ix < 0x40c00000 { /* |x| < 6 */ + y = 1.0 - erfc2(ix,x); + } else { + let x1p_120 = f32::from_bits(0x03800000); + y = 1.0 - x1p_120; + } + + if sign != 0 { + -y + } else { + y + } +} + +pub fn erfcf(x: f32) -> f32 { + let r: f32; + let s: f32; + let z: f32; + let y: f32; + let mut ix: u32; + let sign: usize; + + ix = x.to_bits(); + sign = (ix>>31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + /* erfc(nan)=nan, erfc(+-inf)=0,2 */ + return 2.0*(sign as f32) + 1.0/x; + } + + if ix < 0x3f580000 { /* |x| < 0.84375 */ + if ix < 0x23800000 { /* |x| < 2**-56 */ + return 1.0 - x; + } + z = x*x; + r = PP0+z*(PP1+z*(PP2+z*(PP3+z*PP4))); + s = 1.0+z*(QQ1+z*(QQ2+z*(QQ3+z*(QQ4+z*QQ5)))); + y = r/s; + if sign != 0 || ix < 0x3e800000 { /* x < 1/4 */ + return 1.0 - (x+x*y); + } + return 0.5 - (x - 0.5 + x*y); + } + if ix < 0x41e00000 { /* |x| < 28 */ + if sign != 0 { + return 2.0 - erfc2(ix, x); + } else { + return erfc2(ix, x); + } + } + + let x1p_120 = f32::from_bits(0x03800000); + if sign != 0 { + 2.0 - x1p_120 + } else { + x1p_120*x1p_120 + } +} diff --git a/libm/src/math/exp10.rs b/libm/src/math/exp10.rs new file mode 100644 index 000000000..d12fa0be3 --- /dev/null +++ b/libm/src/math/exp10.rs @@ -0,0 +1,24 @@ +use super::{exp2, modf, pow}; + +const LN10: f64 = 3.32192809488736234787031942948939; +const P10: &[f64] = &[ + 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, + 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15 +]; + +pub fn exp10(x: f64) -> f64 +{ + let (mut y, n) = modf(x); + let u: u64 = n.to_bits(); + /* fabs(n) < 16 without raising invalid on nan */ + if (u>>52 & 0x7ff) < 0x3ff+4 { + if y == 0.0 { + return P10[((n as isize) + 15) as usize]; + } + y = exp2(LN10 * y); + return y * P10[((n as isize) + 15) as usize]; + } + return pow(10.0, x); +} diff --git a/libm/src/math/exp10f.rs b/libm/src/math/exp10f.rs new file mode 100644 index 000000000..8fb88a52c --- /dev/null +++ b/libm/src/math/exp10f.rs @@ -0,0 +1,22 @@ +use super::{exp2, exp2f, modff}; + +const LN10_F32: f32 = 3.32192809488736234787031942948939; +const LN10_F64: f64 = 3.32192809488736234787031942948939; +const P10: &[f32] = &[ + 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7 +]; + +pub fn exp10f(x: f32) -> f32 { + let (mut y, n) = modff(x); + let u = n.to_bits(); + /* fabsf(n) < 8 without raising invalid on nan */ + if (u>>23 & 0xff) < 0x7f+3 { + if y == 0.0 { + return P10[((n as isize) + 7) as usize] + } + y = exp2f(LN10_F32 * y); + return y * P10[((n as isize) + 7) as usize]; + } + return exp2(LN10_F64 * (x as f64)) as f32; +} diff --git a/libm/src/math/frexp.rs b/libm/src/math/frexp.rs new file mode 100644 index 000000000..45733a3aa --- /dev/null +++ b/libm/src/math/frexp.rs @@ -0,0 +1,20 @@ +pub fn frexp(x: f64) -> (f64, isize) { + let mut y = x.to_bits(); + let ee = ((y>>52) & 0x7ff) as isize; + + if ee == 0 { + if x != 0.0 { + let x1p64 = f64::from_bits(0x43f0000000000000); + let (x, e) = frexp(x*x1p64); + return (x, e - 64); + } + return (x, 0); + } else if ee == 0x7ff { + return (x, 0); + } + + let e = ee - 0x3fe; + y &= 0x800fffffffffffff; + y |= 0x3fe0000000000000; + return (f64::from_bits(y), e); +} diff --git a/libm/src/math/frexpf.rs b/libm/src/math/frexpf.rs new file mode 100644 index 000000000..1c9dae0bb --- /dev/null +++ b/libm/src/math/frexpf.rs @@ -0,0 +1,21 @@ +pub fn frexpf(x: f32) -> (f32, isize) { + let mut y = x.to_bits(); + let ee: isize = ((y>>23) & 0xff) as isize; + + if ee == 0 { + if x != 0.0 { + let x1p64 = f32::from_bits(0x5f800000); + let (x, e) = frexpf(x*x1p64); + return (x, e - 64); + } else { + return (x, 0); + } + } else if ee == 0xff { + return (x, 0); + } + + let e = ee - 0x7e; + y &= 0x807fffff; + y |= 0x3f000000; + return (f32::from_bits(y), e); +} diff --git a/libm/src/math/ilogb.rs b/libm/src/math/ilogb.rs new file mode 100644 index 000000000..78fe030a1 --- /dev/null +++ b/libm/src/math/ilogb.rs @@ -0,0 +1,31 @@ +const FP_ILOGBNAN: isize = -1 - (((!0) >> 1)); +const FP_ILOGB0: isize = FP_ILOGBNAN; + +pub fn ilogb(x: f64) -> isize { + let mut i: u64 = x.to_bits(); + let e = ((i>>52) & 0x7ff) as isize; + + if e == 0 { + i <<= 12; + if i == 0 { + force_eval!(0.0/0.0); + return FP_ILOGB0; + } + /* subnormal x */ + let mut e = -0x3ff; + while (i>>63) == 0 { + e -= 1; + i <<= 1; + } + return e; + } + if e == 0x7ff { + force_eval!(0.0/0.0); + if (i<<12) != 0 { + return FP_ILOGBNAN; + } else { + return isize::max_value(); + } + } + return e - 0x3ff; +} diff --git a/libm/src/math/ilogbf.rs b/libm/src/math/ilogbf.rs new file mode 100644 index 000000000..9ca1c3606 --- /dev/null +++ b/libm/src/math/ilogbf.rs @@ -0,0 +1,31 @@ +const FP_ILOGBNAN: isize = -1 - (((!0) >> 1)); +const FP_ILOGB0: isize = FP_ILOGBNAN; + +pub fn ilogbf(x: f32) -> isize { + let mut i = x.to_bits(); + let e = ((i>>23) & 0xff) as isize; + + if e == 0 { + i <<= 9; + if i == 0 { + force_eval!(0.0/0.0); + return FP_ILOGB0; + } + /* subnormal x */ + let mut e = -0x7f; + while (i>>31) == 0 { + e -= 1; + i <<= 1; + } + return e; + } + if e == 0xff { + force_eval!(0.0/0.0); + if (i<<9) != 0 { + return FP_ILOGBNAN; + } else { + return isize::max_value(); + } + } + return e - 0x7f; +} diff --git a/libm/src/math/j0.rs b/libm/src/math/j0.rs new file mode 100644 index 000000000..02625b086 --- /dev/null +++ b/libm/src/math/j0.rs @@ -0,0 +1,392 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j0.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* j0(x), y0(x) + * Bessel function of the first and second kinds of order zero. + * Method -- j0(x): + * 1. For tiny x, we use j0(x) = 1 - x^2/4 + x^4/64 - ... + * 2. Reduce x to |x| since j0(x)=j0(-x), and + * for x in (0,2) + * j0(x) = 1-z/4+ z^2*R0/S0, where z = x*x; + * (precision: |j0-1+z/4-z^2R0/S0 |<2**-63.67 ) + * for x in (2,inf) + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) + * as follow: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * (To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one.) + * + * 3 Special cases + * j0(nan)= nan + * j0(0) = 1 + * j0(inf) = 0 + * + * Method -- y0(x): + * 1. For x<2. + * Since + * y0(x) = 2/pi*(j0(x)*(ln(x/2)+Euler) + x^2/4 - ...) + * therefore y0(x)-2/pi*j0(x)*ln(x) is an even function. + * We use the following function to approximate y0, + * y0(x) = U(z)/V(z) + (2/pi)*(j0(x)*ln(x)), z= x^2 + * where + * U(z) = u00 + u01*z + ... + u06*z^6 + * V(z) = 1 + v01*z + ... + v04*z^4 + * with absolute approximation error bounded by 2**-72. + * Note: For tiny x, U/V = u0 and j0(x)~1, hence + * y0(tiny) = u0 + (2/pi)*ln(tiny), (choose tiny<2**-27) + * 2. For x>=2. + * y0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)+q0(x)*sin(x0)) + * where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) + * by the method mentioned above. + * 3. Special cases: y0(0)=-inf, y0(x<0)=NaN, y0(inf)=0. + */ + +use super::{cos, get_low_word, get_high_word, fabs, log, sin, sqrt}; +const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ +const TPI: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ + +/* common method when |x|>=2 */ +fn common(ix: u32, x: f64, y0: bool) -> f64 { + let s: f64; + let mut c: f64; + let mut ss: f64; + let mut cc: f64; + let z: f64; + + /* + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x-pi/4)-q0(x)*sin(x-pi/4)) + * y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x-pi/4)+q0(x)*cos(x-pi/4)) + * + * sin(x-pi/4) = (sin(x) - cos(x))/sqrt(2) + * cos(x-pi/4) = (sin(x) + cos(x))/sqrt(2) + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + */ + s = sin(x); + c = cos(x); + if y0 { + c = -c; + } + cc = s+c; + /* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */ + if ix < 0x7fe00000 { + ss = s-c; + z = -cos(2.0*x); + if s*c < 0.0 { + cc = z/ss; + } else { + ss = z/cc; + } + if ix < 0x48000000 { + if y0 { + ss = -ss; + } + cc = pzero(x)*cc-qzero(x)*ss; + } + } + return INVSQRTPI*cc/sqrt(x); +} + +/* R0/S0 on [0, 2.00] */ +const R02: f64 = 1.56249999999999947958e-02; /* 0x3F8FFFFF, 0xFFFFFFFD */ +const R03: f64 = -1.89979294238854721751e-04; /* 0xBF28E6A5, 0xB61AC6E9 */ +const R04: f64 = 1.82954049532700665670e-06; /* 0x3EBEB1D1, 0x0C503919 */ +const R05: f64 = -4.61832688532103189199e-09; /* 0xBE33D5E7, 0x73D63FCE */ +const S01: f64 = 1.56191029464890010492e-02; /* 0x3F8FFCE8, 0x82C8C2A4 */ +const S02: f64 = 1.16926784663337450260e-04; /* 0x3F1EA6D2, 0xDD57DBF4 */ +const S03: f64 = 5.13546550207318111446e-07; /* 0x3EA13B54, 0xCE84D5A9 */ +const S04: f64 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */ + +pub fn j0(mut x: f64) -> f64 +{ + let z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + + /* j0(+-inf)=0, j0(nan)=nan */ + if ix >= 0x7ff00000 { + return 1.0/(x*x); + } + x = fabs(x); + + if ix >= 0x40000000 { /* |x| >= 2 */ + /* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */ + return common(ix,x,false); + } + + /* 1 - x*x/4 + x*x*R(x^2)/S(x^2) */ + if ix >= 0x3f200000 { /* |x| >= 2**-13 */ + /* up to 4ulp error close to 2 */ + z = x*x; + r = z*(R02+z*(R03+z*(R04+z*R05))); + s = 1.0+z*(S01+z*(S02+z*(S03+z*S04))); + return (1.0+x/2.0)*(1.0-x/2.0) + z*(r/s); + } + + /* 1 - x*x/4 */ + /* prevent underflow */ + /* inexact should be raised when x!=0, this is not done correctly */ + if ix >= 0x38000000 { /* |x| >= 2**-127 */ + x = 0.25*x*x; + } + return 1.0 - x; +} + +const U00: f64 = -7.38042951086872317523e-02; /* 0xBFB2E4D6, 0x99CBD01F */ +const U01: f64 = 1.76666452509181115538e-01; /* 0x3FC69D01, 0x9DE9E3FC */ +const U02: f64 = -1.38185671945596898896e-02; /* 0xBF8C4CE8, 0xB16CFA97 */ +const U03: f64 = 3.47453432093683650238e-04; /* 0x3F36C54D, 0x20B29B6B */ +const U04: f64 = -3.81407053724364161125e-06; /* 0xBECFFEA7, 0x73D25CAD */ +const U05: f64 = 1.95590137035022920206e-08; /* 0x3E550057, 0x3B4EABD4 */ +const U06: f64 = -3.98205194132103398453e-11; /* 0xBDC5E43D, 0x693FB3C8 */ +const V01: f64 = 1.27304834834123699328e-02; /* 0x3F8A1270, 0x91C9C71A */ +const V02: f64 = 7.60068627350353253702e-05; /* 0x3F13ECBB, 0xF578C6C1 */ +const V03: f64 = 2.59150851840457805467e-07; /* 0x3E91642D, 0x7FF202FD */ +const V04: f64 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */ + +pub fn y0(x: f64) -> f64 +{ + let z: f64; + let u: f64; + let v: f64; + let ix: u32; + let lx: u32; + + ix = get_high_word(x); + lx = get_low_word(x); + + /* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */ + if ((ix<<1) | lx) == 0 { + return -1.0/0.0; + } + if (ix>>31) != 0 { + return 0.0/0.0; + } + if ix >= 0x7ff00000 { + return 1.0/x; + } + + if ix >= 0x40000000 { /* x >= 2 */ + /* large ulp errors near zeros: 3.958, 7.086,.. */ + return common(ix,x,true); + } + + /* U(x^2)/V(x^2) + (2/pi)*j0(x)*log(x) */ + if ix >= 0x3e400000 { /* x >= 2**-27 */ + /* large ulp error near the first zero, x ~= 0.89 */ + z = x*x; + u = U00+z*(U01+z*(U02+z*(U03+z*(U04+z*(U05+z*U06))))); + v = 1.0+z*(V01+z*(V02+z*(V03+z*V04))); + return u/v + TPI*(j0(x)*log(x)); + } + return U00 + TPI*log(x); +} + +/* The asymptotic expansions of pzero is + * 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x. + * For x >= 2, We approximate pzero by + * pzero(x) = 1 + (R/S) + * where R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10 + * S = 1 + pS0*s^2 + ... + pS4*s^10 + * and + * | pzero(x)-1-R/S | <= 2 ** ( -60.26) + */ +const PR8: [f64; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + -7.03124999999900357484e-02, /* 0xBFB1FFFF, 0xFFFFFD32 */ + -8.08167041275349795626e+00, /* 0xC02029D0, 0xB44FA779 */ + -2.57063105679704847262e+02, /* 0xC0701102, 0x7B19E863 */ + -2.48521641009428822144e+03, /* 0xC0A36A6E, 0xCD4DCAFC */ + -5.25304380490729545272e+03, /* 0xC0B4850B, 0x36CC643D */ +]; +const PS8: [f64; 5] = [ + 1.16534364619668181717e+02, /* 0x405D2233, 0x07A96751 */ + 3.83374475364121826715e+03, /* 0x40ADF37D, 0x50596938 */ + 4.05978572648472545552e+04, /* 0x40E3D2BB, 0x6EB6B05F */ + 1.16752972564375915681e+05, /* 0x40FC810F, 0x8F9FA9BD */ + 4.76277284146730962675e+04, /* 0x40E74177, 0x4F2C49DC */ +]; + +const PR5: [f64; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -1.14125464691894502584e-11, /* 0xBDA918B1, 0x47E495CC */ + -7.03124940873599280078e-02, /* 0xBFB1FFFF, 0xE69AFBC6 */ + -4.15961064470587782438e+00, /* 0xC010A370, 0xF90C6BBF */ + -6.76747652265167261021e+01, /* 0xC050EB2F, 0x5A7D1783 */ + -3.31231299649172967747e+02, /* 0xC074B3B3, 0x6742CC63 */ + -3.46433388365604912451e+02, /* 0xC075A6EF, 0x28A38BD7 */ +]; +const PS5: [f64; 5] = [ + 6.07539382692300335975e+01, /* 0x404E6081, 0x0C98C5DE */ + 1.05125230595704579173e+03, /* 0x40906D02, 0x5C7E2864 */ + 5.97897094333855784498e+03, /* 0x40B75AF8, 0x8FBE1D60 */ + 9.62544514357774460223e+03, /* 0x40C2CCB8, 0xFA76FA38 */ + 2.40605815922939109441e+03, /* 0x40A2CC1D, 0xC70BE864 */ +]; + +const PR3: [f64; 6] = [/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + -2.54704601771951915620e-09, /* 0xBE25E103, 0x6FE1AA86 */ + -7.03119616381481654654e-02, /* 0xBFB1FFF6, 0xF7C0E24B */ + -2.40903221549529611423e+00, /* 0xC00345B2, 0xAEA48074 */ + -2.19659774734883086467e+01, /* 0xC035F74A, 0x4CB94E14 */ + -5.80791704701737572236e+01, /* 0xC04D0A22, 0x420A1A45 */ + -3.14479470594888503854e+01, /* 0xC03F72AC, 0xA892D80F */ +]; +const PS3: [f64; 5] = [ + 3.58560338055209726349e+01, /* 0x4041ED92, 0x84077DD3 */ + 3.61513983050303863820e+02, /* 0x40769839, 0x464A7C0E */ + 1.19360783792111533330e+03, /* 0x4092A66E, 0x6D1061D6 */ + 1.12799679856907414432e+03, /* 0x40919FFC, 0xB8C39B7E */ + 1.73580930813335754692e+02, /* 0x4065B296, 0xFC379081 */ +]; + +const PR2: [f64; 6] = [/* for x in [2.8570,2]=1/[0.3499,0.5] */ + -8.87534333032526411254e-08, /* 0xBE77D316, 0xE927026D */ + -7.03030995483624743247e-02, /* 0xBFB1FF62, 0x495E1E42 */ + -1.45073846780952986357e+00, /* 0xBFF73639, 0x8A24A843 */ + -7.63569613823527770791e+00, /* 0xC01E8AF3, 0xEDAFA7F3 */ + -1.11931668860356747786e+01, /* 0xC02662E6, 0xC5246303 */ + -3.23364579351335335033e+00, /* 0xC009DE81, 0xAF8FE70F */ +]; +const PS2: [f64; 5] = [ + 2.22202997532088808441e+01, /* 0x40363865, 0x908B5959 */ + 1.36206794218215208048e+02, /* 0x4061069E, 0x0EE8878F */ + 2.70470278658083486789e+02, /* 0x4070E786, 0x42EA079B */ + 1.53875394208320329881e+02, /* 0x40633C03, 0x3AB6FAFF */ + 1.46576176948256193810e+01, /* 0x402D50B3, 0x44391809 */ +]; + +fn pzero(x: f64) -> f64 +{ + let p: &[f64; 6]; + let q: &[f64; 5]; + let z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 {p = &PR8; q = &PS8;} + else if ix >= 0x40122E8B {p = &PR5; q = &PS5;} + else if ix >= 0x4006DB6D {p = &PR3; q = &PS3;} + else /*ix >= 0x40000000*/{p = &PR2; q = &PS2;} + z = 1.0/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); + return 1.0 + r/s; +} + + +/* For x >= 8, the asymptotic expansions of qzero is + * -1/8 s + 75/1024 s^3 - ..., where s = 1/x. + * We approximate pzero by + * qzero(x) = s*(-1.25 + (R/S)) + * where R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10 + * S = 1 + qS0*s^2 + ... + qS5*s^12 + * and + * | qzero(x)/s +1.25-R/S | <= 2 ** ( -61.22) + */ +const QR8: [f64; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + 7.32421874999935051953e-02, /* 0x3FB2BFFF, 0xFFFFFE2C */ + 1.17682064682252693899e+01, /* 0x40278952, 0x5BB334D6 */ + 5.57673380256401856059e+02, /* 0x40816D63, 0x15301825 */ + 8.85919720756468632317e+03, /* 0x40C14D99, 0x3E18F46D */ + 3.70146267776887834771e+04, /* 0x40E212D4, 0x0E901566 */ +]; +const QS8: [f64; 6] = [ + 1.63776026895689824414e+02, /* 0x406478D5, 0x365B39BC */ + 8.09834494656449805916e+03, /* 0x40BFA258, 0x4E6B0563 */ + 1.42538291419120476348e+05, /* 0x41016652, 0x54D38C3F */ + 8.03309257119514397345e+05, /* 0x412883DA, 0x83A52B43 */ + 8.40501579819060512818e+05, /* 0x4129A66B, 0x28DE0B3D */ + -3.43899293537866615225e+05, /* 0xC114FD6D, 0x2C9530C5 */ +]; + +const QR5: [f64; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.84085963594515531381e-11, /* 0x3DB43D8F, 0x29CC8CD9 */ + 7.32421766612684765896e-02, /* 0x3FB2BFFF, 0xD172B04C */ + 5.83563508962056953777e+00, /* 0x401757B0, 0xB9953DD3 */ + 1.35111577286449829671e+02, /* 0x4060E392, 0x0A8788E9 */ + 1.02724376596164097464e+03, /* 0x40900CF9, 0x9DC8C481 */ + 1.98997785864605384631e+03, /* 0x409F17E9, 0x53C6E3A6 */ +]; +const QS5: [f64; 6] = [ + 8.27766102236537761883e+01, /* 0x4054B1B3, 0xFB5E1543 */ + 2.07781416421392987104e+03, /* 0x40A03BA0, 0xDA21C0CE */ + 1.88472887785718085070e+04, /* 0x40D267D2, 0x7B591E6D */ + 5.67511122894947329769e+04, /* 0x40EBB5E3, 0x97E02372 */ + 3.59767538425114471465e+04, /* 0x40E19118, 0x1F7A54A0 */ + -5.35434275601944773371e+03, /* 0xC0B4EA57, 0xBEDBC609 */ +]; + +const QR3: [f64; 6] = [/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + 4.37741014089738620906e-09, /* 0x3E32CD03, 0x6ADECB82 */ + 7.32411180042911447163e-02, /* 0x3FB2BFEE, 0x0E8D0842 */ + 3.34423137516170720929e+00, /* 0x400AC0FC, 0x61149CF5 */ + 4.26218440745412650017e+01, /* 0x40454F98, 0x962DAEDD */ + 1.70808091340565596283e+02, /* 0x406559DB, 0xE25EFD1F */ + 1.66733948696651168575e+02, /* 0x4064D77C, 0x81FA21E0 */ +]; +const QS3: [f64; 6] = [ + 4.87588729724587182091e+01, /* 0x40486122, 0xBFE343A6 */ + 7.09689221056606015736e+02, /* 0x40862D83, 0x86544EB3 */ + 3.70414822620111362994e+03, /* 0x40ACF04B, 0xE44DFC63 */ + 6.46042516752568917582e+03, /* 0x40B93C6C, 0xD7C76A28 */ + 2.51633368920368957333e+03, /* 0x40A3A8AA, 0xD94FB1C0 */ + -1.49247451836156386662e+02, /* 0xC062A7EB, 0x201CF40F */ +]; + +const QR2: [f64; 6] = [/* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.50444444886983272379e-07, /* 0x3E84313B, 0x54F76BDB */ + 7.32234265963079278272e-02, /* 0x3FB2BEC5, 0x3E883E34 */ + 1.99819174093815998816e+00, /* 0x3FFFF897, 0xE727779C */ + 1.44956029347885735348e+01, /* 0x402CFDBF, 0xAAF96FE5 */ + 3.16662317504781540833e+01, /* 0x403FAA8E, 0x29FBDC4A */ + 1.62527075710929267416e+01, /* 0x403040B1, 0x71814BB4 */ +]; +const QS2: [f64; 6] = [ + 3.03655848355219184498e+01, /* 0x403E5D96, 0xF7C07AED */ + 2.69348118608049844624e+02, /* 0x4070D591, 0xE4D14B40 */ + 8.44783757595320139444e+02, /* 0x408A6645, 0x22B3BF22 */ + 8.82935845112488550512e+02, /* 0x408B977C, 0x9C5CC214 */ + 2.12666388511798828631e+02, /* 0x406A9553, 0x0E001365 */ + -5.31095493882666946917e+00, /* 0xC0153E6A, 0xF8B32931 */ +]; + +fn qzero(x: f64) -> f64 +{ + let p: &[f64; 6]; + let q: &[f64; 6]; + let s: f64; + let r: f64; + let z: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 {p = &QR8; q = &QS8;} + else if ix >= 0x40122E8B {p = &QR5; q = &QS5;} + else if ix >= 0x4006DB6D {p = &QR3; q = &QS3;} + else /*ix >= 0x40000000*/{p = &QR2; q = &QS2;} + z = 1.0/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); + return (-0.125 + r/s)/x; +} diff --git a/libm/src/math/j0f.rs b/libm/src/math/j0f.rs new file mode 100644 index 000000000..e2faed0b2 --- /dev/null +++ b/libm/src/math/j0f.rs @@ -0,0 +1,330 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j0f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{cosf, fabsf, logf, sinf, sqrtf}; + +const INVSQRTPI: f32 = 5.6418961287e-01; /* 0x3f106ebb */ +const TPI: f32 = 6.3661974669e-01; /* 0x3f22f983 */ + +fn common(ix: u32, x: f32, y0: bool) -> f32 +{ + let z: f32; + let s: f32; + let mut c: f32; + let mut ss: f32; + let mut cc: f32; + /* + * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + */ + s = sinf(x); + c = cosf(x); + if y0 { + c = -c; + } + cc = s+c; + if ix < 0x7f000000 { + ss = s-c; + z = -cosf(2.0*x); + if s*c < 0.0 { + cc = z/ss; + } else { + ss = z/cc; + } + if ix < 0x58800000 { + if y0 { + ss = -ss; + } + cc = pzerof(x)*cc-qzerof(x)*ss; + } + } + return INVSQRTPI*cc/sqrtf(x); +} + +/* R0/S0 on [0, 2.00] */ +const R02: f32 = 1.5625000000e-02; /* 0x3c800000 */ +const R03: f32 = -1.8997929874e-04; /* 0xb947352e */ +const R04: f32 = 1.8295404516e-06; /* 0x35f58e88 */ +const R05: f32 = -4.6183270541e-09; /* 0xb19eaf3c */ +const S01: f32 = 1.5619102865e-02; /* 0x3c7fe744 */ +const S02: f32 = 1.1692678527e-04; /* 0x38f53697 */ +const S03: f32 = 5.1354652442e-07; /* 0x3509daa6 */ +const S04: f32 = 1.1661400734e-09; /* 0x30a045e8 */ + +pub fn j0f(mut x: f32) -> f32 +{ + let z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + return 1.0/(x*x); + } + x = fabsf(x); + + if ix >= 0x40000000 { /* |x| >= 2 */ + /* large ulp error near zeros */ + return common(ix, x, false); + } + if ix >= 0x3a000000 { /* |x| >= 2**-11 */ + /* up to 4ulp error near 2 */ + z = x*x; + r = z*(R02+z*(R03+z*(R04+z*R05))); + s = 1.0+z*(S01+z*(S02+z*(S03+z*S04))); + return (1.0+x/2.0)*(1.0-x/2.0) + z*(r/s); + } + if ix >= 0x21800000 { /* |x| >= 2**-60 */ + x = 0.25*x*x; + } + return 1.0 - x; +} + +const U00: f32 = -7.3804296553e-02; /* 0xbd9726b5 */ +const U01: f32 = 1.7666645348e-01; /* 0x3e34e80d */ +const U02: f32 = -1.3818567619e-02; /* 0xbc626746 */ +const U03: f32 = 3.4745343146e-04; /* 0x39b62a69 */ +const U04: f32 = -3.8140706238e-06; /* 0xb67ff53c */ +const U05: f32 = 1.9559013964e-08; /* 0x32a802ba */ +const U06: f32 = -3.9820518410e-11; /* 0xae2f21eb */ +const V01: f32 = 1.2730483897e-02; /* 0x3c509385 */ +const V02: f32 = 7.6006865129e-05; /* 0x389f65e0 */ +const V03: f32 = 2.5915085189e-07; /* 0x348b216c */ +const V04: f32 = 4.4111031494e-10; /* 0x2ff280c2 */ + +pub fn y0f(x: f32) -> f32 +{ + let z: f32; + let u: f32; + let v: f32; + let ix: u32; + + ix = x.to_bits(); + if (ix & 0x7fffffff) == 0 { + return -1.0/0.0; + } + if (ix>>31) !=0 { + return 0.0/0.0; + } + if ix >= 0x7f800000 { + return 1.0/x; + } + if ix >= 0x40000000 { /* |x| >= 2.0 */ + /* large ulp error near zeros */ + return common(ix,x,true); + } + if ix >= 0x39000000 { /* x >= 2**-13 */ + /* large ulp error at x ~= 0.89 */ + z = x*x; + u = U00+z*(U01+z*(U02+z*(U03+z*(U04+z*(U05+z*U06))))); + v = 1.0+z*(V01+z*(V02+z*(V03+z*V04))); + return u/v + TPI*(j0f(x)*logf(x)); + } + return U00 + TPI*logf(x); +} + +/* The asymptotic expansions of pzero is + * 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x. + * For x >= 2, We approximate pzero by + * pzero(x) = 1 + (R/S) + * where R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10 + * S = 1 + pS0*s^2 + ... + pS4*s^10 + * and + * | pzero(x)-1-R/S | <= 2 ** ( -60.26) + */ +const PR8: [f32; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + -7.0312500000e-02, /* 0xbd900000 */ + -8.0816707611e+00, /* 0xc1014e86 */ + -2.5706311035e+02, /* 0xc3808814 */ + -2.4852163086e+03, /* 0xc51b5376 */ + -5.2530439453e+03, /* 0xc5a4285a */ +]; +const PS8: [f32; 5] = [ + 1.1653436279e+02, /* 0x42e91198 */ + 3.8337448730e+03, /* 0x456f9beb */ + 4.0597855469e+04, /* 0x471e95db */ + 1.1675296875e+05, /* 0x47e4087c */ + 4.7627726562e+04, /* 0x473a0bba */ +]; +const PR5: [f32; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -1.1412546255e-11, /* 0xad48c58a */ + -7.0312492549e-02, /* 0xbd8fffff */ + -4.1596107483e+00, /* 0xc0851b88 */ + -6.7674766541e+01, /* 0xc287597b */ + -3.3123129272e+02, /* 0xc3a59d9b */ + -3.4643338013e+02, /* 0xc3ad3779 */ +]; +const PS5: [f32; 5] = [ + 6.0753936768e+01, /* 0x42730408 */ + 1.0512523193e+03, /* 0x44836813 */ + 5.9789707031e+03, /* 0x45bad7c4 */ + 9.6254453125e+03, /* 0x461665c8 */ + 2.4060581055e+03, /* 0x451660ee */ +]; + +const PR3: [f32; 6] = [/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + -2.5470459075e-09, /* 0xb12f081b */ + -7.0311963558e-02, /* 0xbd8fffb8 */ + -2.4090321064e+00, /* 0xc01a2d95 */ + -2.1965976715e+01, /* 0xc1afba52 */ + -5.8079170227e+01, /* 0xc2685112 */ + -3.1447946548e+01, /* 0xc1fb9565 */ +]; +const PS3: [f32; 5] = [ + 3.5856033325e+01, /* 0x420f6c94 */ + 3.6151397705e+02, /* 0x43b4c1ca */ + 1.1936077881e+03, /* 0x44953373 */ + 1.1279968262e+03, /* 0x448cffe6 */ + 1.7358093262e+02, /* 0x432d94b8 */ +]; + +const PR2: [f32; 6] = [/* for x in [2.8570,2]=1/[0.3499,0.5] */ + -8.8753431271e-08, /* 0xb3be98b7 */ + -7.0303097367e-02, /* 0xbd8ffb12 */ + -1.4507384300e+00, /* 0xbfb9b1cc */ + -7.6356959343e+00, /* 0xc0f4579f */ + -1.1193166733e+01, /* 0xc1331736 */ + -3.2336456776e+00, /* 0xc04ef40d */ +]; +const PS2: [f32; 5] = [ + 2.2220300674e+01, /* 0x41b1c32d */ + 1.3620678711e+02, /* 0x430834f0 */ + 2.7047027588e+02, /* 0x43873c32 */ + 1.5387539673e+02, /* 0x4319e01a */ + 1.4657617569e+01, /* 0x416a859a */ +]; + +fn pzerof(x: f32) -> f32 +{ + let p: &[f32; 6]; + let q: &[f32; 5]; + let z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 {p = &PR8; q = &PS8;} + else if ix >= 0x409173eb {p = &PR5; q = &PS5;} + else if ix >= 0x4036d917 {p = &PR3; q = &PS3;} + else /*ix >= 0x40000000*/{p = &PR2; q = &PS2;} + z = 1.0/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); + return 1.0 + r/s; +} + + +/* For x >= 8, the asymptotic expansions of qzero is + * -1/8 s + 75/1024 s^3 - ..., where s = 1/x. + * We approximate pzero by + * qzero(x) = s*(-1.25 + (R/S)) + * where R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10 + * S = 1 + qS0*s^2 + ... + qS5*s^12 + * and + * | qzero(x)/s +1.25-R/S | <= 2 ** ( -61.22) + */ +const QR8: [f32; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + 7.3242187500e-02, /* 0x3d960000 */ + 1.1768206596e+01, /* 0x413c4a93 */ + 5.5767340088e+02, /* 0x440b6b19 */ + 8.8591972656e+03, /* 0x460a6cca */ + 3.7014625000e+04, /* 0x471096a0 */ +]; +const QS8: [f32; 6] = [ + 1.6377603149e+02, /* 0x4323c6aa */ + 8.0983447266e+03, /* 0x45fd12c2 */ + 1.4253829688e+05, /* 0x480b3293 */ + 8.0330925000e+05, /* 0x49441ed4 */ + 8.4050156250e+05, /* 0x494d3359 */ + -3.4389928125e+05, /* 0xc8a7eb69 */ +]; + +const QR5: [f32; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.8408595828e-11, /* 0x2da1ec79 */ + 7.3242180049e-02, /* 0x3d95ffff */ + 5.8356351852e+00, /* 0x40babd86 */ + 1.3511157227e+02, /* 0x43071c90 */ + 1.0272437744e+03, /* 0x448067cd */ + 1.9899779053e+03, /* 0x44f8bf4b */ +]; +const QS5: [f32; 6] = [ + 8.2776611328e+01, /* 0x42a58da0 */ + 2.0778142090e+03, /* 0x4501dd07 */ + 1.8847289062e+04, /* 0x46933e94 */ + 5.6751113281e+04, /* 0x475daf1d */ + 3.5976753906e+04, /* 0x470c88c1 */ + -5.3543427734e+03, /* 0xc5a752be */ +]; + +const QR3: [f32; 6] = [/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + 4.3774099900e-09, /* 0x3196681b */ + 7.3241114616e-02, /* 0x3d95ff70 */ + 3.3442313671e+00, /* 0x405607e3 */ + 4.2621845245e+01, /* 0x422a7cc5 */ + 1.7080809021e+02, /* 0x432acedf */ + 1.6673394775e+02, /* 0x4326bbe4 */ +]; +const QS3: [f32; 6] = [ + 4.8758872986e+01, /* 0x42430916 */ + 7.0968920898e+02, /* 0x44316c1c */ + 3.7041481934e+03, /* 0x4567825f */ + 6.4604252930e+03, /* 0x45c9e367 */ + 2.5163337402e+03, /* 0x451d4557 */ + -1.4924745178e+02, /* 0xc3153f59 */ +]; + +const QR2: [f32; 6] = [/* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.5044444979e-07, /* 0x342189db */ + 7.3223426938e-02, /* 0x3d95f62a */ + 1.9981917143e+00, /* 0x3fffc4bf */ + 1.4495602608e+01, /* 0x4167edfd */ + 3.1666231155e+01, /* 0x41fd5471 */ + 1.6252708435e+01, /* 0x4182058c */ +]; +const QS2: [f32; 6] = [ + 3.0365585327e+01, /* 0x41f2ecb8 */ + 2.6934811401e+02, /* 0x4386ac8f */ + 8.4478375244e+02, /* 0x44533229 */ + 8.8293585205e+02, /* 0x445cbbe5 */ + 2.1266638184e+02, /* 0x4354aa98 */ + -5.3109550476e+00, /* 0xc0a9f358 */ +]; + +fn qzerof(x: f32) -> f32 +{ + let p: &[f32; 6]; + let q: &[f32; 6]; + let s: f32; + let r: f32; + let z: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 {p = &QR8; q = &QS8;} + else if ix >= 0x409173eb {p = &QR5; q = &QS5;} + else if ix >= 0x4036d917 {p = &QR3; q = &QS3;} + else /*ix >= 0x40000000*/{p = &QR2; q = &QS2;} + z = 1.0/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); + return (-0.125 + r/s)/x; +} diff --git a/libm/src/math/j1.rs b/libm/src/math/j1.rs new file mode 100644 index 000000000..92289a613 --- /dev/null +++ b/libm/src/math/j1.rs @@ -0,0 +1,387 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j1.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* j1(x), y1(x) + * Bessel function of the first and second kinds of order zero. + * Method -- j1(x): + * 1. For tiny x, we use j1(x) = x/2 - x^3/16 + x^5/384 - ... + * 2. Reduce x to |x| since j1(x)=-j1(-x), and + * for x in (0,2) + * j1(x) = x/2 + x*z*R0/S0, where z = x*x; + * (precision: |j1/x - 1/2 - R0/S0 |<2**-61.51 ) + * for x in (2,inf) + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x1)-q1(x)*sin(x1)) + * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) + * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) + * as follow: + * cos(x1) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x1) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (sin(x) + cos(x)) + * (To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one.) + * + * 3 Special cases + * j1(nan)= nan + * j1(0) = 0 + * j1(inf) = 0 + * + * Method -- y1(x): + * 1. screen out x<=0 cases: y1(0)=-inf, y1(x<0)=NaN + * 2. For x<2. + * Since + * y1(x) = 2/pi*(j1(x)*(ln(x/2)+Euler)-1/x-x/2+5/64*x^3-...) + * therefore y1(x)-2/pi*j1(x)*ln(x)-1/x is an odd function. + * We use the following function to approximate y1, + * y1(x) = x*U(z)/V(z) + (2/pi)*(j1(x)*ln(x)-1/x), z= x^2 + * where for x in [0,2] (abs err less than 2**-65.89) + * U(z) = U0[0] + U0[1]*z + ... + U0[4]*z^4 + * V(z) = 1 + v0[0]*z + ... + v0[4]*z^5 + * Note: For tiny x, 1/x dominate y1 and hence + * y1(tiny) = -2/pi/tiny, (choose tiny<2**-54) + * 3. For x>=2. + * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) + * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) + * by method mentioned above. + */ + +use super::{cos, get_high_word, get_low_word, fabs, log, sin, sqrt}; + +const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ +const TPI: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ + +fn common(ix: u32, x: f64, y1: bool, sign: bool) -> f64 +{ + let z: f64; + let mut s: f64; + let c: f64; + let mut ss: f64; + let mut cc: f64; + + /* + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x-3pi/4)-q1(x)*sin(x-3pi/4)) + * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x-3pi/4)+q1(x)*cos(x-3pi/4)) + * + * sin(x-3pi/4) = -(sin(x) + cos(x))/sqrt(2) + * cos(x-3pi/4) = (sin(x) - cos(x))/sqrt(2) + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + */ + s = sin(x); + if y1 { + s = -s; + } + c = cos(x); + cc = s-c; + if ix < 0x7fe00000 { + /* avoid overflow in 2*x */ + ss = -s-c; + z = cos(2.0*x); + if s*c > 0.0 { + cc = z/ss; + } else { + ss = z/cc; + } + if ix < 0x48000000 { + if y1 { + ss = -ss; + } + cc = pone(x)*cc-qone(x)*ss; + } + } + if sign { + cc = -cc; + } + return INVSQRTPI*cc/sqrt(x); +} + +/* R0/S0 on [0,2] */ +const R00: f64 = -6.25000000000000000000e-02; /* 0xBFB00000, 0x00000000 */ +const R01: f64 = 1.40705666955189706048e-03; /* 0x3F570D9F, 0x98472C61 */ +const R02: f64 = -1.59955631084035597520e-05; /* 0xBEF0C5C6, 0xBA169668 */ +const R03: f64 = 4.96727999609584448412e-08; /* 0x3E6AAAFA, 0x46CA0BD9 */ +const S01: f64 = 1.91537599538363460805e-02; /* 0x3F939D0B, 0x12637E53 */ +const S02: f64 = 1.85946785588630915560e-04; /* 0x3F285F56, 0xB9CDF664 */ +const S03: f64 = 1.17718464042623683263e-06; /* 0x3EB3BFF8, 0x333F8498 */ +const S04: f64 = 5.04636257076217042715e-09; /* 0x3E35AC88, 0xC97DFF2C */ +const S05: f64 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */ + +pub fn j1(x: f64) -> f64 +{ + let mut z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + let sign: bool; + + ix = get_high_word(x); + sign = (ix>>31) != 0; + ix &= 0x7fffffff; + if ix >= 0x7ff00000 { + return 1.0/(x*x); + } + if ix >= 0x40000000 { /* |x| >= 2 */ + return common(ix, fabs(x), false, sign); + } + if ix >= 0x38000000 { /* |x| >= 2**-127 */ + z = x*x; + r = z*(R00+z*(R01+z*(R02+z*R03))); + s = 1.0+z*(S01+z*(S02+z*(S03+z*(S04+z*S05)))); + z = r/s; + } else { + /* avoid underflow, raise inexact if x!=0 */ + z = x; + } + return (0.5 + z)*x; +} + +const U0: [f64; 5] = [ + -1.96057090646238940668e-01, /* 0xBFC91866, 0x143CBC8A */ + 5.04438716639811282616e-02, /* 0x3FA9D3C7, 0x76292CD1 */ + -1.91256895875763547298e-03, /* 0xBF5F55E5, 0x4844F50F */ + 2.35252600561610495928e-05, /* 0x3EF8AB03, 0x8FA6B88E */ + -9.19099158039878874504e-08, /* 0xBE78AC00, 0x569105B8 */ +]; +const V0: [f64; 5] = [ + 1.99167318236649903973e-02, /* 0x3F94650D, 0x3F4DA9F0 */ + 2.02552581025135171496e-04, /* 0x3F2A8C89, 0x6C257764 */ + 1.35608801097516229404e-06, /* 0x3EB6C05A, 0x894E8CA6 */ + 6.22741452364621501295e-09, /* 0x3E3ABF1D, 0x5BA69A86 */ + 1.66559246207992079114e-11, /* 0x3DB25039, 0xDACA772A */ +]; + +pub fn y1(x: f64) -> f64 +{ + let z: f64; + let u: f64; + let v: f64; + let ix: u32; + let lx: u32; + + ix = get_high_word(x); + lx = get_low_word(x); + + /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */ + if (ix<<1 | lx) == 0 { + return -1.0/0.0; + } + if (ix>>31) != 0 { + return 0.0/0.0; + } + if ix >= 0x7ff00000 { + return 1.0/x; + } + + if ix >= 0x40000000 { /* x >= 2 */ + return common(ix, x, true, false); + } + if ix < 0x3c900000 { /* x < 2**-54 */ + return -TPI/x; + } + z = x*x; + u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4]))); + v = 1.0+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4])))); + return x*(u/v) + TPI*(j1(x)*log(x)-1.0/x); +} + +/* For x >= 8, the asymptotic expansions of pone is + * 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x. + * We approximate pone by + * pone(x) = 1 + (R/S) + * where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10 + * S = 1 + ps0*s^2 + ... + ps4*s^10 + * and + * | pone(x)-1-R/S | <= 2 ** ( -60.06) + */ + +const PR8: [f64; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + 1.17187499999988647970e-01, /* 0x3FBDFFFF, 0xFFFFFCCE */ + 1.32394806593073575129e+01, /* 0x402A7A9D, 0x357F7FCE */ + 4.12051854307378562225e+02, /* 0x4079C0D4, 0x652EA590 */ + 3.87474538913960532227e+03, /* 0x40AE457D, 0xA3A532CC */ + 7.91447954031891731574e+03, /* 0x40BEEA7A, 0xC32782DD */ +]; +const PS8: [f64; 5] = [ + 1.14207370375678408436e+02, /* 0x405C8D45, 0x8E656CAC */ + 3.65093083420853463394e+03, /* 0x40AC85DC, 0x964D274F */ + 3.69562060269033463555e+04, /* 0x40E20B86, 0x97C5BB7F */ + 9.76027935934950801311e+04, /* 0x40F7D42C, 0xB28F17BB */ + 3.08042720627888811578e+04, /* 0x40DE1511, 0x697A0B2D */ +]; + +const PR5: [f64; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.31990519556243522749e-11, /* 0x3DAD0667, 0xDAE1CA7D */ + 1.17187493190614097638e-01, /* 0x3FBDFFFF, 0xE2C10043 */ + 6.80275127868432871736e+00, /* 0x401B3604, 0x6E6315E3 */ + 1.08308182990189109773e+02, /* 0x405B13B9, 0x452602ED */ + 5.17636139533199752805e+02, /* 0x40802D16, 0xD052D649 */ + 5.28715201363337541807e+02, /* 0x408085B8, 0xBB7E0CB7 */ +]; +const PS5: [f64; 5] = [ + 5.92805987221131331921e+01, /* 0x404DA3EA, 0xA8AF633D */ + 9.91401418733614377743e+02, /* 0x408EFB36, 0x1B066701 */ + 5.35326695291487976647e+03, /* 0x40B4E944, 0x5706B6FB */ + 7.84469031749551231769e+03, /* 0x40BEA4B0, 0xB8A5BB15 */ + 1.50404688810361062679e+03, /* 0x40978030, 0x036F5E51 */ +]; + +const PR3: [f64; 6] = [ + 3.02503916137373618024e-09, /* 0x3E29FC21, 0xA7AD9EDD */ + 1.17186865567253592491e-01, /* 0x3FBDFFF5, 0x5B21D17B */ + 3.93297750033315640650e+00, /* 0x400F76BC, 0xE85EAD8A */ + 3.51194035591636932736e+01, /* 0x40418F48, 0x9DA6D129 */ + 9.10550110750781271918e+01, /* 0x4056C385, 0x4D2C1837 */ + 4.85590685197364919645e+01, /* 0x4048478F, 0x8EA83EE5 */ +]; +const PS3: [f64; 5] = [ + 3.47913095001251519989e+01, /* 0x40416549, 0xA134069C */ + 3.36762458747825746741e+02, /* 0x40750C33, 0x07F1A75F */ + 1.04687139975775130551e+03, /* 0x40905B7C, 0x5037D523 */ + 8.90811346398256432622e+02, /* 0x408BD67D, 0xA32E31E9 */ + 1.03787932439639277504e+02, /* 0x4059F26D, 0x7C2EED53 */ +]; + +const PR2: [f64; 6] = [/* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.07710830106873743082e-07, /* 0x3E7CE9D4, 0xF65544F4 */ + 1.17176219462683348094e-01, /* 0x3FBDFF42, 0xBE760D83 */ + 2.36851496667608785174e+00, /* 0x4002F2B7, 0xF98FAEC0 */ + 1.22426109148261232917e+01, /* 0x40287C37, 0x7F71A964 */ + 1.76939711271687727390e+01, /* 0x4031B1A8, 0x177F8EE2 */ + 5.07352312588818499250e+00, /* 0x40144B49, 0xA574C1FE */ +]; +const PS2: [f64; 5] = [ + 2.14364859363821409488e+01, /* 0x40356FBD, 0x8AD5ECDC */ + 1.25290227168402751090e+02, /* 0x405F5293, 0x14F92CD5 */ + 2.32276469057162813669e+02, /* 0x406D08D8, 0xD5A2DBD9 */ + 1.17679373287147100768e+02, /* 0x405D6B7A, 0xDA1884A9 */ + 8.36463893371618283368e+00, /* 0x4020BAB1, 0xF44E5192 */ +]; + +fn pone(x: f64) -> f64 +{ + let p: &[f64; 6]; + let q: &[f64; 5]; + let z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 {p = &PR8; q = &PS8;} + else if ix >= 0x40122E8B {p = &PR5; q = &PS5;} + else if ix >= 0x4006DB6D {p = &PR3; q = &PS3;} + else /*ix >= 0x40000000*/{p = &PR2; q = &PS2;} + z = 1.0/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); + return 1.0+ r/s; +} + +/* For x >= 8, the asymptotic expansions of qone is + * 3/8 s - 105/1024 s^3 - ..., where s = 1/x. + * We approximate pone by + * qone(x) = s*(0.375 + (R/S)) + * where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10 + * S = 1 + qs1*s^2 + ... + qs6*s^12 + * and + * | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13) + */ + +const QR8: [f64; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + -1.02539062499992714161e-01, /* 0xBFBA3FFF, 0xFFFFFDF3 */ + -1.62717534544589987888e+01, /* 0xC0304591, 0xA26779F7 */ + -7.59601722513950107896e+02, /* 0xC087BCD0, 0x53E4B576 */ + -1.18498066702429587167e+04, /* 0xC0C724E7, 0x40F87415 */ + -4.84385124285750353010e+04, /* 0xC0E7A6D0, 0x65D09C6A */ +]; +const QS8: [f64; 6] = [ + 1.61395369700722909556e+02, /* 0x40642CA6, 0xDE5BCDE5 */ + 7.82538599923348465381e+03, /* 0x40BE9162, 0xD0D88419 */ + 1.33875336287249578163e+05, /* 0x4100579A, 0xB0B75E98 */ + 7.19657723683240939863e+05, /* 0x4125F653, 0x72869C19 */ + 6.66601232617776375264e+05, /* 0x412457D2, 0x7719AD5C */ + -2.94490264303834643215e+05, /* 0xC111F969, 0x0EA5AA18 */ +]; + +const QR5: [f64; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -2.08979931141764104297e-11, /* 0xBDB6FA43, 0x1AA1A098 */ + -1.02539050241375426231e-01, /* 0xBFBA3FFF, 0xCB597FEF */ + -8.05644828123936029840e+00, /* 0xC0201CE6, 0xCA03AD4B */ + -1.83669607474888380239e+02, /* 0xC066F56D, 0x6CA7B9B0 */ + -1.37319376065508163265e+03, /* 0xC09574C6, 0x6931734F */ + -2.61244440453215656817e+03, /* 0xC0A468E3, 0x88FDA79D */ +]; +const QS5: [f64; 6] = [ + 8.12765501384335777857e+01, /* 0x405451B2, 0xFF5A11B2 */ + 1.99179873460485964642e+03, /* 0x409F1F31, 0xE77BF839 */ + 1.74684851924908907677e+04, /* 0x40D10F1F, 0x0D64CE29 */ + 4.98514270910352279316e+04, /* 0x40E8576D, 0xAABAD197 */ + 2.79480751638918118260e+04, /* 0x40DB4B04, 0xCF7C364B */ + -4.71918354795128470869e+03, /* 0xC0B26F2E, 0xFCFFA004 */ +]; + +const QR3: [f64; 6] = [ + -5.07831226461766561369e-09, /* 0xBE35CFA9, 0xD38FC84F */ + -1.02537829820837089745e-01, /* 0xBFBA3FEB, 0x51AEED54 */ + -4.61011581139473403113e+00, /* 0xC01270C2, 0x3302D9FF */ + -5.78472216562783643212e+01, /* 0xC04CEC71, 0xC25D16DA */ + -2.28244540737631695038e+02, /* 0xC06C87D3, 0x4718D55F */ + -2.19210128478909325622e+02, /* 0xC06B66B9, 0x5F5C1BF6 */ +]; +const QS3: [f64; 6] = [ + 4.76651550323729509273e+01, /* 0x4047D523, 0xCCD367E4 */ + 6.73865112676699709482e+02, /* 0x40850EEB, 0xC031EE3E */ + 3.38015286679526343505e+03, /* 0x40AA684E, 0x448E7C9A */ + 5.54772909720722782367e+03, /* 0x40B5ABBA, 0xA61D54A6 */ + 1.90311919338810798763e+03, /* 0x409DBC7A, 0x0DD4DF4B */ + -1.35201191444307340817e+02, /* 0xC060E670, 0x290A311F */ +]; + +const QR2: [f64; 6] = [/* for x in [2.8570,2]=1/[0.3499,0.5] */ + -1.78381727510958865572e-07, /* 0xBE87F126, 0x44C626D2 */ + -1.02517042607985553460e-01, /* 0xBFBA3E8E, 0x9148B010 */ + -2.75220568278187460720e+00, /* 0xC0060484, 0x69BB4EDA */ + -1.96636162643703720221e+01, /* 0xC033A9E2, 0xC168907F */ + -4.23253133372830490089e+01, /* 0xC04529A3, 0xDE104AAA */ + -2.13719211703704061733e+01, /* 0xC0355F36, 0x39CF6E52 */ +]; +const QS2: [f64; 6] = [ + 2.95333629060523854548e+01, /* 0x403D888A, 0x78AE64FF */ + 2.52981549982190529136e+02, /* 0x406F9F68, 0xDB821CBA */ + 7.57502834868645436472e+02, /* 0x4087AC05, 0xCE49A0F7 */ + 7.39393205320467245656e+02, /* 0x40871B25, 0x48D4C029 */ + 1.55949003336666123687e+02, /* 0x40637E5E, 0x3C3ED8D4 */ + -4.95949898822628210127e+00, /* 0xC013D686, 0xE71BE86B */ +]; + +fn qone(x: f64) -> f64 +{ + let p: &[f64; 6]; + let q: &[f64; 6]; + let s: f64; + let r: f64; + let z: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 {p = &QR8; q = &QS8;} + else if ix >= 0x40122E8B {p = &QR5; q = &QS5;} + else if ix >= 0x4006DB6D {p = &QR3; q = &QS3;} + else /*ix >= 0x40000000*/{p = &QR2; q = &QS2;} + z = 1.0/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); + return (0.375 + r/s)/x; +} diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs new file mode 100644 index 000000000..7cf9c45b9 --- /dev/null +++ b/libm/src/math/j1f.rs @@ -0,0 +1,331 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j1f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{cosf, fabsf, logf, sinf, sqrtf}; + +const INVSQRTPI: f32 = 5.6418961287e-01; /* 0x3f106ebb */ +const TPI: f32 = 6.3661974669e-01; /* 0x3f22f983 */ + +fn common(ix: u32, x: f32, y1: bool, sign: bool) -> f32 +{ + let z: f64; + let mut s: f64; + let c: f64; + let mut ss: f64; + let mut cc: f64; + + s = sinf(x) as f64; + if y1 { + s = -s; + } + c = cosf(x) as f64; + cc = s-c; + if ix < 0x7f000000 { + ss = -s-c; + z = cosf(2.0*x) as f64; + if s*c > 0.0 { + cc = z/ss; + } else { + ss = z/cc; + } + if ix < 0x58800000 { + if y1 { + ss = -ss; + } + cc = (ponef(x) as f64)*cc-(qonef(x) as f64)*ss; + } + } + if sign { + cc = -cc; + } + return INVSQRTPI*(cc as f32)/sqrtf(x); +} + +/* R0/S0 on [0,2] */ +const R00: f32 = -6.2500000000e-02; /* 0xbd800000 */ +const R01: f32 = 1.4070566976e-03; /* 0x3ab86cfd */ +const R02: f32 = -1.5995563444e-05; /* 0xb7862e36 */ +const R03: f32 = 4.9672799207e-08; /* 0x335557d2 */ +const S01: f32 = 1.9153760746e-02; /* 0x3c9ce859 */ +const S02: f32 = 1.8594678841e-04; /* 0x3942fab6 */ +const S03: f32 = 1.1771846857e-06; /* 0x359dffc2 */ +const S04: f32 = 5.0463624390e-09; /* 0x31ad6446 */ +const S05: f32 = 1.2354227016e-11; /* 0x2d59567e */ + +pub fn j1f(x: f32) -> f32 +{ + let mut z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + let sign: bool; + + ix = x.to_bits(); + sign = (ix>>31) != 0; + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + return 1.0/(x*x); + } + if ix >= 0x40000000 { /* |x| >= 2 */ + return common(ix, fabsf(x), false, sign); + } + if ix >= 0x39000000 { /* |x| >= 2**-13 */ + z = x*x; + r = z*(R00+z*(R01+z*(R02+z*R03))); + s = 1.0+z*(S01+z*(S02+z*(S03+z*(S04+z*S05)))); + z = 0.5 + r/s; + } else { + z = 0.5; + } + return z*x; +} + +const U0: [f32; 5] = [ + -1.9605709612e-01, /* 0xbe48c331 */ + 5.0443872809e-02, /* 0x3d4e9e3c */ + -1.9125689287e-03, /* 0xbafaaf2a */ + 2.3525259166e-05, /* 0x37c5581c */ + -9.1909917899e-08, /* 0xb3c56003 */ +]; +const V0: [f32; 5] = [ + 1.9916731864e-02, /* 0x3ca3286a */ + 2.0255257550e-04, /* 0x3954644b */ + 1.3560879779e-06, /* 0x35b602d4 */ + 6.2274145840e-09, /* 0x31d5f8eb */ + 1.6655924903e-11, /* 0x2d9281cf */ +]; + +pub fn y1f(x: f32) -> f32 +{ + let z: f32; + let u: f32; + let v: f32; + let ix: u32; + + ix = x.to_bits(); + if (ix & 0x7fffffff) == 0 { + return -1.0/0.0; + } + if (ix>>31) != 0{ + return 0.0/0.0; + } + if ix >= 0x7f800000 { + return 1.0/x; + } + if ix >= 0x40000000 { /* |x| >= 2.0 */ + return common(ix,x,true,false); + } + if ix < 0x33000000 { /* x < 2**-25 */ + return -TPI/x; + } + z = x*x; + u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4]))); + v = 1.0+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4])))); + return x*(u/v) + TPI*(j1f(x)*logf(x)-1.0/x); +} + +/* For x >= 8, the asymptotic expansions of pone is + * 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x. + * We approximate pone by + * pone(x) = 1 + (R/S) + * where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10 + * S = 1 + ps0*s^2 + ... + ps4*s^10 + * and + * | pone(x)-1-R/S | <= 2 ** ( -60.06) + */ + +const PR8: [f32; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + 1.1718750000e-01, /* 0x3df00000 */ + 1.3239480972e+01, /* 0x4153d4ea */ + 4.1205184937e+02, /* 0x43ce06a3 */ + 3.8747453613e+03, /* 0x45722bed */ + 7.9144794922e+03, /* 0x45f753d6 */ +]; +const PS8: [f32; 5] = [ + 1.1420736694e+02, /* 0x42e46a2c */ + 3.6509309082e+03, /* 0x45642ee5 */ + 3.6956207031e+04, /* 0x47105c35 */ + 9.7602796875e+04, /* 0x47bea166 */ + 3.0804271484e+04, /* 0x46f0a88b */ +]; + +const PR5: [f32; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.3199052094e-11, /* 0x2d68333f */ + 1.1718749255e-01, /* 0x3defffff */ + 6.8027510643e+00, /* 0x40d9b023 */ + 1.0830818176e+02, /* 0x42d89dca */ + 5.1763616943e+02, /* 0x440168b7 */ + 5.2871520996e+02, /* 0x44042dc6 */ +]; +const PS5: [f32; 5] = [ + 5.9280597687e+01, /* 0x426d1f55 */ + 9.9140142822e+02, /* 0x4477d9b1 */ + 5.3532670898e+03, /* 0x45a74a23 */ + 7.8446904297e+03, /* 0x45f52586 */ + 1.5040468750e+03, /* 0x44bc0180 */ +]; + +const PR3: [f32; 6] = [ + 3.0250391081e-09, /* 0x314fe10d */ + 1.1718686670e-01, /* 0x3defffab */ + 3.9329774380e+00, /* 0x407bb5e7 */ + 3.5119403839e+01, /* 0x420c7a45 */ + 9.1055007935e+01, /* 0x42b61c2a */ + 4.8559066772e+01, /* 0x42423c7c */ +]; +const PS3: [f32; 5] = [ + 3.4791309357e+01, /* 0x420b2a4d */ + 3.3676245117e+02, /* 0x43a86198 */ + 1.0468714600e+03, /* 0x4482dbe3 */ + 8.9081134033e+02, /* 0x445eb3ed */ + 1.0378793335e+02, /* 0x42cf936c */ +]; + +const PR2: [f32; 6] = [/* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.0771083225e-07, /* 0x33e74ea8 */ + 1.1717621982e-01, /* 0x3deffa16 */ + 2.3685150146e+00, /* 0x401795c0 */ + 1.2242610931e+01, /* 0x4143e1bc */ + 1.7693971634e+01, /* 0x418d8d41 */ + 5.0735230446e+00, /* 0x40a25a4d */ +]; +const PS2: [f32; 5] = [ + 2.1436485291e+01, /* 0x41ab7dec */ + 1.2529022980e+02, /* 0x42fa9499 */ + 2.3227647400e+02, /* 0x436846c7 */ + 1.1767937469e+02, /* 0x42eb5bd7 */ + 8.3646392822e+00, /* 0x4105d590 */ +]; + +fn ponef(x: f32) -> f32 +{ + let p: &[f32; 6]; + let q: &[f32; 5]; + let z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 {p = &PR8; q = &PS8;} + else if ix >= 0x409173eb {p = &PR5; q = &PS5;} + else if ix >= 0x4036d917 {p = &PR3; q = &PS3;} + else /*ix >= 0x40000000*/{p = &PR2; q = &PS2;} + z = 1.0/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); + return 1.0 + r/s; +} + +/* For x >= 8, the asymptotic expansions of qone is + * 3/8 s - 105/1024 s^3 - ..., where s = 1/x. + * We approximate pone by + * qone(x) = s*(0.375 + (R/S)) + * where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10 + * S = 1 + qs1*s^2 + ... + qs6*s^12 + * and + * | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13) + */ + +const QR8: [f32; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + -1.0253906250e-01, /* 0xbdd20000 */ + -1.6271753311e+01, /* 0xc1822c8d */ + -7.5960174561e+02, /* 0xc43de683 */ + -1.1849806641e+04, /* 0xc639273a */ + -4.8438511719e+04, /* 0xc73d3683 */ +]; +const QS8: [f32; 6] = [ + 1.6139537048e+02, /* 0x43216537 */ + 7.8253862305e+03, /* 0x45f48b17 */ + 1.3387534375e+05, /* 0x4802bcd6 */ + 7.1965775000e+05, /* 0x492fb29c */ + 6.6660125000e+05, /* 0x4922be94 */ + -2.9449025000e+05, /* 0xc88fcb48 */ +]; + +const QR5: [f32; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -2.0897993405e-11, /* 0xadb7d219 */ + -1.0253904760e-01, /* 0xbdd1fffe */ + -8.0564479828e+00, /* 0xc100e736 */ + -1.8366960144e+02, /* 0xc337ab6b */ + -1.3731937256e+03, /* 0xc4aba633 */ + -2.6124443359e+03, /* 0xc523471c */ +]; +const QS5: [f32; 6] = [ + 8.1276550293e+01, /* 0x42a28d98 */ + 1.9917987061e+03, /* 0x44f8f98f */ + 1.7468484375e+04, /* 0x468878f8 */ + 4.9851425781e+04, /* 0x4742bb6d */ + 2.7948074219e+04, /* 0x46da5826 */ + -4.7191835938e+03, /* 0xc5937978 */ +]; + +const QR3: [f32; 6] = [ + -5.0783124372e-09, /* 0xb1ae7d4f */ + -1.0253783315e-01, /* 0xbdd1ff5b */ + -4.6101160049e+00, /* 0xc0938612 */ + -5.7847221375e+01, /* 0xc267638e */ + -2.2824453735e+02, /* 0xc3643e9a */ + -2.1921012878e+02, /* 0xc35b35cb */ +]; +const QS3: [f32; 6] = [ + 4.7665153503e+01, /* 0x423ea91e */ + 6.7386511230e+02, /* 0x4428775e */ + 3.3801528320e+03, /* 0x45534272 */ + 5.5477290039e+03, /* 0x45ad5dd5 */ + 1.9031191406e+03, /* 0x44ede3d0 */ + -1.3520118713e+02, /* 0xc3073381 */ +]; + +const QR2: [f32; 6] = [ /* for x in [2.8570,2]=1/[0.3499,0.5] */ + -1.7838172539e-07, /* 0xb43f8932 */ + -1.0251704603e-01, /* 0xbdd1f475 */ + -2.7522056103e+00, /* 0xc0302423 */ + -1.9663616180e+01, /* 0xc19d4f16 */ + -4.2325313568e+01, /* 0xc2294d1f */ + -2.1371921539e+01, /* 0xc1aaf9b2 */ +]; +const QS2: [f32; 6] = [ + 2.9533363342e+01, /* 0x41ec4454 */ + 2.5298155212e+02, /* 0x437cfb47 */ + 7.5750280762e+02, /* 0x443d602e */ + 7.3939318848e+02, /* 0x4438d92a */ + 1.5594900513e+02, /* 0x431bf2f2 */ + -4.9594988823e+00, /* 0xc09eb437 */ +]; + +fn qonef(x: f32) -> f32 +{ + let p: &[f32; 6]; + let q: &[f32; 6]; + let s: f32; + let r: f32; + let z: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 {p = &QR8; q = &QS8;} + else if ix >= 0x409173eb {p = &QR5; q = &QS5;} + else if ix >= 0x4036d917 {p = &QR3; q = &QS3;} + else /*ix >= 0x40000000*/{p = &QR2; q = &QS2;} + z = 1.0/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); + return (0.375 + r/s)/x; +} diff --git a/libm/src/math/jn.rs b/libm/src/math/jn.rs new file mode 100644 index 000000000..7f7c06fee --- /dev/null +++ b/libm/src/math/jn.rs @@ -0,0 +1,338 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_jn.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * jn(n, x), yn(n, x) + * floating point Bessel's function of the 1st and 2nd kind + * of order n + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + * Note 2. About jn(n,x), yn(n,x) + * For n=0, j0(x) is called, + * for n=1, j1(x) is called, + * for n<=x, forward recursion is used starting + * from values of j0(x) and j1(x). + * for n>x, a continued fraction approximation to + * j(n,x)/j(n-1,x) is evaluated and then backward + * recursion is used starting from a supposed value + * for j(n,x). The resulting value of j(0,x) is + * compared with the actual value to correct the + * supposed value of j(n,x). + * + * yn(n,x) is similar in all respects, except + * that forward recursion is used for all + * values of n>1. + */ + +use super::{cos, fabs, get_high_word, get_low_word, log, j0, j1, sin, sqrt, y0, y1}; + +const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ + +pub fn jn(n: isize, mut x: f64) -> f64 +{ + let mut ix: u32; + let lx: u32; + let nm1: isize; + let mut i: isize; + let mut sign: bool; + let mut a: f64; + let mut b: f64; + let mut temp: f64; + + ix = get_high_word(x); + lx = get_low_word(x); + sign = (ix>>31) != 0; + ix &= 0x7fffffff; + + // -lx == !lx + 1 + if (ix | (lx|(!lx+1))>>31) > 0x7ff00000 { /* nan */ + return x; + } + + /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) + * Thus, J(-n,x) = J(n,-x) + */ + /* nm1 = |n|-1 is used instead of |n| to handle n==INT_MIN */ + if n == 0 { + return j0(x); + } + if n < 0 { + nm1 = -(n+1); + x = -x; + sign = !sign; + } else { + nm1 = n-1; + } + if nm1 == 0 { + return j1(x); + } + + sign &= (n & 1) != 0; /* even n: 0, odd n: signbit(x) */ + x = fabs(x); + if (ix|lx) == 0 || ix == 0x7ff00000 { /* if x is 0 or inf */ + b = 0.0; + } else if (nm1 as f64) < x { + /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ + if ix >= 0x52d00000 { /* x > 2**302 */ + /* (x >> n**2) + * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Let s=sin(x), c=cos(x), + * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + * + * n sin(xn)*sqt2 cos(xn)*sqt2 + * ---------------------------------- + * 0 s-c c+s + * 1 -s-c -c+s + * 2 -s+c -c-s + * 3 s+c c-s + */ + temp = match nm1&3 { + 0 => -cos(x)+sin(x), + 1 => -cos(x)-sin(x), + 2 => cos(x)-sin(x), + 3 | _ => cos(x)+sin(x), + }; + b = INVSQRTPI*temp/sqrt(x); + } else { + a = j0(x); + b = j1(x); + i = 0; + while i < nm1 { + i += 1; + temp = b; + b = b*(2.0*(i as f64)/x) - a; /* avoid underflow */ + a = temp; + } + } + } else { + if ix < 0x3e100000 { /* x < 2**-29 */ + /* x is tiny, return the first Taylor expansion of J(n,x) + * J(n,x) = 1/n!*(x/2)^n - ... + */ + if nm1 > 32 { /* underflow */ + b = 0.0; + } else { + temp = x*0.5; + b = temp; + a = 1.0; + i = 2; + while i <= nm1 + 1 { + a *= i as f64; /* a = n! */ + b *= temp; /* b = (x/2)^n */ + i += 1; + } + b = b/a; + } + } else { + /* use backward recurrence */ + /* x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quadruple + */ + /* determine k */ + let mut t: f64; + let mut q0: f64; + let mut q1: f64; + let mut w: f64; + let h: f64; + let mut z: f64; + let mut tmp: f64; + let nf: f64; + + let mut k: isize; + + nf = (nm1 as f64) + 1.0; + w = 2.0*nf/x; + h = 2.0/x; + z = w+h; + q0 = w; + q1 = w*z - 1.0; + k = 1; + while q1 < 1.0e9 { + k += 1; + z += h; + tmp = z*q1 - q0; + q0 = q1; + q1 = tmp; + } + t = 0.0; + i = k; + while i >= 0 { + t = 1.0/(2.0*((i as f64)+nf)/x - t); + i -= 1; + } + a = t; + b = 1.0; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * Hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = nf*log(fabs(w)); + if tmp < 7.09782712893383973096e+02 { + i = nm1; + while i > 0 { + temp = b; + b = b*(2.0*(i as f64))/x - a; + a = temp; + i -= 1; + } + } else { + i = nm1; + while i > 0 { + temp = b; + b = b*(2.0*(i as f64))/x - a; + a = temp; + /* scale b to avoid spurious overflow */ + let x1p500 = f64::from_bits(0x5f30000000000000); // 0x1p500 == 2^500 + if b > x1p500 { + a /= b; + t /= b; + b = 1.0; + } + i -= 1; + } + } + z = j0(x); + w = j1(x); + if fabs(z) >= fabs(w) { + b = t*z/b; + } else { + b = t*w/a; + } + } + } + + if sign { + -b + } else { + b + } +} + + +pub fn yn(n: isize, x: f64) -> f64 +{ + let mut ix: u32; + let lx: u32; + let mut ib: u32; + let nm1: isize; + let mut sign: bool; + let mut i: isize; + let mut a: f64; + let mut b: f64; + let mut temp: f64; + + ix = get_high_word(x); + lx = get_low_word(x); + sign = (ix>>31) != 0; + ix &= 0x7fffffff; + + // -lx == !lx + 1 + if (ix | (lx|(!lx+1))>>31) > 0x7ff00000 { /* nan */ + return x; + } + if sign && (ix|lx) != 0 { /* x < 0 */ + return 0.0/0.0; + } + if ix == 0x7ff00000 { + return 0.0; + } + + if n == 0 { + return y0(x); + } + if n < 0 { + nm1 = -(n+1); + sign = (n&1) != 0; + } else { + nm1 = n-1; + sign = false; + } + if nm1 == 0 { + if sign { + return -y1(x); + } else { + return y1(x); + } + } + + if ix >= 0x52d00000 { /* x > 2**302 */ + /* (x >> n**2) + * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Let s=sin(x), c=cos(x), + * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + * + * n sin(xn)*sqt2 cos(xn)*sqt2 + * ---------------------------------- + * 0 s-c c+s + * 1 -s-c -c+s + * 2 -s+c -c-s + * 3 s+c c-s + */ + temp = match nm1&3 { + 0 => -sin(x)-cos(x), + 1 => -sin(x)+cos(x), + 2 => sin(x)+cos(x), + 3 | _ => sin(x)-cos(x), + }; + b = INVSQRTPI*temp/sqrt(x); + } else { + a = y0(x); + b = y1(x); + /* quit if b is -inf */ + ib = get_high_word(b); + i = 0; + while i < nm1 && ib != 0xfff00000 { + i += 1; + temp = b; + b = (2.0*(i as f64)/x)*b - a; + ib = get_high_word(b); + a = temp; + } + } + + if sign { + -b + } else { + b + } +} diff --git a/libm/src/math/jnf.rs b/libm/src/math/jnf.rs new file mode 100644 index 000000000..4cd848a03 --- /dev/null +++ b/libm/src/math/jnf.rs @@ -0,0 +1,255 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_jnf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{fabsf, j0f, j1f, logf, y0f, y1f}; + +pub fn jnf(n: isize, mut x: f32) -> f32 +{ + let mut ix: u32; + let mut nm1: isize; + let mut sign: bool; + let mut i: isize; + let mut a: f32; + let mut b: f32; + let mut temp: f32; + + ix = x.to_bits(); + sign = (ix>>31) != 0; + ix &= 0x7fffffff; + if ix > 0x7f800000 { /* nan */ + return x; + } + + /* J(-n,x) = J(n,-x), use |n|-1 to avoid overflow in -n */ + if n == 0 { + return j0f(x); + } + if n < 0 { + nm1 = -(n+1); + x = -x; + sign = !sign; + } else { + nm1 = n-1; + } + if nm1 == 0 { + return j1f(x); + } + + sign &= (n&1) != 0; /* even n: 0, odd n: signbit(x) */ + x = fabsf(x); + if ix == 0 || ix == 0x7f800000 { /* if x is 0 or inf */ + b = 0.0; + } else if (nm1 as f32) < x { + /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ + a = j0f(x); + b = j1f(x); + i = 0; + while i < nm1 { + i += 1; + temp = b; + b = b*(2.0*(i as f32)/x) - a; + a = temp; + } + } else { + if ix < 0x35800000 { /* x < 2**-20 */ + /* x is tiny, return the first Taylor expansion of J(n,x) + * J(n,x) = 1/n!*(x/2)^n - ... + */ + if nm1 > 8 { /* underflow */ + nm1 = 8; + } + temp = 0.5 * x; + b = temp; + a = 1.0; + i = 2; + while i <= nm1 + 1 { + a *= i as f32; /* a = n! */ + b *= temp; /* b = (x/2)^n */ + i += 1; + } + b = b/a; + } else { + /* use backward recurrence */ + /* x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quadruple + */ + /* determine k */ + let mut t: f32; + let mut q0: f32; + let mut q1: f32; + let mut w: f32; + let h: f32; + let mut z: f32; + let mut tmp: f32; + let nf: f32; + let mut k: isize; + + nf = (nm1 as f32)+1.0; + w = 2.0*(nf as f32)/x; + h = 2.0/x; + z = w+h; + q0 = w; + q1 = w*z - 1.0; + k = 1; + while q1 < 1.0e4 { + k += 1; + z += h; + tmp = z*q1 - q0; + q0 = q1; + q1 = tmp; + } + t = 0.0; + i = k; + while i >= 0 { + t = 1.0/(2.0*((i as f32)+nf)/x-t); + i -= 1; + } + a = t; + b = 1.0; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * Hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = nf*logf(fabsf(w)); + if tmp < 88.721679688 { + i = nm1; + while i > 0 { + temp = b; + b = 2.0*(i as f32)*b/x - a; + a = temp; + i -= 1; + } + } else { + i = nm1; + while i > 0 { + temp = b; + b = 2.0*(i as f32)*b/x - a; + a = temp; + /* scale b to avoid spurious overflow */ + let x1p60 = f32::from_bits(0x5d800000); // 0x1p60 == 2^60 + if b > x1p60 { + a /= b; + t /= b; + b = 1.0; + } + i -= 1; + } + } + z = j0f(x); + w = j1f(x); + if fabsf(z) >= fabsf(w) { + b = t*z/b; + } else { + b = t*w/a; + } + } + } + + if sign { + -b + } else { + b + } +} + +pub fn ynf(n: isize, x: f32) -> f32 +{ + let mut ix: u32; + let mut ib: u32; + let nm1: isize; + let mut sign: bool; + let mut i: isize; + let mut a: f32; + let mut b: f32; + let mut temp: f32; + + ix = x.to_bits(); + sign = (ix>>31) != 0; + ix &= 0x7fffffff; + if ix > 0x7f800000 { /* nan */ + return x; + } + if sign && ix != 0 { /* x < 0 */ + return 0.0/0.0; + } + if ix == 0x7f800000 { + return 0.0; + } + + if n == 0 { + return y0f(x); + } + if n < 0 { + nm1 = -(n+1); + sign = (n&1) != 0; + } else { + nm1 = n-1; + sign = false; + } + if nm1 == 0 { + if sign { + return -y1f(x); + } else { + return y1f(x); + } + } + + a = y0f(x); + b = y1f(x); + /* quit if b is -inf */ + ib = b.to_bits(); + i = 0; + while i < nm1 && ib != 0xff800000 { + i += 1; + temp = b; + b = (2.0*(i as f32)/x)*b - a; + ib = b.to_bits(); + a = temp; + } + + if sign { + -b + } else { + b + } +} diff --git a/libm/src/math/lgamma.rs b/libm/src/math/lgamma.rs new file mode 100644 index 000000000..35b252652 --- /dev/null +++ b/libm/src/math/lgamma.rs @@ -0,0 +1,309 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_lgamma_r.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + */ +/* lgamma_r(x, signgamp) + * Reentrant version of the logarithm of the Gamma function + * with user provide pointer for the sign of Gamma(x). + * + * Method: + * 1. Argument Reduction for 0 < x <= 8 + * Since gamma(1+s)=s*gamma(s), for x in [0,8], we may + * reduce x to a number in [1.5,2.5] by + * lgamma(1+s) = log(s) + lgamma(s) + * for example, + * lgamma(7.3) = log(6.3) + lgamma(6.3) + * = log(6.3*5.3) + lgamma(5.3) + * = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3) + * 2. Polynomial approximation of lgamma around its + * minimun ymin=1.461632144968362245 to maintain monotonicity. + * On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use + * Let z = x-ymin; + * lgamma(x) = -1.214862905358496078218 + z^2*poly(z) + * where + * poly(z) is a 14 degree polynomial. + * 2. Rational approximation in the primary interval [2,3] + * We use the following approximation: + * s = x-2.0; + * lgamma(x) = 0.5*s + s*P(s)/Q(s) + * with accuracy + * |P/Q - (lgamma(x)-0.5s)| < 2**-61.71 + * Our algorithms are based on the following observation + * + * zeta(2)-1 2 zeta(3)-1 3 + * lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ... + * 2 3 + * + * where Euler = 0.5771... is the Euler constant, which is very + * close to 0.5. + * + * 3. For x>=8, we have + * lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+.... + * (better formula: + * lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...) + * Let z = 1/x, then we approximation + * f(z) = lgamma(x) - (x-0.5)(log(x)-1) + * by + * 3 5 11 + * w = w0 + w1*z + w2*z + w3*z + ... + w6*z + * where + * |w - f(z)| < 2**-58.74 + * + * 4. For negative x, since (G is gamma function) + * -x*G(-x)*G(x) = PI/sin(PI*x), + * we have + * G(x) = PI/(sin(PI*x)*(-x)*G(-x)) + * since G(-x) is positive, sign(G(x)) = sign(sin(PI*x)) for x<0 + * Hence, for x<0, signgam = sign(sin(PI*x)) and + * lgamma(x) = log(|Gamma(x)|) + * = log(PI/(|x*sin(PI*x)|)) - lgamma(-x); + * Note: one should avoid compute PI*(-x) directly in the + * computation of sin(PI*(-x)). + * + * 5. Special Cases + * lgamma(2+s) ~ s*(1-Euler) for tiny s + * lgamma(1) = lgamma(2) = 0 + * lgamma(x) ~ -log(|x|) for tiny x + * lgamma(0) = lgamma(neg.integer) = inf and raise divide-by-zero + * lgamma(inf) = inf + * lgamma(-inf) = inf (bug for bug compatible with C99!?) + * + */ + +use super::{floor, k_cos, k_sin, log}; + +const PI: f64 = 3.14159265358979311600e+00; /* 0x400921FB, 0x54442D18 */ +const A0: f64 = 7.72156649015328655494e-02; /* 0x3FB3C467, 0xE37DB0C8 */ +const A1: f64 = 3.22467033424113591611e-01; /* 0x3FD4A34C, 0xC4A60FAD */ +const A2: f64 = 6.73523010531292681824e-02; /* 0x3FB13E00, 0x1A5562A7 */ +const A3: f64 = 2.05808084325167332806e-02; /* 0x3F951322, 0xAC92547B */ +const A4: f64 = 7.38555086081402883957e-03; /* 0x3F7E404F, 0xB68FEFE8 */ +const A5: f64 = 2.89051383673415629091e-03; /* 0x3F67ADD8, 0xCCB7926B */ +const A6: f64 = 1.19270763183362067845e-03; /* 0x3F538A94, 0x116F3F5D */ +const A7: f64 = 5.10069792153511336608e-04; /* 0x3F40B6C6, 0x89B99C00 */ +const A8: f64 = 2.20862790713908385557e-04; /* 0x3F2CF2EC, 0xED10E54D */ +const A9: f64 = 1.08011567247583939954e-04; /* 0x3F1C5088, 0x987DFB07 */ +const A10: f64 = 2.52144565451257326939e-05; /* 0x3EFA7074, 0x428CFA52 */ +const A11: f64 = 4.48640949618915160150e-05; /* 0x3F07858E, 0x90A45837 */ +const TC: f64 = 1.46163214496836224576e+00; /* 0x3FF762D8, 0x6356BE3F */ +const TF: f64 = -1.21486290535849611461e-01; /* 0xBFBF19B9, 0xBCC38A42 */ +/* tt = -(tail of TF) */ +const TT: f64 = -3.63867699703950536541e-18; /* 0xBC50C7CA, 0xA48A971F */ +const T0: f64 = 4.83836122723810047042e-01; /* 0x3FDEF72B, 0xC8EE38A2 */ +const T1: f64 = -1.47587722994593911752e-01; /* 0xBFC2E427, 0x8DC6C509 */ +const T2: f64 = 6.46249402391333854778e-02; /* 0x3FB08B42, 0x94D5419B */ +const T3: f64 = -3.27885410759859649565e-02; /* 0xBFA0C9A8, 0xDF35B713 */ +const T4: f64 = 1.79706750811820387126e-02; /* 0x3F9266E7, 0x970AF9EC */ +const T5: f64 = -1.03142241298341437450e-02; /* 0xBF851F9F, 0xBA91EC6A */ +const T6: f64 = 6.10053870246291332635e-03; /* 0x3F78FCE0, 0xE370E344 */ +const T7: f64 = -3.68452016781138256760e-03; /* 0xBF6E2EFF, 0xB3E914D7 */ +const T8: f64 = 2.25964780900612472250e-03; /* 0x3F6282D3, 0x2E15C915 */ +const T9: f64 = -1.40346469989232843813e-03; /* 0xBF56FE8E, 0xBF2D1AF1 */ +const T10: f64 = 8.81081882437654011382e-04; /* 0x3F4CDF0C, 0xEF61A8E9 */ +const T11: f64 = -5.38595305356740546715e-04; /* 0xBF41A610, 0x9C73E0EC */ +const T12: f64 = 3.15632070903625950361e-04; /* 0x3F34AF6D, 0x6C0EBBF7 */ +const T13: f64 = -3.12754168375120860518e-04; /* 0xBF347F24, 0xECC38C38 */ +const T14: f64 = 3.35529192635519073543e-04; /* 0x3F35FD3E, 0xE8C2D3F4 */ +const U0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ +const U1: f64 = 6.32827064025093366517e-01; /* 0x3FE4401E, 0x8B005DFF */ +const U2: f64 = 1.45492250137234768737e+00; /* 0x3FF7475C, 0xD119BD6F */ +const U3: f64 = 9.77717527963372745603e-01; /* 0x3FEF4976, 0x44EA8450 */ +const U4: f64 = 2.28963728064692451092e-01; /* 0x3FCD4EAE, 0xF6010924 */ +const U5: f64 = 1.33810918536787660377e-02; /* 0x3F8B678B, 0xBF2BAB09 */ +const V1: f64 = 2.45597793713041134822e+00; /* 0x4003A5D7, 0xC2BD619C */ +const V2: f64 = 2.12848976379893395361e+00; /* 0x40010725, 0xA42B18F5 */ +const V3: f64 = 7.69285150456672783825e-01; /* 0x3FE89DFB, 0xE45050AF */ +const V4: f64 = 1.04222645593369134254e-01; /* 0x3FBAAE55, 0xD6537C88 */ +const V5: f64 = 3.21709242282423911810e-03; /* 0x3F6A5ABB, 0x57D0CF61 */ +const S0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ +const S1: f64 = 2.14982415960608852501e-01; /* 0x3FCB848B, 0x36E20878 */ +const S2: f64 = 3.25778796408930981787e-01; /* 0x3FD4D98F, 0x4F139F59 */ +const S3: f64 = 1.46350472652464452805e-01; /* 0x3FC2BB9C, 0xBEE5F2F7 */ +const S4: f64 = 2.66422703033638609560e-02; /* 0x3F9B481C, 0x7E939961 */ +const S5: f64 = 1.84028451407337715652e-03; /* 0x3F5E26B6, 0x7368F239 */ +const S6: f64 = 3.19475326584100867617e-05; /* 0x3F00BFEC, 0xDD17E945 */ +const R1: f64 = 1.39200533467621045958e+00; /* 0x3FF645A7, 0x62C4AB74 */ +const R2: f64 = 7.21935547567138069525e-01; /* 0x3FE71A18, 0x93D3DCDC */ +const R3: f64 = 1.71933865632803078993e-01; /* 0x3FC601ED, 0xCCFBDF27 */ +const R4: f64 = 1.86459191715652901344e-02; /* 0x3F9317EA, 0x742ED475 */ +const R5: f64 = 7.77942496381893596434e-04; /* 0x3F497DDA, 0xCA41A95B */ +const R6: f64 = 7.32668430744625636189e-06; /* 0x3EDEBAF7, 0xA5B38140 */ +const W0: f64 = 4.18938533204672725052e-01; /* 0x3FDACFE3, 0x90C97D69 */ +const W1: f64 = 8.33333333333329678849e-02; /* 0x3FB55555, 0x5555553B */ +const W2: f64 = -2.77777777728775536470e-03; /* 0xBF66C16C, 0x16B02E5C */ +const W3: f64 = 7.93650558643019558500e-04; /* 0x3F4A019F, 0x98CF38B6 */ +const W4: f64 = -5.95187557450339963135e-04; /* 0xBF4380CB, 0x8C0FE741 */ +const W5: f64 = 8.36339918996282139126e-04; /* 0x3F4B67BA, 0x4CDAD5D1 */ +const W6: f64 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */ + +/* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ +fn sin_pi(mut x: f64) -> f64 +{ + let mut n: isize; + + /* spurious inexact if odd int */ + x = 2.0*(x*0.5 - floor(x*0.5)); /* x mod 2.0 */ + + n = (x*4.0) as isize; + n = (n+1)/2; + x -= (n as f64)*0.5; + x *= PI; + + match n { + 1 => k_cos(x, 0.0), + 2 => k_sin(-x, 0.0, 0), + 3 => -k_cos(x, 0.0), + 0|_ => k_sin(x, 0.0, 0), + } +} + +pub fn lgamma(x: f64) -> f64 { + lgamma_r(x).0 +} + +pub fn lgamma_r(mut x: f64) -> (f64, isize) +{ + let u: u64 = x.to_bits(); + let mut t: f64; + let y: f64; + let mut z: f64; + let nadj: f64; + let p: f64; + let p1: f64; + let p2: f64; + let p3: f64; + let q: f64; + let mut r: f64; + let w: f64; + let ix: u32; + let sign: bool; + let i: isize; + let mut signgam: isize; + + /* purge off +-inf, NaN, +-0, tiny and negative arguments */ + signgam = 1; + sign = (u>>63) != 0; + ix = ((u>>32) as u32) & 0x7fffffff; + if ix >= 0x7ff00000 { + return (x*x, signgam); + } + if ix < (0x3ff-70)<<20 { /* |x|<2**-70, return -log(|x|) */ + if sign { + x = -x; + signgam = -1; + } + return (-log(x), signgam); + } + if sign { + x = -x; + t = sin_pi(x); + if t == 0.0 { /* -integer */ + return (1.0/(x-x), signgam); + } + if t > 0.0 { + signgam = -1; + } else { + t = -t; + } + nadj = log(PI/(t*x)); + } else { + nadj = 0.0; + } + + /* purge off 1 and 2 */ + if (ix == 0x3ff00000 || ix == 0x40000000) && (u & 0xffffffff) == 0 { + r = 0.0; + } + /* for x < 2.0 */ + else if ix < 0x40000000 { + if ix <= 0x3feccccc { /* lgamma(x) = lgamma(x+1)-log(x) */ + r = -log(x); + if ix >= 0x3FE76944 { + y = 1.0 - x; + i = 0; + } else if ix >= 0x3FCDA661 { + y = x - (TC-1.0); + i = 1; + } else { + y = x; + i = 2; + } + } else { + r = 0.0; + if ix >= 0x3FFBB4C3 { /* [1.7316,2] */ + y = 2.0 - x; + i = 0; + } else if ix >= 0x3FF3B4C4 { /* [1.23,1.73] */ + y = x - TC; + i = 1; + } else { + y = x - 1.0; + i = 2; + } + } + match i { + 0 => { + z = y*y; + p1 = A0+z*(A2+z*(A4+z*(A6+z*(A8+z*A10)))); + p2 = z*(A1+z*(A3+z*(A5+z*(A7+z*(A9+z*A11))))); + p = y*p1+p2; + r += p-0.5*y; + } + 1 => { + z = y*y; + w = z*y; + p1 = T0+w*(T3+w*(T6+w*(T9 +w*T12))); /* parallel comp */ + p2 = T1+w*(T4+w*(T7+w*(T10+w*T13))); + p3 = T2+w*(T5+w*(T8+w*(T11+w*T14))); + p = z*p1-(TT-w*(p2+y*p3)); + r += TF + p; + } + 2 => { + p1 = y*(U0+y*(U1+y*(U2+y*(U3+y*(U4+y*U5))))); + p2 = 1.0+y*(V1+y*(V2+y*(V3+y*(V4+y*V5)))); + r += -0.5*y + p1/p2; + } + #[cfg(feature = "checked")] + _ => unreachable!(), + #[cfg(not(feature = "checked"))] + _ => {} + } + } else if ix < 0x40200000 { /* x < 8.0 */ + i = x as isize; + y = x - (i as f64); + p = y*(S0+y*(S1+y*(S2+y*(S3+y*(S4+y*(S5+y*S6)))))); + q = 1.0+y*(R1+y*(R2+y*(R3+y*(R4+y*(R5+y*R6))))); + r = 0.5*y+p/q; + z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ + // TODO: In C, this was implemented using switch jumps with fallthrough. + // Does this implementation have performance problems? + if i >= 7 { z *= y + 6.0; } + if i >= 6 { z *= y + 5.0; } + if i >= 5 { z *= y + 4.0; } + if i >= 4 { z *= y + 3.0; } + if i >= 3 { + z *= y + 2.0; + r += log(z); + } + } else if ix < 0x43900000 { /* 8.0 <= x < 2**58 */ + t = log(x); + z = 1.0/x; + y = z*z; + w = W0+z*(W1+y*(W2+y*(W3+y*(W4+y*(W5+y*W6))))); + r = (x-0.5)*(t-1.0)+w; + } else { /* 2**58 <= x <= inf */ + r = x*(log(x)-1.0); + } + if sign { + r = nadj - r; + } + return (r, signgam); +} diff --git a/libm/src/math/lgammaf.rs b/libm/src/math/lgammaf.rs new file mode 100644 index 000000000..60effa316 --- /dev/null +++ b/libm/src/math/lgammaf.rs @@ -0,0 +1,244 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_lgammaf_r.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{floorf, k_cosf, k_sinf, logf}; + +const PI: f32 = 3.1415927410e+00; /* 0x40490fdb */ +const A0: f32 = 7.7215664089e-02; /* 0x3d9e233f */ +const A1: f32 = 3.2246702909e-01; /* 0x3ea51a66 */ +const A2: f32 = 6.7352302372e-02; /* 0x3d89f001 */ +const A3: f32 = 2.0580807701e-02; /* 0x3ca89915 */ +const A4: f32 = 7.3855509982e-03; /* 0x3bf2027e */ +const A5: f32 = 2.8905137442e-03; /* 0x3b3d6ec6 */ +const A6: f32 = 1.1927076848e-03; /* 0x3a9c54a1 */ +const A7: f32 = 5.1006977446e-04; /* 0x3a05b634 */ +const A8: f32 = 2.2086278477e-04; /* 0x39679767 */ +const A9: f32 = 1.0801156895e-04; /* 0x38e28445 */ +const A10: f32 = 2.5214456400e-05; /* 0x37d383a2 */ +const A11: f32 = 4.4864096708e-05; /* 0x383c2c75 */ +const TC: f32 = 1.4616321325e+00; /* 0x3fbb16c3 */ +const TF: f32 = -1.2148628384e-01; /* 0xbdf8cdcd */ +/* TT = -(tail of TF) */ +const TT: f32 = 6.6971006518e-09; /* 0x31e61c52 */ +const T0: f32 = 4.8383611441e-01; /* 0x3ef7b95e */ +const T1: f32 = -1.4758771658e-01; /* 0xbe17213c */ +const T2: f32 = 6.4624942839e-02; /* 0x3d845a15 */ +const T3: f32 = -3.2788541168e-02; /* 0xbd064d47 */ +const T4: f32 = 1.7970675603e-02; /* 0x3c93373d */ +const T5: f32 = -1.0314224288e-02; /* 0xbc28fcfe */ +const T6: f32 = 6.1005386524e-03; /* 0x3bc7e707 */ +const T7: f32 = -3.6845202558e-03; /* 0xbb7177fe */ +const T8: f32 = 2.2596477065e-03; /* 0x3b141699 */ +const T9: f32 = -1.4034647029e-03; /* 0xbab7f476 */ +const T10: f32 = 8.8108185446e-04; /* 0x3a66f867 */ +const T11: f32 = -5.3859531181e-04; /* 0xba0d3085 */ +const T12: f32 = 3.1563205994e-04; /* 0x39a57b6b */ +const T13: f32 = -3.1275415677e-04; /* 0xb9a3f927 */ +const T14: f32 = 3.3552918467e-04; /* 0x39afe9f7 */ +const U0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ +const U1: f32 = 6.3282704353e-01; /* 0x3f2200f4 */ +const U2: f32 = 1.4549225569e+00; /* 0x3fba3ae7 */ +const U3: f32 = 9.7771751881e-01; /* 0x3f7a4bb2 */ +const U4: f32 = 2.2896373272e-01; /* 0x3e6a7578 */ +const U5: f32 = 1.3381091878e-02; /* 0x3c5b3c5e */ +const V1: f32 = 2.4559779167e+00; /* 0x401d2ebe */ +const V2: f32 = 2.1284897327e+00; /* 0x4008392d */ +const V3: f32 = 7.6928514242e-01; /* 0x3f44efdf */ +const V4: f32 = 1.0422264785e-01; /* 0x3dd572af */ +const V5: f32 = 3.2170924824e-03; /* 0x3b52d5db */ +const S0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ +const S1: f32 = 2.1498242021e-01; /* 0x3e5c245a */ +const S2: f32 = 3.2577878237e-01; /* 0x3ea6cc7a */ +const S3: f32 = 1.4635047317e-01; /* 0x3e15dce6 */ +const S4: f32 = 2.6642270386e-02; /* 0x3cda40e4 */ +const S5: f32 = 1.8402845599e-03; /* 0x3af135b4 */ +const S6: f32 = 3.1947532989e-05; /* 0x3805ff67 */ +const R1: f32 = 1.3920053244e+00; /* 0x3fb22d3b */ +const R2: f32 = 7.2193557024e-01; /* 0x3f38d0c5 */ +const R3: f32 = 1.7193385959e-01; /* 0x3e300f6e */ +const R4: f32 = 1.8645919859e-02; /* 0x3c98bf54 */ +const R5: f32 = 7.7794247773e-04; /* 0x3a4beed6 */ +const R6: f32 = 7.3266842264e-06; /* 0x36f5d7bd */ +const W0: f32 = 4.1893854737e-01; /* 0x3ed67f1d */ +const W1: f32 = 8.3333335817e-02; /* 0x3daaaaab */ +const W2: f32 = -2.7777778450e-03; /* 0xbb360b61 */ +const W3: f32 = 7.9365057172e-04; /* 0x3a500cfd */ +const W4: f32 = -5.9518753551e-04; /* 0xba1c065c */ +const W5: f32 = 8.3633989561e-04; /* 0x3a5b3dd2 */ +const W6: f32 = -1.6309292987e-03; /* 0xbad5c4e8 */ + +/* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ +fn sin_pi(mut x: f32) -> f32 +{ + let mut y: f64; + let mut n: isize; + + /* spurious inexact if odd int */ + x = 2.0*(x*0.5 - floorf(x*0.5)); /* x mod 2.0 */ + + n = (x*4.0) as isize; + n = (n+1)/2; + y = (x as f64) - (n as f64)*0.5; + y *= 3.14159265358979323846; + match n { + 1 => k_cosf(y), + 2 => k_sinf(-y), + 3 => -k_cosf(y), + 0|_ => k_sinf(y), + } +} + +pub fn lgammaf(x: f32) -> f32 { + lgammaf_r(x).0 +} + +pub fn lgammaf_r(mut x: f32) -> (f32, isize) +{ + let u = x.to_bits(); + let mut t: f32; + let y: f32; + let mut z: f32; + let nadj: f32; + let p: f32; + let p1: f32; + let p2: f32; + let p3: f32; + let q: f32; + let mut r: f32; + let w: f32; + let ix: u32; + let i: isize; + let sign: bool; + let mut signgam: isize; + + /* purge off +-inf, NaN, +-0, tiny and negative arguments */ + signgam = 1; + sign = (u>>31) != 0; + ix = u & 0x7fffffff; + if ix >= 0x7f800000 { + return (x*x, signgam); + } + if ix < 0x35000000 { /* |x| < 2**-21, return -log(|x|) */ + if sign { + signgam = -1; + x = -x; + } + return (-logf(x), signgam); + } + if sign { + x = -x; + t = sin_pi(x); + if t == 0.0 { /* -integer */ + return (1.0/(x-x), signgam); + } + if t > 0.0 { + signgam = -1; + } else { + t = -t; + } + nadj = logf(PI/(t*x)); + } else { + nadj = 0.0; + } + + /* purge off 1 and 2 */ + if ix == 0x3f800000 || ix == 0x40000000 { + r = 0.0; + } + /* for x < 2.0 */ + else if ix < 0x40000000 { + if ix <= 0x3f666666 { /* lgamma(x) = lgamma(x+1)-log(x) */ + r = -logf(x); + if ix >= 0x3f3b4a20 { + y = 1.0 - x; + i = 0; + } else if ix >= 0x3e6d3308 { + y = x - (TC-1.0); + i = 1; + } else { + y = x; + i = 2; + } + } else { + r = 0.0; + if ix >= 0x3fdda618 { /* [1.7316,2] */ + y = 2.0 - x; + i = 0; + } else if ix >= 0x3F9da620 { /* [1.23,1.73] */ + y = x - TC; + i = 1; + } else { + y = x - 1.0; + i = 2; + } + } + match i { + 0 => { + z = y*y; + p1 = A0+z*(A2+z*(A4+z*(A6+z*(A8+z*A10)))); + p2 = z*(A1+z*(A3+z*(A5+z*(A7+z*(A9+z*A11))))); + p = y*p1+p2; + r += p - 0.5*y; + } + 1 => { + z = y*y; + w = z*y; + p1 = T0+w*(T3+w*(T6+w*(T9 +w*T12))); /* parallel comp */ + p2 = T1+w*(T4+w*(T7+w*(T10+w*T13))); + p3 = T2+w*(T5+w*(T8+w*(T11+w*T14))); + p = z*p1-(TT-w*(p2+y*p3)); + r += TF + p; + } + 2 => { + p1 = y*(U0+y*(U1+y*(U2+y*(U3+y*(U4+y*U5))))); + p2 = 1.0+y*(V1+y*(V2+y*(V3+y*(V4+y*V5)))); + r += -0.5*y + p1/p2; + } + #[cfg(feature = "checked")] + _ => unreachable!(), + #[cfg(not(feature = "checked"))] + _ => {} + } + } else if ix < 0x41000000 { /* x < 8.0 */ + i = x as isize; + y = x - (i as f32); + p = y*(S0+y*(S1+y*(S2+y*(S3+y*(S4+y*(S5+y*S6)))))); + q = 1.0+y*(R1+y*(R2+y*(R3+y*(R4+y*(R5+y*R6))))); + r = 0.5*y+p/q; + z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ + // TODO: In C, this was implemented using switch jumps with fallthrough. + // Does this implementation have performance problems? + if i >= 7 { z *= y + 6.0; } + if i >= 6 { z *= y + 5.0; } + if i >= 5 { z *= y + 4.0; } + if i >= 4 { z *= y + 3.0; } + if i >= 3 { + z *= y + 2.0; + r += logf(z); + } + } else if ix < 0x5c800000 { /* 8.0 <= x < 2**58 */ + t = logf(x); + z = 1.0/x; + y = z*z; + w = W0+z*(W1+y*(W2+y*(W3+y*(W4+y*(W5+y*W6))))); + r = (x-0.5)*(t-1.0)+w; + } else { /* 2**58 <= x <= inf */ + r = x*(logf(x)-1.0); + } + if sign { + r = nadj - r; + } + return (r, signgam); +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index da34fb4ce..53842b38e 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -61,21 +61,33 @@ macro_rules! i { // Public modules mod acos; mod acosf; +mod acosh; +mod acoshf; mod asin; mod asinf; +mod asinh; +mod asinhf; mod atan; mod atan2; mod atan2f; mod atanf; +mod atanh; +mod atanhf; mod cbrt; mod cbrtf; mod ceil; mod ceilf; +mod copysign; +mod copysignf; mod cos; mod cosf; mod cosh; mod coshf; +mod erf; +mod erff; mod exp; +mod exp10; +mod exp10f; mod exp2; mod exp2f; mod expf; @@ -91,8 +103,20 @@ mod fma; mod fmaf; mod fmod; mod fmodf; +mod frexp; +mod frexpf; mod hypot; mod hypotf; +mod ilogb; +mod ilogbf; +mod j0; +mod j0f; +mod j1; +mod j1f; +mod jn; +mod jnf; +mod lgamma; +mod lgammaf; mod log; mod log10; mod log10f; @@ -101,13 +125,19 @@ mod log1pf; mod log2; mod log2f; mod logf; +mod modf; +mod modff; mod pow; mod powf; +mod remquo; +mod remquof; mod round; mod roundf; mod scalbn; mod scalbnf; mod sin; +mod sincos; +mod sincosf; mod sinf; mod sinh; mod sinhf; @@ -117,27 +147,43 @@ mod tan; mod tanf; mod tanh; mod tanhf; +mod tgamma; +mod tgammaf; mod trunc; mod truncf; // Use separated imports instead of {}-grouped imports for easier merging. pub use self::acos::acos; pub use self::acosf::acosf; +pub use self::acosh::acosh; +pub use self::acoshf::acoshf; pub use self::asin::asin; pub use self::asinf::asinf; +pub use self::asinh::asinh; +pub use self::asinhf::asinhf; pub use self::atan::atan; pub use self::atan2::atan2; pub use self::atan2f::atan2f; pub use self::atanf::atanf; +pub use self::atanh::atanh; +pub use self::atanhf::atanhf; pub use self::cbrt::cbrt; pub use self::cbrtf::cbrtf; pub use self::ceil::ceil; pub use self::ceilf::ceilf; +pub use self::copysign::copysign; +pub use self::copysignf::copysignf; pub use self::cos::cos; pub use self::cosf::cosf; pub use self::cosh::cosh; pub use self::coshf::coshf; +pub use self::erf::erf; +pub use self::erf::erfc; +pub use self::erff::erff; +pub use self::erff::erfcf; pub use self::exp::exp; +pub use self::exp10::exp10; +pub use self::exp10f::exp10f; pub use self::exp2::exp2; pub use self::exp2f::exp2f; pub use self::expf::expf; @@ -153,8 +199,28 @@ pub use self::fma::fma; pub use self::fmaf::fmaf; pub use self::fmod::fmod; pub use self::fmodf::fmodf; +pub use self::frexp::frexp; +pub use self::frexpf::frexpf; pub use self::hypot::hypot; pub use self::hypotf::hypotf; +pub use self::ilogb::ilogb; +pub use self::ilogbf::ilogbf; +pub use self::j0::j0; +pub use self::j0::y0; +pub use self::j0f::j0f; +pub use self::j0f::y0f; +pub use self::j1::j1; +pub use self::j1::y1; +pub use self::j1f::j1f; +pub use self::j1f::y1f; +pub use self::jn::jn; +pub use self::jn::yn; +pub use self::jnf::jnf; +pub use self::jnf::ynf; +pub use self::lgamma::lgamma; +pub use self::lgamma::lgamma_r; +pub use self::lgammaf::lgammaf; +pub use self::lgammaf::lgammaf_r; pub use self::log::log; pub use self::log10::log10; pub use self::log10f::log10f; @@ -163,13 +229,19 @@ pub use self::log1pf::log1pf; pub use self::log2::log2; pub use self::log2f::log2f; pub use self::logf::logf; +pub use self::modf::modf; +pub use self::modff::modff; pub use self::pow::pow; pub use self::powf::powf; +pub use self::remquo::remquo; +pub use self::remquof::remquof; pub use self::round::round; pub use self::roundf::roundf; pub use self::scalbn::scalbn; pub use self::scalbnf::scalbnf; pub use self::sin::sin; +pub use self::sincos::sincos; +pub use self::sincosf::sincosf; pub use self::sinf::sinf; pub use self::sinh::sinh; pub use self::sinhf::sinhf; @@ -179,6 +251,8 @@ pub use self::tan::tan; pub use self::tanf::tanf; pub use self::tanh::tanh; pub use self::tanhf::tanhf; +pub use self::tgamma::tgamma; +pub use self::tgammaf::tgammaf; pub use self::trunc::trunc; pub use self::truncf::truncf; diff --git a/libm/src/math/modf.rs b/libm/src/math/modf.rs new file mode 100644 index 000000000..1ff8ee116 --- /dev/null +++ b/libm/src/math/modf.rs @@ -0,0 +1,33 @@ +pub fn modf(x: f64) -> (f64, f64) { + let rv2: f64; + let mut u = x.to_bits(); + let mask: u64; + let e = ((u>>52 & 0x7ff) as isize) - 0x3ff; + + /* no fractional part */ + if e >= 52 { + rv2 = x; + if e == 0x400 && (u<<12) != 0 { /* nan */ + return (x, rv2); + } + u &= 1<<63; + return (f64::from_bits(u), rv2); + } + + /* no integral part*/ + if e < 0 { + u &= 1<<63; + rv2 = f64::from_bits(u); + return (x, rv2); + } + + mask = ((!0)>>12)>>e; + if (u & mask) == 0 { + rv2 = x; + u &= 1<<63; + return (f64::from_bits(u), rv2); + } + u &= !mask; + rv2 = f64::from_bits(u); + return (x - rv2, rv2); +} diff --git a/libm/src/math/modff.rs b/libm/src/math/modff.rs new file mode 100644 index 000000000..5250e8d38 --- /dev/null +++ b/libm/src/math/modff.rs @@ -0,0 +1,32 @@ +pub fn modff(x: f32) -> (f32, f32) { + let rv2: f32; + let mut u: u32 = x.to_bits(); + let mask: u32; + let e = ((u>>23 & 0xff) as isize) - 0x7f; + + /* no fractional part */ + if e >= 23 { + rv2 = x; + if e == 0x80 && (u<<9) != 0 { /* nan */ + return (x, rv2); + } + u &= 0x80000000; + return (f32::from_bits(u), rv2); + } + /* no integral part */ + if e < 0 { + u &= 0x80000000; + rv2 = f32::from_bits(u); + return (x, rv2); + } + + mask = 0x007fffff>>e; + if (u & mask) == 0 { + rv2 = x; + u &= 0x80000000; + return (f32::from_bits(u), rv2); + } + u &= !mask; + rv2 = f32::from_bits(u); + return (x - rv2, rv2); +} diff --git a/libm/src/math/remquo.rs b/libm/src/math/remquo.rs new file mode 100644 index 000000000..98f4b3858 --- /dev/null +++ b/libm/src/math/remquo.rs @@ -0,0 +1,98 @@ +pub fn remquo(mut x: f64, mut y: f64) -> (f64, isize) +{ + let ux: u64 = x.to_bits(); + let mut uy: u64 = y.to_bits(); + let mut ex = ((ux>>52) & 0x7ff) as isize; + let mut ey = ((uy>>52) & 0x7ff) as isize; + let sx = (ux>>63) != 0; + let sy = (uy>>63) != 0; + let mut q: u32; + let mut i: u64; + let mut uxi: u64 = ux; + + if (uy<<1) == 0 || y.is_nan() || ex == 0x7ff { + return ((x*y)/(x*y), 0); + } + if (ux<<1) == 0 { + return (x, 0); + } + + /* normalize x and y */ + if ex == 0 { + i = uxi << 12; + while (i>>63) == 0 { + ex -= 1; + i <<= 1; + } + uxi <<= -ex + 1; + } else { + uxi &= (!0) >> 12; + uxi |= 1 << 52; + } + if ey == 0 { + i = uy<<12; + while (i>>63) == 0 { + ey -= 1; + i <<= 1; + } + uy <<= -ey + 1; + } else { + uy &= (!0) >> 12; + uy |= 1 << 52; + } + + q = 0; + + if ex+1 != ey { + if ex < ey { + return (x, 0); + } + /* x mod y */ + while ex > ey { + i = uxi - uy; + if (i>>63) == 0 { + uxi = i; + q += 1; + } + uxi <<= 1; + q <<= 1; + ex -= 1; + } + i = uxi - uy; + if (i>>63) == 0 { + uxi = i; + q += 1; + } + if uxi == 0 { + ex = -60; + } else { + while (uxi>>52) == 0 { + uxi <<= 1; + ex -= 1; + } + } + } + + /* scale result and decide between |x| and |x|-|y| */ + if ex > 0 { + uxi -= 1 << 52; + uxi |= (ex as u64) << 52; + } else { + uxi >>= -ex + 1; + } + x = f64::from_bits(uxi); + if sy { + y = -y; + } + if ex == ey || (ex+1 == ey && (2.0*x > y || (2.0*x == y && (q%2) != 0))) { + x -= y; + q += 1; + } + q &= 0x7fffffff; + let quo = if sx ^ sy { -(q as isize) } else { q as isize }; + if sx { + (-x, quo) + } else { + (x, quo) + } +} diff --git a/libm/src/math/remquof.rs b/libm/src/math/remquof.rs new file mode 100644 index 000000000..4307e1906 --- /dev/null +++ b/libm/src/math/remquof.rs @@ -0,0 +1,97 @@ +pub fn remquof(mut x: f32, mut y: f32) -> (f32, isize) +{ + let ux: u32 = x.to_bits(); + let mut uy: u32 = y.to_bits(); + let mut ex = ((ux>>23) & 0xff) as isize; + let mut ey = ((uy>>23) & 0xff) as isize; + let sx = (ux>>31) != 0; + let sy = (uy>>31) != 0; + let mut q: u32; + let mut i: u32; + let mut uxi: u32 = ux; + + if (uy<<1) == 0 || y.is_nan() || ex == 0xff { + return ((x*y)/(x*y), 0); + } + if (ux<<1) == 0 { + return (x, 0); + } + + /* normalize x and y */ + if ex == 0 { + i = uxi<<9; + while (i>>31) == 0 { + ex -= 1; + i <<= 1; + } + uxi <<= -ex + 1; + } else { + uxi &= (!0) >> 9; + uxi |= 1 << 23; + } + if ey == 0 { + i = uy<<9; + while (i>>31) == 0 { + ey -= 1; + i <<= 1; + } + uy <<= -ey + 1; + } else { + uy &= (!0) >> 9; + uy |= 1 << 23; + } + + q = 0; + if ex+1 != ey { + if ex < ey { + return (x, 0); + } + /* x mod y */ + while ex > ey { + i = uxi - uy; + if (i>>31) == 0 { + uxi = i; + q += 1; + } + uxi <<= 1; + q <<= 1; + ex -= 1; + } + i = uxi - uy; + if (i>>31) == 0 { + uxi = i; + q += 1; + } + if uxi == 0 { + ex = -30; + } else { + while (uxi>>23) == 0 { + uxi <<= 1; + ex -= 1; + } + } + } + + /* scale result and decide between |x| and |x|-|y| */ + if ex > 0 { + uxi -= 1 << 23; + uxi |= (ex as u32) << 23; + } else { + uxi >>= -ex + 1; + } + x = f32::from_bits(uxi); + if sy { + y = -y; + } + if ex == ey || (ex+1 == ey && (2.0*x > y || (2.0*x == y && (q%2) != 0))) { + x -= y; + q += 1; + } + q &= 0x7fffffff; + let quo = if sx^sy { -(q as isize) } else { q as isize }; + if sx { + (-x, quo) + } else { + (x, quo) + } +} diff --git a/libm/src/math/sincos.rs b/libm/src/math/sincos.rs new file mode 100644 index 000000000..c15ee4661 --- /dev/null +++ b/libm/src/math/sincos.rs @@ -0,0 +1,60 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{get_high_word, k_cos, k_sin, rem_pio2}; + +pub fn sincos(x: f64) -> (f64, f64) +{ + let s: f64; + let c: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + + /* |x| ~< pi/4 */ + if ix <= 0x3fe921fb { + /* if |x| < 2**-27 * sqrt(2) */ + if ix < 0x3e46a09e { + /* raise inexact if x!=0 and underflow if subnormal */ + let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120 == 2^120 + if ix < 0x00100000 { + force_eval!(x/x1p120); + } else { + force_eval!(x+x1p120); + } + return (x, 1.0); + } + return (k_sin(x, 0.0, 0), k_cos(x, 0.0)); + } + + /* sincos(Inf or NaN) is NaN */ + if ix >= 0x7ff00000 { + let rv = x - x; + return (rv, rv); + } + + /* argument reduction needed */ + let (n, y0, y1) = rem_pio2(x); + s = k_sin(y0, y1, 1); + c = k_cos(y0, y1); + match n&3 { + 0 => (s, c), + 1 => (c, -s), + 2 => (-s, -c), + 3 => (-c, s), + #[cfg(feature = "checked")] + _ => unreachable!(), + #[cfg(not(feature = "checked"))] + _ => (0.0, 1.0), + } +} diff --git a/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs new file mode 100644 index 000000000..911421d63 --- /dev/null +++ b/libm/src/math/sincosf.rs @@ -0,0 +1,122 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{k_cosf, k_sinf, rem_pio2f}; + +/* Small multiples of pi/2 rounded to double precision. */ +const PI_2: f32 = 0.5 * 3.1415926535897931160E+00; +const S1PIO2: f32 = 1.0*PI_2; /* 0x3FF921FB, 0x54442D18 */ +const S2PIO2: f32 = 2.0*PI_2; /* 0x400921FB, 0x54442D18 */ +const S3PIO2: f32 = 3.0*PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const S4PIO2: f32 = 4.0*PI_2; /* 0x401921FB, 0x54442D18 */ + +pub fn sincosf(x: f32) -> (f32, f32) +{ + let s: f32; + let c: f32; + let mut ix: u32; + let sign: bool; + + ix = x.to_bits(); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + /* |x| ~<= pi/4 */ + if ix <= 0x3f490fda { + /* |x| < 2**-12 */ + if ix < 0x39800000 { + /* raise inexact if x!=0 and underflow if subnormal */ + + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120 == 2^120 + if ix < 0x00100000 { + force_eval!(x/x1p120); + } else { + force_eval!(x+x1p120); + } + return (x, 1.0); + } + return (k_sinf(x as f64), k_cosf(x as f64)); + } + + /* |x| ~<= 5*pi/4 */ + if ix <= 0x407b53d1 { + if ix <= 0x4016cbe3 { /* |x| ~<= 3pi/4 */ + if sign { + s = -k_cosf((x + S1PIO2) as f64); + c = k_sinf((x + S1PIO2) as f64); + } else { + s = k_cosf((S1PIO2 - x) as f64); + c = k_sinf((S1PIO2 - x) as f64); + } + } + /* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */ + else { + if sign { + s = k_sinf((x + S2PIO2) as f64); + c = k_cosf((x + S2PIO2) as f64); + } else { + s = k_sinf((x - S2PIO2) as f64); + c = k_cosf((x - S2PIO2) as f64); + } + } + + return (s, c); + } + + /* |x| ~<= 9*pi/4 */ + if ix <= 0x40e231d5 { + if ix <= 0x40afeddf { /* |x| ~<= 7*pi/4 */ + if sign { + s = k_cosf((x + S3PIO2) as f64); + c = -k_sinf((x + S3PIO2) as f64); + } else { + s = -k_cosf((x - S3PIO2) as f64); + c = k_sinf((x - S3PIO2) as f64); + } + } else { + if sign { + s = k_cosf((x + S4PIO2) as f64); + c = k_sinf((x + S4PIO2) as f64); + } else { + s = k_cosf((x - S4PIO2) as f64); + c = k_sinf((x - S4PIO2) as f64); + } + } + + return (s, c); + } + + /* sin(Inf or NaN) is NaN */ + if ix >= 0x7f800000 { + let rv = x - x; + return (rv, rv); + } + + /* general argument reduction needed */ + let (n, y) = rem_pio2f(x); + s = k_sinf(y); + c = k_cosf(y); + match n&3 { + 0 => (s, c), + 1 => (c, -s), + 2 => (-s, -c), + 3 => (-c, s), + #[cfg(feature = "checked")] + _ => unreachable!(), + #[cfg(not(feature = "checked"))] + _ => (0.0, 1.0), + } +} diff --git a/libm/src/math/tgamma.rs b/libm/src/math/tgamma.rs new file mode 100644 index 000000000..598f46f1c --- /dev/null +++ b/libm/src/math/tgamma.rs @@ -0,0 +1,179 @@ +/* +"A Precision Approximation of the Gamma Function" - Cornelius Lanczos (1964) +"Lanczos Implementation of the Gamma Function" - Paul Godfrey (2001) +"An Analysis of the Lanczos Gamma Approximation" - Glendon Ralph Pugh (2004) + +approximation method: + + (x - 0.5) S(x) +Gamma(x) = (x + g - 0.5) * ---------------- + exp(x + g - 0.5) + +with + a1 a2 a3 aN +S(x) ~= [ a0 + ----- + ----- + ----- + ... + ----- ] + x + 1 x + 2 x + 3 x + N + +with a0, a1, a2, a3,.. aN constants which depend on g. + +for x < 0 the following reflection formula is used: + +Gamma(x)*Gamma(-x) = -pi/(x sin(pi x)) + +most ideas and constants are from boost and python +*/ +extern crate core; +use super::{exp, floor, k_cos, k_sin, pow}; + +const PI: f64 = 3.141592653589793238462643383279502884; + +/* sin(pi x) with x > 0x1p-100, if sin(pi*x)==0 the sign is arbitrary */ +fn sinpi(mut x: f64) -> f64 +{ + let mut n: isize; + + /* argument reduction: x = |x| mod 2 */ + /* spurious inexact when x is odd int */ + x = x * 0.5; + x = 2.0 * (x - floor(x)); + + /* reduce x into [-.25,.25] */ + n = (4.0 * x) as isize; + n = (n+1)/2; + x -= (n as f64) * 0.5; + + x *= PI; + match n { + 1 => k_cos(x, 0.0), + 2 => k_sin(-x, 0.0, 0), + 3 => -k_cos(x, 0.0), + 0|_ => k_sin(x, 0.0, 0), + } +} + +const N: usize = 12; +//static const double g = 6.024680040776729583740234375; +const GMHALF: f64 = 5.524680040776729583740234375; +const SNUM: [f64; N+1] = [ + 23531376880.410759688572007674451636754734846804940, + 42919803642.649098768957899047001988850926355848959, + 35711959237.355668049440185451547166705960488635843, + 17921034426.037209699919755754458931112671403265390, + 6039542586.3520280050642916443072979210699388420708, + 1439720407.3117216736632230727949123939715485786772, + 248874557.86205415651146038641322942321632125127801, + 31426415.585400194380614231628318205362874684987640, + 2876370.6289353724412254090516208496135991145378768, + 186056.26539522349504029498971604569928220784236328, + 8071.6720023658162106380029022722506138218516325024, + 210.82427775157934587250973392071336271166969580291, + 2.5066282746310002701649081771338373386264310793408, +]; +const SDEN: [f64; N+1] = [ + 0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, + 45995730.0, 13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0, +]; +/* n! for small integer n */ +const FACT: [f64; 23] = [ + 1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0, 3628800.0, + 39916800.0, 479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, + 20922789888000.0, 355687428096000.0, 6402373705728000.0, 121645100408832000.0, + 2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0, +]; + +/* S(x) rational function for positive x */ +fn s(x: f64) -> f64 +{ + let mut num: f64 = 0.0; + let mut den: f64 = 0.0; + + /* to avoid overflow handle large x differently */ + if x < 8.0 { + for i in (0..=N).rev() { + num = num * x + SNUM[i]; + den = den * x + SDEN[i]; + } + } else { + for i in 0..=N { + num = num / x + SNUM[i]; + den = den / x + SDEN[i]; + } + } + return num/den; +} + +pub fn tgamma(mut x: f64) -> f64 +{ + let u: u64 = x.to_bits(); + let absx: f64; + let mut y: f64; + let mut dy: f64; + let mut z: f64; + let mut r: f64; + let ix: u32 = ((u >> 32) as u32) & 0x7fffffff; + let sign: bool = (u>>64) != 0; + + /* special cases */ + if ix >= 0x7ff00000 { + /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */ + return x + core::f64::INFINITY; + } + if ix < ((0x3ff-54)<<20) { + /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */ + return 1.0/x; + } + + /* integer arguments */ + /* raise inexact when non-integer */ + if x == floor(x) { + if sign { + return 0.0/0.0; + } + if x <= FACT.len() as f64 { + return FACT[(x as usize) - 1]; + } + } + + /* x >= 172: tgamma(x)=inf with overflow */ + /* x =< -184: tgamma(x)=+-0 with underflow */ + if ix >= 0x40670000 { /* |x| >= 184 */ + if sign { + let x1p_126 = f64::from_bits(0x3810000000000000); // 0x1p-126 == 2^-126 + force_eval!((x1p_126/x) as f32); + if floor(x) * 0.5 == floor(x * 0.5) { + return 0.0; + } else { + return -0.0; + } + } + let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 == 2^1023 + x *= x1p1023; + return x; + } + + absx = if sign { -x } else { x }; + + /* handle the error of x + g - 0.5 */ + y = absx + GMHALF; + if absx > GMHALF { + dy = y - absx; + dy -= GMHALF; + } else { + dy = y - GMHALF; + dy -= absx; + } + + z = absx - 0.5; + r = s(absx) * exp(-y); + if x < 0.0 { + /* reflection formula for negative x */ + /* sinpi(absx) is not 0, integers are already handled */ + r = -PI / (sinpi(absx) * absx * r); + dy = -dy; + z = -z; + } + r += dy * (GMHALF+0.5) * r / y; + z = pow(y, 0.5*z); + y = r * z * z; + return y; +} diff --git a/libm/src/math/tgammaf.rs b/libm/src/math/tgammaf.rs new file mode 100644 index 000000000..b9c799ce7 --- /dev/null +++ b/libm/src/math/tgammaf.rs @@ -0,0 +1,5 @@ +use super::{tgamma}; + +pub fn tgammaf(x: f32) -> f32 { + tgamma(x as f64) as f32 +} From 50ea70d7d066171a7fbf7f3905a33f84135456ea Mon Sep 17 00:00:00 2001 From: Paolo Teti Date: Sat, 1 Sep 2018 20:12:41 +0200 Subject: [PATCH 0123/1459] Rename panic_implementation -> panic_handler panic_implementation has been deprecated/renamed. New name is panic_handler --- Cargo.toml | 2 +- crates/{panic-implementation => panic-handler}/Cargo.toml | 2 +- crates/{panic-implementation => panic-handler}/src/lib.rs | 4 ++-- examples/intrinsics.rs | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) rename crates/{panic-implementation => panic-handler}/Cargo.toml (76%) rename crates/{panic-implementation => panic-handler}/src/lib.rs (68%) diff --git a/Cargo.toml b/Cargo.toml index 311266269..7eae2dc1f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ test = false cc = { optional = true, version = "1.0" } [dev-dependencies] -panic-implementation = { path = 'crates/panic-implementation' } +panic-handler = { path = 'crates/panic-handler' } [features] default = ["compiler-builtins"] diff --git a/crates/panic-implementation/Cargo.toml b/crates/panic-handler/Cargo.toml similarity index 76% rename from crates/panic-implementation/Cargo.toml rename to crates/panic-handler/Cargo.toml index a076cbc3c..1dea613d1 100644 --- a/crates/panic-implementation/Cargo.toml +++ b/crates/panic-handler/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "panic-implementation" +name = "panic-handler" version = "0.1.0" authors = ["Alex Crichton "] diff --git a/crates/panic-implementation/src/lib.rs b/crates/panic-handler/src/lib.rs similarity index 68% rename from crates/panic-implementation/src/lib.rs rename to crates/panic-handler/src/lib.rs index 1bb23970f..e97061087 100644 --- a/crates/panic-implementation/src/lib.rs +++ b/crates/panic-handler/src/lib.rs @@ -1,11 +1,11 @@ // Hack of a crate until rust-lang/rust#51647 is fixed -#![feature(no_core, panic_implementation)] +#![feature(no_core, panic_handler)] #![no_core] extern crate core; -#[panic_implementation] +#[panic_handler] fn panic(_: &core::panic::PanicInfo) -> ! { loop {} } diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 7f502bba3..a5be57394 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -11,11 +11,11 @@ #![feature(lang_items)] #![feature(start)] #![feature(allocator_api)] -#![feature(panic_implementation)] +#![feature(panic_handler)] #![cfg_attr(windows, feature(panic_unwind))] #![no_std] -extern crate panic_implementation; +extern crate panic_handler; #[cfg(not(thumb))] #[link(name = "c")] From 0703bfa72524e01e414477657ca9b64794c5c1c3 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 4 Sep 2018 10:19:55 -0700 Subject: [PATCH 0124/1459] Fix some `use_c_shim_if` directives This was an accidental regression introduced in #252 by removing compilation of C files without adjusting the `#[use_c_shim_if]` directives. This restores the compilation of the assembly files and updates the `#[use_c_shim_if]` directives. --- build.rs | 2 ++ src/float/conv.rs | 19 ++++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/build.rs b/build.rs index fba409112..cdcbfe24c 100644 --- a/build.rs +++ b/build.rs @@ -267,8 +267,10 @@ mod c { if target_arch == "x86_64" { sources.extend( &[ + "x86_64/floatdisf.c", "x86_64/floatdixf.c", "x86_64/floatundidf.S", + "x86_64/floatundisf.S", "x86_64/floatundixf.S", ], ); diff --git a/src/float/conv.rs b/src/float/conv.rs index 53844c17b..3171e4509 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -80,7 +80,10 @@ intrinsics! { int_to_float!(i, i32, f64) } - #[use_c_shim_if(all(target_arch = "x86", not(target_env = "msvc")))] + #[use_c_shim_if(any( + all(target_arch = "x86", not(target_env = "msvc")), + all(target_arch = "x86_64", not(windows)), + ))] #[arm_aeabi_alias = __aeabi_l2f] pub extern "C" fn __floatdisf(i: i64) -> f32 { // On x86_64 LLVM will use native instructions for this conversion, we @@ -124,17 +127,19 @@ intrinsics! { int_to_float!(i, u32, f64) } - #[use_c_shim_if(all(not(target_env = "msvc"), - any(target_arch = "x86", - all(not(windows), target_arch = "x86_64"))))] + #[use_c_shim_if(any( + all(target_arch = "x86", not(target_env = "msvc")), + all(target_arch = "x86_64", not(windows)), + ))] #[arm_aeabi_alias = __aeabi_ul2f] pub extern "C" fn __floatundisf(i: u64) -> f32 { int_to_float!(i, u64, f32) } - #[use_c_shim_if(all(not(target_env = "msvc"), - any(target_arch = "x86", - all(not(windows), target_arch = "x86_64"))))] + #[use_c_shim_if(any( + all(target_arch = "x86", not(target_env = "msvc")), + all(target_arch = "x86_64", not(windows)), + ))] #[arm_aeabi_alias = __aeabi_ul2d] pub extern "C" fn __floatundidf(i: u64) -> f64 { int_to_float!(i, u64, f64) From 78f544f57d33f662699c606326465ab56e6478a1 Mon Sep 17 00:00:00 2001 From: Jordan Rhee Date: Thu, 5 Jul 2018 11:20:34 -0700 Subject: [PATCH 0125/1459] Support windows/arm target --- build.rs | 4 ++-- src/arm.rs | 2 +- src/int/sdiv.rs | 5 +++-- src/int/udiv.rs | 2 ++ 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/build.rs b/build.rs index fba409112..111a38b5e 100644 --- a/build.rs +++ b/build.rs @@ -297,7 +297,7 @@ mod c { } } - if target_arch == "arm" && target_os != "ios" { + if target_arch == "arm" && target_os != "ios" && target_env != "msvc" { sources.extend( &[ "arm/aeabi_div0.c", @@ -348,7 +348,7 @@ mod c { } } - if llvm_target[0] == "armv7" { + if llvm_target[0] == "armv7" && target_env != "msvc" { sources.extend( &[ "arm/sync_fetch_and_add_4.S", diff --git a/src/arm.rs b/src/arm.rs index dbd6f87ca..9e43aec7d 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -4,7 +4,7 @@ use core::intrinsics; // calling convention which can't be implemented using a normal Rust function. // NOTE The only difference between the iOS and non-iOS versions of those functions is that the iOS // versions use 3 leading underscores in the names of called functions instead of 2. -#[cfg(not(target_os = "ios"))] +#[cfg(not(any(target_os = "ios", target_env = "msvc")))] #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe fn __aeabi_uidivmod() { diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index 2de73b0ea..89bb51a47 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -73,7 +73,8 @@ intrinsics! { } #[use_c_shim_if(all(target_arch = "arm", - not(target_os = "ios")), + not(target_os = "ios"), + not(target_env = "msvc")), not(thumbv6m))] pub extern "C" fn __modsi3(a: i32, b: i32) -> i32 { a.mod_(b) @@ -89,7 +90,7 @@ intrinsics! { a.mod_(b) } - #[use_c_shim_if(all(target_arch = "arm", + #[use_c_shim_if(all(target_arch = "arm", not(target_env = "msvc"), not(target_os = "ios"), not(thumbv6m)))] pub extern "C" fn __divmodsi4(a: i32, b: i32, rem: &mut i32) -> i32 { a.divmod(b, rem, |a, b| __divsi3(a, b)) diff --git a/src/int/udiv.rs b/src/int/udiv.rs index 4382460e7..a2572227f 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -211,6 +211,7 @@ intrinsics! { #[use_c_shim_if(all(target_arch = "arm", not(target_os = "ios"), + not(target_env = "msvc"), not(thumbv6m)))] /// Returns `n % d` pub extern "C" fn __umodsi3(n: u32, d: u32) -> u32 { @@ -220,6 +221,7 @@ intrinsics! { #[use_c_shim_if(all(target_arch = "arm", not(target_os = "ios"), + not(target_env = "msvc"), not(thumbv6m)))] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 { From f3bd252ffc59775d056aba2975c85a78292dde84 Mon Sep 17 00:00:00 2001 From: Jordan Rhee Date: Thu, 6 Sep 2018 09:24:52 -0700 Subject: [PATCH 0126/1459] Try undoing unnecessary change --- build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.rs b/build.rs index 111a38b5e..45060a4e6 100644 --- a/build.rs +++ b/build.rs @@ -348,7 +348,7 @@ mod c { } } - if llvm_target[0] == "armv7" && target_env != "msvc" { + if llvm_target[0] == "armv7" /* XXX && target_env != "msvc" */ { sources.extend( &[ "arm/sync_fetch_and_add_4.S", From 0a87c71839239ff9065646ff88ec9ec03fa6049f Mon Sep 17 00:00:00 2001 From: Jordan Rhee Date: Fri, 7 Sep 2018 08:27:38 -0700 Subject: [PATCH 0127/1459] Remove unnecessary check --- build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.rs b/build.rs index 45060a4e6..87b5e0490 100644 --- a/build.rs +++ b/build.rs @@ -348,7 +348,7 @@ mod c { } } - if llvm_target[0] == "armv7" /* XXX && target_env != "msvc" */ { + if llvm_target[0] == "armv7" { sources.extend( &[ "arm/sync_fetch_and_add_4.S", From 59353afa1069634384a9ca242c8e2544ff4f7d7e Mon Sep 17 00:00:00 2001 From: Paolo Teti Date: Sat, 8 Sep 2018 17:41:26 +0200 Subject: [PATCH 0128/1459] Remove superfluous cc flags Latest `cc-rs` already manage all arm/thumb flags, so We can safely remove this piece of code. --- build.rs | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/build.rs b/build.rs index fba409112..48d11db7c 100644 --- a/build.rs +++ b/build.rs @@ -141,29 +141,6 @@ mod c { cfg.define("VISIBILITY_HIDDEN", None); } - // NOTE Most of the ARM intrinsics are written in assembly. Tell gcc which arch we are going - // to target to make sure that the assembly implementations really work for the target. If - // the implementation is not valid for the arch, then gcc will error when compiling it. - if llvm_target[0].starts_with("thumb") { - cfg.flag("-mthumb"); - } - - if target_arch_arm && llvm_target.last() == Some(&"eabihf") { - cfg.flag("-mfloat-abi=hard"); - } - - if llvm_target[0] == "thumbv6m" { - cfg.flag("-march=armv6-m"); - } - - if llvm_target[0] == "thumbv7m" { - cfg.flag("-march=armv7-m"); - } - - if llvm_target[0] == "thumbv7em" { - cfg.flag("-march=armv7e-m"); - } - let mut sources = Sources::new(); sources.extend( &[ From 8128d996b1008d77438a2994e88020fc14a31334 Mon Sep 17 00:00:00 2001 From: Paolo Teti Date: Sat, 8 Sep 2018 19:04:09 +0200 Subject: [PATCH 0129/1459] Remove unused variable target_arch_arm Previous commit makes this variable unused --- build.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/build.rs b/build.rs index 48d11db7c..1ace622be 100644 --- a/build.rs +++ b/build.rs @@ -110,9 +110,6 @@ mod c { let target_env = env::var("CARGO_CFG_TARGET_ENV").unwrap(); let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap(); let target_vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap(); - let target_arch_arm = - target_arch.contains("arm") || - target_arch.contains("thumb"); let cfg = &mut cc::Build::new(); cfg.warnings(false); From 8bfd4374b6f74b25fe27c5bebae64229fe15c3f8 Mon Sep 17 00:00:00 2001 From: Paolo Teti Date: Thu, 13 Sep 2018 10:10:52 +0200 Subject: [PATCH 0130/1459] `panic_handler` is now stable Fixes the following warning: "warning: the feature `panic_handler` has been stable since 1.30.0 and no longer requires an attribute to enable" --- crates/panic-handler/src/lib.rs | 2 +- examples/intrinsics.rs | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/panic-handler/src/lib.rs b/crates/panic-handler/src/lib.rs index e97061087..a75999a4b 100644 --- a/crates/panic-handler/src/lib.rs +++ b/crates/panic-handler/src/lib.rs @@ -1,6 +1,6 @@ // Hack of a crate until rust-lang/rust#51647 is fixed -#![feature(no_core, panic_handler)] +#![feature(no_core)] #![no_core] extern crate core; diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index a5be57394..1c498176c 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -11,7 +11,6 @@ #![feature(lang_items)] #![feature(start)] #![feature(allocator_api)] -#![feature(panic_handler)] #![cfg_attr(windows, feature(panic_unwind))] #![no_std] From fa36133caf2ffbe4365327da18567b363c41e1c5 Mon Sep 17 00:00:00 2001 From: Paolo Teti Date: Thu, 13 Sep 2018 10:31:36 +0200 Subject: [PATCH 0131/1459] Apple-darwin: set DEBUG_LTO_BUILD_DOESNT_WORK Temporary workaround for the well known "undefined references problem for debug-assertions+lto" (#79) --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7b7ed7e6b..9b4c28419 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ matrix: - env: TARGET=arm-unknown-linux-gnueabihf - env: TARGET=armv7-unknown-linux-gnueabihf - env: TARGET=i586-unknown-linux-gnu - - env: TARGET=i686-apple-darwin + - env: TARGET=i686-apple-darwin DEBUG_LTO_BUILD_DOESNT_WORK=1 os: osx - env: TARGET=i686-unknown-linux-gnu - env: TARGET=mips-unknown-linux-gnu @@ -28,7 +28,7 @@ matrix: - env: TARGET=wasm32-unknown-unknown install: rustup target add $TARGET script: cargo build --target $TARGET - - env: TARGET=x86_64-apple-darwin + - env: TARGET=x86_64-apple-darwin DEBUG_LTO_BUILD_DOESNT_WORK=1 os: osx - env: TARGET=x86_64-unknown-linux-gnu allow_failures: From baab4fd89cdd945e46fed31166e5dcad7224ed87 Mon Sep 17 00:00:00 2001 From: Paolo Teti Date: Mon, 17 Sep 2018 19:37:18 +0200 Subject: [PATCH 0132/1459] Conversion from a wider to a narrower IEEE-754 floating-point type Adds generic conversion from a wider to a narrower IEEE-754 floating-point type. Implement `__truncdfsf2` and `__truncdfsf2vfp` and associated test-cases. --- README.md | 4 +- build.rs | 1 - src/float/mod.rs | 1 + src/float/truncate.rs | 116 ++++++++++++++++++++++++++++++++++++++++++ testcrate/build.rs | 18 +++++++ 5 files changed, 137 insertions(+), 3 deletions(-) create mode 100644 src/float/truncate.rs diff --git a/README.md b/README.md index cae885279..fceaa631d 100644 --- a/README.md +++ b/README.md @@ -129,7 +129,7 @@ features = ["c"] - [x] arm/softfloat-alias.list - [x] arm/subdf3vfp.S - [x] arm/subsf3vfp.S -- [ ] arm/truncdfsf2vfp.S +- [x] arm/truncdfsf2vfp.S - [ ] arm/udivmodsi4.S (generic version is done) - [ ] arm/udivsi3.S (generic version is done) - [ ] arm/umodsi3.S (generic version is done) @@ -186,7 +186,7 @@ features = ["c"] - [x] subdf3.c - [x] subsf3.c - [ ] truncdfhf2.c -- [ ] truncdfsf2.c +- [x] truncdfsf2.c - [ ] truncsfhf2.c - [x] udivdi3.c - [x] udivmoddi4.c diff --git a/build.rs b/build.rs index 6f2cc76a9..917dd963d 100644 --- a/build.rs +++ b/build.rs @@ -174,7 +174,6 @@ mod c { "subvdi3.c", "subvsi3.c", "truncdfhf2.c", - "truncdfsf2.c", "truncsfhf2.c", "ucmpdi2.c", ], diff --git a/src/float/mod.rs b/src/float/mod.rs index 3bb13abbc..2b8ddb941 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -11,6 +11,7 @@ pub mod sub; pub mod mul; pub mod div; pub mod extend; +pub mod truncate; /// Trait for some basic operations on floats pub trait Float: diff --git a/src/float/truncate.rs b/src/float/truncate.rs new file mode 100644 index 000000000..99d4807da --- /dev/null +++ b/src/float/truncate.rs @@ -0,0 +1,116 @@ +use float::Float; +use int::{CastInto, Int}; + +/// Generic conversion from a wider to a narrower IEEE-754 floating-point type +fn truncate(a: F) -> R +where + F::Int: CastInto, + u64: CastInto, + F::Int: CastInto, + u32: CastInto, + u32: CastInto, + R::Int: CastInto, + F::Int: CastInto, +{ + let src_one = F::Int::ONE; + let src_bits = F::BITS; + let src_sign_bits = F::SIGNIFICAND_BITS; + let src_exp_bias = F::EXPONENT_BIAS; + let src_min_normal = F::IMPLICIT_BIT; + let src_infinity = F::EXPONENT_MASK; + let src_sign_mask = F::SIGN_MASK as F::Int; + let src_abs_mask = src_sign_mask - src_one; + let src_qnan = F::SIGNIFICAND_MASK; + let src_nan_code = src_qnan - src_one; + + let dst_bits = R::BITS; + let dst_sign_bits = R::SIGNIFICAND_BITS; + let dst_inf_exp = R::EXPONENT_MAX; + let dst_exp_bias = R::EXPONENT_BIAS; + + let dst_zero = R::Int::ZERO; + let dst_one = R::Int::ONE; + let dst_qnan = R::SIGNIFICAND_MASK; + let dst_nan_code = dst_qnan - dst_one; + + let round_mask = (src_one << src_sign_bits - dst_sign_bits) - src_one; + let half = src_one << src_sign_bits - dst_sign_bits - 1; + let underflow_exp = src_exp_bias + 1 - dst_exp_bias; + let overflow_exp = src_exp_bias + dst_inf_exp - dst_exp_bias; + let underflow: F::Int = underflow_exp.cast(); // << src_sign_bits; + let overflow: F::Int = overflow_exp.cast(); //<< src_sign_bits; + + let a_abs = a.repr() & src_abs_mask; + let sign = a.repr() & src_sign_mask; + let mut abs_result: R::Int; + + let src_underflow = underflow << src_sign_bits; + let src_overflow = overflow << src_sign_bits; + + if a_abs.wrapping_sub(src_underflow) < a_abs.wrapping_sub(src_overflow) { + // The exponent of a is within the range of normal numbers + let bias_delta: R::Int = (src_exp_bias - dst_exp_bias).cast(); + abs_result = a_abs.cast(); + abs_result = abs_result >> src_sign_bits - dst_sign_bits; + abs_result = abs_result - bias_delta.wrapping_shl(dst_sign_bits); + let round_bits: F::Int = a_abs & round_mask; + abs_result += if round_bits > half { + dst_one + } else { + abs_result & dst_one + }; + } else if a_abs > src_infinity { + // a is NaN. + // Conjure the result by beginning with infinity, setting the qNaN + // bit and inserting the (truncated) trailing NaN field + let nan_result: R::Int = (a_abs & src_nan_code).cast(); + abs_result = dst_inf_exp.cast(); + abs_result = abs_result.wrapping_shl(dst_sign_bits); + abs_result |= dst_qnan; + abs_result |= (nan_result >> (src_sign_bits - dst_sign_bits)) & dst_nan_code; + } else if a_abs >= src_overflow { + // a overflows to infinity. + abs_result = dst_inf_exp.cast(); + abs_result = abs_result.wrapping_shl(dst_sign_bits); + } else { + // a underflows on conversion to the destination type or is an exact + // zero. The result may be a denormal or zero. Extract the exponent + // to get the shift amount for the denormalization. + let a_exp = a_abs >> src_sign_bits; + let mut shift: u32 = a_exp.cast(); + shift = src_exp_bias - dst_exp_bias - shift + 1; + + let significand = (a.repr() & src_sign_mask) | src_min_normal; + if shift > src_sign_bits { + abs_result = dst_zero; + } else { + let sticky = significand << src_bits - shift; + let mut denormalized_significand: R::Int = significand.cast(); + let sticky_shift: u32 = sticky.cast(); + denormalized_significand = denormalized_significand >> (shift | sticky_shift); + abs_result = denormalized_significand >> src_sign_bits - dst_sign_bits; + let round_bits = denormalized_significand & round_mask.cast(); + if round_bits > half.cast() { + abs_result += dst_one; // Round to nearest + } else if round_bits == half.cast() { + abs_result += abs_result & dst_one; // Ties to even + } + } + } + // Finally apply the sign bit + let s = sign >> src_bits - dst_bits; + R::from_repr(abs_result | s.cast()) +} + +intrinsics! { + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_d2f] + pub extern "C" fn __truncdfsf2(a: f64) -> f32 { + truncate(a) + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __truncdfsf2vfp(a: f64) -> f32 { + a as f32 + } +} diff --git a/testcrate/build.rs b/testcrate/build.rs index d862e0d0f..f02a67cc3 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -348,6 +348,24 @@ fn main() { "builtins::float::extend::__extendsfdf2vfp(a)"); } + // float/truncate.rs + gen(|a: MyF64| { + if a.0.is_nan() { + return None; + } + Some(a.0 as f32) + }, + "builtins::float::truncate::__truncdfsf2(a)"); + if target_arch_arm { + gen(|a: LargeF64| { + if a.0.is_nan() { + return None; + } + Some(a.0 as f32) + }, + "builtins::float::truncate::__truncdfsf2vfp(a)"); + } + // float/conv.rs gen(|a: MyF64| i64(a.0).ok(), "builtins::float::conv::__fixdfdi(a)"); From ea7ce750bada96f1519b69ee9f40824b8dfb74f9 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 11 Oct 2018 14:43:19 -0700 Subject: [PATCH 0133/1459] Optimize intrinsics on wasm32 Profiling a recent demo I was playing with on `wasm32-unknown-unknown` pointed me to the surprising result that 15% of the execution time was in the `sqrt` intrinsic (there's a lot of math here). Upon investigation I remembered that wasm (unconditionally) has a native `f32.sqrt` instruction! I was then subsequently confused that a simple `f.sqrt()` actually codegens to use `f32.sqrt` in Rust, but I later realized that the implementations of intrinsics in this library often use other intrinsics to implement them. That means that the real intrinsic here, `acos`, internally called `sqrt` at some point but wasn't using the optimized implementation! To help fix this situation this PR is intended on providing the infrastructure for optimized implementations (via code generation) to be used for each intrinsic. I've gone thorugh the various math instructions that wasm has available and updated each of the intrinsic implementations in this crate to optionally use the LLVM intrinsic versions, which are known to unconditionally compile down to a single instruction (unlike the arbitrary platform, where we don't know what it will compile down to!). To do this I created a new macro to wrap the invocation of LLVM intrinsics. Invoking LLVM intrinsics is turned off by default (through a new and on-by-default feature, `stable`). When the `stable` feature is disabled, however, then the wasm-target specifically will enable usage of the LLVM intrinsics. I've additionally added a CI builder which should verify that these continue to build on Travis. After this I intended to update the submodule in the `compiler-builtins` repository so we can pull in the optimized implementation there, and `compiler-builtins` naturally won't set `feature = "stable"` when compiling so all the intrinsics should get compiled in by default. After a further update of `the libcompiler_builtins` submodule in rust-lang/rust we should be good to go! --- libm/.travis.yml | 7 +++++++ libm/Cargo.toml | 4 ++++ libm/src/lib.rs | 4 ++++ libm/src/math/ceil.rs | 8 ++++++++ libm/src/math/ceilf.rs | 8 ++++++++ libm/src/math/fabs.rs | 8 ++++++++ libm/src/math/fabsf.rs | 8 ++++++++ libm/src/math/floor.rs | 8 ++++++++ libm/src/math/floorf.rs | 8 ++++++++ libm/src/math/mod.rs | 11 +++++++++++ libm/src/math/sqrt.rs | 12 ++++++++++++ libm/src/math/sqrtf.rs | 12 ++++++++++++ libm/src/math/trunc.rs | 8 ++++++++ libm/src/math/truncf.rs | 8 ++++++++ 14 files changed, 114 insertions(+) diff --git a/libm/.travis.yml b/libm/.travis.yml index 47f2b2f20..758316178 100644 --- a/libm/.travis.yml +++ b/libm/.travis.yml @@ -29,6 +29,13 @@ matrix: - env: TARGET=cargo-fmt rust: beta + - env: TARGET=wasm32-unknown-unknown + rust: nightly + install: rustup target add $TARGET + script: + - cargo build --target $TARGET + - cargo build --no-default-features --target $TARGET + before_install: set -e install: diff --git a/libm/Cargo.toml b/libm/Cargo.toml index cedf8d267..f7a528334 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -24,3 +24,7 @@ members = [ [dev-dependencies] shared = { path = "shared" } + +[features] +default = ['stable'] +stable = [] diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 627c6443e..6be458728 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -11,6 +11,10 @@ #![deny(warnings)] #![no_std] +#![cfg_attr( + all(target_arch = "wasm32", not(feature = "stable")), + feature(core_intrinsics) +)] mod math; diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index 4db2ca840..5dbfa6a2c 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -4,6 +4,14 @@ const TOINT: f64 = 1. / f64::EPSILON; #[inline] pub fn ceil(x: f64) -> f64 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f64.ceil` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::ceilf64(x) } + } + } let u: u64 = x.to_bits(); let e: i64 = (u >> 52 & 0x7ff) as i64; let y: f64; diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs index 16bffb300..c8cd4b5aa 100644 --- a/libm/src/math/ceilf.rs +++ b/libm/src/math/ceilf.rs @@ -2,6 +2,14 @@ use core::f32; #[inline] pub fn ceilf(x: f32) -> f32 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f32.ceil` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::ceilf32(x) } + } + } let mut ui = x.to_bits(); let e = (((ui >> 23) & 0xff) - 0x7f) as i32; diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index 9e081f3f9..7c804653c 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -2,5 +2,13 @@ use core::u64; #[inline] pub fn fabs(x: f64) -> f64 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f64.abs` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::fabsf64(x) } + } + } f64::from_bits(x.to_bits() & (u64::MAX / 2)) } diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs index 4cc941116..884c20f6c 100644 --- a/libm/src/math/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -1,4 +1,12 @@ #[inline] pub fn fabsf(x: f32) -> f32 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f32.abs` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::fabsf32(x) } + } + } f32::from_bits(x.to_bits() & 0x7fffffff) } diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index 997865d39..b14a48d55 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -4,6 +4,14 @@ const TOINT: f64 = 1. / f64::EPSILON; #[inline] pub fn floor(x: f64) -> f64 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f64.floor` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::floorf64(x) } + } + } let ui = x.to_bits(); let e = ((ui >> 52) & 0x7ff) as i32; diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs index 9c263b518..71b5953df 100644 --- a/libm/src/math/floorf.rs +++ b/libm/src/math/floorf.rs @@ -2,6 +2,14 @@ use core::f32; #[inline] pub fn floorf(x: f32) -> f32 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f32.floor` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::floorf32(x) } + } + } let mut ui = x.to_bits(); let e = (((ui >> 23) & 0xff) - 0x7f) as i32; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index da34fb4ce..e51b1511d 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -58,6 +58,17 @@ macro_rules! i { }; } +macro_rules! llvm_intrinsically_optimized { + (#[cfg($($clause:tt)*)] $e:expr) => { + #[cfg(all(not(feature = "stable"), $($clause)*))] + { + if true { // thwart the dead code lint + $e + } + } + }; +} + // Public modules mod acos; mod acosf; diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index cbadb49bb..b2387a26e 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -82,6 +82,18 @@ const TINY: f64 = 1.0e-300; #[inline] pub fn sqrt(x: f64) -> f64 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f64.sqrt` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return if x < 0.0 { + f64::NAN + } else { + unsafe { ::core::intrinsics::sqrtf64(x) } + } + } + } let mut z: f64; let sign: u32 = 0x80000000; let mut ix0: i32; diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index 49984689e..33cafbcbd 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -17,6 +17,18 @@ const TINY: f32 = 1.0e-30; #[inline] pub fn sqrtf(x: f32) -> f32 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f32.sqrt` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return if x < 0.0 { + ::core::f32::NAN + } else { + unsafe { ::core::intrinsics::sqrtf32(x) } + } + } + } let mut z: f32; let sign: i32 = 0x80000000u32 as i32; let mut ix: i32; diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs index 6bea67cbc..8eecfcf53 100644 --- a/libm/src/math/trunc.rs +++ b/libm/src/math/trunc.rs @@ -2,6 +2,14 @@ use core::f64; #[inline] pub fn trunc(x: f64) -> f64 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f64.trunc` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::truncf64(x) } + } + } let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 let mut i: u64 = x.to_bits(); diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs index 9d42620d9..0d74fea9c 100644 --- a/libm/src/math/truncf.rs +++ b/libm/src/math/truncf.rs @@ -2,6 +2,14 @@ use core::f32; #[inline] pub fn truncf(x: f32) -> f32 { + // On wasm32 we know that LLVM's intrinsic will compile to an optimized + // `f32.trunc` native instruction, so we can leverage this for both code size + // and speed. + llvm_intrinsically_optimized! { + #[cfg(target_arch = "wasm32")] { + return unsafe { ::core::intrinsics::truncf32(x) } + } + } let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 let mut i: u32 = x.to_bits(); From a867a9cec36d00586529e917b0eae302985c56e9 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Wed, 24 Oct 2018 00:18:30 +0200 Subject: [PATCH 0134/1459] merge [features] tables --- libm/Cargo.toml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index f7a528334..f28024d04 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -12,6 +12,8 @@ version = "0.1.2" [features] # only used to run our test suite checked = [] +default = ['stable'] +stable = [] [workspace] members = [ @@ -24,7 +26,3 @@ members = [ [dev-dependencies] shared = { path = "shared" } - -[features] -default = ['stable'] -stable = [] From f7fbdc33dfc79584375985b15a183e32e33ce72d Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 24 Oct 2018 01:01:14 -0700 Subject: [PATCH 0135/1459] Update libm submodule --- .gitmodules | 2 +- libm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 7162b2c4d..50ed51e93 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,4 +3,4 @@ url = https://github.com/rust-lang/compiler-rt [submodule "libm"] path = libm - url = https://github.com/japaric/libm + url = https://github.com/rust-lang-nursery/libm diff --git a/libm b/libm index 96e36ea26..3559e7037 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 96e36ea2620f9fbbaa46a01694a2fa3ef6c2fb7e +Subproject commit 3559e703795d33e84a91da2a35f2f3baac47e872 From 939cbca6e9d829265d6cf006d3532142a4061cd3 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 30 Oct 2018 10:51:11 -0700 Subject: [PATCH 0136/1459] Revert "Conversion from a wider to a narrower IEEE-754 floating-point type" This reverts commit baab4fd89cdd945e46fed31166e5dcad7224ed87. --- README.md | 4 +- build.rs | 1 + src/float/mod.rs | 1 - src/float/truncate.rs | 116 ------------------------------------------ testcrate/build.rs | 18 ------- 5 files changed, 3 insertions(+), 137 deletions(-) delete mode 100644 src/float/truncate.rs diff --git a/README.md b/README.md index fceaa631d..cae885279 100644 --- a/README.md +++ b/README.md @@ -129,7 +129,7 @@ features = ["c"] - [x] arm/softfloat-alias.list - [x] arm/subdf3vfp.S - [x] arm/subsf3vfp.S -- [x] arm/truncdfsf2vfp.S +- [ ] arm/truncdfsf2vfp.S - [ ] arm/udivmodsi4.S (generic version is done) - [ ] arm/udivsi3.S (generic version is done) - [ ] arm/umodsi3.S (generic version is done) @@ -186,7 +186,7 @@ features = ["c"] - [x] subdf3.c - [x] subsf3.c - [ ] truncdfhf2.c -- [x] truncdfsf2.c +- [ ] truncdfsf2.c - [ ] truncsfhf2.c - [x] udivdi3.c - [x] udivmoddi4.c diff --git a/build.rs b/build.rs index 917dd963d..6f2cc76a9 100644 --- a/build.rs +++ b/build.rs @@ -174,6 +174,7 @@ mod c { "subvdi3.c", "subvsi3.c", "truncdfhf2.c", + "truncdfsf2.c", "truncsfhf2.c", "ucmpdi2.c", ], diff --git a/src/float/mod.rs b/src/float/mod.rs index 2b8ddb941..3bb13abbc 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -11,7 +11,6 @@ pub mod sub; pub mod mul; pub mod div; pub mod extend; -pub mod truncate; /// Trait for some basic operations on floats pub trait Float: diff --git a/src/float/truncate.rs b/src/float/truncate.rs deleted file mode 100644 index 99d4807da..000000000 --- a/src/float/truncate.rs +++ /dev/null @@ -1,116 +0,0 @@ -use float::Float; -use int::{CastInto, Int}; - -/// Generic conversion from a wider to a narrower IEEE-754 floating-point type -fn truncate(a: F) -> R -where - F::Int: CastInto, - u64: CastInto, - F::Int: CastInto, - u32: CastInto, - u32: CastInto, - R::Int: CastInto, - F::Int: CastInto, -{ - let src_one = F::Int::ONE; - let src_bits = F::BITS; - let src_sign_bits = F::SIGNIFICAND_BITS; - let src_exp_bias = F::EXPONENT_BIAS; - let src_min_normal = F::IMPLICIT_BIT; - let src_infinity = F::EXPONENT_MASK; - let src_sign_mask = F::SIGN_MASK as F::Int; - let src_abs_mask = src_sign_mask - src_one; - let src_qnan = F::SIGNIFICAND_MASK; - let src_nan_code = src_qnan - src_one; - - let dst_bits = R::BITS; - let dst_sign_bits = R::SIGNIFICAND_BITS; - let dst_inf_exp = R::EXPONENT_MAX; - let dst_exp_bias = R::EXPONENT_BIAS; - - let dst_zero = R::Int::ZERO; - let dst_one = R::Int::ONE; - let dst_qnan = R::SIGNIFICAND_MASK; - let dst_nan_code = dst_qnan - dst_one; - - let round_mask = (src_one << src_sign_bits - dst_sign_bits) - src_one; - let half = src_one << src_sign_bits - dst_sign_bits - 1; - let underflow_exp = src_exp_bias + 1 - dst_exp_bias; - let overflow_exp = src_exp_bias + dst_inf_exp - dst_exp_bias; - let underflow: F::Int = underflow_exp.cast(); // << src_sign_bits; - let overflow: F::Int = overflow_exp.cast(); //<< src_sign_bits; - - let a_abs = a.repr() & src_abs_mask; - let sign = a.repr() & src_sign_mask; - let mut abs_result: R::Int; - - let src_underflow = underflow << src_sign_bits; - let src_overflow = overflow << src_sign_bits; - - if a_abs.wrapping_sub(src_underflow) < a_abs.wrapping_sub(src_overflow) { - // The exponent of a is within the range of normal numbers - let bias_delta: R::Int = (src_exp_bias - dst_exp_bias).cast(); - abs_result = a_abs.cast(); - abs_result = abs_result >> src_sign_bits - dst_sign_bits; - abs_result = abs_result - bias_delta.wrapping_shl(dst_sign_bits); - let round_bits: F::Int = a_abs & round_mask; - abs_result += if round_bits > half { - dst_one - } else { - abs_result & dst_one - }; - } else if a_abs > src_infinity { - // a is NaN. - // Conjure the result by beginning with infinity, setting the qNaN - // bit and inserting the (truncated) trailing NaN field - let nan_result: R::Int = (a_abs & src_nan_code).cast(); - abs_result = dst_inf_exp.cast(); - abs_result = abs_result.wrapping_shl(dst_sign_bits); - abs_result |= dst_qnan; - abs_result |= (nan_result >> (src_sign_bits - dst_sign_bits)) & dst_nan_code; - } else if a_abs >= src_overflow { - // a overflows to infinity. - abs_result = dst_inf_exp.cast(); - abs_result = abs_result.wrapping_shl(dst_sign_bits); - } else { - // a underflows on conversion to the destination type or is an exact - // zero. The result may be a denormal or zero. Extract the exponent - // to get the shift amount for the denormalization. - let a_exp = a_abs >> src_sign_bits; - let mut shift: u32 = a_exp.cast(); - shift = src_exp_bias - dst_exp_bias - shift + 1; - - let significand = (a.repr() & src_sign_mask) | src_min_normal; - if shift > src_sign_bits { - abs_result = dst_zero; - } else { - let sticky = significand << src_bits - shift; - let mut denormalized_significand: R::Int = significand.cast(); - let sticky_shift: u32 = sticky.cast(); - denormalized_significand = denormalized_significand >> (shift | sticky_shift); - abs_result = denormalized_significand >> src_sign_bits - dst_sign_bits; - let round_bits = denormalized_significand & round_mask.cast(); - if round_bits > half.cast() { - abs_result += dst_one; // Round to nearest - } else if round_bits == half.cast() { - abs_result += abs_result & dst_one; // Ties to even - } - } - } - // Finally apply the sign bit - let s = sign >> src_bits - dst_bits; - R::from_repr(abs_result | s.cast()) -} - -intrinsics! { - #[aapcs_on_arm] - #[arm_aeabi_alias = __aeabi_d2f] - pub extern "C" fn __truncdfsf2(a: f64) -> f32 { - truncate(a) - } - - #[cfg(target_arch = "arm")] - pub extern "C" fn __truncdfsf2vfp(a: f64) -> f32 { - a as f32 - } -} diff --git a/testcrate/build.rs b/testcrate/build.rs index f02a67cc3..d862e0d0f 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -348,24 +348,6 @@ fn main() { "builtins::float::extend::__extendsfdf2vfp(a)"); } - // float/truncate.rs - gen(|a: MyF64| { - if a.0.is_nan() { - return None; - } - Some(a.0 as f32) - }, - "builtins::float::truncate::__truncdfsf2(a)"); - if target_arch_arm { - gen(|a: LargeF64| { - if a.0.is_nan() { - return None; - } - Some(a.0 as f32) - }, - "builtins::float::truncate::__truncdfsf2vfp(a)"); - } - // float/conv.rs gen(|a: MyF64| i64(a.0).ok(), "builtins::float::conv::__fixdfdi(a)"); From fe74674f6e4be76d47b66f67d529ebf4186f4eb1 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 9 Nov 2018 12:52:21 -0800 Subject: [PATCH 0137/1459] Update for next LLVM 8 version --- compiler-rt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt b/compiler-rt index 7e387f0f9..84c0bd015 160000 --- a/compiler-rt +++ b/compiler-rt @@ -1 +1 @@ -Subproject commit 7e387f0f90b493ae72930c787c381a80055a7ec9 +Subproject commit 84c0bd0158c3ff86052be1b07a3ddc3c4f5ba52a From 14685d238bd0f4dfccd08db5230a13ea11973fef Mon Sep 17 00:00:00 2001 From: Jethro Beekman Date: Wed, 21 Nov 2018 14:07:03 +0530 Subject: [PATCH 0138/1459] Add f32 versions of WebAssembly math functions --- src/math.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/math.rs b/src/math.rs index 6eddc3d78..9db7a0aa3 100644 --- a/src/math.rs +++ b/src/math.rs @@ -51,6 +51,19 @@ no_mangle! { fn fmodf(x: f32, y: f32) -> f32; fn fma(x: f64, y: f64, z: f64) -> f64; fn fmaf(x: f32, y: f32, z: f32) -> f32; + fn acosf(n: f32) -> f32; + fn asinf(n: f32) -> f32; + fn atan2f(a: f32, b: f32) -> f32; + fn atanf(n: f32) -> f32; + fn cbrtf(n: f32) -> f32; + fn coshf(n: f32) -> f32; + fn expm1f(n: f32) -> f32; + fn fdimf(a: f32, b: f32) -> f32; + fn hypotf(x: f32, y: f32) -> f32; + fn log1pf(n: f32) -> f32; + fn sinhf(n: f32) -> f32; + fn tanf(n: f32) -> f32; + fn tanhf(n: f32) -> f32; } // only for the thumb*-none-eabi* targets From e43c838450371008db60d552a586876855ba3d0d Mon Sep 17 00:00:00 2001 From: Jethro Beekman Date: Wed, 21 Nov 2018 14:07:57 +0530 Subject: [PATCH 0139/1459] Add SGX target --- build.rs | 4 ++-- src/lib.rs | 3 ++- src/math.rs | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/build.rs b/build.rs index 6f2cc76a9..f5ada024f 100644 --- a/build.rs +++ b/build.rs @@ -17,9 +17,9 @@ fn main() { return; } - // Forcibly enable memory intrinsics on wasm32 as we don't have a libc to + // Forcibly enable memory intrinsics on wasm32 & SGX as we don't have a libc to // provide them. - if target.contains("wasm32") { + if target.contains("wasm32") || target.contains("sgx") { println!("cargo:rustc-cfg=feature=\"mem\""); } diff --git a/src/lib.rs b/src/lib.rs index 9f1dd1512..9bb38f39a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -48,7 +48,8 @@ pub mod int; pub mod float; #[cfg(any(all(target_arch = "wasm32", target_os = "unknown"), - all(target_arch = "arm", target_os = "none")))] + all(target_arch = "arm", target_os = "none"), + target_env = "sgx"))] pub mod math; pub mod mem; diff --git a/src/math.rs b/src/math.rs index 9db7a0aa3..c37243504 100644 --- a/src/math.rs +++ b/src/math.rs @@ -15,7 +15,7 @@ macro_rules! no_mangle { } // only for the wasm32-unknown-unknown target -#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +#[cfg(any(all(target_arch = "wasm32", target_os = "unknown"), target_env = "sgx"))] no_mangle! { fn acos(x: f64) -> f64; fn asin(x: f64) -> f64; From 2e8f0b125bb8a00f1d1c128e1ed116ed74c6adfa Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 19 Nov 2018 22:02:13 -0800 Subject: [PATCH 0140/1459] Prepare for publication to crates.io This commit prepares to publish the compiler-builtins crate to crates.io in order for the standard library to directly depend on it from crates.io in rust-lang/rust#56092 --- Cargo.toml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 7eae2dc1f..0bbd22e46 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,11 @@ version = "0.1.0" [lib] test = false +[dependencies] +# For more information on this dependency see rust-lang/rust's +# `src/tools/rustc-std-workspace` folder +core = { version = "1.0.0", optional = true, package = 'rustc-std-workspace-core' } + [build-dependencies] cc = { optional = true, version = "1.0" } @@ -32,6 +37,9 @@ mangled-names = [] # Don't generate lang items for i128 intrisnics and such no-lang-items = [] +# Only used in the compiler's build system +rustc-dep-of-std = ['c', 'compiler-builtins', 'core'] + [[example]] name = "intrinsics" required-features = ["c", "compiler-builtins"] From d74c506aab3efa3ca7a82c23f8a2ce7fe62351a0 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 21 Nov 2018 10:19:31 -0800 Subject: [PATCH 0141/1459] Use panic=abort for tests Try to fix issues with undefined `rust_eh_personality` symbols --- Cargo.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 0bbd22e46..a7f34c007 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,3 +46,9 @@ required-features = ["c", "compiler-builtins"] [workspace] members = ["testcrate"] + +[profile.release] +panic = 'abort' + +[profile.dev] +panic = 'abort' From 2da5d40e849ae30c0693bc543fd3fa3b2ccee99e Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 21 Nov 2018 10:48:58 -0800 Subject: [PATCH 0142/1459] Use `nm` on OSX Looks like it may be fixed now? --- ci/run.sh | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/ci/run.sh b/ci/run.sh index 8c85038f9..b77752288 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -64,18 +64,7 @@ case $1 in ;; esac -case "$TRAVIS_OS_NAME" in - osx) - # NOTE OSx's nm doesn't accept the `--defined-only` or provide an equivalent. - # Use GNU nm instead - NM=gnm - brew update - brew install binutils - ;; - *) - NM=nm - ;; -esac +NM=nm if [ -d /target ]; then path=/target/${1}/debug/deps/libcompiler_builtins-*.rlib From 02bcce860bbf247e8c0890e20540eb9915220e3a Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 21 Nov 2018 11:07:07 -0800 Subject: [PATCH 0143/1459] Remove unknown feature --- examples/intrinsics.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 1c498176c..c1a781415 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -11,7 +11,6 @@ #![feature(lang_items)] #![feature(start)] #![feature(allocator_api)] -#![cfg_attr(windows, feature(panic_unwind))] #![no_std] extern crate panic_handler; From 1fa8d083c26cacad08be13decbf61222850fbb93 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 21 Nov 2018 11:09:24 -0800 Subject: [PATCH 0144/1459] Add some crate metadata --- Cargo.toml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index a7f34c007..97cad699c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,15 @@ authors = ["Jorge Aparicio "] name = "compiler_builtins" version = "0.1.0" +license = "MIT/Apache-2.0" +readme = "README.md" +repository = "https://github.com/rust-lang-nursery/compiler-builtins" +homepage = "https://github.com/rust-lang-nursery/compiler-builtins" +documentation = "https://docs.rs/compiler_builtins" +description = """ +Compiler intrinsics used by the Rust compiler. Also available for other targets +if necessary! +""" [lib] test = false From 003f1faf4c647eb9ff1e78d74500f3fb48aaa401 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 21 Nov 2018 11:56:04 -0800 Subject: [PATCH 0145/1459] Try to fix Windows build issue --- examples/intrinsics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index c1a781415..c52b4f0d7 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -15,7 +15,7 @@ extern crate panic_handler; -#[cfg(not(thumb))] +#[cfg(all(not(thumb), not(windows)))] #[link(name = "c")] extern {} From b99d14905136dcd4456e272e404bcb66f7d5b65c Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 21 Nov 2018 12:55:06 -0800 Subject: [PATCH 0146/1459] Add metadata for where compiler-rt is located Compiler crates will need to use this! --- Cargo.toml | 1 + build.rs | 3 +++ 2 files changed, 4 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 97cad699c..5b9469b3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ description = """ Compiler intrinsics used by the Rust compiler. Also available for other targets if necessary! """ +links = 'compiler-rt' [lib] test = false diff --git a/build.rs b/build.rs index f5ada024f..752bba134 100644 --- a/build.rs +++ b/build.rs @@ -4,6 +4,9 @@ fn main() { println!("cargo:rerun-if-changed=build.rs"); let target = env::var("TARGET").unwrap(); + let cwd = env::current_dir().unwrap(); + + println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display()); // Emscripten's runtime includes all the builtins if target.contains("emscripten") { From 352f93e2a9e3b770405168ea6690b68567e76e5e Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 21 Nov 2018 12:57:14 -0800 Subject: [PATCH 0147/1459] Add a whitelist of included files in packaging --- Cargo.toml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 5b9469b3d..13d0b18e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,16 @@ description = """ Compiler intrinsics used by the Rust compiler. Also available for other targets if necessary! """ +include = [ + '/Cargo.toml', + '/build.rs', + '/src/*', + '/examples/*', + '/LICENSE.txt', + '/README.md', + '/compiler-rt/*', + '/libm/src/math/*', +] links = 'compiler-rt' [lib] From 8b3d127506d3fb3e19b1e00d799200a790890f25 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 21 Nov 2018 12:57:54 -0800 Subject: [PATCH 0148/1459] Bump to 0.1.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 13d0b18e3..cf996ef2f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.0" +version = "0.1.1" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From 10f4f35f9670bb29715a8c1ec01284852d47ed35 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 26 Nov 2018 12:09:10 -0800 Subject: [PATCH 0149/1459] Bump to 0.1.2 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index cf996ef2f..07afd3046 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.1" +version = "0.1.2" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From 3080360beb7b7aaa658fe08569a84dd9d5c451e5 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 20 Dec 2018 17:47:57 -0700 Subject: [PATCH 0150/1459] start of __clzsi2 --- src/arm.rs | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/arm.rs b/src/arm.rs index 9e43aec7d..111e6974f 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -233,3 +233,61 @@ pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) { pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { __aeabi_memset4(dest, n, 0); } + +#[no_mangle] +#[cfg(any(target_pointer_width = "16", target_pointer_width = "32", target_pointer_width = "64"))] +pub extern "C" fn __clzsi2(mut x: usize) -> usize { + // TODO: const this? Requires const if + let mut y: usize; + let mut n: usize = { + #[cfg(target_pointer_width = "64")] + { + 64 + } + #[cfg(target_pointer_width = "32")] + { + 32 + } + #[cfg(target_pointer_width = "16")] + { + 16 + } + }; + #[cfg(target_pointer_width = "64")] + { + y = x >> 32; + if y != 0 { + n -= 32; + x = y; + } + } + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] + { + y = x >> 16; + if y != 0 { + n -= 16; + x = y; + } + } + y = x >> 8; + if y != 0 { + n -= 8; + x = y; + } + y = x >> 4; + if y != 0 { + n -= 4; + x = y; + } + y = x >> 2; + if y != 0 { + n -= 2; + x = y; + } + y = x >> 1; + if y != 0 { + n - 2 + } else { + n - x + } +} From a74490a75b7dd857ac2ab095bbc49b1ad9f1aaec Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 20 Dec 2018 17:53:10 -0700 Subject: [PATCH 0151/1459] modify to fit into standard rustfmt output --- src/arm.rs | 92 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 48 insertions(+), 44 deletions(-) diff --git a/src/arm.rs b/src/arm.rs index 111e6974f..009b1ff69 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -235,59 +235,63 @@ pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { } #[no_mangle] -#[cfg(any(target_pointer_width = "16", target_pointer_width = "32", target_pointer_width = "64"))] +#[cfg(any( + target_pointer_width = "16", + target_pointer_width = "32", + target_pointer_width = "64" +))] pub extern "C" fn __clzsi2(mut x: usize) -> usize { - // TODO: const this? Requires const if - let mut y: usize; - let mut n: usize = { + // TODO: const this? Requires const if + let mut y: usize; + let mut n: usize = { + #[cfg(target_pointer_width = "64")] + { + 64 + } + #[cfg(target_pointer_width = "32")] + { + 32 + } + #[cfg(target_pointer_width = "16")] + { + 16 + } + }; #[cfg(target_pointer_width = "64")] { - 64 + y = x >> 32; + if y != 0 { + n -= 32; + x = y; + } } - #[cfg(target_pointer_width = "32")] + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] { - 32 + y = x >> 16; + if y != 0 { + n -= 16; + x = y; + } } - #[cfg(target_pointer_width = "16")] - { - 16 + y = x >> 8; + if y != 0 { + n -= 8; + x = y; + } + y = x >> 4; + if y != 0 { + n -= 4; + x = y; } - }; - #[cfg(target_pointer_width = "64")] - { - y = x >> 32; + y = x >> 2; if y != 0 { - n -= 32; - x = y; + n -= 2; + x = y; } - } - #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] - { - y = x >> 16; + y = x >> 1; if y != 0 { - n -= 16; - x = y; + n - 2 + } else { + n - x } - } - y = x >> 8; - if y != 0 { - n -= 8; - x = y; - } - y = x >> 4; - if y != 0 { - n -= 4; - x = y; - } - y = x >> 2; - if y != 0 { - n -= 2; - x = y; - } - y = x >> 1; - if y != 0 { - n - 2 - } else { - n - x - } } From 4e3fc640dfb0f859d7d85a7ddec6714d8b0c05b8 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Fri, 21 Dec 2018 18:02:12 -0700 Subject: [PATCH 0152/1459] Move clzi2 into the int module --- src/arm.rs | 62 -------------------------------------------------- src/int/mod.rs | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 62 deletions(-) diff --git a/src/arm.rs b/src/arm.rs index 009b1ff69..9e43aec7d 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -233,65 +233,3 @@ pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) { pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { __aeabi_memset4(dest, n, 0); } - -#[no_mangle] -#[cfg(any( - target_pointer_width = "16", - target_pointer_width = "32", - target_pointer_width = "64" -))] -pub extern "C" fn __clzsi2(mut x: usize) -> usize { - // TODO: const this? Requires const if - let mut y: usize; - let mut n: usize = { - #[cfg(target_pointer_width = "64")] - { - 64 - } - #[cfg(target_pointer_width = "32")] - { - 32 - } - #[cfg(target_pointer_width = "16")] - { - 16 - } - }; - #[cfg(target_pointer_width = "64")] - { - y = x >> 32; - if y != 0 { - n -= 32; - x = y; - } - } - #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] - { - y = x >> 16; - if y != 0 { - n -= 16; - x = y; - } - } - y = x >> 8; - if y != 0 { - n -= 8; - x = y; - } - y = x >> 4; - if y != 0 { - n -= 4; - x = y; - } - y = x >> 2; - if y != 0 { - n -= 2; - x = y; - } - y = x >> 1; - if y != 0 { - n - 2 - } else { - n - x - } -} diff --git a/src/int/mod.rs b/src/int/mod.rs index b645b2145..11fd49bbc 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -300,3 +300,65 @@ macro_rules! impl_wide_int { impl_wide_int!(u32, u64, 32); impl_wide_int!(u64, u128, 64); + +#[no_mangle] +#[cfg(any( + target_pointer_width = "16", + target_pointer_width = "32", + target_pointer_width = "64" +))] +pub extern "C" fn __clzsi2(mut x: usize) -> usize { + // TODO: const this? Would require const-if + let mut y: usize; + let mut n: usize = { + #[cfg(target_pointer_width = "64")] + { + 64 + } + #[cfg(target_pointer_width = "32")] + { + 32 + } + #[cfg(target_pointer_width = "16")] + { + 16 + } + }; + #[cfg(target_pointer_width = "64")] + { + y = x >> 32; + if y != 0 { + n -= 32; + x = y; + } + } + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] + { + y = x >> 16; + if y != 0 { + n -= 16; + x = y; + } + } + y = x >> 8; + if y != 0 { + n -= 8; + x = y; + } + y = x >> 4; + if y != 0 { + n -= 4; + x = y; + } + y = x >> 2; + if y != 0 { + n -= 2; + x = y; + } + y = x >> 1; + if y != 0 { + n - 2 + } else { + n - x + } +} From e8c09a84f837ad15c06561bda55d5b6c60928ada Mon Sep 17 00:00:00 2001 From: Lokathor Date: Fri, 21 Dec 2018 18:10:45 -0700 Subject: [PATCH 0153/1459] trying a test case for clzsi2 --- testcrate/build.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/testcrate/build.rs b/testcrate/build.rs index d862e0d0f..08c14fda5 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -775,6 +775,12 @@ fn main() { (builtins::int::udiv::__udivmodti4(a, b, Some(&mut r)), r) }"); } + + // count leading zeros + gen(|(a): (usize)| { + Some(a.leading_zeros()) + }, + "builtins::int::__clzsi2(a)"); } macro_rules! gen_float { From c548dc5b344f89227ffaadc2657f90a7544b0200 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Fri, 21 Dec 2018 18:20:59 -0700 Subject: [PATCH 0154/1459] We can't accept usize directly, i guess accept `MyU64`? --- testcrate/build.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/testcrate/build.rs b/testcrate/build.rs index 08c14fda5..0fa2f5631 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -777,10 +777,12 @@ fn main() { } // count leading zeros - gen(|(a): (usize)| { - Some(a.leading_zeros()) + gen(|a: MyU128| { + Some((a as usize).leading_zeros()) }, - "builtins::int::__clzsi2(a)"); + "{ + builtins::int::__clzsi2(a as usize) + }"); } macro_rules! gen_float { From 1e8904e4242f685dae583655e2109d32b7b0e325 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Fri, 21 Dec 2018 18:21:47 -0700 Subject: [PATCH 0155/1459] 64, not 128! --- testcrate/build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testcrate/build.rs b/testcrate/build.rs index 0fa2f5631..292d36a77 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -777,7 +777,7 @@ fn main() { } // count leading zeros - gen(|a: MyU128| { + gen(|a: MyU64| { Some((a as usize).leading_zeros()) }, "{ From 2afedce8cb4169dca7b5b37a22d455b4975bd19c Mon Sep 17 00:00:00 2001 From: Lokathor Date: Fri, 21 Dec 2018 18:36:26 -0700 Subject: [PATCH 0156/1459] forgot the little `.0` part --- testcrate/build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testcrate/build.rs b/testcrate/build.rs index 292d36a77..1a5105407 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -778,7 +778,7 @@ fn main() { // count leading zeros gen(|a: MyU64| { - Some((a as usize).leading_zeros()) + Some((a.0 as usize).leading_zeros()) }, "{ builtins::int::__clzsi2(a as usize) From d27966f52c7bf271caa0c2fd792be5d60ac5e956 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Fri, 21 Dec 2018 18:47:03 -0700 Subject: [PATCH 0157/1459] Rust and LLVM don't spec the same output types --- testcrate/build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testcrate/build.rs b/testcrate/build.rs index 1a5105407..4f10adcf0 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -778,7 +778,7 @@ fn main() { // count leading zeros gen(|a: MyU64| { - Some((a.0 as usize).leading_zeros()) + Some((a.0 as usize).leading_zeros() as usize) }, "{ builtins::int::__clzsi2(a as usize) From 828c5c6d5fc50291c4511ca2561ee540840bbafc Mon Sep 17 00:00:00 2001 From: Lokathor Date: Fri, 21 Dec 2018 18:55:01 -0700 Subject: [PATCH 0158/1459] Okay we'll process outputs as all u32 --- testcrate/build.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testcrate/build.rs b/testcrate/build.rs index 4f10adcf0..c18daca24 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -778,10 +778,10 @@ fn main() { // count leading zeros gen(|a: MyU64| { - Some((a.0 as usize).leading_zeros() as usize) + Some((a.0 as usize).leading_zeros()) }, "{ - builtins::int::__clzsi2(a as usize) + builtins::int::__clzsi2(a as usize) as u32 }"); } From 09a2d437f518aa87b8772e6bb4c18dbfe42b0b30 Mon Sep 17 00:00:00 2001 From: akashfortanix Date: Thu, 27 Dec 2018 19:13:50 +0530 Subject: [PATCH 0159/1459] expose ceil, floor and trunc on sgx-target --- src/math.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/math.rs b/src/math.rs index c37243504..e2b15c6a7 100644 --- a/src/math.rs +++ b/src/math.rs @@ -66,6 +66,16 @@ no_mangle! { fn tanhf(n: f32) -> f32; } +#[cfg(target_env = "sgx")] +no_mangle! { + fn ceil(x: f64) -> f64; + fn ceilf(x: f32) -> f32; + fn floor(x: f64) -> f64; + fn floorf(x: f32) -> f32; + fn trunc(x: f64) -> f64; + fn truncf(x: f32) -> f32; +} + // only for the thumb*-none-eabi* targets #[cfg(all(target_arch = "arm", target_os = "none"))] no_mangle! { From 35d9cc37bb64867870dad6dc569c9014cea4a246 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 2 Jan 2019 10:21:41 -0800 Subject: [PATCH 0160/1459] Bump to 0.1.3 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 07afd3046..32cc2681d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.2" +version = "0.1.3" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From 2e8b85439f194105b86315b96c1e4ff4ae2465e7 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Wed, 2 Jan 2019 18:50:11 -0700 Subject: [PATCH 0161/1459] Move the test to be a standard test. --- testcrate/build.rs | 8 -------- testcrate/tests/count_leading_zeros.rs | 25 +++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 8 deletions(-) create mode 100644 testcrate/tests/count_leading_zeros.rs diff --git a/testcrate/build.rs b/testcrate/build.rs index c18daca24..d862e0d0f 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -775,14 +775,6 @@ fn main() { (builtins::int::udiv::__udivmodti4(a, b, Some(&mut r)), r) }"); } - - // count leading zeros - gen(|a: MyU64| { - Some((a.0 as usize).leading_zeros()) - }, - "{ - builtins::int::__clzsi2(a as usize) as u32 - }"); } macro_rules! gen_float { diff --git a/testcrate/tests/count_leading_zeros.rs b/testcrate/tests/count_leading_zeros.rs new file mode 100644 index 000000000..559650174 --- /dev/null +++ b/testcrate/tests/count_leading_zeros.rs @@ -0,0 +1,25 @@ +#![feature(compiler_builtins_lib)] + +extern crate compiler_builtins; + +use compiler_builtins::int::__clzsi2; + +#[test] +fn __clzsi2_test() { + let mut i: usize = core::usize::MAX; + // Check all values above 0 + while i > 0 { + assert_eq!(__clzsi2(i) as u32, i.leading_zeros()); + i >>= 1; + } + // check 0 also + i = 0; + assert_eq!(__clzsi2(i) as u32, i.leading_zeros()); + // double check for bit patterns that aren't just solid 1s + i = 1; + for _ in 0..63 { + assert_eq!(__clzsi2(i) as u32, i.leading_zeros()); + i <<= 2; + i += 1; + } +} From a9a108548c93774640b5819c3bbb1fce5d95dfd2 Mon Sep 17 00:00:00 2001 From: Yu Ding Date: Thu, 3 Jan 2019 12:53:08 -0800 Subject: [PATCH 0162/1459] Fix SGX target_env collision Signed-off-by: Yu Ding --- build.rs | 2 +- src/lib.rs | 3 ++- src/math.rs | 5 +++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/build.rs b/build.rs index 752bba134..78a4ce2fb 100644 --- a/build.rs +++ b/build.rs @@ -22,7 +22,7 @@ fn main() { // Forcibly enable memory intrinsics on wasm32 & SGX as we don't have a libc to // provide them. - if target.contains("wasm32") || target.contains("sgx") { + if target.contains("wasm32") || (target.contains("sgx") && target.contains("fortanix")) { println!("cargo:rustc-cfg=feature=\"mem\""); } diff --git a/src/lib.rs b/src/lib.rs index 9bb38f39a..0acb00698 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,6 +16,7 @@ #![feature(abi_unadjusted)] #![feature(linkage)] #![feature(lang_items)] +#![feature(cfg_target_vendor)] #![allow(unused_features)] #![no_builtins] #![cfg_attr(feature = "compiler-builtins", feature(staged_api))] @@ -49,7 +50,7 @@ pub mod float; #[cfg(any(all(target_arch = "wasm32", target_os = "unknown"), all(target_arch = "arm", target_os = "none"), - target_env = "sgx"))] + all(target_vendor = "fortanix", target_env = "sgx")))] pub mod math; pub mod mem; diff --git a/src/math.rs b/src/math.rs index e2b15c6a7..18def0e86 100644 --- a/src/math.rs +++ b/src/math.rs @@ -15,7 +15,8 @@ macro_rules! no_mangle { } // only for the wasm32-unknown-unknown target -#[cfg(any(all(target_arch = "wasm32", target_os = "unknown"), target_env = "sgx"))] +#[cfg(any(all(target_arch = "wasm32", target_os = "unknown"), + all(target_vendor = "fortanix", target_env = "sgx")))] no_mangle! { fn acos(x: f64) -> f64; fn asin(x: f64) -> f64; @@ -66,7 +67,7 @@ no_mangle! { fn tanhf(n: f32) -> f32; } -#[cfg(target_env = "sgx")] +#[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] no_mangle! { fn ceil(x: f64) -> f64; fn ceilf(x: f32) -> f32; From 0c5dffb5ba791251ae6d2a92d4491d444a5c45b2 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 3 Jan 2019 13:55:02 -0800 Subject: [PATCH 0163/1459] Bump to 0.1.4 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 32cc2681d..7dce4b07b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.3" +version = "0.1.4" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From 9a68e74ebdea558f500be073685d06d5bd6e620f Mon Sep 17 00:00:00 2001 From: Lokathor Date: Fri, 4 Jan 2019 19:17:02 -0700 Subject: [PATCH 0164/1459] Attempt to use `intrinsics!` --- src/int/mod.rs | 97 ++++++++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 47 deletions(-) diff --git a/src/int/mod.rs b/src/int/mod.rs index 11fd49bbc..52a4227a0 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -301,64 +301,67 @@ macro_rules! impl_wide_int { impl_wide_int!(u32, u64, 32); impl_wide_int!(u64, u128, 64); -#[no_mangle] -#[cfg(any( - target_pointer_width = "16", - target_pointer_width = "32", - target_pointer_width = "64" -))] -pub extern "C" fn __clzsi2(mut x: usize) -> usize { - // TODO: const this? Would require const-if - let mut y: usize; - let mut n: usize = { +intrinsics! { + #[cfg(any( + target_pointer_width = "16", + target_pointer_width = "32", + target_pointer_width = "64" + ))] + pub extern "C" fn __clzsi2(x: usize) -> usize { + // TODO: const this? Would require const-if + // Note(Lokathor): the `intrinsics!` macro can't process mut inputs + let mut x = x; + let mut y: usize; + let mut n: usize = { + #[cfg(target_pointer_width = "64")] + { + 64 + } + #[cfg(target_pointer_width = "32")] + { + 32 + } + #[cfg(target_pointer_width = "16")] + { + 16 + } + }; #[cfg(target_pointer_width = "64")] { - 64 + y = x >> 32; + if y != 0 { + n -= 32; + x = y; + } } - #[cfg(target_pointer_width = "32")] + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] { - 32 + y = x >> 16; + if y != 0 { + n -= 16; + x = y; + } } - #[cfg(target_pointer_width = "16")] - { - 16 + y = x >> 8; + if y != 0 { + n -= 8; + x = y; } - }; - #[cfg(target_pointer_width = "64")] - { - y = x >> 32; + y = x >> 4; if y != 0 { - n -= 32; + n -= 4; x = y; } - } - #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] - { - y = x >> 16; + y = x >> 2; if y != 0 { - n -= 16; + n -= 2; x = y; } - } - y = x >> 8; - if y != 0 { - n -= 8; - x = y; - } - y = x >> 4; - if y != 0 { - n -= 4; - x = y; - } - y = x >> 2; - if y != 0 { - n -= 2; - x = y; - } - y = x >> 1; - if y != 0 { - n - 2 - } else { - n - x + y = x >> 1; + if y != 0 { + n - 2 + } else { + n - x + } } } From 827f9a8a01a449331f2d6d741e3a6f221e4f322c Mon Sep 17 00:00:00 2001 From: Denys Zariaiev Date: Sun, 6 Jan 2019 16:28:46 +0100 Subject: [PATCH 0165/1459] Don't build compiler-rt for NVPTX --- build.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.rs b/build.rs index 752bba134..b09473453 100644 --- a/build.rs +++ b/build.rs @@ -36,8 +36,8 @@ fn main() { // build anything and we rely on the upstream implementation of compiler-rt // functions if !cfg!(feature = "mangled-names") && cfg!(feature = "c") { - // no C compiler for wasm - if !target.contains("wasm32") { + // Don't use C compiler for bitcode-only wasm and nvptx + if !target.contains("wasm32") && !target.contains("nvptx") { #[cfg(feature = "c")] c::compile(&llvm_target); println!("cargo:rustc-cfg=use_c"); From af2bc8ead66669bff02f3f544a124c12e7b6e44f Mon Sep 17 00:00:00 2001 From: Lokathor Date: Mon, 7 Jan 2019 11:52:30 -0700 Subject: [PATCH 0166/1459] Version Bump for count leading zeros --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 07afd3046..32cc2681d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.2" +version = "0.1.3" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From fc3368b558d1100291689f229b878aff52ac1d3c Mon Sep 17 00:00:00 2001 From: Lokathor Date: Mon, 7 Jan 2019 12:08:18 -0700 Subject: [PATCH 0167/1459] Perform the correct version bump --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7dce4b07b..de2996b21 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.4" +version = "0.1.5" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From b570ccfd2d700ba61c8347423fa3c99c01a332e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cyryl=20P=C5=82otnicki?= Date: Sun, 20 Jan 2019 09:33:55 +0000 Subject: [PATCH 0168/1459] Fix compilation on new nightly. Compilation on rustc 1.33.0-nightly (c76f3c374 2019-01-18) failed with ``` error: the feature `cfg_target_vendor` has been stable since 1.33.0 and no longer requires an attribute to enable --> src/lib.rs:19:12 | 19 | #![feature(cfg_target_vendor)] | ^^^^^^^^^^^^^^^^^ | ``` Removed the attribute to make it compile. --- src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 0acb00698..fddfa67aa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,7 +16,6 @@ #![feature(abi_unadjusted)] #![feature(linkage)] #![feature(lang_items)] -#![feature(cfg_target_vendor)] #![allow(unused_features)] #![no_builtins] #![cfg_attr(feature = "compiler-builtins", feature(staged_api))] From 275b37d66cd6b5bc28f4cf97339de629d3f86e9c Mon Sep 17 00:00:00 2001 From: MikaelUrankar Date: Sat, 2 Feb 2019 19:52:41 +0100 Subject: [PATCH 0169/1459] FreeBSD arm needs clear_cache.c --- build.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/build.rs b/build.rs index 057f5d9d8..9a7c05eb0 100644 --- a/build.rs +++ b/build.rs @@ -311,6 +311,10 @@ mod c { ], ); + if target_os == "freebsd" { + sources.extend(&["clear_cache.c"]); + } + // First of all aeabi_cdcmp and aeabi_cfcmp are never called by LLVM. // Second are little-endian only, so build fail on big-endian targets. // Temporally workaround: exclude these files for big-endian targets. From 803d16ee847652d126261467b9f3488c6c48de45 Mon Sep 17 00:00:00 2001 From: Jordan Rhee Date: Wed, 27 Feb 2019 11:39:49 -0800 Subject: [PATCH 0170/1459] Fix undefined symbol errors on windows/arm Fix undefined symbol linker errors when building rust for windows/arm by excluding unneeded symbols. The errors are: = note: lib.def : error LNK2001: unresolved external symbol __aeabi_memclr4 lib.def : error LNK2001: unresolved external symbol __aeabi_memclr8 lib.def : error LNK2001: unresolved external symbol __aeabi_memmove4 lib.def : error LNK2001: unresolved external symbol __aeabi_memmove8 --- src/arm.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/arm.rs b/src/arm.rs index 9e43aec7d..9bfffb74f 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -164,14 +164,14 @@ pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: u ::mem::memmove(dest, src, n); } -#[cfg(not(target_os = "ios"))] +#[cfg(not(any(target_os = "ios", target_env = "msvc")))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(thumb, linkage = "weak")] pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memmove(dest, src, n); } -#[cfg(not(target_os = "ios"))] +#[cfg(not(any(target_os = "ios", target_env = "msvc")))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(thumb, linkage = "weak")] pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) { @@ -220,14 +220,14 @@ pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) { __aeabi_memset(dest, n, 0); } -#[cfg(not(target_os = "ios"))] +#[cfg(not(any(target_os = "ios", target_env = "msvc")))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(thumb, linkage = "weak")] pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) { __aeabi_memset4(dest, n, 0); } -#[cfg(not(target_os = "ios"))] +#[cfg(not(any(target_os = "ios", target_env = "msvc")))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(thumb, linkage = "weak")] pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { From 18cd30009c689df30286c1239806b98d9dcabff0 Mon Sep 17 00:00:00 2001 From: Jordan Rhee Date: Wed, 27 Feb 2019 11:45:14 -0800 Subject: [PATCH 0171/1459] Bump version to 0.1.6 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index de2996b21..44dabada1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.5" +version = "0.1.6" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From fafaacecd19533729d8912cf672ddd8350ffef8b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 27 Feb 2019 12:39:34 -0800 Subject: [PATCH 0172/1459] Bump to 0.1.7 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 44dabada1..62900bff6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.6" +version = "0.1.7" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From c8b8087c9536e3779eee7e2ff39198e13177143f Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 13 Mar 2019 08:19:30 -0700 Subject: [PATCH 0173/1459] Don't compile memory intrinsics on wasi --- build.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/build.rs b/build.rs index 9a7c05eb0..5d555d608 100644 --- a/build.rs +++ b/build.rs @@ -22,7 +22,8 @@ fn main() { // Forcibly enable memory intrinsics on wasm32 & SGX as we don't have a libc to // provide them. - if target.contains("wasm32") || (target.contains("sgx") && target.contains("fortanix")) { + if (target.contains("wasm32") && !target.contains("wasi")) || + (target.contains("sgx") && target.contains("fortanix")) { println!("cargo:rustc-cfg=feature=\"mem\""); } @@ -314,7 +315,7 @@ mod c { if target_os == "freebsd" { sources.extend(&["clear_cache.c"]); } - + // First of all aeabi_cdcmp and aeabi_cfcmp are never called by LLVM. // Second are little-endian only, so build fail on big-endian targets. // Temporally workaround: exclude these files for big-endian targets. From c924aed0b9ac3a8f0d6342d16c91ec2f44a90732 Mon Sep 17 00:00:00 2001 From: Hugues de Valon Date: Thu, 7 Mar 2019 19:24:15 +0000 Subject: [PATCH 0174/1459] Fix Armv8-M Baseline compilation Armv8-M Baseline, ie thumbv8m.base-none-eabi, is a superset of the Armv6-M architecture profile. As it shares almost the same instruction set, this commit copies the configuration for thumbv6m-none-eabi to enable it. --- build.rs | 11 ++++++----- src/int/sdiv.rs | 6 +++--- src/int/udiv.rs | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/build.rs b/build.rs index 9a7c05eb0..6cef4be39 100644 --- a/build.rs +++ b/build.rs @@ -49,10 +49,11 @@ fn main() { println!("cargo:rustc-cfg=thumb") } - // compiler-rt `cfg`s away some intrinsics for thumbv6m because that target doesn't have full - // THUMBv2 support. We have to cfg our code accordingly. - if llvm_target[0] == "thumbv6m" { - println!("cargo:rustc-cfg=thumbv6m") + // compiler-rt `cfg`s away some intrinsics for thumbv6m and thumbv8m.base because + // these targets do not have full Thumb-2 support but only original Thumb-1. + // We have to cfg our code accordingly. + if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" { + println!("cargo:rustc-cfg=thumb_1") } // Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. @@ -407,7 +408,7 @@ mod c { } // Remove the assembly implementations that won't compile for the target - if llvm_target[0] == "thumbv6m" { + if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" { sources.remove( &[ "clzdi2", diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index 89bb51a47..a2e8aa96f 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -74,8 +74,8 @@ intrinsics! { #[use_c_shim_if(all(target_arch = "arm", not(target_os = "ios"), - not(target_env = "msvc")), - not(thumbv6m))] + not(target_env = "msvc"), + not(thumb_1)))] pub extern "C" fn __modsi3(a: i32, b: i32) -> i32 { a.mod_(b) } @@ -91,7 +91,7 @@ intrinsics! { } #[use_c_shim_if(all(target_arch = "arm", not(target_env = "msvc"), - not(target_os = "ios"), not(thumbv6m)))] + not(target_os = "ios"), not(thumb_1)))] pub extern "C" fn __divmodsi4(a: i32, b: i32, rem: &mut i32) -> i32 { a.divmod(b, rem, |a, b| __divsi3(a, b)) } diff --git a/src/int/udiv.rs b/src/int/udiv.rs index a2572227f..d873559bd 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -212,7 +212,7 @@ intrinsics! { #[use_c_shim_if(all(target_arch = "arm", not(target_os = "ios"), not(target_env = "msvc"), - not(thumbv6m)))] + not(thumb_1)))] /// Returns `n % d` pub extern "C" fn __umodsi3(n: u32, d: u32) -> u32 { let q = __udivsi3(n, d); @@ -222,7 +222,7 @@ intrinsics! { #[use_c_shim_if(all(target_arch = "arm", not(target_os = "ios"), not(target_env = "msvc"), - not(thumbv6m)))] + not(thumb_1)))] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 { let q = __udivsi3(n, d); From 85101f2a475f647916f63264400763411806bf7e Mon Sep 17 00:00:00 2001 From: Hugues de Valon Date: Thu, 7 Mar 2019 19:30:39 +0000 Subject: [PATCH 0175/1459] Fix compilation for thumbv8m.main-none-eabihf Some files were not assembling for the Armv8-M Mainline architecture profile with FPU extension. Reason being the same as for Armv7-M: the conversion intrinsics including double precision floating point variables do not work with single precision FPUs. Also removes from exclusion files that are assembling without errors for Armv7-M and Armv8-M Mainline. --- build.rs | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/build.rs b/build.rs index 6cef4be39..ccd908caa 100644 --- a/build.rs +++ b/build.rs @@ -360,24 +360,36 @@ mod c { } if llvm_target.last().unwrap().ends_with("eabihf") { - if !llvm_target[0].starts_with("thumbv7em") { + if !llvm_target[0].starts_with("thumbv7em") && + !llvm_target[0].starts_with("thumbv8m.main") { + // The FPU option chosen for these architectures in cc-rs, ie: + // -mfpu=fpv4-sp-d16 for thumbv7em + // -mfpu=fpv5-sp-d16 for thumbv8m.main + // do not support double precision floating points conversions so the files + // that include such instructions are not included for these targets. sources.extend( &[ "arm/fixdfsivfp.S", - "arm/fixsfsivfp.S", "arm/fixunsdfsivfp.S", - "arm/fixunssfsivfp.S", "arm/floatsidfvfp.S", - "arm/floatsisfvfp.S", "arm/floatunssidfvfp.S", - "arm/floatunssisfvfp.S", - "arm/restore_vfp_d8_d15_regs.S", - "arm/save_vfp_d8_d15_regs.S", ], ); } - sources.extend(&["arm/negdf2vfp.S", "arm/negsf2vfp.S"]); + sources.extend( + &[ + "arm/fixsfsivfp.S", + "arm/fixunssfsivfp.S", + "arm/floatsisfvfp.S", + "arm/floatunssisfvfp.S", + "arm/floatunssisfvfp.S", + "arm/restore_vfp_d8_d15_regs.S", + "arm/save_vfp_d8_d15_regs.S", + "arm/negdf2vfp.S", + "arm/negsf2vfp.S", + ] + ); } From 5d683bafc36a567c5b320b99b12bfc2cac015d85 Mon Sep 17 00:00:00 2001 From: Hugues de Valon Date: Thu, 7 Mar 2019 19:34:53 +0000 Subject: [PATCH 0176/1459] Remove thumbv6m configuration of intrinsic example It seems that the intrinsics that were generated for the functions in example/intrinsics.rs where different implementations were given for thumb6m-none-eabi target, have now been implemented in Rust so configuration is not needed anymore. --- examples/intrinsics.rs | 63 ------------------------------------------ 1 file changed, 63 deletions(-) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index c52b4f0d7..89c2c23db 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -19,9 +19,6 @@ extern crate panic_handler; #[link(name = "c")] extern {} -// NOTE cfg(not(thumbv6m)) means that the operation is not supported on ARMv6-M at all. Not even -// compiler-rt provides a C/assembly implementation. - // Every function in this module maps will be lowered to an intrinsic by LLVM, if the platform // doesn't have native support for the operation used in the function. ARM has a naming convention // convention for its intrinsics that's different from other architectures; that's why some function @@ -39,70 +36,40 @@ mod intrinsics { } // fixdfdi - #[cfg(not(thumbv6m))] pub fn aeabi_d2l(x: f64) -> i64 { x as i64 } - #[cfg(thumbv6m)] - pub fn aeabi_d2l(_: f64) -> i64 { - 0 - } - // fixunsdfsi pub fn aeabi_d2uiz(x: f64) -> u32 { x as u32 } // fixunsdfdi - #[cfg(not(thumbv6m))] pub fn aeabi_d2ulz(x: f64) -> u64 { x as u64 } - #[cfg(thumbv6m)] - pub fn aeabi_d2ulz(_: f64) -> u64 { - 0 - } - // adddf3 pub fn aeabi_dadd(a: f64, b: f64) -> f64 { a + b } // eqdf2 - #[cfg(not(thumbv6m))] pub fn aeabi_dcmpeq(a: f64, b: f64) -> bool { a == b } - #[cfg(thumbv6m)] - pub fn aeabi_dcmpeq(_: f64, _: f64) -> bool { - true - } - // gtdf2 - #[cfg(not(thumbv6m))] pub fn aeabi_dcmpgt(a: f64, b: f64) -> bool { a > b } - #[cfg(thumbv6m)] - pub fn aeabi_dcmpgt(_: f64, _: f64) -> bool { - true - } - // ltdf2 - #[cfg(not(thumbv6m))] pub fn aeabi_dcmplt(a: f64, b: f64) -> bool { a < b } - #[cfg(thumbv6m)] - pub fn aeabi_dcmplt(_: f64, _: f64) -> bool { - true - } - // divdf3 pub fn aeabi_ddiv(a: f64, b: f64) -> f64 { a / b @@ -129,70 +96,40 @@ mod intrinsics { } // fixsfdi - #[cfg(not(thumbv6m))] pub fn aeabi_f2lz(x: f32) -> i64 { x as i64 } - #[cfg(thumbv6m)] - pub fn aeabi_f2lz(_: f32) -> i64 { - 0 - } - // fixunssfsi pub fn aeabi_f2uiz(x: f32) -> u32 { x as u32 } // fixunssfdi - #[cfg(not(thumbv6m))] pub fn aeabi_f2ulz(x: f32) -> u64 { x as u64 } - #[cfg(thumbv6m)] - pub fn aeabi_f2ulz(_: f32) -> u64 { - 0 - } - // addsf3 pub fn aeabi_fadd(a: f32, b: f32) -> f32 { a + b } // eqsf2 - #[cfg(not(thumbv6m))] pub fn aeabi_fcmpeq(a: f32, b: f32) -> bool { a == b } - #[cfg(thumbv6m)] - pub fn aeabi_fcmpeq(_: f32, _: f32) -> bool { - true - } - // gtsf2 - #[cfg(not(thumbv6m))] pub fn aeabi_fcmpgt(a: f32, b: f32) -> bool { a > b } - #[cfg(thumbv6m)] - pub fn aeabi_fcmpgt(_: f32, _: f32) -> bool { - true - } - // ltsf2 - #[cfg(not(thumbv6m))] pub fn aeabi_fcmplt(a: f32, b: f32) -> bool { a < b } - #[cfg(thumbv6m)] - pub fn aeabi_fcmplt(_: f32, _: f32) -> bool { - true - } - // divsf3 pub fn aeabi_fdiv(a: f32, b: f32) -> f32 { a / b From a4420e66adef9277675bcea3396ee27afb1611eb Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 21 Mar 2019 07:52:59 -0700 Subject: [PATCH 0177/1459] Bump to 0.1.8 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 62900bff6..d30af4051 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.7" +version = "0.1.8" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From e95ef7a5ef3df5a8376b7bda7e906f3703f1abc1 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 2 Apr 2019 07:46:06 -0700 Subject: [PATCH 0178/1459] Don't compile math symbols on wasm32-unknown-wasi These are already provided by the C sysroot, so no need for us to duplicate them! --- src/math.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/math.rs b/src/math.rs index 18def0e86..dfdd99cdf 100644 --- a/src/math.rs +++ b/src/math.rs @@ -14,9 +14,14 @@ macro_rules! no_mangle { } } -// only for the wasm32-unknown-unknown target -#[cfg(any(all(target_arch = "wasm32", target_os = "unknown"), - all(target_vendor = "fortanix", target_env = "sgx")))] +#[cfg(any( + all( + target_arch = "wasm32", + target_os = "unknown", + not(target_env = "wasi") + ), + all(target_vendor = "fortanix", target_env = "sgx") +))] no_mangle! { fn acos(x: f64) -> f64; fn asin(x: f64) -> f64; From ef099c7994b0b125f4e55c74c3a2e5017e658587 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 2 Apr 2019 07:57:02 -0700 Subject: [PATCH 0179/1459] Add sample azure pipelines configuration --- azure-pipelines.yml | 33 +++++++++++++++++++++++++++++++++ ci/azure-install-rust.yml | 25 +++++++++++++++++++++++++ ci/azure-steps.yml | 10 ++++++++++ 3 files changed, 68 insertions(+) create mode 100644 azure-pipelines.yml create mode 100644 ci/azure-install-rust.yml create mode 100644 ci/azure-steps.yml diff --git a/azure-pipelines.yml b/azure-pipelines.yml new file mode 100644 index 000000000..f7a25ae00 --- /dev/null +++ b/azure-pipelines.yml @@ -0,0 +1,33 @@ +trigger: + - master + +jobs: + - job: Linux + pool: + vmImage: ubuntu-16.04 + steps: + - template: ci/azure-steps.yml + strategy: + matrix: + aarch64-unknown-linux-gnu: + TARGET: aarch64-unknown-linux-gnu + + - job: macOS + pool: + vmImage: macos-10.13 + steps: + - template: ci/azure-steps.yml + strategy: + matrix: + x86_64-apple-darwin: + TARGET: x86_64-apple-darwin + + - job: Windows + pool: + vmImage: 'vs2017-win2016' + steps: + - template: ci/azure-steps.yml + strategy: + matrix: + i686-pc-windows-msvc: + TARGET: i686-pc-windows-msvc diff --git a/ci/azure-install-rust.yml b/ci/azure-install-rust.yml new file mode 100644 index 000000000..25e48f991 --- /dev/null +++ b/ci/azure-install-rust.yml @@ -0,0 +1,25 @@ +parameters: + toolchain: 'nightly' + +steps: + - bash: | + curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain $TOOLCHAIN + echo "##vso[task.setvariable variable=PATH;]$PATH:$HOME/.cargo/bin" + displayName: Install rust + condition: ne( variables['Agent.OS'], 'Windows_NT' ) + env: + TOOLCHAIN: ${{ parameters.toolchain }} + + - script: | + curl -sSf -o rustup-init.exe https://win.rustup.rs + rustup-init.exe -y --default-toolchain %TOOLCHAIN% + echo "##vso[task.setvariable variable=PATH;]%PATH%;%USERPROFILE%\.cargo\bin" + displayName: Install rust + condition: eq( variables['Agent.OS'], 'Windows_NT' ) + env: + TOOLCHAIN: ${{ parameters.toolchain }} + + - script: | + rustc -Vv + cargo -V + displayName: Query rust and cargo versions diff --git a/ci/azure-steps.yml b/ci/azure-steps.yml new file mode 100644 index 000000000..a6acba480 --- /dev/null +++ b/ci/azure-steps.yml @@ -0,0 +1,10 @@ +steps: + - template: azure-install-rust.yml + + - bash: ./ci/run.sh $TARGET + condition: ne( variables['Agent.OS'], 'Linux' ) + displayName: Run test script + + - bash: ./ci/run-docker.sh $TARGET + condition: eq( variables['Agent.OS'], 'Linux' ) + displayName: Run docker test script From 19a2b4fe07927c6d0708ed08f5f2f20d014d19e2 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 2 Apr 2019 07:58:12 -0700 Subject: [PATCH 0180/1459] Configure Azure Pipelines --- .travis.yml | 63 --------------------------------------- README.md | 5 ++-- appveyor.yml | 45 ---------------------------- azure-pipelines.yml | 3 +- ci/azure-install-rust.yml | 5 ++-- ci/azure-steps.yml | 2 ++ 6 files changed, 9 insertions(+), 114 deletions(-) delete mode 100644 .travis.yml delete mode 100644 appveyor.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 9b4c28419..000000000 --- a/.travis.yml +++ /dev/null @@ -1,63 +0,0 @@ -dist: trusty -language: rust -rust: nightly -services: docker -sudo: required - -matrix: - include: - - env: TARGET=aarch64-unknown-linux-gnu - - env: TARGET=arm-unknown-linux-gnueabi - - env: TARGET=arm-unknown-linux-gnueabihf - - env: TARGET=armv7-unknown-linux-gnueabihf - - env: TARGET=i586-unknown-linux-gnu - - env: TARGET=i686-apple-darwin DEBUG_LTO_BUILD_DOESNT_WORK=1 - os: osx - - env: TARGET=i686-unknown-linux-gnu - - env: TARGET=mips-unknown-linux-gnu - - env: TARGET=mips64-unknown-linux-gnuabi64 - - env: TARGET=mips64el-unknown-linux-gnuabi64 - - env: TARGET=mipsel-unknown-linux-gnu - - env: TARGET=powerpc-unknown-linux-gnu - - env: TARGET=powerpc64-unknown-linux-gnu - - env: TARGET=powerpc64le-unknown-linux-gnu - - env: TARGET=thumbv6m-linux-eabi - - env: TARGET=thumbv7em-linux-eabi - - env: TARGET=thumbv7em-linux-eabihf - - env: TARGET=thumbv7m-linux-eabi - - env: TARGET=wasm32-unknown-unknown - install: rustup target add $TARGET - script: cargo build --target $TARGET - - env: TARGET=x86_64-apple-darwin DEBUG_LTO_BUILD_DOESNT_WORK=1 - os: osx - - env: TARGET=x86_64-unknown-linux-gnu - allow_failures: - - env: TARGET=thumbv6m-linux-eabi - - env: TARGET=thumbv7em-linux-eabi - - env: TARGET=thumbv7em-linux-eabihf - - env: TARGET=thumbv7m-linux-eabi - -install: - - case $TARGET in - x86_64-apple-darwin | x86_64-unknown-linux-gnu) ;; - thumbv*eabi*) rustup component add rust-src ;; - *) rustup target add $TARGET;; - esac - -script: - # work around rust-lang/cargo#3340 - - test "$TRAVIS_OS_NAME" = "osx" || - export SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt - - cargo generate-lockfile - - if [[ $TRAVIS_OS_NAME = "linux" ]]; then - sudo apt-get remove -y qemu-user-static && - sudo apt-get install -y qemu-user-static && - sh ci/run-docker.sh $TARGET; - else - sh ci/run.sh $TARGET; - fi - -notifications: - email: - on_success: never - webhooks: https://buildbot.rust-lang.org/homu/travis diff --git a/README.md b/README.md index cae885279..b290cbff8 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,8 @@ # `compiler-builtins` -[![Build status](https://ci.appveyor.com/api/projects/status/eusnjps5ui3d305p?svg=true)](https://ci.appveyor.com/project/rust-lang-libs/compiler-builtins) -[![Build Status](https://travis-ci.org/rust-lang-nursery/compiler-builtins.svg?branch=master)](https://travis-ci.org/rust-lang-nursery/compiler-builtins) +[![Build Status](https://dev.azure.com/rust-lang/compiler-builtins/_apis/build/status/compiler-builtins-CI?branchName=master)](https://dev.azure.com/rust-lang/compiler-builtins/_build/latest?definitionId=2&branchName=master) -> [WIP] Porting `compiler-rt` intrinsics to Rust +> Porting `compiler-rt` intrinsics to Rust See [rust-lang/rust#35437][0]. diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index bb78ad36a..000000000 --- a/appveyor.yml +++ /dev/null @@ -1,45 +0,0 @@ -environment: - # It's... a little unclear why the memcpy symbols clash on linux but not on - # other platforms. Would be great to not differ on this though! - INTRINSICS_FAILS_WITH_MEM_FEATURE: 1 - - matrix: - - TARGET: i686-pc-windows-msvc - - TARGET: x86_64-pc-windows-msvc - - # Ensure MinGW works, but we need to download the 32-bit MinGW compiler from a - # custom location. - # - # Note that the MinGW builds have tons of references to - # `rust_eh_unwind_resume` in the debug LTO builds that aren't optimized out, - # so we skip that test for now. Would be great to not skip it! - - TARGET: i686-pc-windows-gnu - MINGW_URL: https://s3-us-west-1.amazonaws.com/rust-lang-ci2/rust-ci-mirror - MINGW_ARCHIVE: i686-6.3.0-release-win32-dwarf-rt_v5-rev1.7z - MINGW_DIR: mingw32 - DEBUG_LTO_BUILD_DOESNT_WORK: 1 - - TARGET: x86_64-pc-windows-gnu - DEBUG_LTO_BUILD_DOESNT_WORK: 1 - -install: - - git submodule update --init - - appveyor-retry appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe - - rustup-init.exe --default-host x86_64-pc-windows-msvc --default-toolchain nightly -y - - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin - - if NOT "%TARGET%" == "x86_64-pc-windows-msvc" rustup target add %TARGET% - - # Use the system msys - - set PATH=C:\msys64\mingw64\bin;C:\msys64\usr\bin;%PATH% - - # download a custom compiler otherwise - - if defined MINGW_URL appveyor DownloadFile %MINGW_URL%/%MINGW_ARCHIVE% - - if defined MINGW_URL 7z x -y %MINGW_ARCHIVE% > nul - - if defined MINGW_URL set PATH=C:\Python27;%CD%\%MINGW_DIR%\bin;C:\msys64\usr\bin;%PATH% - - - rustc -Vv - - cargo -V - -build: false - -test_script: - - sh ci/run.sh %TARGET% diff --git a/azure-pipelines.yml b/azure-pipelines.yml index f7a25ae00..b4b7ef81c 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -1,6 +1,7 @@ trigger: - master - +pr: + - master jobs: - job: Linux pool: diff --git a/ci/azure-install-rust.yml b/ci/azure-install-rust.yml index 25e48f991..d74946e3d 100644 --- a/ci/azure-install-rust.yml +++ b/ci/azure-install-rust.yml @@ -3,8 +3,9 @@ parameters: steps: - bash: | + set -e curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain $TOOLCHAIN - echo "##vso[task.setvariable variable=PATH;]$PATH:$HOME/.cargo/bin" + echo "##vso[task.prependpath]$HOME/.cargo/bin" displayName: Install rust condition: ne( variables['Agent.OS'], 'Windows_NT' ) env: @@ -13,7 +14,7 @@ steps: - script: | curl -sSf -o rustup-init.exe https://win.rustup.rs rustup-init.exe -y --default-toolchain %TOOLCHAIN% - echo "##vso[task.setvariable variable=PATH;]%PATH%;%USERPROFILE%\.cargo\bin" + echo ##vso[task.prependpath]%USERPROFILE%\.cargo\bin displayName: Install rust condition: eq( variables['Agent.OS'], 'Windows_NT' ) env: diff --git a/ci/azure-steps.yml b/ci/azure-steps.yml index a6acba480..c769941d4 100644 --- a/ci/azure-steps.yml +++ b/ci/azure-steps.yml @@ -1,4 +1,6 @@ steps: + - checkout: self + submodules: true - template: azure-install-rust.yml - bash: ./ci/run.sh $TARGET From 8e2f43e4006f7571f7eea67a91869b412bda942b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 2 Apr 2019 08:27:38 -0700 Subject: [PATCH 0181/1459] Expand Azure Pipelines configuration --- azure-pipelines.yml | 51 ++++++++++++++++++++++++++++++++++++++++----- ci/azure-steps.yml | 4 ++++ ci/run-docker.sh | 0 3 files changed, 50 insertions(+), 5 deletions(-) mode change 100644 => 100755 ci/run-docker.sh diff --git a/azure-pipelines.yml b/azure-pipelines.yml index b4b7ef81c..c32262b1c 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -1,7 +1,6 @@ trigger: - master -pr: - - master + jobs: - job: Linux pool: @@ -10,8 +9,42 @@ jobs: - template: ci/azure-steps.yml strategy: matrix: - aarch64-unknown-linux-gnu: + aarch64: TARGET: aarch64-unknown-linux-gnu + arm: + TARGET: arm-unknown-linux-gnueabi + armhf: + TARGET: arm-unknown-linux-gnueabihf + i586: + TARGET: i586-unknown-linux-gnu + i686: + TARGET: i686-unknown-linux-gnu + mips: + TARGET: mips-unknown-linux-gnu + mips64: + TARGET: mips64-unknown-linux-gnuabi64 + mips64el: + TARGET: mips64el-unknown-linux-gnuabi64 + mipsel: + TARGET: mipsel-unknown-linux-gnu + powerpc: + TARGET: powerpc-unknown-linux-gnu + powerpc64: + TARGET: powerpc64-unknown-linux-gnu + powerpc64le: + TARGET: powerpc64le-unknown-linux-gnu + thumbv6m: + TARGET: thumbv6m-linux-eabi + thumbv7em: + TARGET: thumbv7em-linux-eabi + thumbv7emhf: + TARGET: thumbv7em-linux-eabihf + thumbv7m: + TARGET: thumbv7m-linux-eabi + wasm32: + TARGET: wasm32-unknown-unknown + x86_64: + TARGET: x86_64-unknown-linux-gnu - job: macOS pool: @@ -20,8 +53,10 @@ jobs: - template: ci/azure-steps.yml strategy: matrix: - x86_64-apple-darwin: + x86_64: TARGET: x86_64-apple-darwin + i686: + TARGET: i686-apple-darwin - job: Windows pool: @@ -30,5 +65,11 @@ jobs: - template: ci/azure-steps.yml strategy: matrix: - i686-pc-windows-msvc: + i686-msvc: TARGET: i686-pc-windows-msvc + x86_64-msvc: + TARGET: x86_64-pc-windows-msvc + i686-gnu: + TARGET: i686-pc-windows-gnu + x86_64-gnu: + TARGET: x86_64-pc-windows-gnu diff --git a/ci/azure-steps.yml b/ci/azure-steps.yml index c769941d4..251b2d584 100644 --- a/ci/azure-steps.yml +++ b/ci/azure-steps.yml @@ -1,8 +1,12 @@ steps: - checkout: self submodules: true + - template: azure-install-rust.yml + - bash: rustup target add $TARGET + displayName: Install compilation target + - bash: ./ci/run.sh $TARGET condition: ne( variables['Agent.OS'], 'Linux' ) displayName: Run test script diff --git a/ci/run-docker.sh b/ci/run-docker.sh old mode 100644 new mode 100755 From 87154a3e7d1c243cc7b23d364560cdc0058db4c5 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 2 Apr 2019 08:39:19 -0700 Subject: [PATCH 0182/1459] Upgrade all docker containers to 18.04 --- ci/docker/aarch64-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/arm-unknown-linux-gnueabi/Dockerfile | 2 +- ci/docker/arm-unknown-linux-gnueabihf/Dockerfile | 2 +- ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile | 2 +- ci/docker/i586-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/i686-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/mips-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile | 2 +- ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile | 2 +- ci/docker/mipsel-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/powerpc-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/powerpc64-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/thumbv6m-linux-eabi/Dockerfile | 2 +- ci/docker/thumbv7em-linux-eabi/Dockerfile | 2 +- ci/docker/thumbv7em-linux-eabihf/Dockerfile | 2 +- ci/docker/thumbv7m-linux-eabi/Dockerfile | 2 +- ci/docker/x86_64-unknown-linux-gnu/Dockerfile | 2 +- 18 files changed, 18 insertions(+), 18 deletions(-) diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index 3f6a63fcb..9e2559f4a 100644 --- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/ci/docker/arm-unknown-linux-gnueabi/Dockerfile index 1c31b00b4..afab874bc 100644 --- a/ci/docker/arm-unknown-linux-gnueabi/Dockerfile +++ b/ci/docker/arm-unknown-linux-gnueabi/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile index c305b1ba5..3ed3602b0 100644 --- a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile +++ b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile index 76f367f14..6617af155 100644 --- a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +++ b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/ci/docker/i586-unknown-linux-gnu/Dockerfile b/ci/docker/i586-unknown-linux-gnu/Dockerfile index fb3230609..5783e28e1 100644 --- a/ci/docker/i586-unknown-linux-gnu/Dockerfile +++ b/ci/docker/i586-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc-multilib libc6-dev ca-certificates diff --git a/ci/docker/i686-unknown-linux-gnu/Dockerfile b/ci/docker/i686-unknown-linux-gnu/Dockerfile index fb3230609..5783e28e1 100644 --- a/ci/docker/i686-unknown-linux-gnu/Dockerfile +++ b/ci/docker/i686-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc-multilib libc6-dev ca-certificates diff --git a/ci/docker/mips-unknown-linux-gnu/Dockerfile b/ci/docker/mips-unknown-linux-gnu/Dockerfile index 71a9e8032..f47e8f522 100644 --- a/ci/docker/mips-unknown-linux-gnu/Dockerfile +++ b/ci/docker/mips-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile index 22239e46d..8fa77c7bd 100644 --- a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile +++ b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates \ diff --git a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile index 77f1fd58c..c6611d9ac 100644 --- a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile +++ b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates \ diff --git a/ci/docker/mipsel-unknown-linux-gnu/Dockerfile b/ci/docker/mipsel-unknown-linux-gnu/Dockerfile index 98257e768..0bc695624 100644 --- a/ci/docker/mipsel-unknown-linux-gnu/Dockerfile +++ b/ci/docker/mipsel-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc-unknown-linux-gnu/Dockerfile index b6bee385e..2d39fef61 100644 --- a/ci/docker/powerpc-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile index 26dc1dc27..653cd3511 100644 --- a/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile index 3b280c0bd..63ea9af9d 100644 --- a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/thumbv6m-linux-eabi/Dockerfile b/ci/docker/thumbv6m-linux-eabi/Dockerfile index ecf90087e..789bdf4e4 100644 --- a/ci/docker/thumbv6m-linux-eabi/Dockerfile +++ b/ci/docker/thumbv6m-linux-eabi/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates curl gcc gcc-arm-none-eabi libc6-dev libcurl4-openssl-dev libssh2-1 libnewlib-dev qemu-user-static diff --git a/ci/docker/thumbv7em-linux-eabi/Dockerfile b/ci/docker/thumbv7em-linux-eabi/Dockerfile index 029a55d29..c6ce273c8 100644 --- a/ci/docker/thumbv7em-linux-eabi/Dockerfile +++ b/ci/docker/thumbv7em-linux-eabi/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates curl gcc gcc-arm-none-eabi libc6-dev libcurl4-openssl-dev libssh2-1 libnewlib-dev qemu-user-static diff --git a/ci/docker/thumbv7em-linux-eabihf/Dockerfile b/ci/docker/thumbv7em-linux-eabihf/Dockerfile index 5bf0c76e5..c7518aaca 100644 --- a/ci/docker/thumbv7em-linux-eabihf/Dockerfile +++ b/ci/docker/thumbv7em-linux-eabihf/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates curl gcc gcc-arm-none-eabi libc6-dev libcurl4-openssl-dev libssh2-1 libnewlib-dev qemu-user-static diff --git a/ci/docker/thumbv7m-linux-eabi/Dockerfile b/ci/docker/thumbv7m-linux-eabi/Dockerfile index 1ffac1f33..c90710941 100644 --- a/ci/docker/thumbv7m-linux-eabi/Dockerfile +++ b/ci/docker/thumbv7m-linux-eabi/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates curl gcc gcc-arm-none-eabi libc6-dev libcurl4-openssl-dev libssh2-1 libnewlib-dev qemu-user-static diff --git a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index efc7b26c6..98000f4eb 100644 --- a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates From aa5ff95d7ec543198e0dc260eeb5715564a72053 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 2 Apr 2019 09:35:47 -0700 Subject: [PATCH 0183/1459] Don't pass `-it` to docker --- ci/run-docker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run-docker.sh b/ci/run-docker.sh index 5608cedc4..6b3066e53 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -23,7 +23,7 @@ run() { -v `pwd`:/checkout:ro \ -v `rustc --print sysroot`:/rust:ro \ -w /checkout \ - -it $target \ + $target \ sh -c "HOME=/tmp PATH=\$PATH:/rust/bin ci/run.sh $target" } From 89f2b5a8bde4c60c8b1e72ad7a18cad3c2577bc0 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 2 Apr 2019 09:53:05 -0700 Subject: [PATCH 0184/1459] Generate a lock file before using Docker Can't do it in the readonly filesystem inside! --- ci/azure-steps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure-steps.yml b/ci/azure-steps.yml index 251b2d584..19972a7c0 100644 --- a/ci/azure-steps.yml +++ b/ci/azure-steps.yml @@ -11,6 +11,6 @@ steps: condition: ne( variables['Agent.OS'], 'Linux' ) displayName: Run test script - - bash: ./ci/run-docker.sh $TARGET + - bash: cargo generate-lockfile && ./ci/run-docker.sh $TARGET condition: eq( variables['Agent.OS'], 'Linux' ) displayName: Run docker test script From c880d1ca06632eb088d2004008a96c27c16c89c1 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 2 Apr 2019 11:44:28 -0700 Subject: [PATCH 0185/1459] Try to handle thumb targets and xargo --- azure-pipelines.yml | 61 +++++++++++++++++++++++------------------- ci/azure-steps.yml | 14 ++++++++-- ci/run-docker.sh | 1 + ci/run.sh | 65 ++++++++++++++++++++------------------------- 4 files changed, 75 insertions(+), 66 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c32262b1c..ca02a5153 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -11,38 +11,43 @@ jobs: matrix: aarch64: TARGET: aarch64-unknown-linux-gnu - arm: - TARGET: arm-unknown-linux-gnueabi - armhf: - TARGET: arm-unknown-linux-gnueabihf - i586: - TARGET: i586-unknown-linux-gnu - i686: - TARGET: i686-unknown-linux-gnu - mips: - TARGET: mips-unknown-linux-gnu - mips64: - TARGET: mips64-unknown-linux-gnuabi64 - mips64el: - TARGET: mips64el-unknown-linux-gnuabi64 - mipsel: - TARGET: mipsel-unknown-linux-gnu - powerpc: - TARGET: powerpc-unknown-linux-gnu - powerpc64: - TARGET: powerpc64-unknown-linux-gnu - powerpc64le: - TARGET: powerpc64le-unknown-linux-gnu + # arm: + # TARGET: arm-unknown-linux-gnueabi + # armhf: + # TARGET: arm-unknown-linux-gnueabihf + # i586: + # TARGET: i586-unknown-linux-gnu + # i686: + # TARGET: i686-unknown-linux-gnu + # mips: + # TARGET: mips-unknown-linux-gnu + # mips64: + # TARGET: mips64-unknown-linux-gnuabi64 + # mips64el: + # TARGET: mips64el-unknown-linux-gnuabi64 + # mipsel: + # TARGET: mipsel-unknown-linux-gnu + # powerpc: + # TARGET: powerpc-unknown-linux-gnu + # powerpc64: + # TARGET: powerpc64-unknown-linux-gnu + # powerpc64le: + # TARGET: powerpc64le-unknown-linux-gnu thumbv6m: TARGET: thumbv6m-linux-eabi + XARGO: 1 thumbv7em: TARGET: thumbv7em-linux-eabi + XARGO: 1 thumbv7emhf: TARGET: thumbv7em-linux-eabihf + XARGO: 1 thumbv7m: TARGET: thumbv7m-linux-eabi + XARGO: 1 wasm32: TARGET: wasm32-unknown-unknown + ONLY_BUILD: 1 x86_64: TARGET: x86_64-unknown-linux-gnu @@ -55,8 +60,8 @@ jobs: matrix: x86_64: TARGET: x86_64-apple-darwin - i686: - TARGET: i686-apple-darwin + # i686: + # TARGET: i686-apple-darwin - job: Windows pool: @@ -67,9 +72,9 @@ jobs: matrix: i686-msvc: TARGET: i686-pc-windows-msvc - x86_64-msvc: - TARGET: x86_64-pc-windows-msvc - i686-gnu: - TARGET: i686-pc-windows-gnu + # x86_64-msvc: + # TARGET: x86_64-pc-windows-msvc + # i686-gnu: + # TARGET: i686-pc-windows-gnu x86_64-gnu: TARGET: x86_64-pc-windows-gnu diff --git a/ci/azure-steps.yml b/ci/azure-steps.yml index 19972a7c0..c8a954024 100644 --- a/ci/azure-steps.yml +++ b/ci/azure-steps.yml @@ -4,13 +4,23 @@ steps: - template: azure-install-rust.yml + - script: rustup component add rust-src + displayName: Install Rust sources + condition: eq( variables['XARGO'], '1' ) + - bash: rustup target add $TARGET - displayName: Install compilation target + displayName: Install Rust target + condition: ne( variables['XARGO'], '1' ) - bash: ./ci/run.sh $TARGET condition: ne( variables['Agent.OS'], 'Linux' ) displayName: Run test script - - bash: cargo generate-lockfile && ./ci/run-docker.sh $TARGET + - bash: | + if [ "$ONLY_BUILD" = "1" ]; then + cargo build --target $TARGET + else + cargo generate-lockfile && ./ci/run-docker.sh $TARGET + fi condition: eq( variables['Agent.OS'], 'Linux' ) displayName: Run docker test script diff --git a/ci/run-docker.sh b/ci/run-docker.sh index 6b3066e53..5c2e065cc 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -18,6 +18,7 @@ run() { --user $(id -u):$(id -g) \ -e CARGO_HOME=/cargo \ -e CARGO_TARGET_DIR=/target \ + -e XARGO \ -v $HOME/.cargo:/cargo \ -v `pwd`/target:/target \ -v `pwd`:/checkout:ro \ diff --git a/ci/run.sh b/ci/run.sh index b77752288..589553adb 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -3,14 +3,10 @@ set -ex # FIXME(japarix/xargo#186) this shouldn't be necessary export RUST_TARGET_PATH=`pwd` -case $1 in - thumb*) - cargo=xargo - ;; - *) - cargo=cargo - ;; -esac +cargo=cargo +if [ "$XARGO" = "1" ]; then + cargo=xargo +fi INTRINSICS_FEATURES="c" @@ -22,34 +18,31 @@ if [ -z "$INTRINSICS_FAILS_WITH_MEM_FEATURE" ]; then fi # Test our implementation -case $1 in - thumb*) - run="xargo test --manifest-path testcrate/Cargo.toml --target $1" - for t in $(ls testcrate/tests); do - t=${t%.rs} - - RUSTFLAGS="-C debug-assertions=no -C lto" \ - CARGO_INCREMENTAL=0 \ - $run --test $t --no-default-features --features 'mem c' --no-run - qemu-arm-static target/${1}/debug/$t-* - done - - for t in $(ls testcrate/tests); do - t=${t%.rs} - RUSTFLAGS="-C lto" \ - CARGO_INCREMENTAL=0 \ - $run --test $t --no-default-features --features 'mem c' --no-run --release - qemu-arm-static target/${1}/release/$t-* - done - ;; - *) - run="cargo test --manifest-path testcrate/Cargo.toml --target $1" - $run - $run --release - $run --features c - $run --features c --release - ;; -esac +if [ "$XARGO" = "1" ]; then + run="xargo test --manifest-path testcrate/Cargo.toml --target $1" + for t in $(ls testcrate/tests); do + t=${t%.rs} + + RUSTFLAGS="-C debug-assertions=no -C lto" \ + CARGO_INCREMENTAL=0 \ + $run --test $t --no-default-features --features 'mem c' --no-run + qemu-arm-static target/${1}/debug/$t-* + done + + for t in $(ls testcrate/tests); do + t=${t%.rs} + RUSTFLAGS="-C lto" \ + CARGO_INCREMENTAL=0 \ + $run --test $t --no-default-features --features 'mem c' --no-run --release + qemu-arm-static target/${1}/release/$t-* + done +else + run="cargo test --manifest-path testcrate/Cargo.toml --target $1" + $run + $run --release + $run --features c + $run --features c --release +fi PREFIX=$(echo $1 | sed -e 's/unknown-//')- case $1 in From ca03cf138a22cb10c8f7e6c1d86754708875d10a Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 2 Apr 2019 12:12:19 -0700 Subject: [PATCH 0186/1459] Try to fix Windows --- azure-pipelines.yml | 130 +++++++++++++++++++------------------- ci/azure-install-rust.yml | 2 +- 2 files changed, 66 insertions(+), 66 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ca02a5153..36732f132 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -2,67 +2,67 @@ trigger: - master jobs: - - job: Linux - pool: - vmImage: ubuntu-16.04 - steps: - - template: ci/azure-steps.yml - strategy: - matrix: - aarch64: - TARGET: aarch64-unknown-linux-gnu - # arm: - # TARGET: arm-unknown-linux-gnueabi - # armhf: - # TARGET: arm-unknown-linux-gnueabihf - # i586: - # TARGET: i586-unknown-linux-gnu - # i686: - # TARGET: i686-unknown-linux-gnu - # mips: - # TARGET: mips-unknown-linux-gnu - # mips64: - # TARGET: mips64-unknown-linux-gnuabi64 - # mips64el: - # TARGET: mips64el-unknown-linux-gnuabi64 - # mipsel: - # TARGET: mipsel-unknown-linux-gnu - # powerpc: - # TARGET: powerpc-unknown-linux-gnu - # powerpc64: - # TARGET: powerpc64-unknown-linux-gnu - # powerpc64le: - # TARGET: powerpc64le-unknown-linux-gnu - thumbv6m: - TARGET: thumbv6m-linux-eabi - XARGO: 1 - thumbv7em: - TARGET: thumbv7em-linux-eabi - XARGO: 1 - thumbv7emhf: - TARGET: thumbv7em-linux-eabihf - XARGO: 1 - thumbv7m: - TARGET: thumbv7m-linux-eabi - XARGO: 1 - wasm32: - TARGET: wasm32-unknown-unknown - ONLY_BUILD: 1 - x86_64: - TARGET: x86_64-unknown-linux-gnu - - - job: macOS - pool: - vmImage: macos-10.13 - steps: - - template: ci/azure-steps.yml - strategy: - matrix: - x86_64: - TARGET: x86_64-apple-darwin - # i686: - # TARGET: i686-apple-darwin - + # - job: Linux + # pool: + # vmImage: ubuntu-16.04 + # steps: + # - template: ci/azure-steps.yml + # strategy: + # matrix: + # aarch64: + # TARGET: aarch64-unknown-linux-gnu + # arm: + # TARGET: arm-unknown-linux-gnueabi + # armhf: + # TARGET: arm-unknown-linux-gnueabihf + # i586: + # TARGET: i586-unknown-linux-gnu + # i686: + # TARGET: i686-unknown-linux-gnu + # mips: + # TARGET: mips-unknown-linux-gnu + # mips64: + # TARGET: mips64-unknown-linux-gnuabi64 + # mips64el: + # TARGET: mips64el-unknown-linux-gnuabi64 + # mipsel: + # TARGET: mipsel-unknown-linux-gnu + # powerpc: + # TARGET: powerpc-unknown-linux-gnu + # powerpc64: + # TARGET: powerpc64-unknown-linux-gnu + # powerpc64le: + # TARGET: powerpc64le-unknown-linux-gnu + # thumbv6m: + # TARGET: thumbv6m-linux-eabi + # XARGO: 1 + # thumbv7em: + # TARGET: thumbv7em-linux-eabi + # XARGO: 1 + # thumbv7emhf: + # TARGET: thumbv7em-linux-eabihf + # XARGO: 1 + # thumbv7m: + # TARGET: thumbv7m-linux-eabi + # XARGO: 1 + # wasm32: + # TARGET: wasm32-unknown-unknown + # ONLY_BUILD: 1 + # x86_64: + # TARGET: x86_64-unknown-linux-gnu + # + # - job: macOS + # pool: + # vmImage: macos-10.13 + # steps: + # - template: ci/azure-steps.yml + # strategy: + # matrix: + # x86_64: + # TARGET: x86_64-apple-darwin + # i686: + # TARGET: i686-apple-darwin + # - job: Windows pool: vmImage: 'vs2017-win2016' @@ -72,9 +72,9 @@ jobs: matrix: i686-msvc: TARGET: i686-pc-windows-msvc - # x86_64-msvc: - # TARGET: x86_64-pc-windows-msvc - # i686-gnu: - # TARGET: i686-pc-windows-gnu + x86_64-msvc: + TARGET: x86_64-pc-windows-msvc + i686-gnu: + TARGET: i686-pc-windows-gnu x86_64-gnu: TARGET: x86_64-pc-windows-gnu diff --git a/ci/azure-install-rust.yml b/ci/azure-install-rust.yml index d74946e3d..f44f8c59d 100644 --- a/ci/azure-install-rust.yml +++ b/ci/azure-install-rust.yml @@ -13,7 +13,7 @@ steps: - script: | curl -sSf -o rustup-init.exe https://win.rustup.rs - rustup-init.exe -y --default-toolchain %TOOLCHAIN% + rustup-init.exe -y --default-toolchain %TOOLCHAIN%-%TARGET% echo ##vso[task.prependpath]%USERPROFILE%\.cargo\bin displayName: Install rust condition: eq( variables['Agent.OS'], 'Windows_NT' ) From 17b8ea36203ae5a6a7e83108d7a46c2f65798387 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 2 Apr 2019 12:16:59 -0700 Subject: [PATCH 0187/1459] Re-enable all targets --- azure-pipelines.yml | 122 ++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 36732f132..8df6e64c5 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -2,67 +2,67 @@ trigger: - master jobs: - # - job: Linux - # pool: - # vmImage: ubuntu-16.04 - # steps: - # - template: ci/azure-steps.yml - # strategy: - # matrix: - # aarch64: - # TARGET: aarch64-unknown-linux-gnu - # arm: - # TARGET: arm-unknown-linux-gnueabi - # armhf: - # TARGET: arm-unknown-linux-gnueabihf - # i586: - # TARGET: i586-unknown-linux-gnu - # i686: - # TARGET: i686-unknown-linux-gnu - # mips: - # TARGET: mips-unknown-linux-gnu - # mips64: - # TARGET: mips64-unknown-linux-gnuabi64 - # mips64el: - # TARGET: mips64el-unknown-linux-gnuabi64 - # mipsel: - # TARGET: mipsel-unknown-linux-gnu - # powerpc: - # TARGET: powerpc-unknown-linux-gnu - # powerpc64: - # TARGET: powerpc64-unknown-linux-gnu - # powerpc64le: - # TARGET: powerpc64le-unknown-linux-gnu - # thumbv6m: - # TARGET: thumbv6m-linux-eabi - # XARGO: 1 - # thumbv7em: - # TARGET: thumbv7em-linux-eabi - # XARGO: 1 - # thumbv7emhf: - # TARGET: thumbv7em-linux-eabihf - # XARGO: 1 - # thumbv7m: - # TARGET: thumbv7m-linux-eabi - # XARGO: 1 - # wasm32: - # TARGET: wasm32-unknown-unknown - # ONLY_BUILD: 1 - # x86_64: - # TARGET: x86_64-unknown-linux-gnu - # - # - job: macOS - # pool: - # vmImage: macos-10.13 - # steps: - # - template: ci/azure-steps.yml - # strategy: - # matrix: - # x86_64: - # TARGET: x86_64-apple-darwin - # i686: - # TARGET: i686-apple-darwin - # + - job: Linux + pool: + vmImage: ubuntu-16.04 + steps: + - template: ci/azure-steps.yml + strategy: + matrix: + aarch64: + TARGET: aarch64-unknown-linux-gnu + arm: + TARGET: arm-unknown-linux-gnueabi + armhf: + TARGET: arm-unknown-linux-gnueabihf + i586: + TARGET: i586-unknown-linux-gnu + i686: + TARGET: i686-unknown-linux-gnu + mips: + TARGET: mips-unknown-linux-gnu + mips64: + TARGET: mips64-unknown-linux-gnuabi64 + mips64el: + TARGET: mips64el-unknown-linux-gnuabi64 + mipsel: + TARGET: mipsel-unknown-linux-gnu + powerpc: + TARGET: powerpc-unknown-linux-gnu + powerpc64: + TARGET: powerpc64-unknown-linux-gnu + powerpc64le: + TARGET: powerpc64le-unknown-linux-gnu + # thumbv6m: + # TARGET: thumbv6m-linux-eabi + # XARGO: 1 + # thumbv7em: + # TARGET: thumbv7em-linux-eabi + # XARGO: 1 + # thumbv7emhf: + # TARGET: thumbv7em-linux-eabihf + # XARGO: 1 + # thumbv7m: + # TARGET: thumbv7m-linux-eabi + # XARGO: 1 + wasm32: + TARGET: wasm32-unknown-unknown + ONLY_BUILD: 1 + x86_64: + TARGET: x86_64-unknown-linux-gnu + + - job: macOS + pool: + vmImage: macos-10.13 + steps: + - template: ci/azure-steps.yml + strategy: + matrix: + x86_64: + TARGET: x86_64-apple-darwin + i686: + TARGET: i686-apple-darwin + - job: Windows pool: vmImage: 'vs2017-win2016' From 4c60176278c2fb320ba4e7483888b3c53cb86e5b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 2 Apr 2019 12:51:36 -0700 Subject: [PATCH 0188/1459] Attempt to fix MinGW targets --- azure-pipelines.yml | 4 ++++ examples/intrinsics.rs | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 8df6e64c5..da3a1bb1d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -76,5 +76,9 @@ jobs: TARGET: x86_64-pc-windows-msvc i686-gnu: TARGET: i686-pc-windows-gnu + INTRINSICS_FAILS_WITH_MEM_FEATURE: 1 + DEBUG_LTO_BUILD_DOESNT_WORK: 1 x86_64-gnu: TARGET: x86_64-pc-windows-gnu + INTRINSICS_FAILS_WITH_MEM_FEATURE: 1 + DEBUG_LTO_BUILD_DOESNT_WORK: 1 diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 89c2c23db..7766687c8 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -383,3 +383,11 @@ pub fn _Unwind_Resume() {} #[lang = "eh_personality"] #[no_mangle] pub extern "C" fn eh_personality() {} + +#[cfg(all(windows, target_env = "gnu"))] +mod mingw_unwidning { + #[no_mangle] + pub fn rust_eh_personality() {} + #[no_mangle] + pub fn rust_eh_unwind_resume() {} +} From 1275ec66546cd974e834c3b31d7d89913f074eb4 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 2 Apr 2019 13:02:22 -0700 Subject: [PATCH 0189/1459] More fixes for i686-mingw --- examples/intrinsics.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 7766687c8..ccd701569 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -390,4 +390,8 @@ mod mingw_unwidning { pub fn rust_eh_personality() {} #[no_mangle] pub fn rust_eh_unwind_resume() {} + #[no_mangle] + pub fn rust_eh_register_frames() {} + #[no_mangle] + pub fn rust_eh_unregister_frames() {} } From cc2d7cb3bc42e620346d04230cd321245c9fff05 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 2 Apr 2019 13:05:56 -0700 Subject: [PATCH 0190/1459] Bump to 0.1.9 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index d30af4051..7c0cf0d74 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.8" +version = "0.1.9" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From 6e8085722d2cfd69353a61c7377745a379c26677 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 2 Apr 2019 15:41:20 -0500 Subject: [PATCH 0191/1459] Update azure pipelines badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b290cbff8..f0724bffe 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # `compiler-builtins` -[![Build Status](https://dev.azure.com/rust-lang/compiler-builtins/_apis/build/status/compiler-builtins-CI?branchName=master)](https://dev.azure.com/rust-lang/compiler-builtins/_build/latest?definitionId=2&branchName=master) +[![Build Status](https://dev.azure.com/rust-lang/compiler-builtins/_apis/build/status/rust-lang-nursery.compiler-builtins?branchName=master)](https://dev.azure.com/rust-lang/compiler-builtins/_build/latest?definitionId=6&branchName=master) > Porting `compiler-rt` intrinsics to Rust From 98c5738ed5fbbea0c31abc89e0d0abd369e1a8f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Mon, 8 Apr 2019 16:30:33 +0200 Subject: [PATCH 0192/1459] Update submodule for VS 2019 support --- compiler-rt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt b/compiler-rt index 84c0bd015..03fc28f92 160000 --- a/compiler-rt +++ b/compiler-rt @@ -1 +1 @@ -Subproject commit 84c0bd0158c3ff86052be1b07a3ddc3c4f5ba52a +Subproject commit 03fc28f9273eeab16f1005f982dfde5900bddb29 From afca23bbb95a9a24425484291fda249891439164 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 8 Apr 2019 07:52:05 -0700 Subject: [PATCH 0193/1459] Bump to 0.1.10 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7c0cf0d74..1dfe6c73f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.9" +version = "0.1.10" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From c8dd6524e8b415cd0098a09096d5a70ad7b7827e Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 8 Apr 2019 07:55:09 -0700 Subject: [PATCH 0194/1459] Add instructions for publishing --- PUBLISHING.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 PUBLISHING.md diff --git a/PUBLISHING.md b/PUBLISHING.md new file mode 100644 index 000000000..ad100dee0 --- /dev/null +++ b/PUBLISHING.md @@ -0,0 +1,17 @@ +# Publishing to crates.io + +Publishing `compiler-builtins` to crates.io takes a few steps unfortunately. +It's not great, but it works for now. PRs to improve this process would be +greatly appreciated! + +1. Make sure you've got a clean working tree and it's updated with the latest + changes on `master` +2. Edit `Cargo.toml` to bump the version number +3. Commit this change +4. Run `git tag` to create a tag for this version +5. Delete the `libm/Cargo.toml` file +6. Comment out the `[dev-dependencies]` section of `Cargo.toml` +7. Run `cargo +nightly publish --allow-dirty` +8. Push the tag +9. Push the commit +10. Undo changes to `Cargo.toml` and the `libm` submodule From aec945a708e385ff337fc0922917b57c78d16755 Mon Sep 17 00:00:00 2001 From: Goirad Date: Mon, 29 Apr 2019 14:42:14 -0700 Subject: [PATCH 0195/1459] Added missing fdim signature --- src/math.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/math.rs b/src/math.rs index dfdd99cdf..b61955539 100644 --- a/src/math.rs +++ b/src/math.rs @@ -64,6 +64,7 @@ no_mangle! { fn cbrtf(n: f32) -> f32; fn coshf(n: f32) -> f32; fn expm1f(n: f32) -> f32; + fn fdim(a: f64, b: f64) -> f64; fn fdimf(a: f32, b: f32) -> f32; fn hypotf(x: f32, y: f32) -> f32; fn log1pf(n: f32) -> f32; From 045de6e5166ebaf8b74ad9d97b6bb09d5d28c982 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 30 Apr 2019 10:08:23 -0700 Subject: [PATCH 0196/1459] Bump to 0.1.11 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1dfe6c73f..fdca20143 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.10" +version = "0.1.11" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From 56b7a5bc0726da3e0a19356eba99f65bc3bf2662 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 09:15:01 -0500 Subject: [PATCH 0197/1459] Set up CI with Azure Pipelines [skip ci] --- libm/azure-pipelines.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 libm/azure-pipelines.yml diff --git a/libm/azure-pipelines.yml b/libm/azure-pipelines.yml new file mode 100644 index 000000000..c23ced43c --- /dev/null +++ b/libm/azure-pipelines.yml @@ -0,0 +1,19 @@ +# Starter pipeline +# Start with a minimal pipeline that you can customize to build and deploy your code. +# Add steps that build, run tests, deploy, and more: +# https://aka.ms/yaml + +trigger: +- master + +pool: + vmImage: 'Ubuntu-16.04' + +steps: +- script: echo Hello, world! + displayName: 'Run a one-line script' + +- script: | + echo Add other tasks to build, test, and deploy your project. + echo See https://aka.ms/yaml + displayName: 'Run a multi-line script' From 82c594fe2b56cb2ce75abf49ad50028ade7f672e Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 08:12:00 -0700 Subject: [PATCH 0198/1459] Move crates to `crates` folder --- libm/Cargo.toml | 10 +- libm/azure-pipelines.yml | 156 ++++++++++++++++-- libm/ci/azure-install-rust.yml | 23 +++ libm/ci/azure-test-all.yml | 41 +++++ .../compiler-builtins-smoke-test}/Cargo.toml | 0 .../compiler-builtins-smoke-test}/src/lib.rs | 2 +- libm/{ => crates}/input-generator/Cargo.toml | 0 libm/{ => crates}/input-generator/src/main.rs | 32 ++-- libm/{ => crates}/musl-generator/Cargo.toml | 0 .../{ => crates}/musl-generator/src/macros.rs | 16 +- libm/{ => crates}/musl-generator/src/main.rs | 0 libm/{ => crates}/newlib-generator/Cargo.toml | 0 .../newlib-generator/src/macros.rs | 0 .../{ => crates}/newlib-generator/src/main.rs | 0 libm/{ => crates}/shared/Cargo.toml | 0 libm/{ => crates}/shared/src/lib.rs | 82 +++++---- libm/math/.cargo/config | 11 -- libm/math/Cargo.toml | 8 - libm/math/Cross.toml | 2 - libm/src/math/acosf.rs | 14 ++ 20 files changed, 288 insertions(+), 109 deletions(-) create mode 100644 libm/ci/azure-install-rust.yml create mode 100644 libm/ci/azure-test-all.yml rename libm/{cb => crates/compiler-builtins-smoke-test}/Cargo.toml (100%) rename libm/{cb => crates/compiler-builtins-smoke-test}/src/lib.rs (82%) rename libm/{ => crates}/input-generator/Cargo.toml (100%) rename libm/{ => crates}/input-generator/src/main.rs (82%) rename libm/{ => crates}/musl-generator/Cargo.toml (100%) rename libm/{ => crates}/musl-generator/src/macros.rs (90%) rename libm/{ => crates}/musl-generator/src/main.rs (100%) rename libm/{ => crates}/newlib-generator/Cargo.toml (100%) rename libm/{ => crates}/newlib-generator/src/macros.rs (100%) rename libm/{ => crates}/newlib-generator/src/main.rs (100%) rename libm/{ => crates}/shared/Cargo.toml (100%) rename libm/{ => crates}/shared/src/lib.rs (86%) delete mode 100644 libm/math/.cargo/config delete mode 100644 libm/math/Cargo.toml delete mode 100644 libm/math/Cross.toml diff --git a/libm/Cargo.toml b/libm/Cargo.toml index f28024d04..b4f07eeee 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -17,11 +17,11 @@ stable = [] [workspace] members = [ - "cb", - "input-generator", - "musl-generator", - "newlib-generator", - "shared", + "crates/compiler-builtins-smoke-test", + "crates/input-generator", + "crates/musl-generator", + "crates/newlib-generator", + "crates/shared", ] [dev-dependencies] diff --git a/libm/azure-pipelines.yml b/libm/azure-pipelines.yml index c23ced43c..36271ec1b 100644 --- a/libm/azure-pipelines.yml +++ b/libm/azure-pipelines.yml @@ -1,19 +1,143 @@ -# Starter pipeline -# Start with a minimal pipeline that you can customize to build and deploy your code. -# Add steps that build, run tests, deploy, and more: -# https://aka.ms/yaml - trigger: -- master - -pool: - vmImage: 'Ubuntu-16.04' + - master -steps: -- script: echo Hello, world! - displayName: 'Run a one-line script' +jobs: + - job: Docker + pool: + vmImage: ubuntu-16.04 + steps: + - template: ci/azure-install-rust.yml + env: + TOOLCHAIN: nightly + - bash: rustup target add $TARGET + displayName: "Install rust cross target" + - bash: | + set -e + mkdir cross + curl -L https://github.com/rust-embedded/cross/releases/download/v0.1.14/cross-v0.1.14-x86_64-unknown-linux-musl.tar.gz | tar xzf - -C $HOME/.cargo/bin + displayName: "Install cross" + - bash: cross test --lib --features checked --target $TARGET --release + displayName: "Run lib tests" + - bash: cross test --tests --features checked --target $TARGET --release + displayName: "Run integration tests" + strategy: + matrix: + aarch64: + TARGET: aarch64-unknown-linux-gnu + armhv: + TARGET: arm-unknown-linux-gnueabihf + armv7: + TARGET: armv7-unknown-linux-gnueabihf + i586: + TARGET: i586-unknown-linux-gnu + i686: + TARGET: i686-unknown-linux-gnu + mips: + TARGET: mips-unknown-linux-gnu + mips64: + TARGET: mips64-unknown-linux-gnuabi64 + mips64el: + TARGET: mips64el-unknown-linux-gnuabi64 + powerpc: + TARGET: powerpc-unknown-linux-gnu + powerpc64: + TARGET: powerpc64-unknown-linux-gnu + powerpc64le: + TARGET: powerpc64le-unknown-linux-gnu + x86_64: + TARGET: x86_64-unknown-linux-gnu -- script: | - echo Add other tasks to build, test, and deploy your project. - echo See https://aka.ms/yaml - displayName: 'Run a multi-line script' + # - job: Linux + # pool: + # vmImage: ubuntu-16.04 + # steps: + # - template: ci/azure-test-all.yml + # strategy: + # matrix: + # stable: + # TOOLCHAIN: stable + # beta: + # TOOLCHAIN: beta + # nightly: + # TOOLCHAIN: nightly + # + # - job: macOS + # pool: + # vmImage: macos-10.13 + # steps: + # - template: ci/azure-test-all.yml + # strategy: + # matrix: + # x86_64: + # TARGET: x86_64-apple-darwin + # + # - job: iOS + # pool: + # vmImage: macos-10.13 + # steps: + # - checkout: self + # submodules: true + # - template: ci/azure-install-rust.yml + # - script: rustup target add $TARGET + # displayName: "Install rust cross target" + # - bash: | + # set -e + # export SDK_PATH=`xcrun --show-sdk-path --sdk $SDK` + # export RUSTFLAGS="-C link-arg=-isysroot -C link-arg=$SDK_PATH" + # cargo test --no-run --target $TARGET + # displayName: "Build for iOS" + # strategy: + # matrix: + # aarch64: + # TARGET: aarch64-apple-ios + # SDK: iphoneos + # armv7: + # TARGET: armv7-apple-ios + # SDK: iphoneos + # armv7s: + # TARGET: armv7s-apple-ios + # SDK: iphoneos + # i386: + # TARGET: i386-apple-ios + # SDK: iphonesimulator + # x86_64: + # TARGET: x86_64-apple-ios + # SDK: iphonesimulator + # + # - job: wasm + # pool: + # vmImage: ubuntu-16.04 + # steps: + # - checkout: self + # submodules: true + # - template: ci/azure-install-rust.yml + # - script: rustup target add wasm32-unknown-unknown + # displayName: "Install rust cross target" + # - script: cargo build --target wasm32-unknown-unknown + # displayName: "Build for wasm" + # + # - job: Windows + # pool: + # vmImage: vs2017-win2016 + # steps: + # - template: ci/azure-test-all.yml + # strategy: + # matrix: + # x86_64-msvc: + # TARGET: x86_64-pc-windows-msvc + # i686-msvc: + # TARGET: i686-pc-windows-msvc + # x86_64-gnu: + # TARGET: x86_64-pc-windows-gnu + # i686-gnu: + # TARGET: i686-pc-windows-gnu + # + # - job: Windows_arm64 + # pool: + # vmImage: windows-2019 + # steps: + # - template: ci/azure-install-rust.yml + # - script: rustup target add aarch64-pc-windows-msvc + # displayName: "Install rust cross target" + # - script: cargo test --no-run --target aarch64-pc-windows-msvc + # displayName: "Build for arm64" diff --git a/libm/ci/azure-install-rust.yml b/libm/ci/azure-install-rust.yml new file mode 100644 index 000000000..fa7eae459 --- /dev/null +++ b/libm/ci/azure-install-rust.yml @@ -0,0 +1,23 @@ +steps: + - bash: | + set -e + toolchain=$TOOLCHAIN + if [ "$toolchain" = "" ]; then + toolchain=stable + fi + curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain $toolchain + echo "##vso[task.prependpath]$HOME/.cargo/bin" + displayName: Install rust (unix) + condition: ne( variables['Agent.OS'], 'Windows_NT' ) + + - script: | + curl -sSf -o rustup-init.exe https://win.rustup.rs + rustup-init.exe -y --default-toolchain stable-%TARGET% + echo ##vso[task.prependpath]%USERPROFILE%\.cargo\bin + displayName: Install rust (windows) + condition: eq( variables['Agent.OS'], 'Windows_NT' ) + + - script: | + rustc -Vv + cargo -V + displayName: Query rust and cargo versions diff --git a/libm/ci/azure-test-all.yml b/libm/ci/azure-test-all.yml new file mode 100644 index 000000000..b2b7124d3 --- /dev/null +++ b/libm/ci/azure-test-all.yml @@ -0,0 +1,41 @@ +steps: + - checkout: self + submodules: true + - template: azure-install-rust.yml + + - bash: cargo build --manifest-path backtrace-sys/Cargo.toml + displayName: "Build backtrace-sys" + - bash: cargo build + displayName: "Build backtrace" + - bash: cargo test + displayName: "Test backtrace" + - bash: cargo test --no-default-features + displayName: "Test backtrace (-default)" + - bash: cargo test --no-default-features --features 'std' + displayName: "Test backtrace (-default + std)" + - bash: cargo test --no-default-features --features 'libunwind std' + displayName: "Test backtrace (-default + libunwind)" + - bash: cargo test --no-default-features --features 'libunwind dladdr std' + displayName: "Test backtrace (-default + libunwind + dladdr)" + - bash: cargo test --no-default-features --features 'libunwind libbacktrace std' + displayName: "Test backtrace (-default + libunwind + libbacktrace)" + - bash: cargo test --no-default-features --features 'unix-backtrace std' + displayName: "Test backtrace (-default + unix-backtrace)" + - bash: cargo test --no-default-features --features 'unix-backtrace dladdr std' + displayName: "Test backtrace (-default + unix-backtrace + dladdr)" + - bash: cargo test --no-default-features --features 'unix-backtrace libbacktrace std' + displayName: "Test backtrace (-default + unix-backtrace + libbacktrace)" + - bash: cargo test --no-default-features --features 'serialize-serde std' + displayName: "Test backtrace (-default + serialize-serde + std)" + - bash: cargo test --no-default-features --features 'serialize-rustc std' + displayName: "Test backtrace (-default + serialize-rustc + std)" + - bash: cargo test --no-default-features --features 'serialize-rustc serialize-serde std' + displayName: "Test backtrace (-default + serialize-rustc + serialize-serde + std)" + - bash: cargo test --no-default-features --features 'cpp_demangle std' + displayName: "Test backtrace (-default + cpp_demangle + std)" + - bash: cargo test --no-default-features --features 'gimli-symbolize std' + displayName: "Test backtrace (-default + gimli-symbolize + std)" + - bash: cargo test --no-default-features --features 'dbghelp std' + displayName: "Test backtrace (-default + dbghelp + std)" + - bash: cd ./cpp_smoke_test && cargo test + displayName: "Test cpp_smoke_test" diff --git a/libm/cb/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml similarity index 100% rename from libm/cb/Cargo.toml rename to libm/crates/compiler-builtins-smoke-test/Cargo.toml diff --git a/libm/cb/src/lib.rs b/libm/crates/compiler-builtins-smoke-test/src/lib.rs similarity index 82% rename from libm/cb/src/lib.rs rename to libm/crates/compiler-builtins-smoke-test/src/lib.rs index 439ba7dc4..7fad301b9 100644 --- a/libm/cb/src/lib.rs +++ b/libm/crates/compiler-builtins-smoke-test/src/lib.rs @@ -5,5 +5,5 @@ #![allow(dead_code)] #![no_std] -#[path = "../../src/math/mod.rs"] +#[path = "../../../src/math/mod.rs"] mod libm; diff --git a/libm/input-generator/Cargo.toml b/libm/crates/input-generator/Cargo.toml similarity index 100% rename from libm/input-generator/Cargo.toml rename to libm/crates/input-generator/Cargo.toml diff --git a/libm/input-generator/src/main.rs b/libm/crates/input-generator/src/main.rs similarity index 82% rename from libm/input-generator/src/main.rs rename to libm/crates/input-generator/src/main.rs index b4a6ad142..0746ea477 100644 --- a/libm/input-generator/src/main.rs +++ b/libm/crates/input-generator/src/main.rs @@ -43,7 +43,7 @@ fn f32(rng: &mut XorShiftRng) -> Result<(), Box> { let mut f = File::create("bin/input/f32")?; for i in set { - f.write_all(&i.to_bytes())?; + f.write_all(&i.to_le_bytes())?; } Ok(()) @@ -61,8 +61,8 @@ fn f32f32(rng: &mut XorShiftRng) -> Result<(), Box> { } i += 1; - f.write_all(&x0.to_bits().to_bytes())?; - f.write_all(&x1.to_bits().to_bytes())?; + f.write_all(&x0.to_bits().to_le_bytes())?; + f.write_all(&x1.to_bits().to_le_bytes())?; } Ok(()) @@ -80,8 +80,8 @@ fn f32i16(rng: &mut XorShiftRng) -> Result<(), Box> { } i += 1; - f.write_all(&x0.to_bits().to_bytes())?; - f.write_all(&x1.to_bytes())?; + f.write_all(&x0.to_bits().to_le_bytes())?; + f.write_all(&x1.to_le_bytes())?; } Ok(()) @@ -100,9 +100,9 @@ fn f32f32f32(rng: &mut XorShiftRng) -> Result<(), Box> { } i += 1; - f.write_all(&x0.to_bits().to_bytes())?; - f.write_all(&x1.to_bits().to_bytes())?; - f.write_all(&x2.to_bits().to_bytes())?; + f.write_all(&x0.to_bits().to_le_bytes())?; + f.write_all(&x1.to_bits().to_le_bytes())?; + f.write_all(&x2.to_bits().to_le_bytes())?; } Ok(()) @@ -123,7 +123,7 @@ fn f64(rng: &mut XorShiftRng) -> Result<(), Box> { let mut f = File::create("bin/input/f64")?; for i in set { - f.write_all(&i.to_bytes())?; + f.write_all(&i.to_le_bytes())?; } Ok(()) @@ -141,8 +141,8 @@ fn f64f64(rng: &mut XorShiftRng) -> Result<(), Box> { } i += 1; - f.write_all(&x0.to_bits().to_bytes())?; - f.write_all(&x1.to_bits().to_bytes())?; + f.write_all(&x0.to_bits().to_le_bytes())?; + f.write_all(&x1.to_bits().to_le_bytes())?; } Ok(()) @@ -161,9 +161,9 @@ fn f64f64f64(rng: &mut XorShiftRng) -> Result<(), Box> { } i += 1; - f.write_all(&x0.to_bits().to_bytes())?; - f.write_all(&x1.to_bits().to_bytes())?; - f.write_all(&x2.to_bits().to_bytes())?; + f.write_all(&x0.to_bits().to_le_bytes())?; + f.write_all(&x1.to_bits().to_le_bytes())?; + f.write_all(&x2.to_bits().to_le_bytes())?; } Ok(()) @@ -181,8 +181,8 @@ fn f64i16(rng: &mut XorShiftRng) -> Result<(), Box> { } i += 1; - f.write_all(&x0.to_bits().to_bytes())?; - f.write_all(&x1.to_bytes())?; + f.write_all(&x0.to_bits().to_le_bytes())?; + f.write_all(&x1.to_le_bytes())?; } Ok(()) diff --git a/libm/musl-generator/Cargo.toml b/libm/crates/musl-generator/Cargo.toml similarity index 100% rename from libm/musl-generator/Cargo.toml rename to libm/crates/musl-generator/Cargo.toml diff --git a/libm/musl-generator/src/macros.rs b/libm/crates/musl-generator/src/macros.rs similarity index 90% rename from libm/musl-generator/src/macros.rs rename to libm/crates/musl-generator/src/macros.rs index 16ba99d64..e47c0ab6f 100644 --- a/libm/musl-generator/src/macros.rs +++ b/libm/crates/musl-generator/src/macros.rs @@ -16,7 +16,7 @@ macro_rules! f32 { $fun(*x) }; - $fun.write_all(&y.to_bits().to_bytes())?; + $fun.write_all(&y.to_bits().to_le_bytes())?; )+ } }}; @@ -40,7 +40,7 @@ macro_rules! f32f32 { $fun(*x0, *x1) }; - $fun.write_all(&y.to_bits().to_bytes())?; + $fun.write_all(&y.to_bits().to_le_bytes())?; )+ } }}; @@ -64,7 +64,7 @@ macro_rules! f32f32f32 { $fun(*x0, *x1, *x2) }; - $fun.write_all(&y.to_bits().to_bytes())?; + $fun.write_all(&y.to_bits().to_le_bytes())?; )+ } }}; @@ -88,7 +88,7 @@ macro_rules! f32i32 { $fun(*x0, *x1 as i32) }; - $fun.write_all(&y.to_bits().to_bytes())?; + $fun.write_all(&y.to_bits().to_le_bytes())?; )+ } }}; @@ -112,7 +112,7 @@ macro_rules! f64 { $fun(*x) }; - $fun.write_all(&y.to_bits().to_bytes())?; + $fun.write_all(&y.to_bits().to_le_bytes())?; )+ } }}; @@ -136,7 +136,7 @@ macro_rules! f64f64 { $fun(*x0, *x1) }; - $fun.write_all(&y.to_bits().to_bytes())?; + $fun.write_all(&y.to_bits().to_le_bytes())?; )+ } }}; @@ -160,7 +160,7 @@ macro_rules! f64f64f64 { $fun(*x0, *x1, *x2) }; - $fun.write_all(&y.to_bits().to_bytes())?; + $fun.write_all(&y.to_bits().to_le_bytes())?; )+ } }}; @@ -184,7 +184,7 @@ macro_rules! f64i32 { $fun(*x0, *x1 as i32) }; - $fun.write_all(&y.to_bits().to_bytes())?; + $fun.write_all(&y.to_bits().to_le_bytes())?; )+ } }}; diff --git a/libm/musl-generator/src/main.rs b/libm/crates/musl-generator/src/main.rs similarity index 100% rename from libm/musl-generator/src/main.rs rename to libm/crates/musl-generator/src/main.rs diff --git a/libm/newlib-generator/Cargo.toml b/libm/crates/newlib-generator/Cargo.toml similarity index 100% rename from libm/newlib-generator/Cargo.toml rename to libm/crates/newlib-generator/Cargo.toml diff --git a/libm/newlib-generator/src/macros.rs b/libm/crates/newlib-generator/src/macros.rs similarity index 100% rename from libm/newlib-generator/src/macros.rs rename to libm/crates/newlib-generator/src/macros.rs diff --git a/libm/newlib-generator/src/main.rs b/libm/crates/newlib-generator/src/main.rs similarity index 100% rename from libm/newlib-generator/src/main.rs rename to libm/crates/newlib-generator/src/main.rs diff --git a/libm/shared/Cargo.toml b/libm/crates/shared/Cargo.toml similarity index 100% rename from libm/shared/Cargo.toml rename to libm/crates/shared/Cargo.toml diff --git a/libm/shared/src/lib.rs b/libm/crates/shared/src/lib.rs similarity index 86% rename from libm/shared/src/lib.rs rename to libm/crates/shared/src/lib.rs index 84676f94f..17c20a332 100644 --- a/libm/shared/src/lib.rs +++ b/libm/crates/shared/src/lib.rs @@ -1,5 +1,3 @@ -#![feature(exact_chunks)] - #[macro_use] extern crate lazy_static; @@ -8,11 +6,11 @@ lazy_static! { let bytes = include_bytes!("../../bin/input/f32"); bytes - .exact_chunks(4) + .chunks_exact(4) .map(|chunk| { let mut buf = [0; 4]; buf.copy_from_slice(chunk); - f32::from_bits(u32::from_le(u32::from_bytes(buf))) + f32::from_bits(u32::from_le(u32::from_le_bytes(buf))) }) .collect() }; @@ -20,7 +18,7 @@ lazy_static! { let bytes = include_bytes!("../../bin/input/f32f32"); bytes - .exact_chunks(8) + .chunks_exact(8) .map(|chunk| { let mut x0 = [0; 4]; let mut x1 = [0; 4]; @@ -28,8 +26,8 @@ lazy_static! { x1.copy_from_slice(&chunk[4..]); ( - f32::from_bits(u32::from_le(u32::from_bytes(x0))), - f32::from_bits(u32::from_le(u32::from_bytes(x1))), + f32::from_bits(u32::from_le(u32::from_le_bytes(x0))), + f32::from_bits(u32::from_le(u32::from_le_bytes(x1))), ) }) .collect() @@ -38,7 +36,7 @@ lazy_static! { let bytes = include_bytes!("../../bin/input/f32f32f32"); bytes - .exact_chunks(12) + .chunks_exact(12) .map(|chunk| { let mut x0 = [0; 4]; let mut x1 = [0; 4]; @@ -48,9 +46,9 @@ lazy_static! { x2.copy_from_slice(&chunk[8..]); ( - f32::from_bits(u32::from_le(u32::from_bytes(x0))), - f32::from_bits(u32::from_le(u32::from_bytes(x1))), - f32::from_bits(u32::from_le(u32::from_bytes(x2))), + f32::from_bits(u32::from_le(u32::from_le_bytes(x0))), + f32::from_bits(u32::from_le(u32::from_le_bytes(x1))), + f32::from_bits(u32::from_le(u32::from_le_bytes(x2))), ) }) .collect() @@ -59,7 +57,7 @@ lazy_static! { let bytes = include_bytes!("../../bin/input/f32i16"); bytes - .exact_chunks(6) + .chunks_exact(6) .map(|chunk| { let mut x0 = [0; 4]; let mut x1 = [0; 2]; @@ -67,8 +65,8 @@ lazy_static! { x1.copy_from_slice(&chunk[4..]); ( - f32::from_bits(u32::from_le(u32::from_bytes(x0))), - i16::from_le(i16::from_bytes(x1)) as i32, + f32::from_bits(u32::from_le(u32::from_le_bytes(x0))), + i16::from_le(i16::from_le_bytes(x1)) as i32, ) }) .collect() @@ -77,11 +75,11 @@ lazy_static! { let bytes = include_bytes!("../../bin/input/f64"); bytes - .exact_chunks(8) + .chunks_exact(8) .map(|chunk| { let mut buf = [0; 8]; buf.copy_from_slice(chunk); - f64::from_bits(u64::from_le(u64::from_bytes(buf))) + f64::from_bits(u64::from_le(u64::from_le_bytes(buf))) }) .collect() }; @@ -89,7 +87,7 @@ lazy_static! { let bytes = include_bytes!("../../bin/input/f64f64"); bytes - .exact_chunks(16) + .chunks_exact(16) .map(|chunk| { let mut x0 = [0; 8]; let mut x1 = [0; 8]; @@ -97,8 +95,8 @@ lazy_static! { x1.copy_from_slice(&chunk[8..]); ( - f64::from_bits(u64::from_le(u64::from_bytes(x0))), - f64::from_bits(u64::from_le(u64::from_bytes(x1))), + f64::from_bits(u64::from_le(u64::from_le_bytes(x0))), + f64::from_bits(u64::from_le(u64::from_le_bytes(x1))), ) }) .collect() @@ -107,7 +105,7 @@ lazy_static! { let bytes = include_bytes!("../../bin/input/f64f64f64"); bytes - .exact_chunks(24) + .chunks_exact(24) .map(|chunk| { let mut x0 = [0; 8]; let mut x1 = [0; 8]; @@ -117,9 +115,9 @@ lazy_static! { x2.copy_from_slice(&chunk[16..]); ( - f64::from_bits(u64::from_le(u64::from_bytes(x0))), - f64::from_bits(u64::from_le(u64::from_bytes(x1))), - f64::from_bits(u64::from_le(u64::from_bytes(x2))), + f64::from_bits(u64::from_le(u64::from_le_bytes(x0))), + f64::from_bits(u64::from_le(u64::from_le_bytes(x1))), + f64::from_bits(u64::from_le(u64::from_le_bytes(x2))), ) }) .collect() @@ -128,7 +126,7 @@ lazy_static! { let bytes = include_bytes!("../../bin/input/f64i16"); bytes - .exact_chunks(10) + .chunks_exact(10) .map(|chunk| { let mut x0 = [0; 8]; let mut x1 = [0; 2]; @@ -136,8 +134,8 @@ lazy_static! { x1.copy_from_slice(&chunk[8..]); ( - f64::from_bits(u64::from_le(u64::from_bytes(x0))), - i16::from_le(i16::from_bytes(x1)) as i32, + f64::from_bits(u64::from_le(u64::from_le_bytes(x0))), + i16::from_le(i16::from_le_bytes(x1)) as i32, ) }) .collect() @@ -151,11 +149,11 @@ macro_rules! f32 { #[test] fn $fun() { let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .exact_chunks(4) + .chunks_exact(4) .map(|chunk| { let mut buf = [0; 4]; buf.copy_from_slice(chunk); - f32::from_bits(u32::from_le(u32::from_bytes(buf))) + f32::from_bits(u32::from_le(u32::from_le_bytes(buf))) }) .collect::>(); @@ -190,11 +188,11 @@ macro_rules! f32f32 { #[test] fn $fun() { let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .exact_chunks(4) + .chunks_exact(4) .map(|chunk| { let mut buf = [0; 4]; buf.copy_from_slice(chunk); - f32::from_bits(u32::from_le(u32::from_bytes(buf))) + f32::from_bits(u32::from_le(u32::from_le_bytes(buf))) }) .collect::>(); @@ -231,11 +229,11 @@ macro_rules! f32f32f32 { #[test] fn $fun() { let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .exact_chunks(4) + .chunks_exact(4) .map(|chunk| { let mut buf = [0; 4]; buf.copy_from_slice(chunk); - f32::from_bits(u32::from_le(u32::from_bytes(buf))) + f32::from_bits(u32::from_le(u32::from_le_bytes(buf))) }) .collect::>(); @@ -273,11 +271,11 @@ macro_rules! f32i32 { #[test] fn $fun() { let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .exact_chunks(4) + .chunks_exact(4) .map(|chunk| { let mut buf = [0; 4]; buf.copy_from_slice(chunk); - f32::from_bits(u32::from_le(u32::from_bytes(buf))) + f32::from_bits(u32::from_le(u32::from_le_bytes(buf))) }) .collect::>(); @@ -314,11 +312,11 @@ macro_rules! f64 { #[test] fn $fun() { let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .exact_chunks(8) + .chunks_exact(8) .map(|chunk| { let mut buf = [0; 8]; buf.copy_from_slice(chunk); - f64::from_bits(u64::from_le(u64::from_bytes(buf))) + f64::from_bits(u64::from_le(u64::from_le_bytes(buf))) }) .collect::>(); @@ -353,11 +351,11 @@ macro_rules! f64f64 { #[test] fn $fun() { let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .exact_chunks(8) + .chunks_exact(8) .map(|chunk| { let mut buf = [0; 8]; buf.copy_from_slice(chunk); - f64::from_bits(u64::from_le(u64::from_bytes(buf))) + f64::from_bits(u64::from_le(u64::from_le_bytes(buf))) }) .collect::>(); @@ -394,11 +392,11 @@ macro_rules! f64f64f64 { #[test] fn $fun() { let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .exact_chunks(8) + .chunks_exact(8) .map(|chunk| { let mut buf = [0; 8]; buf.copy_from_slice(chunk); - f64::from_bits(u64::from_le(u64::from_bytes(buf))) + f64::from_bits(u64::from_le(u64::from_le_bytes(buf))) }) .collect::>(); @@ -436,11 +434,11 @@ macro_rules! f64i32 { #[test] fn $fun() { let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .exact_chunks(8) + .chunks_exact(8) .map(|chunk| { let mut buf = [0; 8]; buf.copy_from_slice(chunk); - f64::from_bits(u64::from_le(u64::from_bytes(buf))) + f64::from_bits(u64::from_le(u64::from_le_bytes(buf))) }) .collect::>(); diff --git a/libm/math/.cargo/config b/libm/math/.cargo/config deleted file mode 100644 index be79c453a..000000000 --- a/libm/math/.cargo/config +++ /dev/null @@ -1,11 +0,0 @@ -[target.thumbv7em-none-eabi] -rustflags = [ - "-C", "link-arg=-Wl,-Tlink.x", - "-C", "link-arg=-nostartfiles", - "-C", "link-arg=-mthumb", - "-C", "link-arg=-march=armv7e-m", - "-C", "link-arg=-mfloat-abi=soft", -] - -[build] -target = "thumbv7em-none-eabi" \ No newline at end of file diff --git a/libm/math/Cargo.toml b/libm/math/Cargo.toml deleted file mode 100644 index 5bca038a9..000000000 --- a/libm/math/Cargo.toml +++ /dev/null @@ -1,8 +0,0 @@ -[package] -name = "math" -version = "0.0.0" - -[dependencies] -qemu-arm-rt = { git = "https://github.com/japaric/qemu-arm-rt" } - -[workspace] \ No newline at end of file diff --git a/libm/math/Cross.toml b/libm/math/Cross.toml deleted file mode 100644 index 471770b52..000000000 --- a/libm/math/Cross.toml +++ /dev/null @@ -1,2 +0,0 @@ -[target.thumbv7em-none-eabi] -xargo = false \ No newline at end of file diff --git a/libm/src/math/acosf.rs b/libm/src/math/acosf.rs index 469601cab..b12ed531a 100644 --- a/libm/src/math/acosf.rs +++ b/libm/src/math/acosf.rs @@ -73,3 +73,17 @@ pub fn acosf(x: f32) -> f32 { w = r(z) * s + c; 2. * (df + w) } + +#[cfg(test)] +mod tests { + #[test] + fn acosf() { + extern { + fn acosf(x: f32) -> f32; + } + unsafe { + crate::_eqf(super::acosf(1.0), acosf(1.0)).unwrap(); + } + } + // shared::f32!("musl", acosf); +} From 64f05859f92750767c61e0ce0167ab1c92f514ab Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 08:12:23 -0700 Subject: [PATCH 0199/1459] Remove newlib generator It's broken and we can try to add it back later if necessary --- libm/Cargo.toml | 1 - libm/crates/newlib-generator/Cargo.toml | 7 - libm/crates/newlib-generator/src/macros.rs | 245 --------------------- libm/crates/newlib-generator/src/main.rs | 32 --- 4 files changed, 285 deletions(-) delete mode 100644 libm/crates/newlib-generator/Cargo.toml delete mode 100644 libm/crates/newlib-generator/src/macros.rs delete mode 100644 libm/crates/newlib-generator/src/main.rs diff --git a/libm/Cargo.toml b/libm/Cargo.toml index b4f07eeee..c2ac902d9 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -20,7 +20,6 @@ members = [ "crates/compiler-builtins-smoke-test", "crates/input-generator", "crates/musl-generator", - "crates/newlib-generator", "crates/shared", ] diff --git a/libm/crates/newlib-generator/Cargo.toml b/libm/crates/newlib-generator/Cargo.toml deleted file mode 100644 index 5766cb4b7..000000000 --- a/libm/crates/newlib-generator/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "newlib-generator" -version = "0.1.0" -authors = ["Jorge Aparicio "] - -[dependencies] -shared = { path = "../shared" } diff --git a/libm/crates/newlib-generator/src/macros.rs b/libm/crates/newlib-generator/src/macros.rs deleted file mode 100644 index 84315a777..000000000 --- a/libm/crates/newlib-generator/src/macros.rs +++ /dev/null @@ -1,245 +0,0 @@ -macro_rules! f32 { - ($($fun:ident,)+) => { - $( - let fun = stringify!($fun); - - fs::create_dir_all("math/src")?; - - let main = format!(" -#![no_main] -#![no_std] - -#[macro_use] -extern crate qemu_arm_rt as rt; - -use core::u32; - -use rt::{{io, process}}; - -entry!(main); - -fn main() {{ - run().unwrap_or_else(|e| {{ - eprintln!(\"error: {{}}\", e); - process::exit(1); - }}) -}} - -fn run() -> Result<(), usize> {{ - #[link(name = \"m\")] - extern \"C\" {{ - fn {0}(_: f32) -> f32; - }} - - let mut buf = [0; 4]; - while let Ok(()) = io::Stdin.read_exact(&mut buf) {{ - let x = f32::from_bits(u32::from_bytes(buf)); - let y = unsafe {{ {0}(x) }}; - - io::Stdout.write_all(&y.to_bits().to_bytes())?; - }} - - Ok(()) -}} - -#[no_mangle] -pub fn __errno() -> *mut i32 {{ - static mut ERRNO: i32 = 0; - unsafe {{ &mut ERRNO }} -}} -", fun); - - File::create("math/src/main.rs")?.write_all(main.as_bytes())?; - - assert!( - Command::new("cross") - .args(&["build", "--target", "thumbv7em-none-eabi", "--release"]) - .current_dir("math") - .status()? - .success() - ); - - let mut qemu = Command::new("qemu-arm") - .arg("math/target/thumbv7em-none-eabi/release/math") - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .spawn()?; - - qemu.stdin.as_mut().take().unwrap().write_all(F32)?; - - let output = qemu.wait_with_output()?; - - File::create(concat!("bin/output/newlib.", stringify!($fun)))? - .write_all(&output.stdout)?; - )+ - } -} - -macro_rules! f32f32 { - ($($fun:ident,)+) => { - $( - let fun = stringify!($fun); - - fs::create_dir_all("math/src")?; - - let main = format!(" -#![no_main] -#![no_std] - -#[macro_use] -extern crate qemu_arm_rt as rt; - -use core::u32; - -use rt::{{io, process}}; - -entry!(main); - -fn main() {{ - run().unwrap_or_else(|e| {{ - eprintln!(\"error: {{}}\", e); - process::exit(1); - }}) -}} - -fn run() -> Result<(), usize> {{ - #[link(name = \"m\")] - extern \"C\" {{ - fn {0}(_: f32, _: f32) -> f32; - }} - - let mut chunk = [0; 8]; - while let Ok(()) = io::Stdin.read_exact(&mut chunk) {{ - let mut buf = [0; 4]; - buf.copy_from_slice(&chunk[..4]); - let x0 = f32::from_bits(u32::from_bytes(buf)); - - buf.copy_from_slice(&chunk[4..]); - let x1 = f32::from_bits(u32::from_bytes(buf)); - - let y = unsafe {{ {0}(x0, x1) }}; - - io::Stdout.write_all(&y.to_bits().to_bytes())?; - }} - - Ok(()) -}} - -#[no_mangle] -pub fn __errno() -> *mut i32 {{ - static mut ERRNO: i32 = 0; - unsafe {{ &mut ERRNO }} -}} -", fun); - - File::create("math/src/main.rs")?.write_all(main.as_bytes())?; - - assert!( - Command::new("cross") - .args(&["build", "--target", "thumbv7em-none-eabi", "--release"]) - .current_dir("math") - .status()? - .success() - ); - - let mut qemu = Command::new("qemu-arm") - .arg("math/target/thumbv7em-none-eabi/release/math") - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .spawn()?; - - qemu.stdin.as_mut().take().unwrap().write_all(F32)?; - - let output = qemu.wait_with_output()?; - - File::create(concat!("bin/output/newlib.", stringify!($fun)))? - .write_all(&output.stdout)?; - )+ - } -} - -macro_rules! f32f32f32 { - ($($fun:ident,)+) => { - $( - let fun = stringify!($fun); - - fs::create_dir_all("math/src")?; - - let main = format!(" -#![no_main] -#![no_std] - -#[macro_use] -extern crate qemu_arm_rt as rt; - -use core::u32; - -use rt::{{io, process}}; - -entry!(main); - -fn main() {{ - run().unwrap_or_else(|e| {{ - eprintln!(\"error: {{}}\", e); - process::exit(1); - }}) -}} - -fn run() -> Result<(), usize> {{ - #[link(name = \"m\")] - extern \"C\" {{ - fn {0}(_: f32, _: f32, _: f32) -> f32; - }} - - let mut chunk = [0; 12]; - while let Ok(()) = io::Stdin.read_exact(&mut chunk) {{ - let mut buf = [0; 4]; - buf.copy_from_slice(&chunk[..4]); - let x0 = f32::from_bits(u32::from_bytes(buf)); - - buf.copy_from_slice(&chunk[4..8]); - let x1 = f32::from_bits(u32::from_bytes(buf)); - - buf.copy_from_slice(&chunk[8..]); - let x2 = f32::from_bits(u32::from_bytes(buf)); - - let y = unsafe {{ {0}(x0, x1, x2) }}; - - io::Stdout.write_all(&y.to_bits().to_bytes())?; - }} - - Ok(()) -}} - -#[no_mangle] -pub fn __errno() -> *mut i32 {{ - static mut ERRNO: i32 = 0; - unsafe {{ &mut ERRNO }} -}} -", fun); - - File::create("math/src/main.rs")?.write_all(main.as_bytes())?; - - assert!( - Command::new("cross") - .args(&["build", "--target", "thumbv7em-none-eabi", "--release"]) - .current_dir("math") - .status()? - .success() - ); - - let mut qemu = Command::new("qemu-arm") - .arg("math/target/thumbv7em-none-eabi/release/math") - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .spawn()?; - - qemu.stdin.as_mut().take().unwrap().write_all(F32)?; - - let output = qemu.wait_with_output()?; - - File::create(concat!("bin/output/newlib.", stringify!($fun)))? - .write_all(&output.stdout)?; - )+ - } -} diff --git a/libm/crates/newlib-generator/src/main.rs b/libm/crates/newlib-generator/src/main.rs deleted file mode 100644 index 52a97cabb..000000000 --- a/libm/crates/newlib-generator/src/main.rs +++ /dev/null @@ -1,32 +0,0 @@ -extern crate shared; - -use std::error::Error; -use std::fs::{self, File}; -use std::io::Write; -use std::process::{Command, Stdio}; - -#[macro_use] -mod macros; - -fn main() -> Result<(), Box> { - const F32: &[u8] = include_bytes!("../../bin/input/f32"); - - f32! { - asinf, - cbrtf, - cosf, - exp2f, - sinf, - tanf, - } - - f32f32! { - hypotf, - } - - f32f32f32! { - fmaf, - } - - Ok(()) -} From d626dd0f694ca81c6d3891f38786685cff675abe Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 10:48:55 -0700 Subject: [PATCH 0200/1459] Rework how testing is done Use a build script to generate musl reference outputs and then ensure that everything gets hooked up to actually run reference tests. --- libm/Cargo.toml | 9 +- libm/build.rs | 344 +++++++++++++++++ libm/crates/input-generator/Cargo.toml | 7 - libm/crates/input-generator/src/main.rs | 189 --------- libm/crates/musl-generator/Cargo.toml | 9 - libm/crates/musl-generator/src/macros.rs | 191 --------- libm/crates/musl-generator/src/main.rs | 97 ----- libm/crates/shared/Cargo.toml | 7 - libm/crates/shared/src/lib.rs | 469 ----------------------- libm/src/lib.rs | 3 + 10 files changed, 351 insertions(+), 974 deletions(-) create mode 100644 libm/build.rs delete mode 100644 libm/crates/input-generator/Cargo.toml delete mode 100644 libm/crates/input-generator/src/main.rs delete mode 100644 libm/crates/musl-generator/Cargo.toml delete mode 100644 libm/crates/musl-generator/src/macros.rs delete mode 100644 libm/crates/musl-generator/src/main.rs delete mode 100644 libm/crates/shared/Cargo.toml delete mode 100644 libm/crates/shared/src/lib.rs diff --git a/libm/Cargo.toml b/libm/Cargo.toml index c2ac902d9..7d9890e3e 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -8,20 +8,19 @@ license = "MIT OR Apache-2.0" name = "libm" repository = "https://github.com/japaric/libm" version = "0.1.2" +edition = "2018" [features] # only used to run our test suite checked = [] default = ['stable'] stable = [] +musl-reference-tests = ['rand'] [workspace] members = [ "crates/compiler-builtins-smoke-test", - "crates/input-generator", - "crates/musl-generator", - "crates/shared", ] -[dev-dependencies] -shared = { path = "shared" } +[build-dependencies] +rand = { version = "0.6.5", optional = true } diff --git a/libm/build.rs b/libm/build.rs new file mode 100644 index 000000000..31b1bbea9 --- /dev/null +++ b/libm/build.rs @@ -0,0 +1,344 @@ +fn main() { + println!("cargo:rerun-if-changed=build.rs"); + + #[cfg(feature = "musl-reference-tests")] + musl_reference_tests::generate(); +} + +#[cfg(feature = "musl-reference-tests")] +mod musl_reference_tests { + use rand::seq::SliceRandom; + use rand::Rng; + use std::fs; + use std::process::Command; + + // Number of tests to generate for each function + const NTESTS: usize = 500; + + // These files are all internal functions or otherwise miscellaneous, not + // defining a function we want to test. + const IGNORED_FILES: &[&str] = &[ + "expo2.rs", + "fenv.rs", + "k_cos.rs", + "k_cosf.rs", + "k_expo2.rs", + "k_expo2f.rs", + "k_sin.rs", + "k_sinf.rs", + "k_tan.rs", + "k_tanf.rs", + "mod.rs", + "rem_pio2.rs", + "rem_pio2_large.rs", + "rem_pio2f.rs", + ]; + + struct Function { + name: String, + args: Vec, + ret: Ty, + tests: Vec, + } + + enum Ty { + F32, + F64, + I32, + Bool, + } + + struct Test { + inputs: Vec, + output: i64, + } + + pub fn generate() { + let files = fs::read_dir("src/math") + .unwrap() + .map(|f| f.unwrap().path()) + .collect::>(); + + let mut math = Vec::new(); + for file in files { + if IGNORED_FILES.iter().any(|f| file.ends_with(f)) { + continue; + } + + println!("generating musl reference tests in {:?}", file); + + let contents = fs::read_to_string(file).unwrap(); + let mut functions = contents.lines().filter(|f| f.starts_with("pub fn")); + let function_to_test = functions.next().unwrap(); + if functions.next().is_some() { + panic!("more than one function in"); + } + + math.push(parse(function_to_test)); + } + + // Generate a bunch of random inputs for each function. This will + // attempt to generate a good set of uniform test cases for exercising + // all the various functionality. + generate_random_tests(&mut math, &mut rand::thread_rng()); + + // After we have all our inputs, use the x86_64-unknown-linux-musl + // target to generate the expected output. + generate_test_outputs(&mut math); + + // ... and now that we have both inputs and expected outputs, do a bunch + // of codegen to create the unit tests which we'll actually execute. + generate_unit_tests(&math); + } + + /// A "poor man's" parser for the signature of a function + fn parse(s: &str) -> Function { + let s = eat(s, "pub fn "); + let pos = s.find('(').unwrap(); + let name = &s[..pos]; + let s = &s[pos + 1..]; + let end = s.find(')').unwrap(); + let args = s[..end] + .split(',') + .map(|arg| { + let colon = arg.find(':').unwrap(); + parse_ty(arg[colon + 1..].trim()) + }) + .collect::>(); + let tail = &s[end + 1..]; + let tail = eat(tail, " -> "); + let ret = parse_ty(tail.trim().split(' ').next().unwrap()); + + return Function { + name: name.to_string(), + args, + ret, + tests: Vec::new(), + }; + + fn parse_ty(s: &str) -> Ty { + match s { + "f32" => Ty::F32, + "f64" => Ty::F64, + "i32" => Ty::I32, + "bool" => Ty::Bool, + other => panic!("unknown type `{}`", other), + } + } + + fn eat<'a>(s: &'a str, prefix: &str) -> &'a str { + if s.starts_with(prefix) { + &s[prefix.len()..] + } else { + panic!("{:?} didn't start with {:?}", s, prefix) + } + } + } + + fn generate_random_tests(functions: &mut [Function], rng: &mut R) { + for function in functions { + for _ in 0..NTESTS { + function.tests.push(generate_test(&function.args, rng)); + } + } + + fn generate_test(args: &[Ty], rng: &mut R) -> Test { + let inputs = args.iter().map(|ty| ty.gen_i64(rng)).collect(); + // zero output for now since we'll generate it later + Test { inputs, output: 0 } + } + } + + impl Ty { + fn gen_i64(&self, r: &mut R) -> i64 { + match self { + Ty::F32 => r.gen::().to_bits().into(), + Ty::F64 => r.gen::().to_bits() as i64, + Ty::I32 => { + if r.gen_range(0, 10) < 1 { + let i = *[i32::max_value(), 0, i32::min_value()].choose(r).unwrap(); + i.into() + } else { + r.gen::().into() + } + } + Ty::Bool => r.gen::() as i64, + } + } + + fn libc_ty(&self) -> &'static str { + match self { + Ty::F32 => "f32", + Ty::F64 => "f64", + Ty::I32 => "i32", + Ty::Bool => "i32", + } + } + } + + fn generate_test_outputs(functions: &mut [Function]) { + let mut src = String::new(); + let dst = std::env::var("OUT_DIR").unwrap(); + + // Generate a program which will run all tests with all inputs in + // `functions`. This program will write all outputs to stdout (in a + // binary format). + src.push_str("use std::io::Write;"); + src.push_str("fn main() {"); + src.push_str("let mut result = Vec::new();"); + for function in functions.iter_mut() { + src.push_str("unsafe {"); + src.push_str("extern { fn "); + src.push_str(&function.name); + src.push_str("("); + for (i, arg) in function.args.iter().enumerate() { + src.push_str(&format!("arg{}: {},", i, arg.libc_ty())); + } + src.push_str(") -> "); + src.push_str(function.ret.libc_ty()); + src.push_str("; }"); + + src.push_str(&format!("static TESTS: &[[i64; {}]]", function.args.len())); + src.push_str(" = &["); + for test in function.tests.iter() { + src.push_str("["); + for val in test.inputs.iter() { + src.push_str(&val.to_string()); + src.push_str(","); + } + src.push_str("],"); + } + src.push_str("];"); + + src.push_str("for test in TESTS {"); + src.push_str("let output = "); + src.push_str(&function.name); + src.push_str("("); + for (i, arg) in function.args.iter().enumerate() { + src.push_str(&match arg { + Ty::F32 => format!("f32::from_bits(test[{}] as u32)", i), + Ty::F64 => format!("f64::from_bits(test[{}] as u64)", i), + Ty::I32 => format!("test[{}] as i32", i), + Ty::Bool => format!("test[{}] as i32", i), + }); + src.push_str(","); + } + src.push_str(");"); + src.push_str("let output = "); + src.push_str(match function.ret { + Ty::F32 => "output.to_bits() as i64", + Ty::F64 => "output.to_bits() as i64", + Ty::I32 => "output as i64", + Ty::Bool => "output as i64", + }); + src.push_str(";"); + src.push_str("result.extend_from_slice(&output.to_le_bytes());"); + + src.push_str("}"); + + src.push_str("}"); + } + + src.push_str("std::io::stdout().write_all(&result).unwrap();"); + + src.push_str("}"); + + let path = format!("{}/gen.rs", dst); + fs::write(&path, src).unwrap(); + + // Make it somewhat pretty if something goes wrong + drop(Command::new("rustfmt").arg(&path).status()); + + // Compile and execute this tests for the musl target, assuming we're an + // x86_64 host effectively. + let status = Command::new("rustc") + .current_dir(&dst) + .arg(&path) + .arg("--target=x86_64-unknown-linux-musl") + .status() + .unwrap(); + assert!(status.success()); + let output = Command::new("./gen") + .current_dir(&dst) + .output() + .unwrap(); + assert!(output.status.success()); + assert!(output.stderr.is_empty()); + + // Map all the output bytes back to an `i64` and then shove it all into + // the expected results. + let mut results = + output.stdout.chunks_exact(8) + .map(|buf| { + let mut exact = [0; 8]; + exact.copy_from_slice(buf); + i64::from_le_bytes(exact) + }); + + for test in functions.iter_mut().flat_map(|f| f.tests.iter_mut()) { + test.output = results.next().unwrap(); + } + assert!(results.next().is_none()); + } + + /// Codegens a file which has a ton of `#[test]` annotations for all the + /// tests that we generated above. + fn generate_unit_tests(functions: &[Function]) { + let mut src = String::new(); + let dst = std::env::var("OUT_DIR").unwrap(); + + for function in functions { + src.push_str("#[test]"); + src.push_str("fn "); + src.push_str(&function.name); + src.push_str("_matches_musl() {"); + src.push_str(&format!("static TESTS: &[([i64; {}], i64)]", function.args.len())); + src.push_str(" = &["); + for test in function.tests.iter() { + src.push_str("(["); + for val in test.inputs.iter() { + src.push_str(&val.to_string()); + src.push_str(","); + } + src.push_str("],"); + src.push_str(&test.output.to_string()); + src.push_str("),"); + } + src.push_str("];"); + + src.push_str("for (test, expected) in TESTS {"); + src.push_str("let output = "); + src.push_str(&function.name); + src.push_str("("); + for (i, arg) in function.args.iter().enumerate() { + src.push_str(&match arg { + Ty::F32 => format!("f32::from_bits(test[{}] as u32)", i), + Ty::F64 => format!("f64::from_bits(test[{}] as u64)", i), + Ty::I32 => format!("test[{}] as i32", i), + Ty::Bool => format!("test[{}] as i32", i), + }); + src.push_str(","); + } + src.push_str(");"); + src.push_str(match function.ret { + Ty::F32 => "if _eqf(output, f32::from_bits(*expected as u32)).is_ok() { continue }", + Ty::F64 => "if _eq(output, f64::from_bits(*expected as u64)).is_ok() { continue }", + Ty::I32 => "if output as i64 == expected { continue }", + Ty::Bool => unreachable!(), + }); + + src.push_str(r#" + panic!("INPUT: {:?} EXPECTED: {:?} ACTUAL {:?}", test, expected, output); + "#); + src.push_str("}"); + + src.push_str("}"); + } + + let path = format!("{}/tests.rs", dst); + fs::write(&path, src).unwrap(); + + // Try to make it somewhat pretty + drop(Command::new("rustfmt").arg(&path).status()); + } +} diff --git a/libm/crates/input-generator/Cargo.toml b/libm/crates/input-generator/Cargo.toml deleted file mode 100644 index fef2558a8..000000000 --- a/libm/crates/input-generator/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "input-generator" -version = "0.1.0" -authors = ["Jorge Aparicio "] - -[dependencies] -rand = "0.5.4" diff --git a/libm/crates/input-generator/src/main.rs b/libm/crates/input-generator/src/main.rs deleted file mode 100644 index 0746ea477..000000000 --- a/libm/crates/input-generator/src/main.rs +++ /dev/null @@ -1,189 +0,0 @@ -extern crate rand; - -use std::collections::BTreeSet; -use std::error::Error; -use std::fs::{self, File}; -use std::io::Write; - -use rand::{RngCore, SeedableRng, XorShiftRng}; - -const NTESTS: usize = 10_000; - -fn main() -> Result<(), Box> { - let mut rng = XorShiftRng::from_rng(&mut rand::thread_rng())?; - - fs::remove_dir_all("bin").ok(); - fs::create_dir_all("bin/input")?; - fs::create_dir_all("bin/output")?; - - f32(&mut rng)?; - f32f32(&mut rng)?; - f32f32f32(&mut rng)?; - f32i16(&mut rng)?; - f64(&mut rng)?; - f64f64(&mut rng)?; - f64f64f64(&mut rng)?; - f64i16(&mut rng)?; - - Ok(()) -} - -fn f32(rng: &mut XorShiftRng) -> Result<(), Box> { - let mut set = BTreeSet::new(); - - while set.len() < NTESTS { - let f = f32::from_bits(rng.next_u32()); - - if f.is_nan() { - continue; - } - - set.insert(f.to_bits()); - } - - let mut f = File::create("bin/input/f32")?; - for i in set { - f.write_all(&i.to_le_bytes())?; - } - - Ok(()) -} - -fn f32f32(rng: &mut XorShiftRng) -> Result<(), Box> { - let mut f = File::create("bin/input/f32f32")?; - let mut i = 0; - while i < NTESTS { - let x0 = f32::from_bits(rng.next_u32()); - let x1 = f32::from_bits(rng.next_u32()); - - if x0.is_nan() || x1.is_nan() { - continue; - } - - i += 1; - f.write_all(&x0.to_bits().to_le_bytes())?; - f.write_all(&x1.to_bits().to_le_bytes())?; - } - - Ok(()) -} - -fn f32i16(rng: &mut XorShiftRng) -> Result<(), Box> { - let mut f = File::create("bin/input/f32i16")?; - let mut i = 0; - while i < NTESTS { - let x0 = f32::from_bits(rng.next_u32()); - let x1 = rng.next_u32() as i16; - - if x0.is_nan() { - continue; - } - - i += 1; - f.write_all(&x0.to_bits().to_le_bytes())?; - f.write_all(&x1.to_le_bytes())?; - } - - Ok(()) -} - -fn f32f32f32(rng: &mut XorShiftRng) -> Result<(), Box> { - let mut f = File::create("bin/input/f32f32f32")?; - let mut i = 0; - while i < NTESTS { - let x0 = f32::from_bits(rng.next_u32()); - let x1 = f32::from_bits(rng.next_u32()); - let x2 = f32::from_bits(rng.next_u32()); - - if x0.is_nan() || x1.is_nan() || x2.is_nan() { - continue; - } - - i += 1; - f.write_all(&x0.to_bits().to_le_bytes())?; - f.write_all(&x1.to_bits().to_le_bytes())?; - f.write_all(&x2.to_bits().to_le_bytes())?; - } - - Ok(()) -} - -fn f64(rng: &mut XorShiftRng) -> Result<(), Box> { - let mut set = BTreeSet::new(); - - while set.len() < NTESTS { - let f = f64::from_bits(rng.next_u64()); - - if f.is_nan() { - continue; - } - - set.insert(f.to_bits()); - } - - let mut f = File::create("bin/input/f64")?; - for i in set { - f.write_all(&i.to_le_bytes())?; - } - - Ok(()) -} - -fn f64f64(rng: &mut XorShiftRng) -> Result<(), Box> { - let mut f = File::create("bin/input/f64f64")?; - let mut i = 0; - while i < NTESTS { - let x0 = f64::from_bits(rng.next_u64()); - let x1 = f64::from_bits(rng.next_u64()); - - if x0.is_nan() || x1.is_nan() { - continue; - } - - i += 1; - f.write_all(&x0.to_bits().to_le_bytes())?; - f.write_all(&x1.to_bits().to_le_bytes())?; - } - - Ok(()) -} - -fn f64f64f64(rng: &mut XorShiftRng) -> Result<(), Box> { - let mut f = File::create("bin/input/f64f64f64")?; - let mut i = 0; - while i < NTESTS { - let x0 = f64::from_bits(rng.next_u64()); - let x1 = f64::from_bits(rng.next_u64()); - let x2 = f64::from_bits(rng.next_u64()); - - if x0.is_nan() || x1.is_nan() || x2.is_nan() { - continue; - } - - i += 1; - f.write_all(&x0.to_bits().to_le_bytes())?; - f.write_all(&x1.to_bits().to_le_bytes())?; - f.write_all(&x2.to_bits().to_le_bytes())?; - } - - Ok(()) -} - -fn f64i16(rng: &mut XorShiftRng) -> Result<(), Box> { - let mut f = File::create("bin/input/f64i16")?; - let mut i = 0; - while i < NTESTS { - let x0 = f64::from_bits(rng.next_u64()); - let x1 = rng.next_u32() as i16; - - if x0.is_nan() { - continue; - } - - i += 1; - f.write_all(&x0.to_bits().to_le_bytes())?; - f.write_all(&x1.to_le_bytes())?; - } - - Ok(()) -} diff --git a/libm/crates/musl-generator/Cargo.toml b/libm/crates/musl-generator/Cargo.toml deleted file mode 100644 index 0564f3536..000000000 --- a/libm/crates/musl-generator/Cargo.toml +++ /dev/null @@ -1,9 +0,0 @@ -[package] -name = "musl-generator" -version = "0.1.0" -authors = ["Jorge Aparicio "] - -[dependencies] -lazy_static = "1.0.2" -shared = { path = "../shared" } -libm = { path = ".." } diff --git a/libm/crates/musl-generator/src/macros.rs b/libm/crates/musl-generator/src/macros.rs deleted file mode 100644 index e47c0ab6f..000000000 --- a/libm/crates/musl-generator/src/macros.rs +++ /dev/null @@ -1,191 +0,0 @@ -macro_rules! f32 { - ($($fun:ident,)+) => {{ - $( - // check type signature - let _: fn(f32) -> f32 = libm::$fun; - let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; - )+ - - for x in shared::F32.iter() { - $( - let y = unsafe { - extern "C" { - fn $fun(_: f32) -> f32; - } - - $fun(*x) - }; - - $fun.write_all(&y.to_bits().to_le_bytes())?; - )+ - } - }}; -} - -macro_rules! f32f32 { - ($($fun:ident,)+) => {{ - $( - // check type signature - let _: fn(f32, f32) -> f32 = libm::$fun; - let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; - )+ - - for (x0, x1) in shared::F32F32.iter() { - $( - let y = unsafe { - extern "C" { - fn $fun(_: f32, _: f32) -> f32; - } - - $fun(*x0, *x1) - }; - - $fun.write_all(&y.to_bits().to_le_bytes())?; - )+ - } - }}; -} - -macro_rules! f32f32f32 { - ($($fun:ident,)+) => {{ - $( - // check type signature - let _: fn(f32, f32, f32) -> f32 = libm::$fun; - let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; - )+ - - for (x0, x1, x2) in shared::F32F32F32.iter() { - $( - let y = unsafe { - extern "C" { - fn $fun(_: f32, _: f32, _: f32) -> f32; - } - - $fun(*x0, *x1, *x2) - }; - - $fun.write_all(&y.to_bits().to_le_bytes())?; - )+ - } - }}; -} - -macro_rules! f32i32 { - ($($fun:ident,)+) => {{ - $( - // check type signature - let _: fn(f32, i32) -> f32 = libm::$fun; - let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; - )+ - - for (x0, x1) in shared::F32I32.iter() { - $( - let y = unsafe { - extern "C" { - fn $fun(_: f32, _: i32) -> f32; - } - - $fun(*x0, *x1 as i32) - }; - - $fun.write_all(&y.to_bits().to_le_bytes())?; - )+ - } - }}; -} - -macro_rules! f64 { - ($($fun:ident,)+) => {{ - $( - // check type signature - let _: fn(f64) -> f64 = libm::$fun; - let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; - )+ - - for x in shared::F64.iter() { - $( - let y = unsafe { - extern "C" { - fn $fun(_: f64) -> f64; - } - - $fun(*x) - }; - - $fun.write_all(&y.to_bits().to_le_bytes())?; - )+ - } - }}; -} - -macro_rules! f64f64 { - ($($fun:ident,)+) => {{ - $( - // check type signature - let _: fn(f64, f64) -> f64 = libm::$fun; - let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; - )+ - - for (x0, x1) in shared::F64F64.iter() { - $( - let y = unsafe { - extern "C" { - fn $fun(_: f64, _: f64) -> f64; - } - - $fun(*x0, *x1) - }; - - $fun.write_all(&y.to_bits().to_le_bytes())?; - )+ - } - }}; -} - -macro_rules! f64f64f64 { - ($($fun:ident,)+) => {{ - $( - // check type signature - let _: fn(f64, f64, f64) -> f64 = libm::$fun; - let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; - )+ - - for (x0, x1, x2) in shared::F64F64F64.iter() { - $( - let y = unsafe { - extern "C" { - fn $fun(_: f64, _: f64, _: f64) -> f64; - } - - $fun(*x0, *x1, *x2) - }; - - $fun.write_all(&y.to_bits().to_le_bytes())?; - )+ - } - }}; -} - -macro_rules! f64i32 { - ($($fun:ident,)+) => {{ - $( - // check type signature - let _: fn(f64, i32) -> f64 = libm::$fun; - let mut $fun = File::create(concat!("bin/output/musl.", stringify!($fun)))?; - )+ - - for (x0, x1) in shared::F64I32.iter() { - $( - let y = unsafe { - extern "C" { - fn $fun(_: f64, _: i32) -> f64; - } - - $fun(*x0, *x1 as i32) - }; - - $fun.write_all(&y.to_bits().to_le_bytes())?; - )+ - } - }}; -} diff --git a/libm/crates/musl-generator/src/main.rs b/libm/crates/musl-generator/src/main.rs deleted file mode 100644 index 6e57e856d..000000000 --- a/libm/crates/musl-generator/src/main.rs +++ /dev/null @@ -1,97 +0,0 @@ -extern crate libm; -extern crate shared; - -use std::error::Error; -use std::fs::File; -use std::io::Write; - -#[macro_use] -mod macros; - -fn main() -> Result<(), Box> { - f32! { - acosf, - asinf, - atanf, - cbrtf, - ceilf, - cosf, - coshf, - exp2f, - expf, - expm1f, - fabsf, - floorf, - log10f, - log1pf, - log2f, - logf, - roundf, - sinf, - sinhf, - sqrtf, - tanf, - tanhf, - truncf, - } - - f32f32! { - atan2f, - fdimf, - fmodf, - hypotf, - powf, - } - - f32i32! { - scalbnf, - } - - f32f32f32! { - fmaf, - } - - f64! { - acos, - asin, - atan, - cbrt, - ceil, - cos, - cosh, - exp, - exp2, - expm1, - fabs, - floor, - log, - log10, - log1p, - log2, - round, - sin, - sinh, - sqrt, - tan, - tanh, - trunc, - } - - f64f64! { - atan2, - fdim, - fmod, - hypot, - pow, - } - - f64i32! { - scalbn, - } - - f64f64f64! { - fma, - } - - Ok(()) -} diff --git a/libm/crates/shared/Cargo.toml b/libm/crates/shared/Cargo.toml deleted file mode 100644 index d77823781..000000000 --- a/libm/crates/shared/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "shared" -version = "0.1.0" -authors = ["Jorge Aparicio "] - -[dependencies] -lazy_static = "1.0.2" diff --git a/libm/crates/shared/src/lib.rs b/libm/crates/shared/src/lib.rs deleted file mode 100644 index 17c20a332..000000000 --- a/libm/crates/shared/src/lib.rs +++ /dev/null @@ -1,469 +0,0 @@ -#[macro_use] -extern crate lazy_static; - -lazy_static! { - pub static ref F32: Vec = { - let bytes = include_bytes!("../../bin/input/f32"); - - bytes - .chunks_exact(4) - .map(|chunk| { - let mut buf = [0; 4]; - buf.copy_from_slice(chunk); - f32::from_bits(u32::from_le(u32::from_le_bytes(buf))) - }) - .collect() - }; - pub static ref F32F32: Vec<(f32, f32)> = { - let bytes = include_bytes!("../../bin/input/f32f32"); - - bytes - .chunks_exact(8) - .map(|chunk| { - let mut x0 = [0; 4]; - let mut x1 = [0; 4]; - x0.copy_from_slice(&chunk[..4]); - x1.copy_from_slice(&chunk[4..]); - - ( - f32::from_bits(u32::from_le(u32::from_le_bytes(x0))), - f32::from_bits(u32::from_le(u32::from_le_bytes(x1))), - ) - }) - .collect() - }; - pub static ref F32F32F32: Vec<(f32, f32, f32)> = { - let bytes = include_bytes!("../../bin/input/f32f32f32"); - - bytes - .chunks_exact(12) - .map(|chunk| { - let mut x0 = [0; 4]; - let mut x1 = [0; 4]; - let mut x2 = [0; 4]; - x0.copy_from_slice(&chunk[..4]); - x1.copy_from_slice(&chunk[4..8]); - x2.copy_from_slice(&chunk[8..]); - - ( - f32::from_bits(u32::from_le(u32::from_le_bytes(x0))), - f32::from_bits(u32::from_le(u32::from_le_bytes(x1))), - f32::from_bits(u32::from_le(u32::from_le_bytes(x2))), - ) - }) - .collect() - }; - pub static ref F32I32: Vec<(f32, i32)> = { - let bytes = include_bytes!("../../bin/input/f32i16"); - - bytes - .chunks_exact(6) - .map(|chunk| { - let mut x0 = [0; 4]; - let mut x1 = [0; 2]; - x0.copy_from_slice(&chunk[..4]); - x1.copy_from_slice(&chunk[4..]); - - ( - f32::from_bits(u32::from_le(u32::from_le_bytes(x0))), - i16::from_le(i16::from_le_bytes(x1)) as i32, - ) - }) - .collect() - }; - pub static ref F64: Vec = { - let bytes = include_bytes!("../../bin/input/f64"); - - bytes - .chunks_exact(8) - .map(|chunk| { - let mut buf = [0; 8]; - buf.copy_from_slice(chunk); - f64::from_bits(u64::from_le(u64::from_le_bytes(buf))) - }) - .collect() - }; - pub static ref F64F64: Vec<(f64, f64)> = { - let bytes = include_bytes!("../../bin/input/f64f64"); - - bytes - .chunks_exact(16) - .map(|chunk| { - let mut x0 = [0; 8]; - let mut x1 = [0; 8]; - x0.copy_from_slice(&chunk[..8]); - x1.copy_from_slice(&chunk[8..]); - - ( - f64::from_bits(u64::from_le(u64::from_le_bytes(x0))), - f64::from_bits(u64::from_le(u64::from_le_bytes(x1))), - ) - }) - .collect() - }; - pub static ref F64F64F64: Vec<(f64, f64, f64)> = { - let bytes = include_bytes!("../../bin/input/f64f64f64"); - - bytes - .chunks_exact(24) - .map(|chunk| { - let mut x0 = [0; 8]; - let mut x1 = [0; 8]; - let mut x2 = [0; 8]; - x0.copy_from_slice(&chunk[..8]); - x1.copy_from_slice(&chunk[8..16]); - x2.copy_from_slice(&chunk[16..]); - - ( - f64::from_bits(u64::from_le(u64::from_le_bytes(x0))), - f64::from_bits(u64::from_le(u64::from_le_bytes(x1))), - f64::from_bits(u64::from_le(u64::from_le_bytes(x2))), - ) - }) - .collect() - }; - pub static ref F64I32: Vec<(f64, i32)> = { - let bytes = include_bytes!("../../bin/input/f64i16"); - - bytes - .chunks_exact(10) - .map(|chunk| { - let mut x0 = [0; 8]; - let mut x1 = [0; 2]; - x0.copy_from_slice(&chunk[..8]); - x1.copy_from_slice(&chunk[8..]); - - ( - f64::from_bits(u64::from_le(u64::from_le_bytes(x0))), - i16::from_le(i16::from_le_bytes(x1)) as i32, - ) - }) - .collect() - }; -} - -#[macro_export] -macro_rules! f32 { - ($lib:expr, $($fun:ident),+) => { - $( - #[test] - fn $fun() { - let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .chunks_exact(4) - .map(|chunk| { - let mut buf = [0; 4]; - buf.copy_from_slice(chunk); - f32::from_bits(u32::from_le(u32::from_le_bytes(buf))) - }) - .collect::>(); - - for (input, expected) in $crate::F32.iter().zip(&expected) { - if let Ok(output) = panic::catch_unwind(|| libm::$fun(*input)) { - if let Err(error) = libm::_eqf(output, *expected) { - panic!( - "INPUT: {:#x}, OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", - input.to_bits(), - output.to_bits(), - expected.to_bits(), - error - ); - } - } else { - panic!( - "INPUT: {:#x}, OUTPUT: PANIC!, EXPECTED: {:#x}", - input.to_bits(), - expected.to_bits() - ); - } - } - } - )+ - } -} - -#[macro_export] -macro_rules! f32f32 { - ($lib:expr, $($fun:ident),+) => { - $( - #[test] - fn $fun() { - let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .chunks_exact(4) - .map(|chunk| { - let mut buf = [0; 4]; - buf.copy_from_slice(chunk); - f32::from_bits(u32::from_le(u32::from_le_bytes(buf))) - }) - .collect::>(); - - for ((i0, i1), expected) in $crate::F32F32.iter().zip(&expected) { - if let Ok(output) = panic::catch_unwind(|| libm::$fun(*i0, *i1)) { - if let Err(error) = libm::_eqf(output, *expected) { - panic!( - "INPUT: ({:#x}, {:#x}), OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", - i0.to_bits(), - i1.to_bits(), - output.to_bits(), - expected.to_bits(), - error - ); - } - } else { - panic!( - "INPUT: ({:#x}, {:#x}), OUTPUT: PANIC!, EXPECTED: {:#x}", - i0.to_bits(), - i1.to_bits(), - expected.to_bits() - ); - } - } - } - )+ - } -} - -#[macro_export] -macro_rules! f32f32f32 { - ($lib:expr, $($fun:ident),+) => { - $( - #[test] - fn $fun() { - let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .chunks_exact(4) - .map(|chunk| { - let mut buf = [0; 4]; - buf.copy_from_slice(chunk); - f32::from_bits(u32::from_le(u32::from_le_bytes(buf))) - }) - .collect::>(); - - for ((i0, i1, i2), expected) in $crate::F32F32F32.iter().zip(&expected) { - if let Ok(output) = panic::catch_unwind(|| libm::$fun(*i0, *i1, *i2)) { - if let Err(error) = libm::_eqf(output, *expected) { - panic!( - "INPUT: ({:#x}, {:#x}, {:#x}), OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", - i0.to_bits(), - i1.to_bits(), - i2.to_bits(), - output.to_bits(), - expected.to_bits(), - error - ); - } - } else { - panic!( - "INPUT: ({:#x}, {:#x}), OUTPUT: PANIC!, EXPECTED: {:#x}", - i0.to_bits(), - i1.to_bits(), - expected.to_bits() - ); - } - } - } - )+ - } -} - -#[macro_export] -macro_rules! f32i32 { - ($lib:expr, $($fun:ident),+) => { - $( - #[test] - fn $fun() { - let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .chunks_exact(4) - .map(|chunk| { - let mut buf = [0; 4]; - buf.copy_from_slice(chunk); - f32::from_bits(u32::from_le(u32::from_le_bytes(buf))) - }) - .collect::>(); - - for ((i0, i1), expected) in $crate::F32I32.iter().zip(&expected) { - if let Ok(output) = panic::catch_unwind(|| libm::$fun(*i0, *i1)) { - if let Err(error) = libm::_eqf(output, *expected) { - panic!( - "INPUT: ({:#x}, {:#x}), OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", - i0.to_bits(), - i1, - output.to_bits(), - expected.to_bits(), - error - ); - } - } else { - panic!( - "INPUT: ({:#x}, {:#x}), OUTPUT: PANIC!, EXPECTED: {:#x}", - i0.to_bits(), - i1, - expected.to_bits() - ); - } - } - } - )+ - } -} - -#[macro_export] -macro_rules! f64 { - ($lib:expr, $($fun:ident),+) => { - $( - #[test] - fn $fun() { - let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .chunks_exact(8) - .map(|chunk| { - let mut buf = [0; 8]; - buf.copy_from_slice(chunk); - f64::from_bits(u64::from_le(u64::from_le_bytes(buf))) - }) - .collect::>(); - - for (input, expected) in shared::F64.iter().zip(&expected) { - if let Ok(output) = panic::catch_unwind(|| libm::$fun(*input)) { - if let Err(error) = libm::_eq(output, *expected) { - panic!( - "INPUT: {:#x}, OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", - input.to_bits(), - output.to_bits(), - expected.to_bits(), - error - ); - } - } else { - panic!( - "INPUT: {:#x}, OUTPUT: PANIC!, EXPECTED: {:#x}", - input.to_bits(), - expected.to_bits() - ); - } - } - } - )+ - } -} - -#[macro_export] -macro_rules! f64f64 { - ($lib:expr, $($fun:ident),+) => { - $( - #[test] - fn $fun() { - let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .chunks_exact(8) - .map(|chunk| { - let mut buf = [0; 8]; - buf.copy_from_slice(chunk); - f64::from_bits(u64::from_le(u64::from_le_bytes(buf))) - }) - .collect::>(); - - for ((i0, i1), expected) in shared::F64F64.iter().zip(&expected) { - if let Ok(output) = panic::catch_unwind(|| libm::$fun(*i0, *i1)) { - if let Err(error) = libm::_eq(output, *expected) { - panic!( - "INPUT: ({:#x}, {:#x}), OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", - i0.to_bits(), - i1.to_bits(), - output.to_bits(), - expected.to_bits(), - error - ); - } - } else { - panic!( - "INPUT: ({:#x}, {:#x}), OUTPUT: PANIC!, EXPECTED: {:#x}", - i0.to_bits(), - i1.to_bits(), - expected.to_bits() - ); - } - } - } - )+ - } -} - -#[macro_export] -macro_rules! f64f64f64 { - ($lib:expr, $($fun:ident),+) => { - $( - #[test] - fn $fun() { - let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .chunks_exact(8) - .map(|chunk| { - let mut buf = [0; 8]; - buf.copy_from_slice(chunk); - f64::from_bits(u64::from_le(u64::from_le_bytes(buf))) - }) - .collect::>(); - - for ((i0, i1, i2), expected) in shared::F64F64F64.iter().zip(&expected) { - if let Ok(output) = panic::catch_unwind(|| libm::$fun(*i0, *i1, *i2)) { - if let Err(error) = libm::_eq(output, *expected) { - panic!( - "INPUT: ({:#x}, {:#x}, {:#x}), OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", - i0.to_bits(), - i1.to_bits(), - i2.to_bits(), - output.to_bits(), - expected.to_bits(), - error - ); - } - } else { - panic!( - "INPUT: ({:#x}, {:#x}), OUTPUT: PANIC!, EXPECTED: {:#x}", - i0.to_bits(), - i1.to_bits(), - expected.to_bits() - ); - } - } - } - )+ - } -} - -#[macro_export] -macro_rules! f64i32 { - ($lib:expr, $($fun:ident),+) => { - $( - #[test] - fn $fun() { - let expected = include_bytes!(concat!("../bin/output/", $lib, ".", stringify!($fun))) - .chunks_exact(8) - .map(|chunk| { - let mut buf = [0; 8]; - buf.copy_from_slice(chunk); - f64::from_bits(u64::from_le(u64::from_le_bytes(buf))) - }) - .collect::>(); - - for ((i0, i1), expected) in shared::F64I32.iter().zip(&expected) { - if let Ok(output) = panic::catch_unwind(|| libm::$fun(*i0, *i1)) { - if let Err(error) = libm::_eq(output, *expected) { - panic!( - "INPUT: ({:#x}, {:#x}), OUTPUT: {:#x}, EXPECTED: {:#x}, ERROR: {}", - i0.to_bits(), - i1, - output.to_bits(), - expected.to_bits(), - error - ); - } - } else { - panic!( - "INPUT: ({:#x}, {:#x}), OUTPUT: PANIC!, EXPECTED: {:#x}", - i0.to_bits(), - i1, - expected.to_bits() - ); - } - } - } - )+ - } -} diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 6be458728..e705dde0f 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -625,3 +625,6 @@ mod private { impl Sealed for f32 {} impl Sealed for f64 {} } + +#[cfg(test)] +include!(concat!(env!("OUT_DIR"), "/tests.rs")); From 410b0633a6b9f117ea266a3ddb4a5868f59841b4 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 11:37:21 -0700 Subject: [PATCH 0201/1459] Overhaul tests * Move everything to azure pipelines * Inline docker configuration in this repo (no `cross`) * Delete `no-panic` example, use `#[no_panic]` instead. --- libm/.travis.yml | 58 -------- libm/Cargo.toml | 13 +- libm/azure-pipelines.yml | 140 +++++------------- libm/build.rs | 11 +- libm/ci/azure-test-all.yml | 39 +---- .../aarch64-unknown-linux-gnu/Dockerfile | 10 ++ .../arm-unknown-linux-gnueabi/Dockerfile | 9 ++ .../arm-unknown-linux-gnueabihf/Dockerfile | 9 ++ .../armv7-unknown-linux-gnueabihf/Dockerfile | 9 ++ .../docker/i686-unknown-linux-gnu/Dockerfile | 4 + .../docker/mips-unknown-linux-gnu/Dockerfile | 12 ++ .../mips64-unknown-linux-gnuabi64/Dockerfile | 15 ++ .../Dockerfile | 14 ++ .../mipsel-unknown-linux-gnu/Dockerfile | 12 ++ .../powerpc-unknown-linux-gnu/Dockerfile | 12 ++ .../powerpc64-unknown-linux-gnu/Dockerfile | 13 ++ .../powerpc64le-unknown-linux-gnu/Dockerfile | 13 ++ .../x86_64-unknown-linux-gnu/Dockerfile | 4 + libm/ci/install.sh | 25 ---- libm/ci/run-docker.sh | 36 +++++ libm/ci/run.sh | 12 ++ libm/ci/script.sh | 37 ----- libm/examples/no-panic.rs | 115 -------------- libm/src/lib.rs | 4 +- libm/src/math/acos.rs | 1 + libm/src/math/acosf.rs | 1 + libm/src/math/asin.rs | 1 + libm/src/math/asinf.rs | 1 + libm/src/math/atan.rs | 1 + libm/src/math/atan2.rs | 1 + libm/src/math/atan2f.rs | 1 + libm/src/math/atanf.rs | 1 + libm/src/math/cbrt.rs | 1 + libm/src/math/cbrtf.rs | 1 + libm/src/math/ceil.rs | 1 + libm/src/math/ceilf.rs | 1 + libm/src/math/cos.rs | 1 + libm/src/math/cosf.rs | 1 + libm/src/math/cosh.rs | 1 + libm/src/math/coshf.rs | 1 + libm/src/math/exp.rs | 1 + libm/src/math/exp2.rs | 1 + libm/src/math/exp2f.rs | 1 + libm/src/math/expf.rs | 1 + libm/src/math/expm1.rs | 1 + libm/src/math/expm1f.rs | 1 + libm/src/math/expo2.rs | 1 + libm/src/math/fabs.rs | 1 + libm/src/math/fabsf.rs | 1 + libm/src/math/fdim.rs | 1 + libm/src/math/fdimf.rs | 1 + libm/src/math/floor.rs | 1 + libm/src/math/floorf.rs | 1 + libm/src/math/fma.rs | 1 + libm/src/math/fmaf.rs | 1 + libm/src/math/fmod.rs | 1 + libm/src/math/fmodf.rs | 1 + libm/src/math/hypot.rs | 1 + libm/src/math/hypotf.rs | 1 + libm/src/math/k_cos.rs | 1 + libm/src/math/k_cosf.rs | 1 + libm/src/math/k_expo2.rs | 1 + libm/src/math/k_expo2f.rs | 1 + libm/src/math/k_sin.rs | 1 + libm/src/math/k_sinf.rs | 1 + libm/src/math/k_tan.rs | 1 + libm/src/math/k_tanf.rs | 1 + libm/src/math/log.rs | 1 + libm/src/math/log10.rs | 1 + libm/src/math/log10f.rs | 1 + libm/src/math/log1p.rs | 1 + libm/src/math/log1pf.rs | 1 + libm/src/math/log2.rs | 1 + libm/src/math/log2f.rs | 1 + libm/src/math/logf.rs | 1 + libm/src/math/pow.rs | 1 + libm/src/math/powf.rs | 1 + libm/src/math/rem_pio2.rs | 1 + libm/src/math/rem_pio2_large.rs | 1 + libm/src/math/rem_pio2f.rs | 1 + libm/src/math/round.rs | 1 + libm/src/math/roundf.rs | 1 + libm/src/math/scalbn.rs | 1 + libm/src/math/scalbnf.rs | 1 + libm/src/math/sin.rs | 1 + libm/src/math/sinf.rs | 1 + libm/src/math/sinh.rs | 1 + libm/src/math/sinhf.rs | 1 + libm/src/math/sqrt.rs | 1 + libm/src/math/sqrtf.rs | 1 + libm/src/math/tan.rs | 1 + libm/src/math/tanf.rs | 1 + libm/src/math/tanh.rs | 1 + libm/src/math/tanhf.rs | 1 + libm/src/math/trunc.rs | 1 + libm/src/math/truncf.rs | 1 + 96 files changed, 313 insertions(+), 385 deletions(-) delete mode 100644 libm/.travis.yml create mode 100644 libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile create mode 100644 libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile create mode 100644 libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile create mode 100644 libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile create mode 100644 libm/ci/docker/i686-unknown-linux-gnu/Dockerfile create mode 100644 libm/ci/docker/mips-unknown-linux-gnu/Dockerfile create mode 100644 libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile create mode 100644 libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile create mode 100644 libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile create mode 100644 libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile create mode 100644 libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile create mode 100644 libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile create mode 100644 libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile delete mode 100644 libm/ci/install.sh create mode 100755 libm/ci/run-docker.sh create mode 100755 libm/ci/run.sh delete mode 100644 libm/ci/script.sh delete mode 100644 libm/examples/no-panic.rs diff --git a/libm/.travis.yml b/libm/.travis.yml deleted file mode 100644 index 758316178..000000000 --- a/libm/.travis.yml +++ /dev/null @@ -1,58 +0,0 @@ -language: rust -services: docker -sudo: required - -matrix: - include: - - env: TARGET=aarch64-unknown-linux-gnu - rust: nightly - - env: TARGET=armv7-unknown-linux-gnueabihf - rust: nightly - - env: TARGET=i686-unknown-linux-gnu - rust: nightly - - env: TARGET=mips-unknown-linux-gnu - rust: nightly - - env: TARGET=mips64-unknown-linux-gnuabi64 - rust: nightly - - env: TARGET=mips64el-unknown-linux-gnuabi64 - rust: nightly - - env: TARGET=mipsel-unknown-linux-gnu - rust: nightly - - env: TARGET=powerpc-unknown-linux-gnu - rust: nightly - - env: TARGET=powerpc64-unknown-linux-gnu - rust: nightly - - env: TARGET=powerpc64le-unknown-linux-gnu - rust: nightly - - env: TARGET=x86_64-unknown-linux-gnu - rust: nightly - - env: TARGET=cargo-fmt - rust: beta - - - env: TARGET=wasm32-unknown-unknown - rust: nightly - install: rustup target add $TARGET - script: - - cargo build --target $TARGET - - cargo build --no-default-features --target $TARGET - -before_install: set -e - -install: - - bash ci/install.sh - -script: - - export PATH=$HOME/.local/bin:$PATH - - bash ci/script.sh - -after_script: set +e - -cache: cargo - -before_cache: - - chmod -R a+r $HOME/.cargo; - -branches: - only: - - staging - - trying diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 7d9890e3e..45fad8230 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -6,21 +6,30 @@ documentation = "https://docs.rs/libm" keywords = ["libm", "math"] license = "MIT OR Apache-2.0" name = "libm" -repository = "https://github.com/japaric/libm" +repository = "https://github.com/rust-lang-nursery/libm" version = "0.1.2" edition = "2018" [features] # only used to run our test suite -checked = [] default = ['stable'] stable = [] + +# Generate tests which are random inputs and the outputs are calculated with +# musl libc. musl-reference-tests = ['rand'] +# Used checked array indexing instead of unchecked array indexing in this +# library. +checked = [] + [workspace] members = [ "crates/compiler-builtins-smoke-test", ] +[dev-dependencies] +no-panic = "0.1.8" + [build-dependencies] rand = { version = "0.6.5", optional = true } diff --git a/libm/azure-pipelines.yml b/libm/azure-pipelines.yml index 36271ec1b..82a74452a 100644 --- a/libm/azure-pipelines.yml +++ b/libm/azure-pipelines.yml @@ -10,26 +10,18 @@ jobs: env: TOOLCHAIN: nightly - bash: rustup target add $TARGET - displayName: "Install rust cross target" - - bash: | - set -e - mkdir cross - curl -L https://github.com/rust-embedded/cross/releases/download/v0.1.14/cross-v0.1.14-x86_64-unknown-linux-musl.tar.gz | tar xzf - -C $HOME/.cargo/bin - displayName: "Install cross" - - bash: cross test --lib --features checked --target $TARGET --release - displayName: "Run lib tests" - - bash: cross test --tests --features checked --target $TARGET --release - displayName: "Run integration tests" + - template: ci/azure-install-rust.yml + - bash: cargo generate-lockfile && ./ci/run-docker.sh $TARGET strategy: matrix: aarch64: TARGET: aarch64-unknown-linux-gnu - armhv: + arm: + TARGET: arm-unknown-linux-gnueabi + armhf: TARGET: arm-unknown-linux-gnueabihf armv7: TARGET: armv7-unknown-linux-gnueabihf - i586: - TARGET: i586-unknown-linux-gnu i686: TARGET: i686-unknown-linux-gnu mips: @@ -47,97 +39,31 @@ jobs: x86_64: TARGET: x86_64-unknown-linux-gnu - # - job: Linux - # pool: - # vmImage: ubuntu-16.04 - # steps: - # - template: ci/azure-test-all.yml - # strategy: - # matrix: - # stable: - # TOOLCHAIN: stable - # beta: - # TOOLCHAIN: beta - # nightly: - # TOOLCHAIN: nightly - # - # - job: macOS - # pool: - # vmImage: macos-10.13 - # steps: - # - template: ci/azure-test-all.yml - # strategy: - # matrix: - # x86_64: - # TARGET: x86_64-apple-darwin - # - # - job: iOS - # pool: - # vmImage: macos-10.13 - # steps: - # - checkout: self - # submodules: true - # - template: ci/azure-install-rust.yml - # - script: rustup target add $TARGET - # displayName: "Install rust cross target" - # - bash: | - # set -e - # export SDK_PATH=`xcrun --show-sdk-path --sdk $SDK` - # export RUSTFLAGS="-C link-arg=-isysroot -C link-arg=$SDK_PATH" - # cargo test --no-run --target $TARGET - # displayName: "Build for iOS" - # strategy: - # matrix: - # aarch64: - # TARGET: aarch64-apple-ios - # SDK: iphoneos - # armv7: - # TARGET: armv7-apple-ios - # SDK: iphoneos - # armv7s: - # TARGET: armv7s-apple-ios - # SDK: iphoneos - # i386: - # TARGET: i386-apple-ios - # SDK: iphonesimulator - # x86_64: - # TARGET: x86_64-apple-ios - # SDK: iphonesimulator - # - # - job: wasm - # pool: - # vmImage: ubuntu-16.04 - # steps: - # - checkout: self - # submodules: true - # - template: ci/azure-install-rust.yml - # - script: rustup target add wasm32-unknown-unknown - # displayName: "Install rust cross target" - # - script: cargo build --target wasm32-unknown-unknown - # displayName: "Build for wasm" - # - # - job: Windows - # pool: - # vmImage: vs2017-win2016 - # steps: - # - template: ci/azure-test-all.yml - # strategy: - # matrix: - # x86_64-msvc: - # TARGET: x86_64-pc-windows-msvc - # i686-msvc: - # TARGET: i686-pc-windows-msvc - # x86_64-gnu: - # TARGET: x86_64-pc-windows-gnu - # i686-gnu: - # TARGET: i686-pc-windows-gnu - # - # - job: Windows_arm64 - # pool: - # vmImage: windows-2019 - # steps: - # - template: ci/azure-install-rust.yml - # - script: rustup target add aarch64-pc-windows-msvc - # displayName: "Install rust cross target" - # - script: cargo test --no-run --target aarch64-pc-windows-msvc - # displayName: "Build for arm64" + - job: wasm + pool: + vmImage: ubuntu-16.04 + steps: + - template: ci/azure-install-rust.yml + env: + TOOLCHAIN: nightly + - script: rustup target add wasm32-unknown-unknown + displayName: "Install rust wasm target" + - script: cargo build --target wasm32-unknown-unknown + displayName: "Build for wasm" + - script: cargo build --target wasm32-unknown-unknown --no-default-features + displayName: "Build for wasm (no default features)" + + - job: rustfmt + pool: + vmImage: ubuntu-16.04 + steps: + - template: ci/azure-install-rust.yml + - bash: rustup component add rustfmt + - bash: cargo fmt --all -- --check + + - job: compiler_builtins_works + pool: + vmImage: ubuntu-16.04 + steps: + - template: ci/azure-install-rust.yml + - bash: cargo build -p cb diff --git a/libm/build.rs b/libm/build.rs index 31b1bbea9..41dc920e9 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -1,8 +1,17 @@ +use std::env; + fn main() { println!("cargo:rerun-if-changed=build.rs"); #[cfg(feature = "musl-reference-tests")] musl_reference_tests::generate(); + + if !cfg!(feature = "checked") { + let lvl = env::var("OPT_LEVEL").unwrap(); + if lvl != "0" { + println!("cargo:rustc-cfg=assert_no_panic"); + } + } } #[cfg(feature = "musl-reference-tests")] @@ -335,7 +344,7 @@ mod musl_reference_tests { src.push_str("}"); } - let path = format!("{}/tests.rs", dst); + let path = format!("{}/musl-tests.rs", dst); fs::write(&path, src).unwrap(); // Try to make it somewhat pretty diff --git a/libm/ci/azure-test-all.yml b/libm/ci/azure-test-all.yml index b2b7124d3..36831bd54 100644 --- a/libm/ci/azure-test-all.yml +++ b/libm/ci/azure-test-all.yml @@ -1,41 +1,4 @@ steps: - - checkout: self - submodules: true - template: azure-install-rust.yml - - bash: cargo build --manifest-path backtrace-sys/Cargo.toml - displayName: "Build backtrace-sys" - - bash: cargo build - displayName: "Build backtrace" - - bash: cargo test - displayName: "Test backtrace" - - bash: cargo test --no-default-features - displayName: "Test backtrace (-default)" - - bash: cargo test --no-default-features --features 'std' - displayName: "Test backtrace (-default + std)" - - bash: cargo test --no-default-features --features 'libunwind std' - displayName: "Test backtrace (-default + libunwind)" - - bash: cargo test --no-default-features --features 'libunwind dladdr std' - displayName: "Test backtrace (-default + libunwind + dladdr)" - - bash: cargo test --no-default-features --features 'libunwind libbacktrace std' - displayName: "Test backtrace (-default + libunwind + libbacktrace)" - - bash: cargo test --no-default-features --features 'unix-backtrace std' - displayName: "Test backtrace (-default + unix-backtrace)" - - bash: cargo test --no-default-features --features 'unix-backtrace dladdr std' - displayName: "Test backtrace (-default + unix-backtrace + dladdr)" - - bash: cargo test --no-default-features --features 'unix-backtrace libbacktrace std' - displayName: "Test backtrace (-default + unix-backtrace + libbacktrace)" - - bash: cargo test --no-default-features --features 'serialize-serde std' - displayName: "Test backtrace (-default + serialize-serde + std)" - - bash: cargo test --no-default-features --features 'serialize-rustc std' - displayName: "Test backtrace (-default + serialize-rustc + std)" - - bash: cargo test --no-default-features --features 'serialize-rustc serialize-serde std' - displayName: "Test backtrace (-default + serialize-rustc + serialize-serde + std)" - - bash: cargo test --no-default-features --features 'cpp_demangle std' - displayName: "Test backtrace (-default + cpp_demangle + std)" - - bash: cargo test --no-default-features --features 'gimli-symbolize std' - displayName: "Test backtrace (-default + gimli-symbolize + std)" - - bash: cargo test --no-default-features --features 'dbghelp std' - displayName: "Test backtrace (-default + dbghelp + std)" - - bash: cd ./cpp_smoke_test && cargo test - displayName: "Test cpp_smoke_test" + - bash: cargo generate-lockfile && ./ci/run-docker.sh $TARGET diff --git a/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000..9e2559f4a --- /dev/null +++ b/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -0,0 +1,10 @@ +FROM ubuntu:18.04 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-aarch64-linux-gnu libc6-dev-arm64-cross \ + qemu-user-static +ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ + CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-aarch64-static \ + QEMU_LD_PREFIX=/usr/aarch64-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile new file mode 100644 index 000000000..afab874bc --- /dev/null +++ b/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:18.04 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-linux-gnueabi libc6-dev-armel-cross qemu-user-static +ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc \ + CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER=qemu-arm-static \ + QEMU_LD_PREFIX=/usr/arm-linux-gnueabi \ + RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile new file mode 100644 index 000000000..3ed3602b0 --- /dev/null +++ b/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:18.04 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-user-static +ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ + CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm-static \ + QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf \ + RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile new file mode 100644 index 000000000..6617af155 --- /dev/null +++ b/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:18.04 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-user-static +ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ + CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm-static \ + QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf \ + RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile b/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000..5783e28e1 --- /dev/null +++ b/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile @@ -0,0 +1,4 @@ +FROM ubuntu:18.04 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc-multilib libc6-dev ca-certificates diff --git a/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile b/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000..f47e8f522 --- /dev/null +++ b/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile @@ -0,0 +1,12 @@ +FROM ubuntu:18.04 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-mips-linux-gnu libc6-dev-mips-cross \ + binfmt-support qemu-user-static qemu-system-mips + +ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \ + CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER=qemu-mips-static \ + QEMU_LD_PREFIX=/usr/mips-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile new file mode 100644 index 000000000..8fa77c7bd --- /dev/null +++ b/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile @@ -0,0 +1,15 @@ +FROM ubuntu:18.04 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + gcc \ + gcc-mips64-linux-gnuabi64 \ + libc6-dev \ + libc6-dev-mips64-cross \ + qemu-user-static \ + qemu-system-mips +ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \ + CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER=qemu-mips64-static \ + CC_mips64_unknown_linux_gnuabi64=mips64-linux-gnuabi64-gcc \ + QEMU_LD_PREFIX=/usr/mips64-linux-gnuabi64 \ + RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile new file mode 100644 index 000000000..c6611d9ac --- /dev/null +++ b/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile @@ -0,0 +1,14 @@ +FROM ubuntu:18.04 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + gcc \ + gcc-mips64el-linux-gnuabi64 \ + libc6-dev \ + libc6-dev-mips64el-cross \ + qemu-user-static +ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \ + CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER=qemu-mips64el-static \ + CC_mips64el_unknown_linux_gnuabi64=mips64el-linux-gnuabi64-gcc \ + QEMU_LD_PREFIX=/usr/mips64el-linux-gnuabi64 \ + RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile b/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000..0bc695624 --- /dev/null +++ b/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile @@ -0,0 +1,12 @@ +FROM ubuntu:18.04 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-mipsel-linux-gnu libc6-dev-mipsel-cross \ + binfmt-support qemu-user-static + +ENV CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_LINKER=mipsel-linux-gnu-gcc \ + CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_RUNNER=qemu-mipsel-static \ + QEMU_LD_PREFIX=/usr/mipsel-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000..2d39fef61 --- /dev/null +++ b/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile @@ -0,0 +1,12 @@ +FROM ubuntu:18.04 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user-static ca-certificates \ + gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \ + qemu-system-ppc + +ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \ + CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc-static \ + QEMU_LD_PREFIX=/usr/powerpc-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000..653cd3511 --- /dev/null +++ b/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile @@ -0,0 +1,13 @@ +FROM ubuntu:18.04 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-powerpc64-linux-gnu libc6-dev-ppc64-cross \ + binfmt-support qemu-user-static qemu-system-ppc + +ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \ + CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64-static \ + CC_powerpc64_unknown_linux_gnu=powerpc64-linux-gnu-gcc \ + QEMU_LD_PREFIX=/usr/powerpc64-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000..63ea9af9d --- /dev/null +++ b/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile @@ -0,0 +1,13 @@ +FROM ubuntu:18.04 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user-static ca-certificates \ + gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \ + qemu-system-ppc + +ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \ + CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64le-static \ + QEMU_CPU=POWER8 \ + QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000..98000f4eb --- /dev/null +++ b/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -0,0 +1,4 @@ +FROM ubuntu:18.04 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates diff --git a/libm/ci/install.sh b/libm/ci/install.sh deleted file mode 100644 index af26e2d4c..000000000 --- a/libm/ci/install.sh +++ /dev/null @@ -1,25 +0,0 @@ -set -euxo pipefail - -main() { - if [ $TARGET = cargo-fmt ]; then - rustup component add rustfmt-preview - return - fi - - if ! hash cross >/dev/null 2>&1; then - cargo install cross - fi - - rustup target add x86_64-unknown-linux-musl - - if [ $TARGET != x86_64-unknown-linux-gnu ]; then - rustup target add $TARGET - fi - - mkdir -p ~/.local/bin - curl -L https://github.com/japaric/qemu-bin/raw/master/14.04/qemu-arm-2.12.0 > ~/.local/bin/qemu-arm - chmod +x ~/.local/bin/qemu-arm - qemu-arm --version -} - -main diff --git a/libm/ci/run-docker.sh b/libm/ci/run-docker.sh new file mode 100755 index 000000000..6b3066e53 --- /dev/null +++ b/libm/ci/run-docker.sh @@ -0,0 +1,36 @@ +# Small script to run tests for a target (or all targets) inside all the +# respective docker images. + +set -ex + +run() { + local target=$1 + + echo $target + + # This directory needs to exist before calling docker, otherwise docker will create it but it + # will be owned by root + mkdir -p target + + docker build -t $target ci/docker/$target + docker run \ + --rm \ + --user $(id -u):$(id -g) \ + -e CARGO_HOME=/cargo \ + -e CARGO_TARGET_DIR=/target \ + -v $HOME/.cargo:/cargo \ + -v `pwd`/target:/target \ + -v `pwd`:/checkout:ro \ + -v `rustc --print sysroot`:/rust:ro \ + -w /checkout \ + $target \ + sh -c "HOME=/tmp PATH=\$PATH:/rust/bin ci/run.sh $target" +} + +if [ -z "$1" ]; then + for d in `ls ci/docker/`; do + run $d + done +else + run $1 +fi diff --git a/libm/ci/run.sh b/libm/ci/run.sh new file mode 100755 index 000000000..fabfd0ecb --- /dev/null +++ b/libm/ci/run.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +set -ex +TARGET=$1 + +cargo build --target $TARGET +cargo test --target $TARGET +cargo build --target $TARGET --release +cargo test --target $TARGET --release + +cargo test --features 'checked musl-reference-tests' --target $TARGET +cargo test --features 'checked musl-reference-tests' --target $TARGET --release diff --git a/libm/ci/script.sh b/libm/ci/script.sh deleted file mode 100644 index c3b6faa6c..000000000 --- a/libm/ci/script.sh +++ /dev/null @@ -1,37 +0,0 @@ -set -euxo pipefail - -main() { - if [ $TARGET = cargo-fmt ]; then - cargo fmt -- --check - return - fi - - # quick check - cargo check - - # check that we can source import libm into compiler-builtins - cargo check --package cb - - # generate tests - cargo run -p input-generator --target x86_64-unknown-linux-musl - cargo run -p musl-generator --target x86_64-unknown-linux-musl - cargo run -p newlib-generator - - # test that the functions don't contain invocations of `panic!` - case $TARGET in - armv7-unknown-linux-gnueabihf) - cross build --release --target $TARGET --example no-panic - ;; - esac - - # run unit tests - cross test --lib --features checked --target $TARGET --release - - # run generated tests - cross test --tests --features checked --target $TARGET --release - - # TODO need to fix overflow issues (cf. issue #4) - # cross test --target $TARGET -} - -main diff --git a/libm/examples/no-panic.rs b/libm/examples/no-panic.rs deleted file mode 100644 index fb79f99af..000000000 --- a/libm/examples/no-panic.rs +++ /dev/null @@ -1,115 +0,0 @@ -#![feature(lang_items)] -#![feature(panic_implementation)] -#![no_main] -#![no_std] - -extern crate libm; - -use core::panic::PanicInfo; -use core::ptr; - -macro_rules! force_eval { - ($e:expr) => { - unsafe { - core::ptr::read_volatile(&$e); - } - }; -} - -#[no_mangle] -pub fn main() { - force_eval!(libm::acos(random())); - force_eval!(libm::acosf(random())); - force_eval!(libm::asin(random())); - force_eval!(libm::asinf(random())); - force_eval!(libm::atan(random())); - force_eval!(libm::atan2(random(), random())); - force_eval!(libm::atan2f(random(), random())); - force_eval!(libm::atanf(random())); - force_eval!(libm::cbrt(random())); - force_eval!(libm::cbrtf(random())); - force_eval!(libm::ceil(random())); - force_eval!(libm::ceilf(random())); - force_eval!(libm::cos(random())); - force_eval!(libm::cosf(random())); - force_eval!(libm::cosh(random())); - force_eval!(libm::coshf(random())); - force_eval!(libm::exp(random())); - force_eval!(libm::exp2(random())); - force_eval!(libm::exp2f(random())); - force_eval!(libm::expf(random())); - force_eval!(libm::expm1(random())); - force_eval!(libm::expm1f(random())); - force_eval!(libm::fabs(random())); - force_eval!(libm::fabsf(random())); - force_eval!(libm::fdim(random(), random())); - force_eval!(libm::fdimf(random(), random())); - force_eval!(libm::floor(random())); - force_eval!(libm::floorf(random())); - force_eval!(libm::fma(random(), random(), random())); - force_eval!(libm::fmaf(random(), random(), random())); - force_eval!(libm::fmod(random(), random())); - force_eval!(libm::fmodf(random(), random())); - force_eval!(libm::hypot(random(), random())); - force_eval!(libm::hypotf(random(), random())); - force_eval!(libm::log(random())); - force_eval!(libm::log2(random())); - force_eval!(libm::log10(random())); - force_eval!(libm::log10f(random())); - force_eval!(libm::log1p(random())); - force_eval!(libm::log1pf(random())); - force_eval!(libm::log2f(random())); - force_eval!(libm::logf(random())); - force_eval!(libm::pow(random(), random())); - force_eval!(libm::powf(random(), random())); - force_eval!(libm::round(random())); - force_eval!(libm::roundf(random())); - force_eval!(libm::scalbn(random(), random())); - force_eval!(libm::scalbnf(random(), random())); - force_eval!(libm::sin(random())); - force_eval!(libm::sinf(random())); - force_eval!(libm::sinh(random())); - force_eval!(libm::sinhf(random())); - force_eval!(libm::sqrt(random())); - force_eval!(libm::sqrtf(random())); - force_eval!(libm::tan(random())); - force_eval!(libm::tanf(random())); - force_eval!(libm::tanh(random())); - force_eval!(libm::tanhf(random())); - force_eval!(libm::trunc(random())); - force_eval!(libm::truncf(random())); -} - -fn random() -> T -where - T: Copy, -{ - unsafe { - static mut X: usize = 0; - X += 8; - ptr::read_volatile(X as *const T) - } -} - -#[panic_implementation] -#[no_mangle] -pub fn panic(_info: &PanicInfo) -> ! { - // loop {} - extern "C" { - fn thou_shalt_not_panic() -> !; - } - - unsafe { thou_shalt_not_panic() } -} - -#[link(name = "c")] -extern "C" {} - -#[lang = "eh_personality"] -fn eh() {} - -#[no_mangle] -pub extern "C" fn __aeabi_unwind_cpp_pr0() {} - -#[no_mangle] -pub extern "C" fn __aeabi_unwind_cpp_pr1() {} diff --git a/libm/src/lib.rs b/libm/src/lib.rs index e705dde0f..5e94541ab 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -626,5 +626,5 @@ mod private { impl Sealed for f64 {} } -#[cfg(test)] -include!(concat!(env!("OUT_DIR"), "/tests.rs")); +#[cfg(all(test, feature = "musl-reference-tests"))] +include!(concat!(env!("OUT_DIR"), "/musl-tests.rs")); diff --git a/libm/src/math/acos.rs b/libm/src/math/acos.rs index 276e361f3..a0d1450e3 100644 --- a/libm/src/math/acos.rs +++ b/libm/src/math/acos.rs @@ -56,6 +56,7 @@ fn r(z: f64) -> f64 { } #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn acos(x: f64) -> f64 { let x1p_120f = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ -120 let z: f64; diff --git a/libm/src/math/acosf.rs b/libm/src/math/acosf.rs index b12ed531a..d635ee8a6 100644 --- a/libm/src/math/acosf.rs +++ b/libm/src/math/acosf.rs @@ -30,6 +30,7 @@ fn r(z: f32) -> f32 { } #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn acosf(x: f32) -> f32 { let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120) diff --git a/libm/src/math/asin.rs b/libm/src/math/asin.rs index a0bb4918c..855300837 100644 --- a/libm/src/math/asin.rs +++ b/libm/src/math/asin.rs @@ -63,6 +63,7 @@ fn comp_r(z: f64) -> f64 { } #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn asin(mut x: f64) -> f64 { let z: f64; let r: f64; diff --git a/libm/src/math/asinf.rs b/libm/src/math/asinf.rs index 79c85d81d..979f1a654 100644 --- a/libm/src/math/asinf.rs +++ b/libm/src/math/asinf.rs @@ -32,6 +32,7 @@ fn r(z: f32) -> f32 { } #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn asinf(mut x: f32) -> f32 { let x1p_120 = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ (-120) diff --git a/libm/src/math/atan.rs b/libm/src/math/atan.rs index cf6a62a54..a9cdc2933 100644 --- a/libm/src/math/atan.rs +++ b/libm/src/math/atan.rs @@ -61,6 +61,7 @@ const AT: [f64; 11] = [ ]; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atan(x: f64) -> f64 { let mut x = x; let mut ix = (x.to_bits() >> 32) as u32; diff --git a/libm/src/math/atan2.rs b/libm/src/math/atan2.rs index a91ddd84d..a702ec39f 100644 --- a/libm/src/math/atan2.rs +++ b/libm/src/math/atan2.rs @@ -44,6 +44,7 @@ const PI: f64 = 3.1415926535897931160E+00; /* 0x400921FB, 0x54442D18 */ const PI_LO: f64 = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atan2(y: f64, x: f64) -> f64 { if x.is_nan() || y.is_nan() { return x + y; diff --git a/libm/src/math/atan2f.rs b/libm/src/math/atan2f.rs index 211a992a0..94e3c7718 100644 --- a/libm/src/math/atan2f.rs +++ b/libm/src/math/atan2f.rs @@ -20,6 +20,7 @@ const PI: f32 = 3.1415927410e+00; /* 0x40490fdb */ const PI_LO: f32 = -8.7422776573e-08; /* 0xb3bbbd2e */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atan2f(y: f32, x: f32) -> f32 { if x.is_nan() || y.is_nan() { return x + y; diff --git a/libm/src/math/atanf.rs b/libm/src/math/atanf.rs index b05152e2b..5d9024022 100644 --- a/libm/src/math/atanf.rs +++ b/libm/src/math/atanf.rs @@ -38,6 +38,7 @@ const A_T: [f32; 5] = [ ]; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atanf(mut x: f32) -> f32 { let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120) diff --git a/libm/src/math/cbrt.rs b/libm/src/math/cbrt.rs index 8c37f0b26..ab11c497e 100644 --- a/libm/src/math/cbrt.rs +++ b/libm/src/math/cbrt.rs @@ -28,6 +28,7 @@ const P3: f64 = -0.758397934778766047437; /* 0xbfe844cb, 0xbee751d9 */ const P4: f64 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cbrt(x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 diff --git a/libm/src/math/cbrtf.rs b/libm/src/math/cbrtf.rs index 878372eef..19215b858 100644 --- a/libm/src/math/cbrtf.rs +++ b/libm/src/math/cbrtf.rs @@ -23,6 +23,7 @@ const B1: u32 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */ const B2: u32 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cbrtf(x: f32) -> f32 { let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index 5dbfa6a2c..c2b11e4e7 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -3,6 +3,7 @@ use core::f64; const TOINT: f64 = 1. / f64::EPSILON; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ceil(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f64.ceil` native instruction, so we can leverage this for both code size diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs index c8cd4b5aa..5eb6a35a6 100644 --- a/libm/src/math/ceilf.rs +++ b/libm/src/math/ceilf.rs @@ -1,6 +1,7 @@ use core::f32; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ceilf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f32.ceil` native instruction, so we can leverage this for both code size diff --git a/libm/src/math/cos.rs b/libm/src/math/cos.rs index df16b5c36..fe5a89919 100644 --- a/libm/src/math/cos.rs +++ b/libm/src/math/cos.rs @@ -42,6 +42,7 @@ use super::{k_cos, k_sin, rem_pio2}; // TRIG(x) returns trig(x) nearly rounded // #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cos(x: f64) -> f64 { let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; diff --git a/libm/src/math/cosf.rs b/libm/src/math/cosf.rs index 23faacdc2..615746a31 100644 --- a/libm/src/math/cosf.rs +++ b/libm/src/math/cosf.rs @@ -25,6 +25,7 @@ const C3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ const C4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cosf(x: f32) -> f32 { let x64 = x as f64; diff --git a/libm/src/math/cosh.rs b/libm/src/math/cosh.rs index f3f7fbfbe..b6ba338b5 100644 --- a/libm/src/math/cosh.rs +++ b/libm/src/math/cosh.rs @@ -3,6 +3,7 @@ use super::expm1; use super::k_expo2; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cosh(mut x: f64) -> f64 { /* |x| */ let mut ix = x.to_bits(); diff --git a/libm/src/math/coshf.rs b/libm/src/math/coshf.rs index bd468f5da..b37ee1f32 100644 --- a/libm/src/math/coshf.rs +++ b/libm/src/math/coshf.rs @@ -3,6 +3,7 @@ use super::expm1f; use super::k_expo2f; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn coshf(mut x: f32) -> f32 { let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 diff --git a/libm/src/math/exp.rs b/libm/src/math/exp.rs index cd63b8fb3..c32773186 100644 --- a/libm/src/math/exp.rs +++ b/libm/src/math/exp.rs @@ -78,6 +78,7 @@ const P4: f64 = -1.65339022054652515390e-06; /* 0xBEBBBD41, 0xC5D26BF1 */ const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp(mut x: f64) -> f64 { let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 let x1p_149 = f64::from_bits(0x36a0000000000000); // 0x1p-149 === 2 ^ -149 diff --git a/libm/src/math/exp2.rs b/libm/src/math/exp2.rs index 3952e9300..be6a003c6 100644 --- a/libm/src/math/exp2.rs +++ b/libm/src/math/exp2.rs @@ -319,6 +319,7 @@ static TBL: [u64; TBLSIZE * 2] = [ // Gal, S. and Bachelis, B. An Accurate Elementary Mathematical Library // for the IEEE Floating Point Standard. TOMS 17(1), 26-46 (1991). #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp2(mut x: f64) -> f64 { let redux = f64::from_bits(0x4338000000000000) / TBLSIZE as f64; let p1 = f64::from_bits(0x3fe62e42fefa39ef); diff --git a/libm/src/math/exp2f.rs b/libm/src/math/exp2f.rs index a3f6db8c5..32816104b 100644 --- a/libm/src/math/exp2f.rs +++ b/libm/src/math/exp2f.rs @@ -70,6 +70,7 @@ static EXP2FT: [u64; TBLSIZE] = [ // Tang, P. Table-driven Implementation of the Exponential Function // in IEEE Floating-Point Arithmetic. TOMS 15(2), 144-157 (1989). #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp2f(mut x: f32) -> f32 { let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32; let p1 = f32::from_bits(0x3f317218); diff --git a/libm/src/math/expf.rs b/libm/src/math/expf.rs index 8ecc3b6ab..e33425665 100644 --- a/libm/src/math/expf.rs +++ b/libm/src/math/expf.rs @@ -27,6 +27,7 @@ const P1: f32 = 1.6666625440e-1; /* 0xaaaa8f.0p-26 */ const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn expf(mut x: f32) -> f32 { let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 /*original 0x1p-149f ??????????? */ diff --git a/libm/src/math/expm1.rs b/libm/src/math/expm1.rs index 9da064ee7..426163990 100644 --- a/libm/src/math/expm1.rs +++ b/libm/src/math/expm1.rs @@ -24,6 +24,7 @@ const Q4: f64 = 4.00821782732936239552e-06; /* 3ED0CFCA 86E65239 */ const Q5: f64 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn expm1(mut x: f64) -> f64 { let hi: f64; let lo: f64; diff --git a/libm/src/math/expm1f.rs b/libm/src/math/expm1f.rs index 8f581733a..4daa83c85 100644 --- a/libm/src/math/expm1f.rs +++ b/libm/src/math/expm1f.rs @@ -26,6 +26,7 @@ const Q1: f32 = -3.3333212137e-2; /* -0x888868.0p-28 */ const Q2: f32 = 1.5807170421e-3; /* 0xcf3010.0p-33 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn expm1f(mut x: f32) -> f32 { let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 diff --git a/libm/src/math/expo2.rs b/libm/src/math/expo2.rs index 39f9815c4..b5369fbbe 100644 --- a/libm/src/math/expo2.rs +++ b/libm/src/math/expo2.rs @@ -2,6 +2,7 @@ use super::{combine_words, exp}; /* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn expo2(x: f64) -> f64 { /* k is such that k*ln2 has minimal relative error and x - kln2 > log(DBL_MIN) */ const K: i32 = 2043; diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index 7c804653c..0824bd593 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -1,6 +1,7 @@ use core::u64; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fabs(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f64.abs` native instruction, so we can leverage this for both code size diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs index 884c20f6c..859508f9b 100644 --- a/libm/src/math/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -1,4 +1,5 @@ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fabsf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f32.abs` native instruction, so we can leverage this for both code size diff --git a/libm/src/math/fdim.rs b/libm/src/math/fdim.rs index 1daae4ebc..32ae306e2 100644 --- a/libm/src/math/fdim.rs +++ b/libm/src/math/fdim.rs @@ -1,6 +1,7 @@ use core::f64; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fdim(x: f64, y: f64) -> f64 { if x.is_nan() { x diff --git a/libm/src/math/fdimf.rs b/libm/src/math/fdimf.rs index 953e0c8df..7db019326 100644 --- a/libm/src/math/fdimf.rs +++ b/libm/src/math/fdimf.rs @@ -1,6 +1,7 @@ use core::f32; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fdimf(x: f32, y: f32) -> f32 { if x.is_nan() { x diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index b14a48d55..f5ac8006f 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -3,6 +3,7 @@ use core::f64; const TOINT: f64 = 1. / f64::EPSILON; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn floor(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f64.floor` native instruction, so we can leverage this for both code size diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs index 71b5953df..8699be060 100644 --- a/libm/src/math/floorf.rs +++ b/libm/src/math/floorf.rs @@ -1,6 +1,7 @@ use core::f32; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn floorf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f32.floor` native instruction, so we can leverage this for both code size diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 99a27164a..38468ae75 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -49,6 +49,7 @@ fn mul(x: u64, y: u64) -> (u64, u64) { } #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fma(x: f64, y: f64, z: f64) -> f64 { let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63 diff --git a/libm/src/math/fmaf.rs b/libm/src/math/fmaf.rs index 25b04fc23..9e5a55f44 100644 --- a/libm/src/math/fmaf.rs +++ b/libm/src/math/fmaf.rs @@ -41,6 +41,7 @@ use super::fenv::{ * rounding occurs. */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 { let xy: f64; let mut result: f64; diff --git a/libm/src/math/fmod.rs b/libm/src/math/fmod.rs index 23f0c4846..ecc9b39a5 100644 --- a/libm/src/math/fmod.rs +++ b/libm/src/math/fmod.rs @@ -1,6 +1,7 @@ use core::u64; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmod(x: f64, y: f64) -> f64 { let mut uxi = x.to_bits(); let mut uyi = y.to_bits(); diff --git a/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs index d84cfeb01..98f51f455 100644 --- a/libm/src/math/fmodf.rs +++ b/libm/src/math/fmodf.rs @@ -2,6 +2,7 @@ use core::f32; use core::u32; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmodf(x: f32, y: f32) -> f32 { let mut uxi = x.to_bits(); let mut uyi = y.to_bits(); diff --git a/libm/src/math/hypot.rs b/libm/src/math/hypot.rs index f011415fd..dee9bbf42 100644 --- a/libm/src/math/hypot.rs +++ b/libm/src/math/hypot.rs @@ -19,6 +19,7 @@ fn sq(x: f64) -> (f64, f64) { } #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn hypot(mut x: f64, mut y: f64) -> f64 { let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700 let x1p_700 = f64::from_bits(0x1430000000000000); // 0x1p-700 === 2 ^ -700 diff --git a/libm/src/math/hypotf.rs b/libm/src/math/hypotf.rs index d59710ada..4636b8f1d 100644 --- a/libm/src/math/hypotf.rs +++ b/libm/src/math/hypotf.rs @@ -3,6 +3,7 @@ use core::f32; use super::sqrtf; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn hypotf(mut x: f32, mut y: f32) -> f32 { let x1p90 = f32::from_bits(0x6c800000); // 0x1p90f === 2 ^ 90 let x1p_90 = f32::from_bits(0x12800000); // 0x1p-90f === 2 ^ -90 diff --git a/libm/src/math/k_cos.rs b/libm/src/math/k_cos.rs index 693950d1d..8876fac21 100644 --- a/libm/src/math/k_cos.rs +++ b/libm/src/math/k_cos.rs @@ -52,6 +52,7 @@ const C6: f64 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ // under FreeBSD, so don't pessimize things by forcibly clipping // any extra precision in w. #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn k_cos(x: f64, y: f64) -> f64 { let z = x * x; let w = z * z; diff --git a/libm/src/math/k_cosf.rs b/libm/src/math/k_cosf.rs index 4aa10c0f0..9b48e190d 100644 --- a/libm/src/math/k_cosf.rs +++ b/libm/src/math/k_cosf.rs @@ -21,6 +21,7 @@ const C2: f64 = -0.00138867637746099294692; /* -0x16c087e80f1e27.0p-62 */ const C3: f64 = 0.0000243904487962774090654; /* 0x199342e0ee5069.0p-68 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn k_cosf(x: f64) -> f32 { let z = x * x; let w = z * z; diff --git a/libm/src/math/k_expo2.rs b/libm/src/math/k_expo2.rs index e295c7a53..0a9562eae 100644 --- a/libm/src/math/k_expo2.rs +++ b/libm/src/math/k_expo2.rs @@ -5,6 +5,7 @@ const K: i32 = 2043; /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_expo2(x: f64) -> f64 { let k_ln2 = f64::from_bits(0x40962066151add8b); /* note that k is odd and scale*scale overflows */ diff --git a/libm/src/math/k_expo2f.rs b/libm/src/math/k_expo2f.rs index ec2a2c5e2..68a7a5032 100644 --- a/libm/src/math/k_expo2f.rs +++ b/libm/src/math/k_expo2f.rs @@ -5,6 +5,7 @@ const K: i32 = 235; /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn k_expo2f(x: f32) -> f32 { let k_ln2 = f32::from_bits(0x4322e3bc); /* note that k is odd and scale*scale overflows */ diff --git a/libm/src/math/k_sin.rs b/libm/src/math/k_sin.rs index 3e07c3594..15718c4c9 100644 --- a/libm/src/math/k_sin.rs +++ b/libm/src/math/k_sin.rs @@ -44,6 +44,7 @@ const S6: f64 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ // then 3 2 // sin(x) = x + (S1*x + (x *(r-y/2)+y)) #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn k_sin(x: f64, y: f64, iy: i32) -> f64 { let z = x * x; let w = z * z; diff --git a/libm/src/math/k_sinf.rs b/libm/src/math/k_sinf.rs index 1c5f5f98a..157fc104c 100644 --- a/libm/src/math/k_sinf.rs +++ b/libm/src/math/k_sinf.rs @@ -21,6 +21,7 @@ const S3: f64 = -0.000198393348360966317347; /* -0x1a00f9e2cae774.0p-65 */ const S4: f64 = 0.0000027183114939898219064; /* 0x16cd878c3b46a7.0p-71 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn k_sinf(x: f64) -> f32 { let z = x * x; let w = z * z; diff --git a/libm/src/math/k_tan.rs b/libm/src/math/k_tan.rs index e9ba21499..684e937b9 100644 --- a/libm/src/math/k_tan.rs +++ b/libm/src/math/k_tan.rs @@ -59,6 +59,7 @@ const PIO4: f64 = 7.85398163397448278999e-01; /* 3FE921FB, 54442D18 */ const PIO4_LO: f64 = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn k_tan(mut x: f64, mut y: f64, odd: i32) -> f64 { let hx = (f64::to_bits(x) >> 32) as u32; let big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */ diff --git a/libm/src/math/k_tanf.rs b/libm/src/math/k_tanf.rs index b9ccf2570..96a591007 100644 --- a/libm/src/math/k_tanf.rs +++ b/libm/src/math/k_tanf.rs @@ -20,6 +20,7 @@ const T: [f64; 6] = [ ]; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn k_tanf(x: f64, odd: bool) -> f32 { let z = x * x; /* diff --git a/libm/src/math/log.rs b/libm/src/math/log.rs index 48e9fa79a..948065abf 100644 --- a/libm/src/math/log.rs +++ b/libm/src/math/log.rs @@ -71,6 +71,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 diff --git a/libm/src/math/log10.rs b/libm/src/math/log10.rs index 7c7afefa3..100618a02 100644 --- a/libm/src/math/log10.rs +++ b/libm/src/math/log10.rs @@ -32,6 +32,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log10(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 diff --git a/libm/src/math/log10f.rs b/libm/src/math/log10f.rs index 82b87c044..9cf89deb9 100644 --- a/libm/src/math/log10f.rs +++ b/libm/src/math/log10f.rs @@ -26,6 +26,7 @@ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log10f(mut x: f32) -> f32 { let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 diff --git a/libm/src/math/log1p.rs b/libm/src/math/log1p.rs index f42669dee..fb35e90db 100644 --- a/libm/src/math/log1p.rs +++ b/libm/src/math/log1p.rs @@ -66,6 +66,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log1p(x: f64) -> f64 { let mut ui: u64 = x.to_bits(); let hfsq: f64; diff --git a/libm/src/math/log1pf.rs b/libm/src/math/log1pf.rs index e6e1c14c8..9fc399d95 100644 --- a/libm/src/math/log1pf.rs +++ b/libm/src/math/log1pf.rs @@ -21,6 +21,7 @@ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log1pf(x: f32) -> f32 { let mut ui: u32 = x.to_bits(); let hfsq: f32; diff --git a/libm/src/math/log2.rs b/libm/src/math/log2.rs index 35eb9bf72..b513928cc 100644 --- a/libm/src/math/log2.rs +++ b/libm/src/math/log2.rs @@ -30,6 +30,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log2(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 diff --git a/libm/src/math/log2f.rs b/libm/src/math/log2f.rs index 8684b142f..07a00dc3d 100644 --- a/libm/src/math/log2f.rs +++ b/libm/src/math/log2f.rs @@ -24,6 +24,7 @@ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log2f(mut x: f32) -> f32 { let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 diff --git a/libm/src/math/logf.rs b/libm/src/math/logf.rs index 095191041..95195601c 100644 --- a/libm/src/math/logf.rs +++ b/libm/src/math/logf.rs @@ -22,6 +22,7 @@ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn logf(mut x: f32) -> f32 { let x1p25 = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 329b3955d..3b789dd9f 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -90,6 +90,7 @@ const IVLN2_H: f64 = 1.44269502162933349609e+00; /* 0x3ff71547_60000000 =24b 1/l const IVLN2_L: f64 = 1.92596299112661746887e-08; /* 0x3e54ae0b_f85ddf44 =1/ln2 tail*/ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn pow(x: f64, y: f64) -> f64 { let t1: f64; let t2: f64; diff --git a/libm/src/math/powf.rs b/libm/src/math/powf.rs index 8d0afe669..5bc5c08e9 100644 --- a/libm/src/math/powf.rs +++ b/libm/src/math/powf.rs @@ -44,6 +44,7 @@ const IVLN2_H: f32 = 1.4426879883e+00; const IVLN2_L: f32 = 7.0526075433e-06; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn powf(x: f32, y: f32) -> f32 { let mut z: f32; let mut ax: f32; diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 5c1685877..98d6b37be 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -43,6 +43,7 @@ const PIO2_3T: f64 = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ // // caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn rem_pio2(x: f64) -> (i32, f64, f64) { let x1p24 = f64::from_bits(0x4170000000000000); diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 4d9146af9..8bab48569 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -223,6 +223,7 @@ const PIO2: [f64; 8] = [ /// more accurately, = 0 mod 8 ). Thus the number of operations are /// independent of the exponent of the input. #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24) diff --git a/libm/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs index 5e7a7d439..054c31184 100644 --- a/libm/src/math/rem_pio2f.rs +++ b/libm/src/math/rem_pio2f.rs @@ -32,6 +32,7 @@ const PIO2_1T: f64 = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ /// use double precision for everything except passing x /// use __rem_pio2_large() for large x #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn rem_pio2f(x: f32) -> (i32, f64) { let x64 = x as f64; diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs index 1a6e75448..9a9723cfb 100644 --- a/libm/src/math/round.rs +++ b/libm/src/math/round.rs @@ -3,6 +3,7 @@ use core::f64; const TOINT: f64 = 1.0 / f64::EPSILON; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn round(mut x: f64) -> f64 { let (f, i) = (x, x.to_bits()); let e: u64 = i >> 52 & 0x7ff; diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs index 7dd79557a..839d9469a 100644 --- a/libm/src/math/roundf.rs +++ b/libm/src/math/roundf.rs @@ -3,6 +3,7 @@ use core::f32; const TOINT: f32 = 1.0 / f32::EPSILON; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn roundf(mut x: f32) -> f32 { let i = x.to_bits(); let e: u32 = i >> 23 & 0xff; diff --git a/libm/src/math/scalbn.rs b/libm/src/math/scalbn.rs index ad81072dd..2c4ab3660 100644 --- a/libm/src/math/scalbn.rs +++ b/libm/src/math/scalbn.rs @@ -1,4 +1,5 @@ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn scalbn(x: f64, mut n: i32) -> f64 { let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 let x1p53 = f64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53 diff --git a/libm/src/math/scalbnf.rs b/libm/src/math/scalbnf.rs index 901497e5e..4e9771175 100644 --- a/libm/src/math/scalbnf.rs +++ b/libm/src/math/scalbnf.rs @@ -1,4 +1,5 @@ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn scalbnf(mut x: f32, mut n: i32) -> f32 { let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 diff --git a/libm/src/math/sin.rs b/libm/src/math/sin.rs index e749094e6..b73074416 100644 --- a/libm/src/math/sin.rs +++ b/libm/src/math/sin.rs @@ -41,6 +41,7 @@ use super::{k_cos, k_sin, rem_pio2}; // Accuracy: // TRIG(x) returns trig(x) nearly rounded #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sin(x: f64) -> f64 { let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 diff --git a/libm/src/math/sinf.rs b/libm/src/math/sinf.rs index c9b02bcdc..b8fc8d6f4 100644 --- a/libm/src/math/sinf.rs +++ b/libm/src/math/sinf.rs @@ -25,6 +25,7 @@ const S3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ const S4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sinf(x: f32) -> f32 { let x64 = x as f64; diff --git a/libm/src/math/sinh.rs b/libm/src/math/sinh.rs index 684e8e309..25ff3daac 100644 --- a/libm/src/math/sinh.rs +++ b/libm/src/math/sinh.rs @@ -5,6 +5,7 @@ use super::{expm1, expo2}; // = x + x^3/6 + o(x^5) // #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sinh(x: f64) -> f64 { // union {double f; uint64_t i;} u = {.f = x}; // uint32_t w; diff --git a/libm/src/math/sinhf.rs b/libm/src/math/sinhf.rs index 90c4b9312..fd0b2bfc8 100644 --- a/libm/src/math/sinhf.rs +++ b/libm/src/math/sinhf.rs @@ -2,6 +2,7 @@ use super::expm1f; use super::k_expo2f; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sinhf(x: f32) -> f32 { let mut h = 0.5f32; let mut ix = x.to_bits(); diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index b2387a26e..a05a521fb 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -81,6 +81,7 @@ use core::f64; const TINY: f64 = 1.0e-300; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrt(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f64.sqrt` native instruction, so we can leverage this for both code size diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index 33cafbcbd..b9365c617 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -16,6 +16,7 @@ const TINY: f32 = 1.0e-30; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrtf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f32.sqrt` native instruction, so we can leverage this for both code size diff --git a/libm/src/math/tan.rs b/libm/src/math/tan.rs index 5a5f178a5..e5c94cbb1 100644 --- a/libm/src/math/tan.rs +++ b/libm/src/math/tan.rs @@ -40,6 +40,7 @@ use super::{k_tan, rem_pio2}; // Accuracy: // TRIG(x) returns trig(x) nearly rounded #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tan(x: f64) -> f64 { let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 diff --git a/libm/src/math/tanf.rs b/libm/src/math/tanf.rs index 15a462d4e..c286cdeb4 100644 --- a/libm/src/math/tanf.rs +++ b/libm/src/math/tanf.rs @@ -25,6 +25,7 @@ const T3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ const T4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tanf(x: f32) -> f32 { let x64 = x as f64; diff --git a/libm/src/math/tanh.rs b/libm/src/math/tanh.rs index 1c3dd0be4..75d695cf7 100644 --- a/libm/src/math/tanh.rs +++ b/libm/src/math/tanh.rs @@ -5,6 +5,7 @@ use super::expm1; * = (1 - exp(-2*x))/(exp(-2*x) - 1 + 2) */ #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tanh(mut x: f64) -> f64 { let mut uf: f64 = x; let mut ui: u64 = f64::to_bits(uf); diff --git a/libm/src/math/tanhf.rs b/libm/src/math/tanhf.rs index 98a1b60c2..ac4657b5a 100644 --- a/libm/src/math/tanhf.rs +++ b/libm/src/math/tanhf.rs @@ -1,6 +1,7 @@ use super::expm1f; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tanhf(mut x: f32) -> f32 { /* x = |x| */ let mut ix = x.to_bits(); diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs index 8eecfcf53..1ee46fc7d 100644 --- a/libm/src/math/trunc.rs +++ b/libm/src/math/trunc.rs @@ -1,6 +1,7 @@ use core::f64; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn trunc(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f64.trunc` native instruction, so we can leverage this for both code size diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs index 0d74fea9c..f93383269 100644 --- a/libm/src/math/truncf.rs +++ b/libm/src/math/truncf.rs @@ -1,6 +1,7 @@ use core::f32; #[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn truncf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f32.trunc` native instruction, so we can leverage this for both code size From b93187c9e40d77d832f18411993611e359427fe8 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 11:38:26 -0700 Subject: [PATCH 0202/1459] Delete stray test --- libm/src/math/acosf.rs | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/libm/src/math/acosf.rs b/libm/src/math/acosf.rs index d635ee8a6..a6061ae80 100644 --- a/libm/src/math/acosf.rs +++ b/libm/src/math/acosf.rs @@ -74,17 +74,3 @@ pub fn acosf(x: f32) -> f32 { w = r(z) * s + c; 2. * (df + w) } - -#[cfg(test)] -mod tests { - #[test] - fn acosf() { - extern { - fn acosf(x: f32) -> f32; - } - unsafe { - crate::_eqf(super::acosf(1.0), acosf(1.0)).unwrap(); - } - } - // shared::f32!("musl", acosf); -} From 8f199b75de8b9c207fc3ce358305eb2ebe54d038 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 11:40:01 -0700 Subject: [PATCH 0203/1459] Fix azure config --- libm/azure-pipelines.yml | 5 ----- libm/ci/azure-test-all.yml | 4 ---- 2 files changed, 9 deletions(-) delete mode 100644 libm/ci/azure-test-all.yml diff --git a/libm/azure-pipelines.yml b/libm/azure-pipelines.yml index 82a74452a..9f4e3672d 100644 --- a/libm/azure-pipelines.yml +++ b/libm/azure-pipelines.yml @@ -7,10 +7,7 @@ jobs: vmImage: ubuntu-16.04 steps: - template: ci/azure-install-rust.yml - env: - TOOLCHAIN: nightly - bash: rustup target add $TARGET - - template: ci/azure-install-rust.yml - bash: cargo generate-lockfile && ./ci/run-docker.sh $TARGET strategy: matrix: @@ -44,8 +41,6 @@ jobs: vmImage: ubuntu-16.04 steps: - template: ci/azure-install-rust.yml - env: - TOOLCHAIN: nightly - script: rustup target add wasm32-unknown-unknown displayName: "Install rust wasm target" - script: cargo build --target wasm32-unknown-unknown diff --git a/libm/ci/azure-test-all.yml b/libm/ci/azure-test-all.yml deleted file mode 100644 index 36831bd54..000000000 --- a/libm/ci/azure-test-all.yml +++ /dev/null @@ -1,4 +0,0 @@ -steps: - - template: azure-install-rust.yml - - - bash: cargo generate-lockfile && ./ci/run-docker.sh $TARGET From e38a30a2d1b5104c1ba53e7783fdc28d1a045e45 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 11:44:58 -0700 Subject: [PATCH 0204/1459] Run rustfmt --- libm/build.rs | 28 ++++++++++++++-------------- libm/src/math/atan.rs | 3 ++- libm/src/math/expf.rs | 1 - libm/src/math/fma.rs | 8 ++++---- libm/src/math/pow.rs | 2 +- libm/src/math/powf.rs | 2 +- libm/src/math/rem_pio2.rs | 1 - libm/src/math/scalbn.rs | 2 +- 8 files changed, 23 insertions(+), 24 deletions(-) diff --git a/libm/build.rs b/libm/build.rs index 41dc920e9..23e1178e3 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -267,22 +267,17 @@ mod musl_reference_tests { .status() .unwrap(); assert!(status.success()); - let output = Command::new("./gen") - .current_dir(&dst) - .output() - .unwrap(); + let output = Command::new("./gen").current_dir(&dst).output().unwrap(); assert!(output.status.success()); assert!(output.stderr.is_empty()); // Map all the output bytes back to an `i64` and then shove it all into // the expected results. - let mut results = - output.stdout.chunks_exact(8) - .map(|buf| { - let mut exact = [0; 8]; - exact.copy_from_slice(buf); - i64::from_le_bytes(exact) - }); + let mut results = output.stdout.chunks_exact(8).map(|buf| { + let mut exact = [0; 8]; + exact.copy_from_slice(buf); + i64::from_le_bytes(exact) + }); for test in functions.iter_mut().flat_map(|f| f.tests.iter_mut()) { test.output = results.next().unwrap(); @@ -301,7 +296,10 @@ mod musl_reference_tests { src.push_str("fn "); src.push_str(&function.name); src.push_str("_matches_musl() {"); - src.push_str(&format!("static TESTS: &[([i64; {}], i64)]", function.args.len())); + src.push_str(&format!( + "static TESTS: &[([i64; {}], i64)]", + function.args.len() + )); src.push_str(" = &["); for test in function.tests.iter() { src.push_str("(["); @@ -336,9 +334,11 @@ mod musl_reference_tests { Ty::Bool => unreachable!(), }); - src.push_str(r#" + src.push_str( + r#" panic!("INPUT: {:?} EXPECTED: {:?} ACTUAL {:?}", test, expected, output); - "#); + "#, + ); src.push_str("}"); src.push_str("}"); diff --git a/libm/src/math/atan.rs b/libm/src/math/atan.rs index a9cdc2933..ad1d57c1f 100644 --- a/libm/src/math/atan.rs +++ b/libm/src/math/atan.rs @@ -148,7 +148,8 @@ mod tests { (-3.0_f64.sqrt() / 3.0, -f64::consts::FRAC_PI_6), (-1.0, -f64::consts::FRAC_PI_4), (-3.0_f64.sqrt(), -f64::consts::FRAC_PI_3), - ].iter() + ] + .iter() { assert!( (atan(*input) - answer) / answer < 1e-5, diff --git a/libm/src/math/expf.rs b/libm/src/math/expf.rs index e33425665..baade2552 100644 --- a/libm/src/math/expf.rs +++ b/libm/src/math/expf.rs @@ -31,7 +31,6 @@ const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */ pub fn expf(mut x: f32) -> f32 { let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 /*original 0x1p-149f ??????????? */ - let mut hx = x.to_bits(); let sign = (hx >> 31) as i32; /* sign bit of x */ let signb: bool = sign != 0; diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 38468ae75..acf99a5d4 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -166,13 +166,13 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { } if r == c { /* min normal after rounding, underflow depends - on arch behaviour which can be imitated by - a double to float conversion */ + on arch behaviour which can be imitated by + a double to float conversion */ let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32; return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64; } /* one bit is lost when scaled, add another top bit to - only round once at conversion if it is inexact */ + only round once at conversion if it is inexact */ if (rhi << 53) != 0 { i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64; if sign != 0 { @@ -182,7 +182,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { r = 2. * r - c; /* remove top bit */ /* raise underflow portably, such that it - cannot be optimized away */ + cannot be optimized away */ { let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r; r += (tiny * tiny) * (r - r); diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 3b789dd9f..b5b0407ef 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -262,7 +262,7 @@ pub fn pow(x: f64, y: f64) -> f64 { } /* now |1-x| is TINY <= 2**-20, suffice to compute - log(x) by x-x^2/2+x^3/3-x^4/4 */ + log(x) by x-x^2/2+x^3/3-x^4/4 */ let t: f64 = ax - 1.0; /* t has 20 trailing zeros */ let w: f64 = (t * t) * (0.5 - t * (0.3333333333333333333333 - t * 0.25)); let u: f64 = IVLN2_H * t; /* ivln2_h has 21 sig. bits */ diff --git a/libm/src/math/powf.rs b/libm/src/math/powf.rs index 5bc5c08e9..0a26573a3 100644 --- a/libm/src/math/powf.rs +++ b/libm/src/math/powf.rs @@ -198,7 +198,7 @@ pub fn powf(x: f32, y: f32) -> f32 { } /* now |1-x| is TINY <= 2**-20, suffice to compute - log(x) by x-x^2/2+x^3/3-x^4/4 */ + log(x) by x-x^2/2+x^3/3-x^4/4 */ t = ax - 1.; /* t has 20 trailing zeros */ w = (t * t) * (0.5 - t * (0.333333333333 - t * 0.25)); u = IVLN2_H * t; /* IVLN2_H has 16 sig. bits */ diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 98d6b37be..82faf5da9 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -10,7 +10,6 @@ // ==================================================== // // Optimized by Bruce D. Evans. */ - use super::rem_pio2_large; // #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 diff --git a/libm/src/math/scalbn.rs b/libm/src/math/scalbn.rs index 2c4ab3660..d8c8409ac 100644 --- a/libm/src/math/scalbn.rs +++ b/libm/src/math/scalbn.rs @@ -19,7 +19,7 @@ pub fn scalbn(x: f64, mut n: i32) -> f64 { } } else if n < -1022 { /* make sure final n < -53 to avoid double - rounding in the subnormal range */ + rounding in the subnormal range */ y *= x1p_1022 * x1p53; n += 1022 - 53; if n < -1022 { From d1ce95aea05c1f529bc15ac6a966398d0cd021d1 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 11:47:46 -0700 Subject: [PATCH 0205/1459] More azure config fixes --- libm/README.md | 2 ++ libm/azure-pipelines.yml | 9 +++++++++ libm/ci/run-docker.sh | 3 ++- libm/ci/run.sh | 6 +++--- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/libm/README.md b/libm/README.md index 24b816c35..568a049f0 100644 --- a/libm/README.md +++ b/libm/README.md @@ -1,5 +1,7 @@ # `libm` +[![Build Status](https://dev.azure.com/rust-lang/libm/_apis/build/status/rust-lang-nursery.libm?branchName=master)](https://dev.azure.com/rust-lang/libm/_build/latest?definitionId=7&branchName=master) + A port of [MUSL]'s libm to Rust. [MUSL]: https://www.musl-libc.org/ diff --git a/libm/azure-pipelines.yml b/libm/azure-pipelines.yml index 9f4e3672d..d8068e023 100644 --- a/libm/azure-pipelines.yml +++ b/libm/azure-pipelines.yml @@ -8,7 +8,11 @@ jobs: steps: - template: ci/azure-install-rust.yml - bash: rustup target add $TARGET + displayName: "add cross target" + - bash: rustup target add x86_64-unknown-linux-musl + displayName: "add musl target" - bash: cargo generate-lockfile && ./ci/run-docker.sh $TARGET + displayName: "run tests" strategy: matrix: aarch64: @@ -47,6 +51,8 @@ jobs: displayName: "Build for wasm" - script: cargo build --target wasm32-unknown-unknown --no-default-features displayName: "Build for wasm (no default features)" + variables: + TOOLCHAIN: nightly - job: rustfmt pool: @@ -54,7 +60,9 @@ jobs: steps: - template: ci/azure-install-rust.yml - bash: rustup component add rustfmt + displayName: "install rustfmt" - bash: cargo fmt --all -- --check + displayName: "check formatting" - job: compiler_builtins_works pool: @@ -62,3 +70,4 @@ jobs: steps: - template: ci/azure-install-rust.yml - bash: cargo build -p cb + displayName: "Check compiler-builtins still probably builds" diff --git a/libm/ci/run-docker.sh b/libm/ci/run-docker.sh index 6b3066e53..95bd3db48 100755 --- a/libm/ci/run-docker.sh +++ b/libm/ci/run-docker.sh @@ -22,9 +22,10 @@ run() { -v `pwd`/target:/target \ -v `pwd`:/checkout:ro \ -v `rustc --print sysroot`:/rust:ro \ + --init \ -w /checkout \ $target \ - sh -c "HOME=/tmp PATH=\$PATH:/rust/bin ci/run.sh $target" + sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/run.sh $target" } if [ -z "$1" ]; then diff --git a/libm/ci/run.sh b/libm/ci/run.sh index fabfd0ecb..d28811300 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -3,10 +3,10 @@ set -ex TARGET=$1 -cargo build --target $TARGET cargo test --target $TARGET -cargo build --target $TARGET --release cargo test --target $TARGET --release -cargo test --features 'checked musl-reference-tests' --target $TARGET +# FIXME(#4) overflow checks in non-release currently cause issues +#cargo test --features 'checked musl-reference-tests' --target $TARGET + cargo test --features 'checked musl-reference-tests' --target $TARGET --release From 73b743412a73c6a9a1e96638a40c4351bef735fd Mon Sep 17 00:00:00 2001 From: Rahul Butani Date: Mon, 23 Jul 2018 01:13:23 -0500 Subject: [PATCH 0206/1459] Add some tests for pow These probably aren't comprehensive but they cover all the edge cases identified in the original musl source. --- libm/src/math/pow.rs | 189 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index b5b0407ef..288e403c6 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -408,3 +408,192 @@ pub fn pow(x: f64, y: f64) -> f64 { return s * z; } + +/// Special cases: + +/// 20. (anything) ** 1 is (anything) +/// 21. (anything) ** -1 is 1/(anything) +/// 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) +/// 23. (-anything except 0 and inf) ** (non-integer) is NAN + +#[cfg(test)] +mod tests { + // #[macro_use] + extern crate std; + + use self::std::f64::consts::{E, PI}; + use self::std::f64::{EPSILON, INFINITY, MAX, MIN, MIN_POSITIVE, NAN, NEG_INFINITY}; + use super::pow; + + // const TESTCASES: &[f64] = &[1.0, 0.0, PI, -PI, E, -E, MIN, MAX, MIN_POSITIVE, NAN, INFINITY, NEG_INFINITY]; + + const POS_ZERO: &[f64] = &[0.0]; + const NEG_ZERO: &[f64] = &[-0.0]; + const POS_ONE: &[f64] = &[1.0]; + const NEG_ONE: &[f64] = &[-1.0]; + const POS_FLOATS: &[f64] = &[E, PI, MAX]; + const NEG_FLOATS: &[f64] = &[-E, -PI, MIN]; + const POS_SMALL_FLOATS: &[f64] = &[(1.0 / 2.0), MIN_POSITIVE, EPSILON]; + const NEG_SMALL_FLOATS: &[f64] = &[-(1.0 / 2.0), -MIN_POSITIVE, -EPSILON]; + const POS_EVENS: &[f64] = &[2.0, 6.0, 8.0, 10.0, 22.0, 100.0]; + const NEG_EVENS: &[f64] = &[-8.0, -2.0]; + const POS_ODDS: &[f64] = &[3.0, 7.0]; + const NEG_ODDS: &[f64] = &[-7.0, -3.0]; + const NANS: &[f64] = &[NAN]; + // const EDGES: &[f64] = &[MIN, MAX, MIN_POSITIVE, EPSILON]; + const POS_INF: &[f64] = &[INFINITY]; + const NEG_INF: &[f64] = &[NEG_INFINITY]; + + const ALL: &[&[f64]] = &[ + POS_ZERO, NEG_ZERO, NANS, NEG_SMALL_FLOATS, POS_SMALL_FLOATS, NEG_FLOATS, POS_FLOATS, NEG_EVENS, POS_EVENS, NEG_ODDS, POS_ODDS, + NEG_INF, POS_INF, NEG_ONE, POS_ONE, + ]; + const POS: &[&[f64]] = &[POS_ZERO, POS_ODDS, POS_ONE, POS_FLOATS, POS_EVENS, POS_INF]; + const NEG: &[&[f64]] = &[NEG_ZERO, NEG_ODDS, NEG_ONE, NEG_FLOATS, NEG_EVENS, NEG_INF]; + + fn pow_test(base: f64, exponent: f64, expected: f64) { + let res = pow(base, exponent); + assert!(if expected.is_nan() {res.is_nan()} else {pow(base, exponent) == expected}, + "{} ** {} was {} instead of {}", base, exponent, res, expected); + } + + fn test_sets_as_base(sets: &[&[f64]], exponent: f64, expected: f64) { + sets.iter() + .for_each(|s| s.iter().for_each(|val| pow_test(*val, exponent, expected))); + } + + fn test_sets_as_exponent(base: f64, sets: &[&[f64]], expected: f64) { + sets.iter() + .for_each(|s| s.iter().for_each(|val| pow_test(base, *val, expected))); + } + + fn test_sets(sets: &[&[f64]], computed: &Fn(f64) -> f64, expected: &Fn(f64) -> f64) { + sets.iter() + .for_each(|s| s.iter().for_each(|val| { + let exp = expected(*val); + let res = computed(*val); + + assert!(if exp.is_nan() {res.is_nan()} else {exp == res}, + "test for {} was {} instead of {}", val, res, exp); + })); + } + + /// 1. (anything) ** 0 is 1 + #[test] + fn zero_as_exponent() { + test_sets_as_base(ALL, 0.0, 1.0); + test_sets_as_base(ALL, -0.0, 1.0); + } + + /// 2. 1 ** (anything) is 1 + #[test] + fn one_as_base() { + test_sets_as_exponent(1.0, ALL, 1.0); + } + + /// 3. (anything except 1) ** NAN is NAN + /// 4. NAN ** (anything except 0) is NAN + #[test] + fn nan_inputs() { + // NAN as the base: + // (NAN ^ anything *but 0* should be NAN) + test_sets_as_exponent(NAN, &ALL[2..], NAN); + + // NAN as the exponent: + // (anything *but 1* ^ NAN should be NAN) + test_sets_as_base(&ALL[..(ALL.len() - 2)], NAN, NAN); + } + + /// 16. +INF ** (+anything except 0,NAN) is +INF + /// 17. +INF ** (-anything except 0,NAN) is +0 + /// 18. -INF ** (+odd integer) is -INF + /// 19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer) + #[test] + fn infinity_as_base() { + // Positive Infinity as the base: + // (+Infinity ^ positive anything but 0 and NAN should be +Infinity) + test_sets_as_exponent(INFINITY, &POS[1..], INFINITY); + + // (+Infinity ^ negative anything except 0 and NAN should be 0.0) + test_sets_as_exponent(INFINITY, &NEG[1..], 0.0); + + // Negative Infinity as the base: + // (-Infinity ^ positive odd ints should be -Infinity) + test_sets_as_exponent(NEG_INFINITY, &[POS_ODDS], NEG_INFINITY); + + // (-Infinity ^ anything but odd ints should be == -0 ^ (-anything)) + // We can lump in pos/neg odd ints here because they don't seem to + // cause panics (div by zero) in release mode (I think). + test_sets(ALL, &|v: f64| pow(NEG_INFINITY, v), &|v: f64| pow(-0.0, -v)); + } + + /// 5. +-(|x| > 1) ** +INF is +INF + /// 6. +-(|x| > 1) ** -INF is +0 + /// 7. +-(|x| < 1) ** +INF is +0 + /// 8. +-(|x| < 1) ** -INF is +INF + /// 9. -1 ** +-INF is 1 + #[test] + fn infinity_as_exponent() { + // Positive/Negative base greater than 1: + // (pos/neg > 1 ^ Infinity should be Infinity - note this excludes NAN as the base) + test_sets_as_base(&ALL[5..(ALL.len() - 2)], INFINITY, INFINITY); + + // (pos/neg > 1 ^ -Infinity should be 0.0) + test_sets_as_base(&ALL[5..(ALL.len() - 2)], NEG_INFINITY, 0.0); + + // Positive/Negative base less than 1: + let base_below_one = &[POS_ZERO, NEG_ZERO, NEG_SMALL_FLOATS, POS_SMALL_FLOATS]; + + // (pos/neg < 1 ^ Infinity should be 0.0 - this also excludes NAN as the base) + test_sets_as_base(base_below_one, INFINITY, 0.0); + + // (pos/neg < 1 ^ -Infinity should be Infinity) + test_sets_as_base(base_below_one, NEG_INFINITY, INFINITY); + + // Positive/Negative 1 as the base: + // (pos/neg 1 ^ Infinity should be 1) + test_sets_as_base(&[NEG_ONE, POS_ONE], INFINITY, 1.0); + + // (pos/neg 1 ^ -Infinity should be 1) + test_sets_as_base(&[NEG_ONE, POS_ONE], NEG_INFINITY, 1.0); + } + + /// 10. +0 ** (+anything except 0, NAN) is +0 + /// 11. -0 ** (+anything except 0, NAN, odd integer) is +0 + /// 12. +0 ** (-anything except 0, NAN) is +INF, raise divbyzero + /// 13. -0 ** (-anything except 0, NAN, odd integer) is +INF, raise divbyzero + /// 14. -0 ** (+odd integer) is -0 + /// 15. -0 ** (-odd integer) is -INF, raise divbyzero + #[test] + fn zero_as_base() { + // Positive Zero as the base: + // (+0 ^ anything positive but 0 and NAN should be +0) + test_sets_as_exponent(0.0, &POS[1..], 0.0); + + // (+0 ^ anything negative but 0 and NAN should be Infinity) + // (this should panic because we're dividing by zero but won't because release mode, I think) + test_sets_as_exponent(0.0, &NEG[1..], INFINITY); + + // Negative Zero as the base: + // (-0 ^ anything positive but 0, NAN, and odd ints should be +0) + test_sets_as_exponent(-0.0, &POS[3..], 0.0); + + // (-0 ^ anything negative but 0, NAN, and odd ints should be Infinity) + // (should panic because of divide by zero) + test_sets_as_exponent(-0.0, &NEG[3..], INFINITY); + + // (-0 ^ positive odd ints should be -0) + test_sets_as_exponent(-0.0, &[POS_ODDS], -0.0); + + // (-0 ^ negative odd ints should be -Infinity) + // (should panic because of divide by zero) + test_sets_as_exponent(-0.0, &[NEG_ODDS], NEG_INFINITY); + } + + #[test] + fn normal_cases() { + assert_eq!(pow(2.0, 20.0), (1 << 20) as f64); + assert_eq!(pow(-1.0, 9.0), -1.0); + assert!(pow(-1.0, 2.2).is_nan()); + } +} From a4858cf3dd9f4dee760402eac5f399e76f02fd15 Mon Sep 17 00:00:00 2001 From: Rahul Butani Date: Mon, 23 Jul 2018 02:54:53 -0500 Subject: [PATCH 0207/1459] Cleaned up + rustfmt'ed --- libm/src/math/pow.rs | 85 +++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 288e403c6..47505dc9a 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -409,24 +409,14 @@ pub fn pow(x: f64, y: f64) -> f64 { return s * z; } -/// Special cases: - -/// 20. (anything) ** 1 is (anything) -/// 21. (anything) ** -1 is 1/(anything) -/// 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) -/// 23. (-anything except 0 and inf) ** (non-integer) is NAN - #[cfg(test)] mod tests { - // #[macro_use] extern crate std; use self::std::f64::consts::{E, PI}; use self::std::f64::{EPSILON, INFINITY, MAX, MIN, MIN_POSITIVE, NAN, NEG_INFINITY}; use super::pow; - // const TESTCASES: &[f64] = &[1.0, 0.0, PI, -PI, E, -E, MIN, MAX, MIN_POSITIVE, NAN, INFINITY, NEG_INFINITY]; - const POS_ZERO: &[f64] = &[0.0]; const NEG_ZERO: &[f64] = &[-0.0]; const POS_ONE: &[f64] = &[1.0]; @@ -440,21 +430,43 @@ mod tests { const POS_ODDS: &[f64] = &[3.0, 7.0]; const NEG_ODDS: &[f64] = &[-7.0, -3.0]; const NANS: &[f64] = &[NAN]; - // const EDGES: &[f64] = &[MIN, MAX, MIN_POSITIVE, EPSILON]; const POS_INF: &[f64] = &[INFINITY]; const NEG_INF: &[f64] = &[NEG_INFINITY]; const ALL: &[&[f64]] = &[ - POS_ZERO, NEG_ZERO, NANS, NEG_SMALL_FLOATS, POS_SMALL_FLOATS, NEG_FLOATS, POS_FLOATS, NEG_EVENS, POS_EVENS, NEG_ODDS, POS_ODDS, - NEG_INF, POS_INF, NEG_ONE, POS_ONE, + POS_ZERO, + NEG_ZERO, + NANS, + NEG_SMALL_FLOATS, + POS_SMALL_FLOATS, + NEG_FLOATS, + POS_FLOATS, + NEG_EVENS, + POS_EVENS, + NEG_ODDS, + POS_ODDS, + NEG_INF, + POS_INF, + NEG_ONE, + POS_ONE, ]; const POS: &[&[f64]] = &[POS_ZERO, POS_ODDS, POS_ONE, POS_FLOATS, POS_EVENS, POS_INF]; const NEG: &[&[f64]] = &[NEG_ZERO, NEG_ODDS, NEG_ONE, NEG_FLOATS, NEG_EVENS, NEG_INF]; fn pow_test(base: f64, exponent: f64, expected: f64) { let res = pow(base, exponent); - assert!(if expected.is_nan() {res.is_nan()} else {pow(base, exponent) == expected}, - "{} ** {} was {} instead of {}", base, exponent, res, expected); + assert!( + if expected.is_nan() { + res.is_nan() + } else { + pow(base, exponent) == expected + }, + "{} ** {} was {} instead of {}", + base, + exponent, + res, + expected + ); } fn test_sets_as_base(sets: &[&[f64]], exponent: f64, expected: f64) { @@ -468,31 +480,37 @@ mod tests { } fn test_sets(sets: &[&[f64]], computed: &Fn(f64) -> f64, expected: &Fn(f64) -> f64) { - sets.iter() - .for_each(|s| s.iter().for_each(|val| { + sets.iter().for_each(|s| { + s.iter().for_each(|val| { let exp = expected(*val); let res = computed(*val); - assert!(if exp.is_nan() {res.is_nan()} else {exp == res}, - "test for {} was {} instead of {}", val, res, exp); - })); + assert!( + if exp.is_nan() { + res.is_nan() + } else { + exp == res + }, + "test for {} was {} instead of {}", + val, + res, + exp + ); + }) + }); } - /// 1. (anything) ** 0 is 1 #[test] fn zero_as_exponent() { test_sets_as_base(ALL, 0.0, 1.0); test_sets_as_base(ALL, -0.0, 1.0); } - /// 2. 1 ** (anything) is 1 #[test] fn one_as_base() { test_sets_as_exponent(1.0, ALL, 1.0); } - /// 3. (anything except 1) ** NAN is NAN - /// 4. NAN ** (anything except 0) is NAN #[test] fn nan_inputs() { // NAN as the base: @@ -504,10 +522,6 @@ mod tests { test_sets_as_base(&ALL[..(ALL.len() - 2)], NAN, NAN); } - /// 16. +INF ** (+anything except 0,NAN) is +INF - /// 17. +INF ** (-anything except 0,NAN) is +0 - /// 18. -INF ** (+odd integer) is -INF - /// 19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer) #[test] fn infinity_as_base() { // Positive Infinity as the base: @@ -527,11 +541,6 @@ mod tests { test_sets(ALL, &|v: f64| pow(NEG_INFINITY, v), &|v: f64| pow(-0.0, -v)); } - /// 5. +-(|x| > 1) ** +INF is +INF - /// 6. +-(|x| > 1) ** -INF is +0 - /// 7. +-(|x| < 1) ** +INF is +0 - /// 8. +-(|x| < 1) ** -INF is +INF - /// 9. -1 ** +-INF is 1 #[test] fn infinity_as_exponent() { // Positive/Negative base greater than 1: @@ -539,7 +548,7 @@ mod tests { test_sets_as_base(&ALL[5..(ALL.len() - 2)], INFINITY, INFINITY); // (pos/neg > 1 ^ -Infinity should be 0.0) - test_sets_as_base(&ALL[5..(ALL.len() - 2)], NEG_INFINITY, 0.0); + test_sets_as_base(&ALL[5..ALL.len() - 2], NEG_INFINITY, 0.0); // Positive/Negative base less than 1: let base_below_one = &[POS_ZERO, NEG_ZERO, NEG_SMALL_FLOATS, POS_SMALL_FLOATS]; @@ -558,12 +567,6 @@ mod tests { test_sets_as_base(&[NEG_ONE, POS_ONE], NEG_INFINITY, 1.0); } - /// 10. +0 ** (+anything except 0, NAN) is +0 - /// 11. -0 ** (+anything except 0, NAN, odd integer) is +0 - /// 12. +0 ** (-anything except 0, NAN) is +INF, raise divbyzero - /// 13. -0 ** (-anything except 0, NAN, odd integer) is +INF, raise divbyzero - /// 14. -0 ** (+odd integer) is -0 - /// 15. -0 ** (-odd integer) is -INF, raise divbyzero #[test] fn zero_as_base() { // Positive Zero as the base: @@ -576,7 +579,7 @@ mod tests { // Negative Zero as the base: // (-0 ^ anything positive but 0, NAN, and odd ints should be +0) - test_sets_as_exponent(-0.0, &POS[3..], 0.0); + test_sets_as_exponent(-0.0, &POS[3..], 0.0); // (-0 ^ anything negative but 0, NAN, and odd ints should be Infinity) // (should panic because of divide by zero) From dfc934d0d81cb91f294e380301109b4197a7c15c Mon Sep 17 00:00:00 2001 From: Rahul Butani Date: Mon, 23 Jul 2018 09:45:22 -0500 Subject: [PATCH 0208/1459] Some additional tests --- libm/src/math/pow.rs | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 47505dc9a..c908c8461 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -421,12 +421,12 @@ mod tests { const NEG_ZERO: &[f64] = &[-0.0]; const POS_ONE: &[f64] = &[1.0]; const NEG_ONE: &[f64] = &[-1.0]; - const POS_FLOATS: &[f64] = &[E, PI, MAX]; - const NEG_FLOATS: &[f64] = &[-E, -PI, MIN]; + const POS_FLOATS: &[f64] = &[99.0 / 70.0, E, PI]; + const NEG_FLOATS: &[f64] = &[-99.0 / 70.0, -E, -PI]; const POS_SMALL_FLOATS: &[f64] = &[(1.0 / 2.0), MIN_POSITIVE, EPSILON]; const NEG_SMALL_FLOATS: &[f64] = &[-(1.0 / 2.0), -MIN_POSITIVE, -EPSILON]; - const POS_EVENS: &[f64] = &[2.0, 6.0, 8.0, 10.0, 22.0, 100.0]; - const NEG_EVENS: &[f64] = &[-8.0, -2.0]; + const POS_EVENS: &[f64] = &[2.0, 6.0, 8.0, 10.0, 22.0, 100.0, MAX]; + const NEG_EVENS: &[f64] = &[MIN, -100.0, -22.0, -10.0, -8.0, -6.0, -2.0]; const POS_ODDS: &[f64] = &[3.0, 7.0]; const NEG_ODDS: &[f64] = &[-7.0, -3.0]; const NANS: &[f64] = &[NAN]; @@ -574,7 +574,7 @@ mod tests { test_sets_as_exponent(0.0, &POS[1..], 0.0); // (+0 ^ anything negative but 0 and NAN should be Infinity) - // (this should panic because we're dividing by zero but won't because release mode, I think) + // (this should panic because we're dividing by zero) test_sets_as_exponent(0.0, &NEG[1..], INFINITY); // Negative Zero as the base: @@ -593,10 +593,39 @@ mod tests { test_sets_as_exponent(-0.0, &[NEG_ODDS], NEG_INFINITY); } + #[test] + fn special_cases() { + // / 20. (anything) ** 1 is (anything) + // One as the exponent: + // (anything ^ 1 should be anything - i.e. the base) + test_sets(ALL, &|v: f64| pow(v, 1.0), &|v: f64| v); + + // / 21. (anything) ** -1 is 1/(anything) + // Negative One as the exponent: + // (anything ^ -1 should be 1/anything) + test_sets(ALL, &|v: f64| pow(v, -1.0), &|v: f64| 1.0 / v); + + // / 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) + // Factoring -1 out: + // (negative anything ^ integer should be (-1 ^ integer) * (positive anything ^ integer)) + &[POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS].iter().for_each(|int_set| int_set.iter().for_each(|int| { + test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| pow(-1.0, *int) * pow(v, *int)); + })); + + // / 23. (-anything except 0 and inf) ** (non-integer) is NAN + // Negative base (imaginary results): + // (-anything except 0 and Infinity ^ non-integer should be NAN) + &NEG[1..(NEG.len()-1)].iter().for_each(|set| set.iter().for_each(|val| { + test_sets(&ALL[3..7], &|v: f64| pow(*val, v), &|_| NAN); + })); + + } + #[test] fn normal_cases() { assert_eq!(pow(2.0, 20.0), (1 << 20) as f64); assert_eq!(pow(-1.0, 9.0), -1.0); assert!(pow(-1.0, 2.2).is_nan()); + assert!(pow(-1.0, -1.14).is_nan()); } } From 3c2d5fcc96c4dce17feb5c6648df531fdf25d48c Mon Sep 17 00:00:00 2001 From: Rahul Butani Date: Mon, 23 Jul 2018 09:48:47 -0500 Subject: [PATCH 0209/1459] rustfmt'ed --- libm/src/math/pow.rs | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index c908c8461..7aaec7874 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -595,30 +595,33 @@ mod tests { #[test] fn special_cases() { - // / 20. (anything) ** 1 is (anything) // One as the exponent: // (anything ^ 1 should be anything - i.e. the base) test_sets(ALL, &|v: f64| pow(v, 1.0), &|v: f64| v); - // / 21. (anything) ** -1 is 1/(anything) // Negative One as the exponent: // (anything ^ -1 should be 1/anything) test_sets(ALL, &|v: f64| pow(v, -1.0), &|v: f64| 1.0 / v); - // / 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) // Factoring -1 out: // (negative anything ^ integer should be (-1 ^ integer) * (positive anything ^ integer)) - &[POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS].iter().for_each(|int_set| int_set.iter().for_each(|int| { - test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| pow(-1.0, *int) * pow(v, *int)); - })); + &[POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS] + .iter() + .for_each(|int_set| { + int_set.iter().for_each(|int| { + test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| { + pow(-1.0, *int) * pow(v, *int) + }); + }) + }); - // / 23. (-anything except 0 and inf) ** (non-integer) is NAN // Negative base (imaginary results): // (-anything except 0 and Infinity ^ non-integer should be NAN) - &NEG[1..(NEG.len()-1)].iter().for_each(|set| set.iter().for_each(|val| { - test_sets(&ALL[3..7], &|v: f64| pow(*val, v), &|_| NAN); - })); - + &NEG[1..(NEG.len() - 1)].iter().for_each(|set| { + set.iter().for_each(|val| { + test_sets(&ALL[3..7], &|v: f64| pow(*val, v), &|_| NAN); + }) + }); } #[test] From ca14b0e7cff7115b24f0454fce67edda29934546 Mon Sep 17 00:00:00 2001 From: Rahul Butani Date: Mon, 23 Jul 2018 11:14:41 -0500 Subject: [PATCH 0210/1459] Use core for constants instead of std (thanks @vks) --- libm/src/math/pow.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 7aaec7874..a1889bb6b 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -411,10 +411,10 @@ pub fn pow(x: f64, y: f64) -> f64 { #[cfg(test)] mod tests { - extern crate std; + extern crate core; - use self::std::f64::consts::{E, PI}; - use self::std::f64::{EPSILON, INFINITY, MAX, MIN, MIN_POSITIVE, NAN, NEG_INFINITY}; + use self::core::f64::consts::{E, PI}; + use self::core::f64::{EPSILON, INFINITY, MAX, MIN, MIN_POSITIVE, NAN, NEG_INFINITY}; use super::pow; const POS_ZERO: &[f64] = &[0.0]; From d3d235c11b6215a5abaeb6cc8d54e5095142036a Mon Sep 17 00:00:00 2001 From: Mark Barbone Date: Tue, 31 Jul 2018 20:56:46 -0400 Subject: [PATCH 0211/1459] Change sqrt to use wrapping operations --- libm/src/math/sqrt.rs | 63 ++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index a05a521fb..14404d4eb 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -77,6 +77,7 @@ */ use core::f64; +use core::num::Wrapping; const TINY: f64 = 1.0e-300; @@ -96,21 +97,21 @@ pub fn sqrt(x: f64) -> f64 { } } let mut z: f64; - let sign: u32 = 0x80000000; + let sign: Wrapping = Wrapping(0x80000000); let mut ix0: i32; let mut s0: i32; let mut q: i32; let mut m: i32; let mut t: i32; let mut i: i32; - let mut r: u32; - let mut t1: u32; - let mut s1: u32; - let mut ix1: u32; - let mut q1: u32; + let mut r: Wrapping; + let mut t1: Wrapping; + let mut s1: Wrapping; + let mut ix1: Wrapping; + let mut q1: Wrapping; ix0 = (x.to_bits() >> 32) as i32; - ix1 = x.to_bits() as u32; + ix1 = Wrapping(x.to_bits() as u32); /* take care of Inf and NaN */ if (ix0 & 0x7ff00000) == 0x7ff00000 { @@ -118,7 +119,7 @@ pub fn sqrt(x: f64) -> f64 { } /* take care of zero */ if ix0 <= 0 { - if ((ix0 & !(sign as i32)) | ix1 as i32) == 0 { + if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { return x; /* sqrt(+-0) = +-0 */ } if ix0 < 0 { @@ -131,7 +132,7 @@ pub fn sqrt(x: f64) -> f64 { /* subnormal x */ while ix0 == 0 { m -= 21; - ix0 |= (ix1 >> 11) as i32; + ix0 |= (ix1 >> 11).0 as i32; ix1 <<= 21; } i = 0; @@ -140,46 +141,46 @@ pub fn sqrt(x: f64) -> f64 { ix0 <<= 1; } m -= i - 1; - ix0 |= (ix1 >> (32 - i)) as i32; - ix1 <<= i; + ix0 |= (ix1 >> (32 - i) as usize).0 as i32; + ix1 = ix1 << i as usize; } m -= 1023; /* unbias exponent */ ix0 = (ix0 & 0x000fffff) | 0x00100000; if (m & 1) == 1 { /* odd m, double x to make it even */ - ix0 += ix0 + ((ix1 & sign) >> 31) as i32; + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; ix1 += ix1; } m >>= 1; /* m = [m/2] */ /* generate sqrt(x) bit by bit */ - ix0 += ix0 + ((ix1 & sign) >> 31) as i32; + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; ix1 += ix1; q = 0; /* [q,q1] = sqrt(x) */ - q1 = 0; + q1 = Wrapping(0); s0 = 0; - s1 = 0; - r = 0x00200000; /* r = moving bit from right to left */ + s1 = Wrapping(0); + r = Wrapping(0x00200000); /* r = moving bit from right to left */ - while r != 0 { - t = s0 + r as i32; + while r != Wrapping(0) { + t = s0 + r.0 as i32; if t <= ix0 { - s0 = t + r as i32; + s0 = t + r.0 as i32; ix0 -= t; - q += r as i32; + q += r.0 as i32; } - ix0 += ix0 + ((ix1 & sign) >> 31) as i32; + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; ix1 += ix1; r >>= 1; } r = sign; - while r != 0 { + while r != Wrapping(0) { t1 = s1 + r; t = s0; if t < ix0 || (t == ix0 && t1 <= ix1) { s1 = t1 + r; - if (t1 & sign) == sign && (s1 & sign) == 0 { + if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { s0 += 1; } ix0 -= t; @@ -189,26 +190,26 @@ pub fn sqrt(x: f64) -> f64 { ix1 -= t1; q1 += r; } - ix0 += ix0 + ((ix1 & sign) >> 31) as i32; + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; ix1 += ix1; r >>= 1; } /* use floating add to find out rounding direction */ - if (ix0 as u32 | ix1) != 0 { + if (ix0 as u32 | ix1.0) != 0 { z = 1.0 - TINY; /* raise inexact flag */ if z >= 1.0 { z = 1.0 + TINY; - if q1 == 0xffffffff { - q1 = 0; + if q1.0 == 0xffffffff { + q1 = Wrapping(0); q += 1; } else if z > 1.0 { - if q1 == 0xfffffffe { + if q1.0 == 0xfffffffe { q += 1; } - q1 += 2; + q1 += Wrapping(2); } else { - q1 += q1 & 1; + q1 += q1 & Wrapping(1); } } } @@ -218,5 +219,5 @@ pub fn sqrt(x: f64) -> f64 { ix1 |= sign; } ix0 += m << 20; - f64::from_bits((ix0 as u64) << 32 | ix1 as u64) + f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) } From e714fae6da08ce57cacafc8b64d4b555b3df95ab Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 12:12:47 -0700 Subject: [PATCH 0212/1459] Update contributing docs --- libm/CONTRIBUTING.md | 48 ++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/libm/CONTRIBUTING.md b/libm/CONTRIBUTING.md index 680c40e80..a7e817e13 100644 --- a/libm/CONTRIBUTING.md +++ b/libm/CONTRIBUTING.md @@ -2,25 +2,25 @@ - Pick your favorite math function from the [issue tracker]. - Look for the C implementation of the function in the [MUSL source code][src]. -- Copy paste the C code into a Rust file in the `src/math` directory and adjust `src/math/mod.rs` - accordingly. Also, uncomment the corresponding trait method in `src/lib.rs`. -- Run `cargo watch check` and fix the compiler errors. -- Tweak the bottom of `test-generator/src/main.rs` to add your function to the test suite. -- If you can, run the full test suite locally (see the [testing](#testing) section below). If you - can't, no problem! Your PR will be fully tested automatically. Though you may still want to add - and run some unit tests. See the bottom of [`src/math/truncf.rs`] for an example of such tests; - you can run unit tests with the `cargo test --lib` command. -- Send us a pull request! Make sure to run `cargo fmt` on your code before sending the PR. Also - include "closes #42" in the PR description to close the corresponding issue. +- Copy paste the C code into a Rust file in the `src/math` directory and adjust + `src/math/mod.rs` accordingly. Also, uncomment the corresponding trait method + in `src/lib.rs`. +- Write some simple tests in your module (using `#[test]`) +- Run `cargo test` to make sure it works +- Run `cargo test --features musl-reference-tests` to compare your + implementation against musl's +- Send us a pull request! Make sure to run `cargo fmt` on your code before + sending the PR. Also include "closes #42" in the PR description to close the + corresponding issue. - :tada: -[issue tracker]: https://github.com/japaric/libm/issues +[issue tracker]: https://github.com/rust-lang-nursery/libm/issues [src]: https://git.musl-libc.org/cgit/musl/tree/src/math -[`src/math/truncf.rs`]: https://github.com/japaric/libm/blob/master/src/math/truncf.rs +[`src/math/truncf.rs`]: https://github.com/rust-lang-nursery/libm/blob/master/src/math/truncf.rs Check [PR #65] for an example. -[PR #65]: https://github.com/japaric/libm/pull/65 +[PR #65]: https://github.com/rust-lang-nursery/libm/pull/65 ## Tips and tricks @@ -78,18 +78,18 @@ let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 12 ## Testing -The test suite of this crate can only be run on x86_64 Linux systems using the following commands: +Normal tests can be executed with: -``` console -$ # The test suite depends on the `cross` tool so install it if you don't have it -$ cargo install cross - -$ # and the `cross` tool requires docker to be running -$ systemctl start docker +``` +cargo test +``` -$ # execute the test suite for the x86_64 target -$ TARGET=x86_64-unknown-linux-gnu bash ci/script.sh +If you'd like to run tests with randomized inputs that get compared against musl +itself, you'll need to be on a Linux system and then you can execute: -$ # execute the test suite for the ARMv7 target -$ TARGET=armv7-unknown-linux-gnueabihf bash ci/script.sh ``` +cargo test --features musl-reference-tests +``` + +Note that you may need to pass `--release` to Cargo if there are errors related +to integer overflow. From 1dbd7314e6ec3dce8f2a666fcd26e6f950563039 Mon Sep 17 00:00:00 2001 From: Mark Barbone Date: Wed, 1 Aug 2018 12:59:20 -0400 Subject: [PATCH 0213/1459] Modify atan2 to use `wrapping_` ops --- libm/src/math/atan2.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/src/math/atan2.rs b/libm/src/math/atan2.rs index a702ec39f..37dad35e1 100644 --- a/libm/src/math/atan2.rs +++ b/libm/src/math/atan2.rs @@ -92,12 +92,12 @@ pub fn atan2(y: f64, x: f64) -> f64 { } } /* |y/x| > 0x1p64 */ - if ix + (64 << 20) < iy || iy == 0x7ff00000 { + if ix.wrapping_add(64 << 20) < iy || iy == 0x7ff00000 { return if m & 1 != 0 { -PI / 2.0 } else { PI / 2.0 }; } /* z = atan(|y/x|) without spurious underflow */ - let z = if (m & 2 != 0) && iy + (64 << 20) < ix { + let z = if (m & 2 != 0) && iy.wrapping_add(64 << 20) < ix { /* |y/x| < 0x1p-64, x<0 */ 0.0 } else { From 294c8508fea23c7b78a41a4ec64f1b0aaf5b5704 Mon Sep 17 00:00:00 2001 From: Igor null Date: Tue, 16 Oct 2018 10:26:39 +0300 Subject: [PATCH 0214/1459] fixed uint underflow in floorf for negative exponents --- libm/src/math/floorf.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs index 8699be060..7d631df02 100644 --- a/libm/src/math/floorf.rs +++ b/libm/src/math/floorf.rs @@ -12,7 +12,7 @@ pub fn floorf(x: f32) -> f32 { } } let mut ui = x.to_bits(); - let e = (((ui >> 23) & 0xff) - 0x7f) as i32; + let e = (((ui >> 23) as i32) & 0xff) - 0x7f; if e >= 23 { return x; From 3a1c8f01d716cce38cd63e2ab65e0433e2abf56a Mon Sep 17 00:00:00 2001 From: Anna Bogus Date: Fri, 5 Oct 2018 07:37:23 +0200 Subject: [PATCH 0215/1459] fixed some clippy warnings --- libm/src/math/acos.rs | 6 +++--- libm/src/math/asin.rs | 8 ++++---- libm/src/math/atan.rs | 16 +++++++--------- libm/src/math/atan2.rs | 2 +- libm/src/math/atanf.rs | 16 +++++++--------- libm/src/math/ceil.rs | 2 +- libm/src/math/ceilf.rs | 2 +- libm/src/math/cosf.rs | 16 ++++++---------- libm/src/math/expo2.rs | 2 +- libm/src/math/fdim.rs | 8 +++----- libm/src/math/fdimf.rs | 8 +++----- libm/src/math/floor.rs | 2 +- libm/src/math/floorf.rs | 2 +- libm/src/math/fma.rs | 10 +++++----- libm/src/math/hypot.rs | 2 +- libm/src/math/log.rs | 2 +- libm/src/math/log10.rs | 2 +- libm/src/math/log10f.rs | 2 +- libm/src/math/log1p.rs | 2 +- libm/src/math/log1pf.rs | 2 +- libm/src/math/log2.rs | 2 +- libm/src/math/log2f.rs | 2 +- libm/src/math/pow.rs | 2 +- libm/src/math/powf.rs | 22 ++++++++++------------ libm/src/math/rem_pio2.rs | 24 +++++++++++------------- libm/src/math/sinf.rs | 2 +- libm/src/math/sinh.rs | 2 +- 27 files changed, 76 insertions(+), 92 deletions(-) diff --git a/libm/src/math/acos.rs b/libm/src/math/acos.rs index a0d1450e3..055888ffa 100644 --- a/libm/src/math/acos.rs +++ b/libm/src/math/acos.rs @@ -52,7 +52,7 @@ const QS4: f64 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ fn r(z: f64) -> f64 { let p: f64 = z * (PS0 + z * (PS1 + z * (PS2 + z * (PS3 + z * (PS4 + z * PS5))))); let q: f64 = 1.0 + z * (QS1 + z * (QS2 + z * (QS3 + z * QS4))); - return p / q; + p / q } #[inline] @@ -73,7 +73,7 @@ pub fn acos(x: f64) -> f64 { if ix >= 0x3ff00000 { let lx: u32 = x.to_bits() as u32; - if (ix - 0x3ff00000 | lx) == 0 { + if ((ix - 0x3ff00000) | lx) == 0 { /* acos(1)=0, acos(-1)=pi */ if (hx >> 31) != 0 { return 2. * PIO2_HI + x1p_120f; @@ -105,5 +105,5 @@ pub fn acos(x: f64) -> f64 { c = (z - df * df) / (s + df); w = r(z) * s + c; - return 2. * (df + w); + 2. * (df + w) } diff --git a/libm/src/math/asin.rs b/libm/src/math/asin.rs index 855300837..2aee72b28 100644 --- a/libm/src/math/asin.rs +++ b/libm/src/math/asin.rs @@ -59,7 +59,7 @@ const Q_S4: f64 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ fn comp_r(z: f64) -> f64 { let p = z * (P_S0 + z * (P_S1 + z * (P_S2 + z * (P_S3 + z * (P_S4 + z * P_S5))))); let q = 1.0 + z * (Q_S1 + z * (Q_S2 + z * (Q_S3 + z * Q_S4))); - return p / q; + p / q } #[inline] @@ -77,7 +77,7 @@ pub fn asin(mut x: f64) -> f64 { if ix >= 0x3ff00000 { let lx: u32; lx = get_low_word(x); - if (ix - 0x3ff00000 | lx) == 0 { + if ((ix - 0x3ff00000) | lx) == 0 { /* asin(1) = +-pi/2 with inexact */ return x * PIO2_HI + f64::from_bits(0x3870000000000000); } else { @@ -109,8 +109,8 @@ pub fn asin(mut x: f64) -> f64 { x = 0.5 * PIO2_HI - (2.0 * s * r - (PIO2_LO - 2.0 * c) - (0.5 * PIO2_HI - 2.0 * f)); } if hx >> 31 != 0 { - return -x; + -x } else { - return x; + x } } diff --git a/libm/src/math/atan.rs b/libm/src/math/atan.rs index ad1d57c1f..94594080c 100644 --- a/libm/src/math/atan.rs +++ b/libm/src/math/atan.rs @@ -102,16 +102,14 @@ pub fn atan(x: f64) -> f64 { x = (x - 1.) / (x + 1.); 1 } + } else if ix < 0x40038000 { + /* |x| < 2.4375 */ + x = (x - 1.5) / (1. + 1.5 * x); + 2 } else { - if ix < 0x40038000 { - /* |x| < 2.4375 */ - x = (x - 1.5) / (1. + 1.5 * x); - 2 - } else { - /* 2.4375 <= |x| < 2^66 */ - x = -1. / x; - 3 - } + /* 2.4375 <= |x| < 2^66 */ + x = -1. / x; + 3 } }; diff --git a/libm/src/math/atan2.rs b/libm/src/math/atan2.rs index a702ec39f..5dc38099d 100644 --- a/libm/src/math/atan2.rs +++ b/libm/src/math/atan2.rs @@ -53,7 +53,7 @@ pub fn atan2(y: f64, x: f64) -> f64 { let lx = x.to_bits() as u32; let mut iy = (y.to_bits() >> 32) as u32; let ly = y.to_bits() as u32; - if (ix - 0x3ff00000 | lx) == 0 { + if ((ix - 0x3ff00000) | lx) == 0 { /* x = 1.0 */ return atan(y); } diff --git a/libm/src/math/atanf.rs b/libm/src/math/atanf.rs index 5d9024022..89b0afd6f 100644 --- a/libm/src/math/atanf.rs +++ b/libm/src/math/atanf.rs @@ -80,16 +80,14 @@ pub fn atanf(mut x: f32) -> f32 { x = (x - 1.) / (x + 1.); 1 } + } else if ix < 0x401c0000 { + /* |x| < 2.4375 */ + x = (x - 1.5) / (1. + 1.5 * x); + 2 } else { - if ix < 0x401c0000 { - /* |x| < 2.4375 */ - x = (x - 1.5) / (1. + 1.5 * x); - 2 - } else { - /* 2.4375 <= |x| < 2**26 */ - x = -1. / x; - 3 - } + /* 2.4375 <= |x| < 2**26 */ + x = -1. / x; + 3 } }; /* end of argument reduction */ diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index c2b11e4e7..d337db200 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -27,7 +27,7 @@ pub fn ceil(x: f64) -> f64 { x + TOINT - TOINT - x }; // special case because of non-nearest rounding modes - if e <= 0x3ff - 1 { + if e < 0x3ff { force_eval!(y); return if (u >> 63) != 0 { -0. } else { 1. }; } diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs index 5eb6a35a6..88f9ecc44 100644 --- a/libm/src/math/ceilf.rs +++ b/libm/src/math/ceilf.rs @@ -35,5 +35,5 @@ pub fn ceilf(x: f32) -> f32 { return 1.0; } } - return f32::from_bits(ui); + f32::from_bits(ui) } diff --git a/libm/src/math/cosf.rs b/libm/src/math/cosf.rs index 615746a31..48d76c8ee 100644 --- a/libm/src/math/cosf.rs +++ b/libm/src/math/cosf.rs @@ -50,12 +50,10 @@ pub fn cosf(x: f32) -> f32 { if ix > 0x4016cbe3 { /* |x| ~> 3*pi/4 */ return -k_cosf(if sign { x64 + C2_PIO2 } else { x64 - C2_PIO2 }); + } else if sign { + return k_sinf(x64 + C1_PIO2); } else { - if sign { - return k_sinf(x64 + C1_PIO2); - } else { - return k_sinf(C1_PIO2 - x64); - } + return k_sinf(C1_PIO2 - x64); } } if ix <= 0x40e231d5 { @@ -63,12 +61,10 @@ pub fn cosf(x: f32) -> f32 { if ix > 0x40afeddf { /* |x| ~> 7*pi/4 */ return k_cosf(if sign { x64 + C4_PIO2 } else { x64 - C4_PIO2 }); + } else if sign { + return k_sinf(-x64 - C3_PIO2); } else { - if sign { - return k_sinf(-x64 - C3_PIO2); - } else { - return k_sinf(x64 - C3_PIO2); - } + return k_sinf(x64 - C3_PIO2); } } diff --git a/libm/src/math/expo2.rs b/libm/src/math/expo2.rs index b5369fbbe..9e60ca994 100644 --- a/libm/src/math/expo2.rs +++ b/libm/src/math/expo2.rs @@ -11,5 +11,5 @@ pub fn expo2(x: f64) -> f64 { /* note that k is odd and scale*scale overflows */ let scale = combine_words(((0x3ff + K / 2) as u32) << 20, 0); /* exp(x - k ln2) * 2**(k-1) */ - return exp(x - kln2) * scale * scale; + exp(x - kln2) * scale * scale } diff --git a/libm/src/math/fdim.rs b/libm/src/math/fdim.rs index 32ae306e2..d9aca8611 100644 --- a/libm/src/math/fdim.rs +++ b/libm/src/math/fdim.rs @@ -7,11 +7,9 @@ pub fn fdim(x: f64, y: f64) -> f64 { x } else if y.is_nan() { y + } else if x > y { + x - y } else { - if x > y { - x - y - } else { - 0.0 - } + 0.0 } } diff --git a/libm/src/math/fdimf.rs b/libm/src/math/fdimf.rs index 7db019326..bcda8ee94 100644 --- a/libm/src/math/fdimf.rs +++ b/libm/src/math/fdimf.rs @@ -7,11 +7,9 @@ pub fn fdimf(x: f32, y: f32) -> f32 { x } else if y.is_nan() { y + } else if x > y { + x - y } else { - if x > y { - x - y - } else { - 0.0 - } + 0.0 } } diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index f5ac8006f..c705ae501 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -26,7 +26,7 @@ pub fn floor(x: f64) -> f64 { x + TOINT - TOINT - x }; /* special case because of non-nearest rounding modes */ - if e <= 0x3ff - 1 { + if e < 0x3ff { force_eval!(y); return if (ui >> 63) != 0 { -1. } else { 0. }; } diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs index 8699be060..b02bae596 100644 --- a/libm/src/math/floorf.rs +++ b/libm/src/math/floorf.rs @@ -35,5 +35,5 @@ pub fn floorf(x: f32) -> f32 { return -1.0; } } - return f32::from_bits(ui); + f32::from_bits(ui) } diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index acf99a5d4..21c854cd0 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -83,7 +83,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { if d > 0 { if d < 64 { zlo = nz.m << d; - zhi = nz.m >> 64 - d; + zhi = nz.m >> (64 - d); } else { zlo = 0; zhi = nz.m; @@ -91,7 +91,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { d -= 64; if d == 0 { } else if d < 64 { - rlo = rhi << 64 - d | rlo >> d | ((rlo << 64 - d) != 0) as u64; + rlo = rhi << (64 - d) | rlo >> d | ((rlo << (64 - d)) != 0) as u64; rhi = rhi >> d; } else { rlo = 1; @@ -104,7 +104,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { if d == 0 { zlo = nz.m; } else if d < 64 { - zlo = nz.m >> d | ((nz.m << 64 - d) != 0) as u64; + zlo = nz.m >> d | ((nz.m << (64 - d)) != 0) as u64; } else { zlo = 1; } @@ -136,7 +136,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { e += 64; d = rhi.leading_zeros() as i32 - 1; /* note: d > 0 */ - rhi = rhi << d | rlo >> 64 - d | ((rlo << d) != 0) as u64; + rhi = rhi << d | rlo >> (64 - d) | ((rlo << d) != 0) as u64; } else if rlo != 0 { d = rlo.leading_zeros() as i32 - 1; if d < 0 { @@ -191,7 +191,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { } else { /* only round once when scaled */ d = 10; - i = ((rhi >> d | ((rhi << 64 - d) != 0) as u64) << d) as i64; + i = ((rhi >> d | ((rhi << (64 - d)) != 0) as u64) << d) as i64; if sign != 0 { i = -i; } diff --git a/libm/src/math/hypot.rs b/libm/src/math/hypot.rs index dee9bbf42..e53baf539 100644 --- a/libm/src/math/hypot.rs +++ b/libm/src/math/hypot.rs @@ -72,5 +72,5 @@ pub fn hypot(mut x: f64, mut y: f64) -> f64 { } let (hx, lx) = sq(x); let (hy, ly) = sq(y); - return z * sqrt(ly + lx + hy + hx); + z * sqrt(ly + lx + hy + hx) } diff --git a/libm/src/math/log.rs b/libm/src/math/log.rs index 948065abf..4126e413b 100644 --- a/libm/src/math/log.rs +++ b/libm/src/math/log.rs @@ -114,5 +114,5 @@ pub fn log(mut x: f64) -> f64 { let t2: f64 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); let r: f64 = t2 + t1; let dk: f64 = k as f64; - return s * (hfsq + r) + dk * LN2_LO - hfsq + f + dk * LN2_HI; + s * (hfsq + r) + dk * LN2_LO - hfsq + f + dk * LN2_HI } diff --git a/libm/src/math/log10.rs b/libm/src/math/log10.rs index 100618a02..c99696040 100644 --- a/libm/src/math/log10.rs +++ b/libm/src/math/log10.rs @@ -114,5 +114,5 @@ pub fn log10(mut x: f64) -> f64 { val_lo += (y - w) + val_hi; val_hi = w; - return val_lo + val_hi; + val_lo + val_hi } diff --git a/libm/src/math/log10f.rs b/libm/src/math/log10f.rs index 9cf89deb9..9845cda5d 100644 --- a/libm/src/math/log10f.rs +++ b/libm/src/math/log10f.rs @@ -88,5 +88,5 @@ pub fn log10f(mut x: f32) -> f32 { hi = f32::from_bits(ui); lo = f - hi - hfsq + s * (hfsq + r); dk = k as f32; - return dk * LOG10_2LO + (lo + hi) * IVLN10LO + lo * IVLN10HI + hi * IVLN10HI + dk * LOG10_2HI; + dk * LOG10_2LO + (lo + hi) * IVLN10LO + lo * IVLN10HI + hi * IVLN10HI + dk * LOG10_2HI } diff --git a/libm/src/math/log1p.rs b/libm/src/math/log1p.rs index fb35e90db..cd7045ac9 100644 --- a/libm/src/math/log1p.rs +++ b/libm/src/math/log1p.rs @@ -140,5 +140,5 @@ pub fn log1p(x: f64) -> f64 { t2 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); r = t2 + t1; dk = k as f64; - return s * (hfsq + r) + (dk * LN2_LO + c) - hfsq + f + dk * LN2_HI; + s * (hfsq + r) + (dk * LN2_LO + c) - hfsq + f + dk * LN2_HI } diff --git a/libm/src/math/log1pf.rs b/libm/src/math/log1pf.rs index 9fc399d95..8e9651357 100644 --- a/libm/src/math/log1pf.rs +++ b/libm/src/math/log1pf.rs @@ -95,5 +95,5 @@ pub fn log1pf(x: f32) -> f32 { r = t2 + t1; hfsq = 0.5 * f * f; dk = k as f32; - return s * (hfsq + r) + (dk * LN2_LO + c) - hfsq + f + dk * LN2_HI; + s * (hfsq + r) + (dk * LN2_LO + c) - hfsq + f + dk * LN2_HI } diff --git a/libm/src/math/log2.rs b/libm/src/math/log2.rs index b513928cc..a3d43e55c 100644 --- a/libm/src/math/log2.rs +++ b/libm/src/math/log2.rs @@ -103,5 +103,5 @@ pub fn log2(mut x: f64) -> f64 { val_lo += (y - w) + val_hi; val_hi = w; - return val_lo + val_hi; + val_lo + val_hi } diff --git a/libm/src/math/log2f.rs b/libm/src/math/log2f.rs index 07a00dc3d..53a37e503 100644 --- a/libm/src/math/log2f.rs +++ b/libm/src/math/log2f.rs @@ -84,5 +84,5 @@ pub fn log2f(mut x: f32) -> f32 { ui &= 0xfffff000; hi = f32::from_bits(ui); lo = f - hi - hfsq + s * (hfsq + r); - return (lo + hi) * IVLN2LO + lo * IVLN2HI + hi * IVLN2HI + k as f32; + (lo + hi) * IVLN2LO + lo * IVLN2HI + hi * IVLN2HI + k as f32 } diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index b5b0407ef..dca70f99b 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -406,5 +406,5 @@ pub fn pow(x: f64, y: f64) -> f64 { z = with_set_high_word(z, j as u32); } - return s * z; + s * z } diff --git a/libm/src/math/powf.rs b/libm/src/math/powf.rs index 0a26573a3..befbf44db 100644 --- a/libm/src/math/powf.rs +++ b/libm/src/math/powf.rs @@ -136,12 +136,11 @@ pub fn powf(x: f32, y: f32) -> f32 { return x * x; } - if hy == 0x3f000000 { - /* y is 0.5 */ - if hx >= 0 { - /* x >= +0 */ - return sqrtf(x); - } + if hy == 0x3f000000 + /* y is 0.5 */ + && hx >= 0 { + /* x >= +0 */ + return sqrtf(x); } ax = fabsf(x); @@ -296,11 +295,10 @@ pub fn powf(x: f32, y: f32) -> f32 { /* z < -150 */ // FIXME: check should be (uint32_t)j > 0xc3160000 return sn * TINY * TINY; /* underflow */ - } else if j as u32 == 0xc3160000 { - /* z == -150 */ - if p_l <= z - p_h { - return sn * TINY * TINY; /* underflow */ - } + } else if j as u32 == 0xc3160000 + /* z == -150 */ + && p_l <= z - p_h { + return sn * TINY * TINY; /* underflow */ } /* @@ -339,5 +337,5 @@ pub fn powf(x: f32, y: f32) -> f32 { } else { z = f32::from_bits(j as u32); } - return sn * z; + sn * z } diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 82faf5da9..951dd08b4 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -78,7 +78,7 @@ pub fn rem_pio2(x: f64) -> (i32, f64, f64) { } } let y1 = (r - y0) - w; - return (n, y0, y1); + (n, y0, y1) } if ix <= 0x400f6a7a { @@ -100,18 +100,16 @@ pub fn rem_pio2(x: f64) -> (i32, f64, f64) { let y1 = (z - y0) + PIO2_1T; return (-1, y0, y1); } + } else if sign == 0 { + let z = x - 2.0 * PIO2_1; + let y0 = z - 2.0 * PIO2_1T; + let y1 = (z - y0) - 2.0 * PIO2_1T; + return (2, y0, y1); } else { - if sign == 0 { - let z = x - 2.0 * PIO2_1; - let y0 = z - 2.0 * PIO2_1T; - let y1 = (z - y0) - 2.0 * PIO2_1T; - return (2, y0, y1); - } else { - let z = x + 2.0 * PIO2_1; - let y0 = z + 2.0 * PIO2_1T; - let y1 = (z - y0) + 2.0 * PIO2_1T; - return (-2, y0, y1); - } + let z = x + 2.0 * PIO2_1; + let y0 = z + 2.0 * PIO2_1T; + let y1 = (z - y0) + 2.0 * PIO2_1T; + return (-2, y0, y1); } } if ix <= 0x401c463b { @@ -185,5 +183,5 @@ pub fn rem_pio2(x: f64) -> (i32, f64, f64) { if sign != 0 { return (-n, -ty[0], -ty[1]); } - return (n, ty[0], ty[1]); + (n, ty[0], ty[1]) } diff --git a/libm/src/math/sinf.rs b/libm/src/math/sinf.rs index b8fc8d6f4..0c31099ed 100644 --- a/libm/src/math/sinf.rs +++ b/libm/src/math/sinf.rs @@ -88,7 +88,7 @@ pub fn sinf(x: f32) -> f32 { match n & 3 { 0 => k_sinf(y), 1 => k_cosf(y), - 2 => return k_sinf(-y), + 2 => k_sinf(-y), _ => -k_cosf(y), } } diff --git a/libm/src/math/sinh.rs b/libm/src/math/sinh.rs index 25ff3daac..d36de66c1 100644 --- a/libm/src/math/sinh.rs +++ b/libm/src/math/sinh.rs @@ -46,5 +46,5 @@ pub fn sinh(x: f64) -> f64 { /* |x| > log(DBL_MAX) or nan */ /* note: the result is stored to handle overflow */ t = 2.0 * h * expo2(absx); - return t; + t } From 14bef6f9a6c837bdbd6efaf30f992dea70b2e81d Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 12:21:55 -0700 Subject: [PATCH 0216/1459] Add bindings for ldexp/ldexpf Should help in fixing wasmerio/wasmer#407 --- libm/src/math/ldexp.rs | 5 +++++ libm/src/math/ldexpf.rs | 5 +++++ libm/src/math/mod.rs | 4 ++++ 3 files changed, 14 insertions(+) create mode 100644 libm/src/math/ldexp.rs create mode 100644 libm/src/math/ldexpf.rs diff --git a/libm/src/math/ldexp.rs b/libm/src/math/ldexp.rs new file mode 100644 index 000000000..780ddfc11 --- /dev/null +++ b/libm/src/math/ldexp.rs @@ -0,0 +1,5 @@ +#[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexp(x: f64, n: i32) -> f64 { + super::scalbn(x, n) +} diff --git a/libm/src/math/ldexpf.rs b/libm/src/math/ldexpf.rs new file mode 100644 index 000000000..70935a002 --- /dev/null +++ b/libm/src/math/ldexpf.rs @@ -0,0 +1,5 @@ +#[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf(x: f32, n: i32) -> f32 { + super::scalbnf(x, n) +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index e51b1511d..be0918ffd 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -104,6 +104,8 @@ mod fmod; mod fmodf; mod hypot; mod hypotf; +mod ldexp; +mod ldexpf; mod log; mod log10; mod log10f; @@ -166,6 +168,8 @@ pub use self::fmod::fmod; pub use self::fmodf::fmodf; pub use self::hypot::hypot; pub use self::hypotf::hypotf; +pub use self::ldexp::ldexp; +pub use self::ldexpf::ldexpf; pub use self::log::log; pub use self::log10::log10; pub use self::log10f::log10f; From 249a9ee94cfeb8c29f5b3bd116a299af07416947 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 12:27:57 -0700 Subject: [PATCH 0217/1459] Update some URLs --- libm/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/README.md b/libm/README.md index 568a049f0..3df5b65ea 100644 --- a/libm/README.md +++ b/libm/README.md @@ -12,9 +12,9 @@ The short term goal of this library is to [enable math support (e.g. `sin`, `ata `wasm32-unknown-unknown` target][wasm] (cf. [rust-lang-nursery/compiler-builtins][pr]). The longer term goal is to enable [math support in the `core` crate][core]. -[wasm]: https://github.com/japaric/libm/milestone/1 +[wasm]: https://github.com/rust-lang-nursery/libm/milestone/1 [pr]: https://github.com/rust-lang-nursery/compiler-builtins/pull/248 -[core]: https://github.com/japaric/libm/milestone/2 +[core]: https://github.com/rust-lang-nursery/libm/milestone/2 ## Already usable From 7ebf00623da7aca65fa68020725b61d76ffc8553 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 12:34:05 -0700 Subject: [PATCH 0218/1459] Add a test that overflow does not happen --- libm/src/math/floorf.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs index 7d631df02..8a63874eb 100644 --- a/libm/src/math/floorf.rs +++ b/libm/src/math/floorf.rs @@ -37,3 +37,11 @@ pub fn floorf(x: f32) -> f32 { } return f32::from_bits(ui); } + +#[cfg(test)] +mod tests { + #[test] + fn no_overflow() { + assert_eq!(super::floorf(0.5), 0.0); + } +} From 2d3dde322176147d32091e6dba055945b1668beb Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 12:36:37 -0700 Subject: [PATCH 0219/1459] Run `cargo fmt` --- libm/src/math/powf.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/libm/src/math/powf.rs b/libm/src/math/powf.rs index befbf44db..015bade86 100644 --- a/libm/src/math/powf.rs +++ b/libm/src/math/powf.rs @@ -136,9 +136,10 @@ pub fn powf(x: f32, y: f32) -> f32 { return x * x; } - if hy == 0x3f000000 + if hy == 0x3f000000 /* y is 0.5 */ - && hx >= 0 { + && hx >= 0 + { /* x >= +0 */ return sqrtf(x); } @@ -295,9 +296,10 @@ pub fn powf(x: f32, y: f32) -> f32 { /* z < -150 */ // FIXME: check should be (uint32_t)j > 0xc3160000 return sn * TINY * TINY; /* underflow */ - } else if j as u32 == 0xc3160000 + } else if j as u32 == 0xc3160000 /* z == -150 */ - && p_l <= z - p_h { + && p_l <= z - p_h + { return sn * TINY * TINY; /* underflow */ } From 4012b9c4f514f8e15cf849d351fb202a66bb024e Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 12:40:39 -0700 Subject: [PATCH 0220/1459] Include `ldexp*` intrinsics on the wasm target Looks like LLVM optimizes programs like: fn foo(a: u8) -> f32 { 2.0f32.powf(a as f32) } to actually invoking `ldexpf`, so let's be sure to include bindings so there's not undefined symbols. --- libm | 2 +- src/math.rs | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/libm b/libm index 3559e7037..0ae442888 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 3559e703795d33e84a91da2a35f2f3baac47e872 +Subproject commit 0ae442888c7af72c0a335edd43dbbd74c751f119 diff --git a/src/math.rs b/src/math.rs index b61955539..1893f1c16 100644 --- a/src/math.rs +++ b/src/math.rs @@ -71,6 +71,8 @@ no_mangle! { fn sinhf(n: f32) -> f32; fn tanf(n: f32) -> f32; fn tanhf(n: f32) -> f32; + fn ldexp(f: f64, n: i32) -> f64; + fn ldexpf(f: f32, n: i32) -> f32; } #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] From 4bbc838ac77c89ccbba780a4234a8a3c25a8b882 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 12:47:51 -0700 Subject: [PATCH 0221/1459] Revert "Use the Rust implementation of udivsi3 on ARM" This reverts commit 681aaa914dea7cae8252c33023604ce6c91808bd. --- build.rs | 17 ++++------------- ci/run-docker.sh | 1 + src/int/sdiv.rs | 1 + src/int/udiv.rs | 3 +++ 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/build.rs b/build.rs index 835b423cd..27dac6026 100644 --- a/build.rs +++ b/build.rs @@ -289,6 +289,7 @@ mod c { "arm/clzdi2.S", "arm/clzsi2.S", "arm/divmodsi4.S", + "arm/divsi3.S", "arm/modsi3.S", "arm/switch16.S", "arm/switch32.S", @@ -296,20 +297,8 @@ mod c { "arm/switchu8.S", "arm/sync_synchronize.S", "arm/udivmodsi4.S", + "arm/udivsi3.S", "arm/umodsi3.S", - - // Exclude these two files for now even though we haven't - // translated their implementation into Rust yet (#173). - // They appear... buggy? The `udivsi3` implementation was - // the one that seemed buggy, but the `divsi3` file - // references a symbol from `udivsi3` so we compile them - // both with the Rust versions. - // - // Note that if these are added back they should be removed - // from thumbv6m below. - // - // "arm/divsi3.S", - // "arm/udivsi3.S", ], ); @@ -427,12 +416,14 @@ mod c { "clzdi2", "clzsi2", "divmodsi4", + "divsi3", "modsi3", "switch16", "switch32", "switch8", "switchu8", "udivmodsi4", + "udivsi3", "umodsi3", ], ); diff --git a/ci/run-docker.sh b/ci/run-docker.sh index 5c2e065cc..bed84ed81 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -24,6 +24,7 @@ run() { -v `pwd`:/checkout:ro \ -v `rustc --print sysroot`:/rust:ro \ -w /checkout \ + --init \ $target \ sh -c "HOME=/tmp PATH=\$PATH:/rust/bin ci/run.sh $target" } diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index a2e8aa96f..d7ae71ab2 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -57,6 +57,7 @@ impl Divmod for i32 {} impl Divmod for i64 {} intrinsics! { + #[use_c_shim_if(all(target_arch = "arm", not(target_os = "ios"), not(thumbv6m)))] #[arm_aeabi_alias = __aeabi_idiv] pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 { a.div(b) diff --git a/src/int/udiv.rs b/src/int/udiv.rs index d873559bd..7b7f5b44d 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -152,6 +152,9 @@ macro_rules! udivmod_inner { } intrinsics! { + #[use_c_shim_if(all(target_arch = "arm", + not(target_os = "ios"), + not(thumbv6m)))] #[arm_aeabi_alias = __aeabi_uidiv] /// Returns `n / d` pub extern "C" fn __udivsi3(n: u32, d: u32) -> u32 { From 752e35a11f2e7eb29df84668bef0945e75cf3eb5 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 13:35:50 -0700 Subject: [PATCH 0222/1459] Fix duplicate symbol __clzsi2 Looks like our tests weren't quite testing compiler-builtins when it was compiled with unmangled symbols, so update the tests to catch this and then fix the compilation of the `__clzsi2` intrinsic to use the C version if it's compiled. --- ci/run.sh | 9 +++++++++ src/int/mod.rs | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/ci/run.sh b/ci/run.sh index 589553adb..27e2de22f 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -42,6 +42,10 @@ else $run --release $run --features c $run --features c --release + cargo build --target $1 + cargo build --target $1 --release + cargo build --target $1 --features c + cargo build --target $1 --release --features c fi PREFIX=$(echo $1 | sed -e 's/unknown-//')- @@ -68,6 +72,10 @@ fi # Look out for duplicated symbols when we include the compiler-rt (C) implementation for rlib in $(echo $path); do set +x + echo "================================================================" + echo checking $rlib for duplicate symbols + echo "================================================================" + stdout=$($PREFIX$NM -g --defined-only $rlib 2>&1) # NOTE On i586, It's normal that the get_pc_thunk symbol appears several @@ -86,6 +94,7 @@ for rlib in $(echo $path); do if test $? = 0; then exit 1 fi + set -ex done diff --git a/src/int/mod.rs b/src/int/mod.rs index 52a4227a0..07f72f84e 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -302,6 +302,7 @@ impl_wide_int!(u32, u64, 32); impl_wide_int!(u64, u128, 64); intrinsics! { + #[use_c_shim_if(/* always if C compilation is enabled */)] #[cfg(any( target_pointer_width = "16", target_pointer_width = "32", @@ -310,7 +311,7 @@ intrinsics! { pub extern "C" fn __clzsi2(x: usize) -> usize { // TODO: const this? Would require const-if // Note(Lokathor): the `intrinsics!` macro can't process mut inputs - let mut x = x; + let mut x = x; let mut y: usize; let mut n: usize = { #[cfg(target_pointer_width = "64")] From 77b1c7a88146dfdf66199b31827b53d77c06fb8d Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 13:44:20 -0700 Subject: [PATCH 0223/1459] Enable historically segfaulting tests on MIPS They seem to be passing locally so let's see what CI has to say. Closes #137 --- testcrate/build.rs | 104 +++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 55 deletions(-) diff --git a/testcrate/build.rs b/testcrate/build.rs index d862e0d0f..17ae3ef18 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -594,14 +594,12 @@ fn main() { }"); gen(|(a, b): (MyI128, MyI128)| Some(a.0.wrapping_mul(b.0)), "builtins::int::mul::__multi3(a, b)"); - if !target_arch_mips { // FIXME(#137) - gen(|(a, b): (MyI128, MyI128)| Some(a.0.overflowing_mul(b.0)), - "{ - let mut o = 2; - let c = builtins::int::mul::__muloti4(a, b, &mut o); - (c, match o { 0 => false, 1 => true, _ => panic!() }) - }"); - } + gen(|(a, b): (MyI128, MyI128)| Some(a.0.overflowing_mul(b.0)), + "{ + let mut o = 2; + let c = builtins::int::mul::__muloti4(a, b, &mut o); + (c, match o { 0 => false, 1 => true, _ => panic!() }) + }"); // int/sdiv.rs gen(|(a, b): (MyI64, MyI64)| { @@ -658,24 +656,22 @@ fn main() { } }, "builtins::int::sdiv::__moddi3(a, b)"); - if !target_arch_mips { // FIXME(#137) - gen(|(a, b): (MyI128, MyI128)| { - if b.0 == 0 { - None - } else { - Some(a.0 / b.0) - } - }, - "builtins::int::sdiv::__divti3(a, b)"); - gen(|(a, b): (MyI128, MyI128)| { - if b.0 == 0 { - None - } else { - Some(a.0 % b.0) - } - }, - "builtins::int::sdiv::__modti3(a, b)"); - } + gen(|(a, b): (MyI128, MyI128)| { + if b.0 == 0 { + None + } else { + Some(a.0 / b.0) + } + }, + "builtins::int::sdiv::__divti3(a, b)"); + gen(|(a, b): (MyI128, MyI128)| { + if b.0 == 0 { + None + } else { + Some(a.0 % b.0) + } + }, + "builtins::int::sdiv::__modti3(a, b)"); // int/shift.rs gen(|(a, b): (MyU64, MyU32)| Some(a.0 << (b.0 % 64)), @@ -746,35 +742,33 @@ fn main() { } }, "builtins::int::udiv::__umoddi3(a, b)"); - if !target_arch_mips { // FIXME(#137) - gen(|(a, b): (MyU128, MyU128)| { - if b.0 == 0 { - None - } else { - Some(a.0 / b.0) - } - }, - "builtins::int::udiv::__udivti3(a, b)"); - gen(|(a, b): (MyU128, MyU128)| { - if b.0 == 0 { - None - } else { - Some(a.0 % b.0) - } - }, - "builtins::int::udiv::__umodti3(a, b)"); - gen(|(a, b): (MyU128, MyU128)| { - if b.0 == 0 { - None - } else { - Some((a.0 / b.0, a.0 % b.0)) - } - }, - "{ - let mut r = 0; - (builtins::int::udiv::__udivmodti4(a, b, Some(&mut r)), r) - }"); - } + gen(|(a, b): (MyU128, MyU128)| { + if b.0 == 0 { + None + } else { + Some(a.0 / b.0) + } + }, + "builtins::int::udiv::__udivti3(a, b)"); + gen(|(a, b): (MyU128, MyU128)| { + if b.0 == 0 { + None + } else { + Some(a.0 % b.0) + } + }, + "builtins::int::udiv::__umodti3(a, b)"); + gen(|(a, b): (MyU128, MyU128)| { + if b.0 == 0 { + None + } else { + Some((a.0 / b.0, a.0 % b.0)) + } + }, + "{ + let mut r = 0; + (builtins::int::udiv::__udivmodti4(a, b, Some(&mut r)), r) + }"); } macro_rules! gen_float { From 80a998df921f572891107a51f47226e7b6858269 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 14:22:19 -0700 Subject: [PATCH 0224/1459] Fix duplicate floatdisf symbol on Windows MSVC --- src/float/conv.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/float/conv.rs b/src/float/conv.rs index 3171e4509..8d3e5fc6d 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -83,6 +83,7 @@ intrinsics! { #[use_c_shim_if(any( all(target_arch = "x86", not(target_env = "msvc")), all(target_arch = "x86_64", not(windows)), + all(target_arch = "x86_64", target_env = "msvc"), ))] #[arm_aeabi_alias = __aeabi_l2f] pub extern "C" fn __floatdisf(i: i64) -> f32 { From e3b53f914298e1fa718b6fe0319a0cf718896648 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 May 2019 15:18:37 -0700 Subject: [PATCH 0225/1459] Bump to 0.1.12 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index fdca20143..a05825a61 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.11" +version = "0.1.12" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From 66efaebbd0bd4f23b8d94f61ba9f235bcacabac4 Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Tue, 7 May 2019 07:16:24 +0300 Subject: [PATCH 0226/1459] tests, fixes, format --- libm/build.rs | 22 +- libm/ci/run-docker.sh | 0 libm/ci/run.sh | 0 libm/src/math/acosh.rs | 44 +- libm/src/math/acoshf.rs | 42 +- libm/src/math/asinef.rs | 188 +++++---- libm/src/math/asinh.rs | 70 ++-- libm/src/math/asinhf.rs | 68 ++-- libm/src/math/atan2.rs | 2 +- libm/src/math/atanh.rs | 65 ++- libm/src/math/atanhf.rs | 64 +-- libm/src/math/ceilf.rs | 2 +- libm/src/math/copysign.rs | 14 +- libm/src/math/copysignf.rs | 14 +- libm/src/math/erf.rs | 603 +++++++++++++-------------- libm/src/math/erff.rs | 428 +++++++++---------- libm/src/math/exp10.rs | 45 +- libm/src/math/exp10f.rs | 43 +- libm/src/math/fma.rs | 4 +- libm/src/math/fmod.rs | 4 +- libm/src/math/fmodf.rs | 4 +- libm/src/math/frexp.rs | 40 +- libm/src/math/frexpf.rs | 42 +- libm/src/math/ilogb.rs | 62 +-- libm/src/math/ilogbf.rs | 62 +-- libm/src/math/j0.rs | 814 +++++++++++++++++++------------------ libm/src/math/j0f.rs | 689 ++++++++++++++++--------------- libm/src/math/j1.rs | 801 ++++++++++++++++++------------------ libm/src/math/j1f.rs | 689 ++++++++++++++++--------------- libm/src/math/jn.rs | 681 ++++++++++++++++--------------- libm/src/math/jnf.rs | 514 +++++++++++------------ libm/src/math/lgamma.rs | 632 ++++++++++++++-------------- libm/src/math/lgammaf.rs | 502 ++++++++++++----------- libm/src/math/mod.rs | 10 +- libm/src/math/modf.rs | 67 +-- libm/src/math/modff.rs | 65 +-- libm/src/math/remquo.rs | 195 +++++---- libm/src/math/remquof.rs | 193 +++++---- libm/src/math/sincos.rs | 119 +++--- libm/src/math/sincosf.rs | 245 +++++------ libm/src/math/tgamma.rs | 386 ++++++++++-------- libm/src/math/tgammaf.rs | 10 +- 42 files changed, 4371 insertions(+), 4173 deletions(-) mode change 100755 => 100644 libm/ci/run-docker.sh mode change 100755 => 100644 libm/ci/run.sh diff --git a/libm/build.rs b/libm/build.rs index 23e1178e3..29521ab19 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -41,6 +41,19 @@ mod musl_reference_tests { "rem_pio2.rs", "rem_pio2_large.rs", "rem_pio2f.rs", + "remquo.rs", + "remquof.rs", + "lgamma.rs", // lgamma passed, lgamma_r has more than 1 result + "lgammaf.rs", // lgammaf passed, lgammaf_r has more than 1 result + "frexp.rs", // more than 1 result + "frexpf.rs", // more than 1 result + "sincos.rs", // more than 1 result + "sincosf.rs", // more than 1 result + "modf.rs", // more than 1 result + "modff.rs", // more than 1 result + "asinef.rs", // not exists + "jn.rs", // passed, but very slow + "jnf.rs", // passed, but very slow ]; struct Function { @@ -78,12 +91,9 @@ mod musl_reference_tests { let contents = fs::read_to_string(file).unwrap(); let mut functions = contents.lines().filter(|f| f.starts_with("pub fn")); - let function_to_test = functions.next().unwrap(); - if functions.next().is_some() { - panic!("more than one function in"); + while let Some(function_to_test) = functions.next() { + math.push(parse(function_to_test)); } - - math.push(parse(function_to_test)); } // Generate a bunch of random inputs for each function. This will @@ -330,7 +340,7 @@ mod musl_reference_tests { src.push_str(match function.ret { Ty::F32 => "if _eqf(output, f32::from_bits(*expected as u32)).is_ok() { continue }", Ty::F64 => "if _eq(output, f64::from_bits(*expected as u64)).is_ok() { continue }", - Ty::I32 => "if output as i64 == expected { continue }", + Ty::I32 => "if output as i64 == *expected { continue }", Ty::Bool => unreachable!(), }); diff --git a/libm/ci/run-docker.sh b/libm/ci/run-docker.sh old mode 100755 new mode 100644 diff --git a/libm/ci/run.sh b/libm/ci/run.sh old mode 100755 new mode 100644 diff --git a/libm/src/math/acosh.rs b/libm/src/math/acosh.rs index 3494e3405..95dc57d81 100644 --- a/libm/src/math/acosh.rs +++ b/libm/src/math/acosh.rs @@ -1,22 +1,22 @@ -use super::{log, log1p, sqrt}; - -const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa39ef*/ - -/* acosh(x) = log(x + sqrt(x*x-1)) */ -pub fn acosh(x: f64) -> f64 { - let u = x.to_bits(); - let e = ((u >> 52) as usize) & 0x7ff; - - /* x < 1 domain error is handled in the called functions */ - - if e < 0x3ff + 1 { - /* |x| < 2, up to 2ulp error in [1,1.125] */ - return log1p(x-1.0+sqrt((x-1.0)*(x-1.0)+2.0*(x-1.0))); - } - if e < 0x3ff + 26 { - /* |x| < 0x1p26 */ - return log(2.0*x-1.0/(x+sqrt(x*x-1.0))); - } - /* |x| >= 0x1p26 or nan */ - return log(x) + LN2; -} +use super::{log, log1p, sqrt}; + +const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa39ef*/ + +/* acosh(x) = log(x + sqrt(x*x-1)) */ +pub fn acosh(x: f64) -> f64 { + let u = x.to_bits(); + let e = ((u >> 52) as usize) & 0x7ff; + + /* x < 1 domain error is handled in the called functions */ + + if e < 0x3ff + 1 { + /* |x| < 2, up to 2ulp error in [1,1.125] */ + return log1p(x - 1.0 + sqrt((x - 1.0) * (x - 1.0) + 2.0 * (x - 1.0))); + } + if e < 0x3ff + 26 { + /* |x| < 0x1p26 */ + return log(2.0 * x - 1.0 / (x + sqrt(x * x - 1.0))); + } + /* |x| >= 0x1p26 or nan */ + return log(x) + LN2; +} diff --git a/libm/src/math/acoshf.rs b/libm/src/math/acoshf.rs index 1e298a9b3..f50a00324 100644 --- a/libm/src/math/acoshf.rs +++ b/libm/src/math/acoshf.rs @@ -1,21 +1,21 @@ -use super::{log1pf, logf, sqrtf}; - -const LN2: f32 = 0.693147180559945309417232121458176568; - -/* acosh(x) = log(x + sqrt(x*x-1)) */ -pub fn acoshf(x: f32) -> f32 { - let u = x.to_bits(); - let a = u & 0x7fffffff; - - if a < 0x3f800000+(1<<23) { - /* |x| < 2, invalid if x < 1 or nan */ - /* up to 2ulp error in [1,1.125] */ - return log1pf(x-1.0 + sqrtf((x-1.0)*(x-1.0)+2.0*(x-1.0))); - } - if a < 0x3f800000+(12<<23) { - /* |x| < 0x1p12 */ - return logf(2.0*x - 1.0/(x+sqrtf(x*x-1.0))); - } - /* x >= 0x1p12 */ - return logf(x) + LN2; -} +use super::{log1pf, logf, sqrtf}; + +const LN2: f32 = 0.693147180559945309417232121458176568; + +/* acosh(x) = log(x + sqrt(x*x-1)) */ +pub fn acoshf(x: f32) -> f32 { + let u = x.to_bits(); + let a = u & 0x7fffffff; + + if a < 0x3f800000 + (1 << 23) { + /* |x| < 2, invalid if x < 1 or nan */ + /* up to 2ulp error in [1,1.125] */ + return log1pf(x - 1.0 + sqrtf((x - 1.0) * (x - 1.0) + 2.0 * (x - 1.0))); + } + if a < 0x3f800000 + (12 << 23) { + /* |x| < 0x1p12 */ + return logf(2.0 * x - 1.0 / (x + sqrtf(x * x - 1.0))); + } + /* x >= 0x1p12 */ + return logf(x) + LN2; +} diff --git a/libm/src/math/asinef.rs b/libm/src/math/asinef.rs index d2cd82699..cd1428bc2 100644 --- a/libm/src/math/asinef.rs +++ b/libm/src/math/asinef.rs @@ -1,95 +1,93 @@ -/* @(#)z_asinef.c 1.0 98/08/13 */ -/****************************************************************** - * The following routines are coded directly from the algorithms - * and coefficients given in "Software Manual for the Elementary - * Functions" by William J. Cody, Jr. and William Waite, Prentice - * Hall, 1980. - ******************************************************************/ -/****************************************************************** - * Arcsine - * - * Input: - * x - floating point value - * acosine - indicates acos calculation - * - * Output: - * Arcsine of x. - * - * Description: - * This routine calculates arcsine / arccosine. - * - *****************************************************************/ - -use super::{fabsf, sqrtf}; - -const P: [f32; 2] = [ 0.933935835, -0.504400557 ]; -const Q: [f32; 2] = [ 0.560363004e+1, -0.554846723e+1 ]; -const A: [f32; 2] = [ 0.0, 0.785398163 ]; -const B: [f32; 2] = [ 1.570796326, 0.785398163 ]; -const Z_ROOTEPS_F: f32 = 1.7263349182589107e-4; - -pub fn asinef(x: f32, acosine: usize) -> f32 -{ - let flag: usize; - let i: usize; - let mut branch: bool = false; - let g: f32; - let mut res: f32 = 0.0; - let mut y: f32; - - /* Check for special values. */ - //i = numtestf (x); - if x.is_nan() || x.is_infinite() { - force_eval!(x); - return x; - } - - y = fabsf(x); - flag = acosine; - - if y > 0.5 { - i = 1 - flag; - - /* Check for range error. */ - if y > 1.0 { - return 0.0 / 0.0; - } - - g = (1.0 - y) / 2.0; - y = -2.0 * sqrtf(g); - branch = true; - } else { - i = flag; - if y < Z_ROOTEPS_F { - res = y; - g = 0.0; // pleasing the uninitialized variable - } else { - g = y * y; - } - } - - if y >= Z_ROOTEPS_F || branch { - /* Calculate the Taylor series. */ - let p = (P[1] * g + P[0]) * g; - let q = (g + Q[1]) * g + Q[0]; - let r = p / q; - - res = y + y * r; - } - - /* Calculate asine or acose. */ - if flag == 0 { - res = (A[i] + res) + A[i]; - if x < 0.0 { - res = -res; - } - } else { - if x < 0.0 { - res = (B[i] + res) + B[i]; - } else { - res = (A[i] - res) + A[i]; - } - } - - return res; -} +/* @(#)z_asinef.c 1.0 98/08/13 */ +/****************************************************************** + * The following routines are coded directly from the algorithms + * and coefficients given in "Software Manual for the Elementary + * Functions" by William J. Cody, Jr. and William Waite, Prentice + * Hall, 1980. + ******************************************************************/ +/****************************************************************** + * Arcsine + * + * Input: + * x - floating point value + * acosine - indicates acos calculation + * + * Output: + * Arcsine of x. + * + * Description: + * This routine calculates arcsine / arccosine. + * + *****************************************************************/ + +use super::{fabsf, sqrtf}; + +const P: [f32; 2] = [ 0.933935835, -0.504400557 ]; +const Q: [f32; 2] = [ 0.560363004e+1, -0.554846723e+1 ]; +const A: [f32; 2] = [ 0.0, 0.785398163 ]; +const B: [f32; 2] = [ 1.570796326, 0.785398163 ]; +const Z_ROOTEPS_F: f32 = 1.7263349182589107e-4; + +pub fn asinef(x: f32, acosine: bool) -> f32 { + let i: usize; + let mut branch: bool = false; + let g: f32; + let mut res: f32 = 0.0; + let mut y: f32; + + /* Check for special values. */ + //i = numtestf (x); + if x.is_nan() || x.is_infinite() { + force_eval!(x); + return x; + } + + y = fabsf(x); + let flag = acosine; + + if y > 0.5 { + i = (!flag) as usize; + + /* Check for range error. */ + if y > 1.0 { + return 0.0 / 0.0; + } + + g = (1.0 - y) / 2.0; + y = -2.0 * sqrtf(g); + branch = true; + } else { + i = flag; + if y < Z_ROOTEPS_F { + res = y; + g = 0.0; // pleasing the uninitialized variable + } else { + g = y * y; + } + } + + if y >= Z_ROOTEPS_F || branch { + /* Calculate the Taylor series. */ + let p = (P[1] * g + P[0]) * g; + let q = (g + Q[1]) * g + Q[0]; + let r = p / q; + + res = y + y * r; + } + + /* Calculate asine or acose. */ + if flag == 0 { + res = (A[i] + res) + A[i]; + if x < 0.0 { + res = -res; + } + } else { + if x < 0.0 { + res = (B[i] + res) + B[i]; + } else { + res = (A[i] - res) + A[i]; + } + } + + return res; +} diff --git a/libm/src/math/asinh.rs b/libm/src/math/asinh.rs index 09e894551..b29093b23 100644 --- a/libm/src/math/asinh.rs +++ b/libm/src/math/asinh.rs @@ -1,35 +1,35 @@ -use super::{log, log1p, sqrt}; - -const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa39ef*/ - -/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ -pub fn asinh(mut x: f64) -> f64 { - let mut u = x.to_bits(); - let e = ((u >> 52) as usize) & 0x7ff; - let sign = (u >> 63) != 0; - - /* |x| */ - u &= (!0) >> 1; - x = f64::from_bits(u); - - if e >= 0x3ff + 26 { - /* |x| >= 0x1p26 or inf or nan */ - x = log(x) + LN2; - } else if e >= 0x3ff + 1 { - /* |x| >= 2 */ - x = log(2.0*x + 1.0/(sqrt(x*x+1.0)+x)); - } else if e >= 0x3ff - 26 { - /* |x| >= 0x1p-26, up to 1.6ulp error in [0.125,0.5] */ - x = log1p(x + x*x/(sqrt(x*x+1.0)+1.0)); - } else { - /* |x| < 0x1p-26, raise inexact if x != 0 */ - let x1p120 = f64::from_bits(0x4770000000000000); - force_eval!(x + x1p120); - } - - if sign { - -x - } else { - x - } -} +use super::{log, log1p, sqrt}; + +const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa39ef*/ + +/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ +pub fn asinh(mut x: f64) -> f64 { + let mut u = x.to_bits(); + let e = ((u >> 52) as usize) & 0x7ff; + let sign = (u >> 63) != 0; + + /* |x| */ + u &= (!0) >> 1; + x = f64::from_bits(u); + + if e >= 0x3ff + 26 { + /* |x| >= 0x1p26 or inf or nan */ + x = log(x) + LN2; + } else if e >= 0x3ff + 1 { + /* |x| >= 2 */ + x = log(2.0 * x + 1.0 / (sqrt(x * x + 1.0) + x)); + } else if e >= 0x3ff - 26 { + /* |x| >= 0x1p-26, up to 1.6ulp error in [0.125,0.5] */ + x = log1p(x + x * x / (sqrt(x * x + 1.0) + 1.0)); + } else { + /* |x| < 0x1p-26, raise inexact if x != 0 */ + let x1p120 = f64::from_bits(0x4770000000000000); + force_eval!(x + x1p120); + } + + if sign { + -x + } else { + x + } +} diff --git a/libm/src/math/asinhf.rs b/libm/src/math/asinhf.rs index 236916d83..981243303 100644 --- a/libm/src/math/asinhf.rs +++ b/libm/src/math/asinhf.rs @@ -1,34 +1,34 @@ -use super::{logf, log1pf, sqrtf}; - -const LN2: f32 = 0.693147180559945309417232121458176568; - -/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ -pub fn asinhf(mut x: f32) -> f32 { - let u = x.to_bits(); - let i = u & 0x7fffffff; - let sign = (u >> 31) != 0; - - /* |x| */ - x = f32::from_bits(i); - - if i >= 0x3f800000 + (12<<23) { - /* |x| >= 0x1p12 or inf or nan */ - x = logf(x) + LN2; - } else if i >= 0x3f800000 + (1<<23) { - /* |x| >= 2 */ - x = logf(2.0*x + 1.0/(sqrtf(x*x+1.0)+x)); - } else if i >= 0x3f800000 - (12<<23) { - /* |x| >= 0x1p-12, up to 1.6ulp error in [0.125,0.5] */ - x = log1pf(x + x*x/(sqrtf(x*x+1.0)+1.0)); - } else { - /* |x| < 0x1p-12, raise inexact if x!=0 */ - let x1p120 = f32::from_bits(0x7b800000); - force_eval!(x + x1p120); - } - - if sign { - -x - } else { - x - } -} +use super::{log1pf, logf, sqrtf}; + +const LN2: f32 = 0.693147180559945309417232121458176568; + +/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ +pub fn asinhf(mut x: f32) -> f32 { + let u = x.to_bits(); + let i = u & 0x7fffffff; + let sign = (u >> 31) != 0; + + /* |x| */ + x = f32::from_bits(i); + + if i >= 0x3f800000 + (12 << 23) { + /* |x| >= 0x1p12 or inf or nan */ + x = logf(x) + LN2; + } else if i >= 0x3f800000 + (1 << 23) { + /* |x| >= 2 */ + x = logf(2.0 * x + 1.0 / (sqrtf(x * x + 1.0) + x)); + } else if i >= 0x3f800000 - (12 << 23) { + /* |x| >= 0x1p-12, up to 1.6ulp error in [0.125,0.5] */ + x = log1pf(x + x * x / (sqrtf(x * x + 1.0) + 1.0)); + } else { + /* |x| < 0x1p-12, raise inexact if x!=0 */ + let x1p120 = f32::from_bits(0x7b800000); + force_eval!(x + x1p120); + } + + if sign { + -x + } else { + x + } +} diff --git a/libm/src/math/atan2.rs b/libm/src/math/atan2.rs index 7ab636050..313bec4b9 100644 --- a/libm/src/math/atan2.rs +++ b/libm/src/math/atan2.rs @@ -53,7 +53,7 @@ pub fn atan2(y: f64, x: f64) -> f64 { let lx = x.to_bits() as u32; let mut iy = (y.to_bits() >> 32) as u32; let ly = y.to_bits() as u32; - if ((ix - 0x3ff00000) | lx) == 0 { + if ((ix.wrapping_sub(0x3ff00000)) | lx) == 0 { /* x = 1.0 */ return atan(y); } diff --git a/libm/src/math/atanh.rs b/libm/src/math/atanh.rs index ea444809c..2833715ab 100644 --- a/libm/src/math/atanh.rs +++ b/libm/src/math/atanh.rs @@ -1,33 +1,32 @@ -use super::{log1p}; - -/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ -pub fn atanh(mut x: f64) -> f64 { - let mut u = x.to_bits(); - let e = ((u >> 52) as usize) & 0x7ff; - let sign = (u >> 63) != 0; - - /* |x| */ - u &= 0x7fffffff; - x = f64::from_bits(u); - - if e < 0x3ff - 1 { - if e < 0x3ff - 32 { - /* handle underflow */ - if e == 0 { - force_eval!(x as f32); - } - } else { - /* |x| < 0.5, up to 1.7ulp error */ - x = 0.5*log1p(2.0*x + 2.0*x*x/(1.0-x)); - } - } else { - /* avoid overflow */ - x = 0.5*log1p(2.0*(x/(1.0-x))); - } - - if sign { - -x - } else { - x - } -} +use super::log1p; + +/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ +pub fn atanh(x: f64) -> f64 { + let u = x.to_bits(); + let e = ((u >> 52) as usize) & 0x7ff; + let sign = (u >> 63) != 0; + + /* |x| */ + let mut y = f64::from_bits(u & 0x7fff_ffff_ffff_ffff); + + if e < 0x3ff - 1 { + if e < 0x3ff - 32 { + /* handle underflow */ + if e == 0 { + force_eval!(y as f32); + } + } else { + /* |x| < 0.5, up to 1.7ulp error */ + y = 0.5 * log1p(2.0 * y + 2.0 * y * y / (1.0 - y)); + } + } else { + /* avoid overflow */ + y = 0.5 * log1p(2.0 * (y / (1.0 - y))); + } + + if sign { + -y + } else { + y + } +} diff --git a/libm/src/math/atanhf.rs b/libm/src/math/atanhf.rs index 77d451bf2..709a95551 100644 --- a/libm/src/math/atanhf.rs +++ b/libm/src/math/atanhf.rs @@ -1,32 +1,32 @@ -use super::{log1pf}; - -/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ -pub fn atanhf(mut x: f32) -> f32 { - let mut u = x.to_bits(); - let sign = (u >> 31) != 0; - - /* |x| */ - u &= 0x7fffffff; - x = f32::from_bits(u); - - if u < 0x3f800000 - (1<<23) { - if u < 0x3f800000 - (32<<23) { - /* handle underflow */ - if u < (1<<23) { - force_eval!((x*x) as f32); - } - } else { - /* |x| < 0.5, up to 1.7ulp error */ - x = 0.5*log1pf(2.0*x + 2.0*x*x/(1.0-x)); - } - } else { - /* avoid overflow */ - x = 0.5*log1pf(2.0*(x/(1.0-x))); - } - - if sign { - -x - } else { - x - } -} +use super::log1pf; + +/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ +pub fn atanhf(mut x: f32) -> f32 { + let mut u = x.to_bits(); + let sign = (u >> 31) != 0; + + /* |x| */ + u &= 0x7fffffff; + x = f32::from_bits(u); + + if u < 0x3f800000 - (1 << 23) { + if u < 0x3f800000 - (32 << 23) { + /* handle underflow */ + if u < (1 << 23) { + force_eval!((x * x) as f32); + } + } else { + /* |x| < 0.5, up to 1.7ulp error */ + x = 0.5 * log1pf(2.0 * x + 2.0 * x * x / (1.0 - x)); + } + } else { + /* avoid overflow */ + x = 0.5 * log1pf(2.0 * (x / (1.0 - x))); + } + + if sign { + -x + } else { + x + } +} diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs index 88f9ecc44..0be53c5cf 100644 --- a/libm/src/math/ceilf.rs +++ b/libm/src/math/ceilf.rs @@ -12,7 +12,7 @@ pub fn ceilf(x: f32) -> f32 { } } let mut ui = x.to_bits(); - let e = (((ui >> 23) & 0xff) - 0x7f) as i32; + let e = (((ui >> 23) & 0xff).wrapping_sub(0x7f)) as i32; if e >= 23 { return x; diff --git a/libm/src/math/copysign.rs b/libm/src/math/copysign.rs index 74b761e74..9c5362a5a 100644 --- a/libm/src/math/copysign.rs +++ b/libm/src/math/copysign.rs @@ -1,7 +1,7 @@ -pub fn copysign(x: f64, y: f64) -> f64 { - let mut ux = x.to_bits(); - let uy = y.to_bits(); - ux &= (!0) >> 1; - ux |= uy & (1<<63); - f64::from_bits(ux) -} +pub fn copysign(x: f64, y: f64) -> f64 { + let mut ux = x.to_bits(); + let uy = y.to_bits(); + ux &= (!0) >> 1; + ux |= uy & (1 << 63); + f64::from_bits(ux) +} diff --git a/libm/src/math/copysignf.rs b/libm/src/math/copysignf.rs index a0a814bf6..b42fd39ad 100644 --- a/libm/src/math/copysignf.rs +++ b/libm/src/math/copysignf.rs @@ -1,7 +1,7 @@ -pub fn copysignf(x: f32, y: f32) -> f32 { - let mut ux = x.to_bits(); - let uy = y.to_bits(); - ux &= 0x7fffffff; - ux |= uy & 0x80000000; - f32::from_bits(ux) -} +pub fn copysignf(x: f32, y: f32) -> f32 { + let mut ux = x.to_bits(); + let uy = y.to_bits(); + ux &= 0x7fffffff; + ux |= uy & 0x80000000; + f32::from_bits(ux) +} diff --git a/libm/src/math/erf.rs b/libm/src/math/erf.rs index b3ad2ce05..d53a4c83c 100644 --- a/libm/src/math/erf.rs +++ b/libm/src/math/erf.rs @@ -1,297 +1,306 @@ -use super::{exp, fabs, get_high_word, with_set_low_word}; -/* origin: FreeBSD /usr/src/lib/msun/src/s_erf.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* double erf(double x) - * double erfc(double x) - * x - * 2 |\ - * erf(x) = --------- | exp(-t*t)dt - * sqrt(pi) \| - * 0 - * - * erfc(x) = 1-erf(x) - * Note that - * erf(-x) = -erf(x) - * erfc(-x) = 2 - erfc(x) - * - * Method: - * 1. For |x| in [0, 0.84375] - * erf(x) = x + x*R(x^2) - * erfc(x) = 1 - erf(x) if x in [-.84375,0.25] - * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] - * where R = P/Q where P is an odd poly of degree 8 and - * Q is an odd poly of degree 10. - * -57.90 - * | R - (erf(x)-x)/x | <= 2 - * - * - * Remark. The formula is derived by noting - * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....) - * and that - * 2/sqrt(pi) = 1.128379167095512573896158903121545171688 - * is close to one. The interval is chosen because the fix - * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is - * near 0.6174), and by some experiment, 0.84375 is chosen to - * guarantee the error is less than one ulp for erf. - * - * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and - * c = 0.84506291151 rounded to single (24 bits) - * erf(x) = sign(x) * (c + P1(s)/Q1(s)) - * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 - * 1+(c+P1(s)/Q1(s)) if x < 0 - * |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 - * Remark: here we use the taylor series expansion at x=1. - * erf(1+s) = erf(1) + s*Poly(s) - * = 0.845.. + P1(s)/Q1(s) - * That is, we use rational approximation to approximate - * erf(1+s) - (c = (single)0.84506291151) - * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] - * where - * P1(s) = degree 6 poly in s - * Q1(s) = degree 6 poly in s - * - * 3. For x in [1.25,1/0.35(~2.857143)], - * erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) - * erf(x) = 1 - erfc(x) - * where - * R1(z) = degree 7 poly in z, (z=1/x^2) - * S1(z) = degree 8 poly in z - * - * 4. For x in [1/0.35,28] - * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 - * = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6 x >= 28 - * erf(x) = sign(x) *(1 - tiny) (raise inexact) - * erfc(x) = tiny*tiny (raise underflow) if x > 0 - * = 2 - tiny if x<0 - * - * 7. Special case: - * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, - * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, - * erfc/erf(NaN) is NaN - */ - -const ERX: f64 = 8.45062911510467529297e-01; /* 0x3FEB0AC1, 0x60000000 */ -/* - * Coefficients for approximation to erf on [0,0.84375] - */ -const EFX8: f64 = 1.02703333676410069053e+00; /* 0x3FF06EBA, 0x8214DB69 */ -const PP0: f64 = 1.28379167095512558561e-01; /* 0x3FC06EBA, 0x8214DB68 */ -const PP1: f64 = -3.25042107247001499370e-01; /* 0xBFD4CD7D, 0x691CB913 */ -const PP2: f64 = -2.84817495755985104766e-02; /* 0xBF9D2A51, 0xDBD7194F */ -const PP3: f64 = -5.77027029648944159157e-03; /* 0xBF77A291, 0x236668E4 */ -const PP4: f64 = -2.37630166566501626084e-05; /* 0xBEF8EAD6, 0x120016AC */ -const QQ1: f64 = 3.97917223959155352819e-01; /* 0x3FD97779, 0xCDDADC09 */ -const QQ2: f64 = 6.50222499887672944485e-02; /* 0x3FB0A54C, 0x5536CEBA */ -const QQ3: f64 = 5.08130628187576562776e-03; /* 0x3F74D022, 0xC4D36B0F */ -const QQ4: f64 = 1.32494738004321644526e-04; /* 0x3F215DC9, 0x221C1A10 */ -const QQ5: f64 = -3.96022827877536812320e-06; /* 0xBED09C43, 0x42A26120 */ -/* - * Coefficients for approximation to erf in [0.84375,1.25] - */ -const PA0: f64 = -2.36211856075265944077e-03; /* 0xBF6359B8, 0xBEF77538 */ -const PA1: f64 = 4.14856118683748331666e-01; /* 0x3FDA8D00, 0xAD92B34D */ -const PA2: f64 = -3.72207876035701323847e-01; /* 0xBFD7D240, 0xFBB8C3F1 */ -const PA3: f64 = 3.18346619901161753674e-01; /* 0x3FD45FCA, 0x805120E4 */ -const PA4: f64 = -1.10894694282396677476e-01; /* 0xBFBC6398, 0x3D3E28EC */ -const PA5: f64 = 3.54783043256182359371e-02; /* 0x3FA22A36, 0x599795EB */ -const PA6: f64 = -2.16637559486879084300e-03; /* 0xBF61BF38, 0x0A96073F */ -const QA1: f64 = 1.06420880400844228286e-01; /* 0x3FBB3E66, 0x18EEE323 */ -const QA2: f64 = 5.40397917702171048937e-01; /* 0x3FE14AF0, 0x92EB6F33 */ -const QA3: f64 = 7.18286544141962662868e-02; /* 0x3FB2635C, 0xD99FE9A7 */ -const QA4: f64 = 1.26171219808761642112e-01; /* 0x3FC02660, 0xE763351F */ -const QA5: f64 = 1.36370839120290507362e-02; /* 0x3F8BEDC2, 0x6B51DD1C */ -const QA6: f64 = 1.19844998467991074170e-02; /* 0x3F888B54, 0x5735151D */ -/* - * Coefficients for approximation to erfc in [1.25,1/0.35] - */ -const RA0: f64 = -9.86494403484714822705e-03; /* 0xBF843412, 0x600D6435 */ -const RA1: f64 = -6.93858572707181764372e-01; /* 0xBFE63416, 0xE4BA7360 */ -const RA2: f64 = -1.05586262253232909814e+01; /* 0xC0251E04, 0x41B0E726 */ -const RA3: f64 = -6.23753324503260060396e+01; /* 0xC04F300A, 0xE4CBA38D */ -const RA4: f64 = -1.62396669462573470355e+02; /* 0xC0644CB1, 0x84282266 */ -const RA5: f64 = -1.84605092906711035994e+02; /* 0xC067135C, 0xEBCCABB2 */ -const RA6: f64 = -8.12874355063065934246e+01; /* 0xC0545265, 0x57E4D2F2 */ -const RA7: f64 = -9.81432934416914548592e+00; /* 0xC023A0EF, 0xC69AC25C */ -const SA1: f64 = 1.96512716674392571292e+01; /* 0x4033A6B9, 0xBD707687 */ -const SA2: f64 = 1.37657754143519042600e+02; /* 0x4061350C, 0x526AE721 */ -const SA3: f64 = 4.34565877475229228821e+02; /* 0x407B290D, 0xD58A1A71 */ -const SA4: f64 = 6.45387271733267880336e+02; /* 0x40842B19, 0x21EC2868 */ -const SA5: f64 = 4.29008140027567833386e+02; /* 0x407AD021, 0x57700314 */ -const SA6: f64 = 1.08635005541779435134e+02; /* 0x405B28A3, 0xEE48AE2C */ -const SA7: f64 = 6.57024977031928170135e+00; /* 0x401A47EF, 0x8E484A93 */ -const SA8: f64 = -6.04244152148580987438e-02; /* 0xBFAEEFF2, 0xEE749A62 */ -/* - * Coefficients for approximation to erfc in [1/.35,28] - */ -const RB0: f64 = -9.86494292470009928597e-03; /* 0xBF843412, 0x39E86F4A */ -const RB1: f64 = -7.99283237680523006574e-01; /* 0xBFE993BA, 0x70C285DE */ -const RB2: f64 = -1.77579549177547519889e+01; /* 0xC031C209, 0x555F995A */ -const RB3: f64 = -1.60636384855821916062e+02; /* 0xC064145D, 0x43C5ED98 */ -const RB4: f64 = -6.37566443368389627722e+02; /* 0xC083EC88, 0x1375F228 */ -const RB5: f64 = -1.02509513161107724954e+03; /* 0xC0900461, 0x6A2E5992 */ -const RB6: f64 = -4.83519191608651397019e+02; /* 0xC07E384E, 0x9BDC383F */ -const SB1: f64 = 3.03380607434824582924e+01; /* 0x403E568B, 0x261D5190 */ -const SB2: f64 = 3.25792512996573918826e+02; /* 0x40745CAE, 0x221B9F0A */ -const SB3: f64 = 1.53672958608443695994e+03; /* 0x409802EB, 0x189D5118 */ -const SB4: f64 = 3.19985821950859553908e+03; /* 0x40A8FFB7, 0x688C246A */ -const SB5: f64 = 2.55305040643316442583e+03; /* 0x40A3F219, 0xCEDF3BE6 */ -const SB6: f64 = 4.74528541206955367215e+02; /* 0x407DA874, 0xE79FE763 */ -const SB7: f64 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */ - -fn erfc1(x: f64) -> f64 { - let s: f64; - let p: f64; - let q: f64; - - s = fabs(x) - 1.0; - p = PA0+s*(PA1+s*(PA2+s*(PA3+s*(PA4+s*(PA5+s*PA6))))); - q = 1.0+s*(QA1+s*(QA2+s*(QA3+s*(QA4+s*(QA5+s*QA6))))); - - 1.0 - ERX - p/q -} - -fn erfc2(ix: u32, mut x: f64) -> f64 { - let s: f64; - let r: f64; - let big_s: f64; - let z: f64; - - if ix < 0x3ff40000 { /* |x| < 1.25 */ - return erfc1(x); - } - - x = fabs(x); - s = 1.0/(x*x); - if ix < 0x4006db6d { /* |x| < 1/.35 ~ 2.85714 */ - r = RA0+s*(RA1+s*(RA2+s*(RA3+s*(RA4+s*( - RA5+s*(RA6+s*RA7)))))); - big_s = 1.0+s*(SA1+s*(SA2+s*(SA3+s*(SA4+s*( - SA5+s*(SA6+s*(SA7+s*SA8))))))); - } else { /* |x| > 1/.35 */ - r = RB0+s*(RB1+s*(RB2+s*(RB3+s*(RB4+s*( - RB5+s*RB6))))); - big_s = 1.0+s*(SB1+s*(SB2+s*(SB3+s*(SB4+s*( - SB5+s*(SB6+s*SB7)))))); - } - z = with_set_low_word(x, 0); - - exp(-z*z-0.5625)*exp((z-x)*(z+x)+r/big_s)/x -} - -pub fn erf(x: f64) -> f64 { - let r: f64; - let s: f64; - let z: f64; - let y: f64; - let mut ix: u32; - let sign: usize; - - ix = get_high_word(x); - sign = (ix>>31) as usize; - ix &= 0x7fffffff; - if ix >= 0x7ff00000 { - /* erf(nan)=nan, erf(+-inf)=+-1 */ - return 1.0-2.0*(sign as f64) + 1.0/x; - } - if ix < 0x3feb0000 { /* |x| < 0.84375 */ - if ix < 0x3e300000 { /* |x| < 2**-28 */ - /* avoid underflow */ - return 0.125*(8.0*x + EFX8*x); - } - z = x*x; - r = PP0+z*(PP1+z*(PP2+z*(PP3+z*PP4))); - s = 1.0+z*(QQ1+z*(QQ2+z*(QQ3+z*(QQ4+z*QQ5)))); - y = r/s; - return x + x*y; - } - if ix < 0x40180000 { /* 0.84375 <= |x| < 6 */ - y = 1.0 - erfc2(ix,x); - } else { - let x1p_1022 = f64::from_bits(0x0010000000000000); - y = 1.0 - x1p_1022; - } - - if sign != 0 { - -y - } else { - y - } -} - -pub fn erfc(x: f64) -> f64 { - let r: f64; - let s: f64; - let z: f64; - let y: f64; - let mut ix: u32; - let sign: usize; - - ix = get_high_word(x); - sign = (ix>>31) as usize; - ix &= 0x7fffffff; - if ix >= 0x7ff00000 { - /* erfc(nan)=nan, erfc(+-inf)=0,2 */ - return 2.0*(sign as f64) + 1.0/x; - } - if ix < 0x3feb0000 { /* |x| < 0.84375 */ - if ix < 0x3c700000 { /* |x| < 2**-56 */ - return 1.0 - x; - } - z = x*x; - r = PP0+z*(PP1+z*(PP2+z*(PP3+z*PP4))); - s = 1.0+z*(QQ1+z*(QQ2+z*(QQ3+z*(QQ4+z*QQ5)))); - y = r/s; - if sign != 0 || ix < 0x3fd00000 { /* x < 1/4 */ - return 1.0 - (x+x*y); - } - return 0.5 - (x - 0.5 + x*y); - } - if ix < 0x403c0000 { /* 0.84375 <= |x| < 28 */ - if sign != 0 { - return 2.0 - erfc2(ix,x); - } else { - return erfc2(ix,x); - } - } - - let x1p_1022 = f64::from_bits(0x0010000000000000); - if sign != 0 { - 2.0 - x1p_1022 - } else { - x1p_1022*x1p_1022 - } -} +use super::{exp, fabs, get_high_word, with_set_low_word}; +/* origin: FreeBSD /usr/src/lib/msun/src/s_erf.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* double erf(double x) + * double erfc(double x) + * x + * 2 |\ + * erf(x) = --------- | exp(-t*t)dt + * sqrt(pi) \| + * 0 + * + * erfc(x) = 1-erf(x) + * Note that + * erf(-x) = -erf(x) + * erfc(-x) = 2 - erfc(x) + * + * Method: + * 1. For |x| in [0, 0.84375] + * erf(x) = x + x*R(x^2) + * erfc(x) = 1 - erf(x) if x in [-.84375,0.25] + * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] + * where R = P/Q where P is an odd poly of degree 8 and + * Q is an odd poly of degree 10. + * -57.90 + * | R - (erf(x)-x)/x | <= 2 + * + * + * Remark. The formula is derived by noting + * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....) + * and that + * 2/sqrt(pi) = 1.128379167095512573896158903121545171688 + * is close to one. The interval is chosen because the fix + * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is + * near 0.6174), and by some experiment, 0.84375 is chosen to + * guarantee the error is less than one ulp for erf. + * + * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and + * c = 0.84506291151 rounded to single (24 bits) + * erf(x) = sign(x) * (c + P1(s)/Q1(s)) + * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 + * 1+(c+P1(s)/Q1(s)) if x < 0 + * |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 + * Remark: here we use the taylor series expansion at x=1. + * erf(1+s) = erf(1) + s*Poly(s) + * = 0.845.. + P1(s)/Q1(s) + * That is, we use rational approximation to approximate + * erf(1+s) - (c = (single)0.84506291151) + * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] + * where + * P1(s) = degree 6 poly in s + * Q1(s) = degree 6 poly in s + * + * 3. For x in [1.25,1/0.35(~2.857143)], + * erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) + * erf(x) = 1 - erfc(x) + * where + * R1(z) = degree 7 poly in z, (z=1/x^2) + * S1(z) = degree 8 poly in z + * + * 4. For x in [1/0.35,28] + * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 + * = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6 x >= 28 + * erf(x) = sign(x) *(1 - tiny) (raise inexact) + * erfc(x) = tiny*tiny (raise underflow) if x > 0 + * = 2 - tiny if x<0 + * + * 7. Special case: + * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, + * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, + * erfc/erf(NaN) is NaN + */ + +const ERX: f64 = 8.45062911510467529297e-01; /* 0x3FEB0AC1, 0x60000000 */ +/* + * Coefficients for approximation to erf on [0,0.84375] + */ +const EFX8: f64 = 1.02703333676410069053e+00; /* 0x3FF06EBA, 0x8214DB69 */ +const PP0: f64 = 1.28379167095512558561e-01; /* 0x3FC06EBA, 0x8214DB68 */ +const PP1: f64 = -3.25042107247001499370e-01; /* 0xBFD4CD7D, 0x691CB913 */ +const PP2: f64 = -2.84817495755985104766e-02; /* 0xBF9D2A51, 0xDBD7194F */ +const PP3: f64 = -5.77027029648944159157e-03; /* 0xBF77A291, 0x236668E4 */ +const PP4: f64 = -2.37630166566501626084e-05; /* 0xBEF8EAD6, 0x120016AC */ +const QQ1: f64 = 3.97917223959155352819e-01; /* 0x3FD97779, 0xCDDADC09 */ +const QQ2: f64 = 6.50222499887672944485e-02; /* 0x3FB0A54C, 0x5536CEBA */ +const QQ3: f64 = 5.08130628187576562776e-03; /* 0x3F74D022, 0xC4D36B0F */ +const QQ4: f64 = 1.32494738004321644526e-04; /* 0x3F215DC9, 0x221C1A10 */ +const QQ5: f64 = -3.96022827877536812320e-06; /* 0xBED09C43, 0x42A26120 */ +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +const PA0: f64 = -2.36211856075265944077e-03; /* 0xBF6359B8, 0xBEF77538 */ +const PA1: f64 = 4.14856118683748331666e-01; /* 0x3FDA8D00, 0xAD92B34D */ +const PA2: f64 = -3.72207876035701323847e-01; /* 0xBFD7D240, 0xFBB8C3F1 */ +const PA3: f64 = 3.18346619901161753674e-01; /* 0x3FD45FCA, 0x805120E4 */ +const PA4: f64 = -1.10894694282396677476e-01; /* 0xBFBC6398, 0x3D3E28EC */ +const PA5: f64 = 3.54783043256182359371e-02; /* 0x3FA22A36, 0x599795EB */ +const PA6: f64 = -2.16637559486879084300e-03; /* 0xBF61BF38, 0x0A96073F */ +const QA1: f64 = 1.06420880400844228286e-01; /* 0x3FBB3E66, 0x18EEE323 */ +const QA2: f64 = 5.40397917702171048937e-01; /* 0x3FE14AF0, 0x92EB6F33 */ +const QA3: f64 = 7.18286544141962662868e-02; /* 0x3FB2635C, 0xD99FE9A7 */ +const QA4: f64 = 1.26171219808761642112e-01; /* 0x3FC02660, 0xE763351F */ +const QA5: f64 = 1.36370839120290507362e-02; /* 0x3F8BEDC2, 0x6B51DD1C */ +const QA6: f64 = 1.19844998467991074170e-02; /* 0x3F888B54, 0x5735151D */ +/* + * Coefficients for approximation to erfc in [1.25,1/0.35] + */ +const RA0: f64 = -9.86494403484714822705e-03; /* 0xBF843412, 0x600D6435 */ +const RA1: f64 = -6.93858572707181764372e-01; /* 0xBFE63416, 0xE4BA7360 */ +const RA2: f64 = -1.05586262253232909814e+01; /* 0xC0251E04, 0x41B0E726 */ +const RA3: f64 = -6.23753324503260060396e+01; /* 0xC04F300A, 0xE4CBA38D */ +const RA4: f64 = -1.62396669462573470355e+02; /* 0xC0644CB1, 0x84282266 */ +const RA5: f64 = -1.84605092906711035994e+02; /* 0xC067135C, 0xEBCCABB2 */ +const RA6: f64 = -8.12874355063065934246e+01; /* 0xC0545265, 0x57E4D2F2 */ +const RA7: f64 = -9.81432934416914548592e+00; /* 0xC023A0EF, 0xC69AC25C */ +const SA1: f64 = 1.96512716674392571292e+01; /* 0x4033A6B9, 0xBD707687 */ +const SA2: f64 = 1.37657754143519042600e+02; /* 0x4061350C, 0x526AE721 */ +const SA3: f64 = 4.34565877475229228821e+02; /* 0x407B290D, 0xD58A1A71 */ +const SA4: f64 = 6.45387271733267880336e+02; /* 0x40842B19, 0x21EC2868 */ +const SA5: f64 = 4.29008140027567833386e+02; /* 0x407AD021, 0x57700314 */ +const SA6: f64 = 1.08635005541779435134e+02; /* 0x405B28A3, 0xEE48AE2C */ +const SA7: f64 = 6.57024977031928170135e+00; /* 0x401A47EF, 0x8E484A93 */ +const SA8: f64 = -6.04244152148580987438e-02; /* 0xBFAEEFF2, 0xEE749A62 */ +/* + * Coefficients for approximation to erfc in [1/.35,28] + */ +const RB0: f64 = -9.86494292470009928597e-03; /* 0xBF843412, 0x39E86F4A */ +const RB1: f64 = -7.99283237680523006574e-01; /* 0xBFE993BA, 0x70C285DE */ +const RB2: f64 = -1.77579549177547519889e+01; /* 0xC031C209, 0x555F995A */ +const RB3: f64 = -1.60636384855821916062e+02; /* 0xC064145D, 0x43C5ED98 */ +const RB4: f64 = -6.37566443368389627722e+02; /* 0xC083EC88, 0x1375F228 */ +const RB5: f64 = -1.02509513161107724954e+03; /* 0xC0900461, 0x6A2E5992 */ +const RB6: f64 = -4.83519191608651397019e+02; /* 0xC07E384E, 0x9BDC383F */ +const SB1: f64 = 3.03380607434824582924e+01; /* 0x403E568B, 0x261D5190 */ +const SB2: f64 = 3.25792512996573918826e+02; /* 0x40745CAE, 0x221B9F0A */ +const SB3: f64 = 1.53672958608443695994e+03; /* 0x409802EB, 0x189D5118 */ +const SB4: f64 = 3.19985821950859553908e+03; /* 0x40A8FFB7, 0x688C246A */ +const SB5: f64 = 2.55305040643316442583e+03; /* 0x40A3F219, 0xCEDF3BE6 */ +const SB6: f64 = 4.74528541206955367215e+02; /* 0x407DA874, 0xE79FE763 */ +const SB7: f64 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */ + +fn erfc1(x: f64) -> f64 { + let s: f64; + let p: f64; + let q: f64; + + s = fabs(x) - 1.0; + p = PA0 + s * (PA1 + s * (PA2 + s * (PA3 + s * (PA4 + s * (PA5 + s * PA6))))); + q = 1.0 + s * (QA1 + s * (QA2 + s * (QA3 + s * (QA4 + s * (QA5 + s * QA6))))); + + 1.0 - ERX - p / q +} + +fn erfc2(ix: u32, mut x: f64) -> f64 { + let s: f64; + let r: f64; + let big_s: f64; + let z: f64; + + if ix < 0x3ff40000 { + /* |x| < 1.25 */ + return erfc1(x); + } + + x = fabs(x); + s = 1.0 / (x * x); + if ix < 0x4006db6d { + /* |x| < 1/.35 ~ 2.85714 */ + r = RA0 + s * (RA1 + s * (RA2 + s * (RA3 + s * (RA4 + s * (RA5 + s * (RA6 + s * RA7)))))); + big_s = 1.0 + + s * (SA1 + + s * (SA2 + s * (SA3 + s * (SA4 + s * (SA5 + s * (SA6 + s * (SA7 + s * SA8))))))); + } else { + /* |x| > 1/.35 */ + r = RB0 + s * (RB1 + s * (RB2 + s * (RB3 + s * (RB4 + s * (RB5 + s * RB6))))); + big_s = + 1.0 + s * (SB1 + s * (SB2 + s * (SB3 + s * (SB4 + s * (SB5 + s * (SB6 + s * SB7)))))); + } + z = with_set_low_word(x, 0); + + exp(-z * z - 0.5625) * exp((z - x) * (z + x) + r / big_s) / x +} + +pub fn erf(x: f64) -> f64 { + let r: f64; + let s: f64; + let z: f64; + let y: f64; + let mut ix: u32; + let sign: usize; + + ix = get_high_word(x); + sign = (ix >> 31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7ff00000 { + /* erf(nan)=nan, erf(+-inf)=+-1 */ + return 1.0 - 2.0 * (sign as f64) + 1.0 / x; + } + if ix < 0x3feb0000 { + /* |x| < 0.84375 */ + if ix < 0x3e300000 { + /* |x| < 2**-28 */ + /* avoid underflow */ + return 0.125 * (8.0 * x + EFX8 * x); + } + z = x * x; + r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); + s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); + y = r / s; + return x + x * y; + } + if ix < 0x40180000 { + /* 0.84375 <= |x| < 6 */ + y = 1.0 - erfc2(ix, x); + } else { + let x1p_1022 = f64::from_bits(0x0010000000000000); + y = 1.0 - x1p_1022; + } + + if sign != 0 { + -y + } else { + y + } +} + +pub fn erfc(x: f64) -> f64 { + let r: f64; + let s: f64; + let z: f64; + let y: f64; + let mut ix: u32; + let sign: usize; + + ix = get_high_word(x); + sign = (ix >> 31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7ff00000 { + /* erfc(nan)=nan, erfc(+-inf)=0,2 */ + return 2.0 * (sign as f64) + 1.0 / x; + } + if ix < 0x3feb0000 { + /* |x| < 0.84375 */ + if ix < 0x3c700000 { + /* |x| < 2**-56 */ + return 1.0 - x; + } + z = x * x; + r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); + s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); + y = r / s; + if sign != 0 || ix < 0x3fd00000 { + /* x < 1/4 */ + return 1.0 - (x + x * y); + } + return 0.5 - (x - 0.5 + x * y); + } + if ix < 0x403c0000 { + /* 0.84375 <= |x| < 28 */ + if sign != 0 { + return 2.0 - erfc2(ix, x); + } else { + return erfc2(ix, x); + } + } + + let x1p_1022 = f64::from_bits(0x0010000000000000); + if sign != 0 { + 2.0 - x1p_1022 + } else { + x1p_1022 * x1p_1022 + } +} diff --git a/libm/src/math/erff.rs b/libm/src/math/erff.rs index 0aaa89767..ef67c335b 100644 --- a/libm/src/math/erff.rs +++ b/libm/src/math/erff.rs @@ -1,210 +1,218 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_erff.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -use super::{expf, fabsf}; - -const ERX: f32 = 8.4506291151e-01; /* 0x3f58560b */ -/* - * Coefficients for approximation to erf on [0,0.84375] - */ -const EFX8: f32 = 1.0270333290e+00; /* 0x3f8375d4 */ -const PP0: f32 = 1.2837916613e-01; /* 0x3e0375d4 */ -const PP1: f32 = -3.2504209876e-01; /* 0xbea66beb */ -const PP2: f32 = -2.8481749818e-02; /* 0xbce9528f */ -const PP3: f32 = -5.7702702470e-03; /* 0xbbbd1489 */ -const PP4: f32 = -2.3763017452e-05; /* 0xb7c756b1 */ -const QQ1: f32 = 3.9791721106e-01; /* 0x3ecbbbce */ -const QQ2: f32 = 6.5022252500e-02; /* 0x3d852a63 */ -const QQ3: f32 = 5.0813062117e-03; /* 0x3ba68116 */ -const QQ4: f32 = 1.3249473704e-04; /* 0x390aee49 */ -const QQ5: f32 = -3.9602282413e-06; /* 0xb684e21a */ -/* - * Coefficients for approximation to erf in [0.84375,1.25] - */ -const PA0: f32 = -2.3621185683e-03; /* 0xbb1acdc6 */ -const PA1: f32 = 4.1485610604e-01; /* 0x3ed46805 */ -const PA2: f32 = -3.7220788002e-01; /* 0xbebe9208 */ -const PA3: f32 = 3.1834661961e-01; /* 0x3ea2fe54 */ -const PA4: f32 = -1.1089469492e-01; /* 0xbde31cc2 */ -const PA5: f32 = 3.5478305072e-02; /* 0x3d1151b3 */ -const PA6: f32 = -2.1663755178e-03; /* 0xbb0df9c0 */ -const QA1: f32 = 1.0642088205e-01; /* 0x3dd9f331 */ -const QA2: f32 = 5.4039794207e-01; /* 0x3f0a5785 */ -const QA3: f32 = 7.1828655899e-02; /* 0x3d931ae7 */ -const QA4: f32 = 1.2617121637e-01; /* 0x3e013307 */ -const QA5: f32 = 1.3637083583e-02; /* 0x3c5f6e13 */ -const QA6: f32 = 1.1984500103e-02; /* 0x3c445aa3 */ -/* - * Coefficients for approximation to erfc in [1.25,1/0.35] - */ -const RA0: f32 = -9.8649440333e-03; /* 0xbc21a093 */ -const RA1: f32 = -6.9385856390e-01; /* 0xbf31a0b7 */ -const RA2: f32 = -1.0558626175e+01; /* 0xc128f022 */ -const RA3: f32 = -6.2375331879e+01; /* 0xc2798057 */ -const RA4: f32 = -1.6239666748e+02; /* 0xc322658c */ -const RA5: f32 = -1.8460508728e+02; /* 0xc3389ae7 */ -const RA6: f32 = -8.1287437439e+01; /* 0xc2a2932b */ -const RA7: f32 = -9.8143291473e+00; /* 0xc11d077e */ -const SA1: f32 = 1.9651271820e+01; /* 0x419d35ce */ -const SA2: f32 = 1.3765776062e+02; /* 0x4309a863 */ -const SA3: f32 = 4.3456588745e+02; /* 0x43d9486f */ -const SA4: f32 = 6.4538726807e+02; /* 0x442158c9 */ -const SA5: f32 = 4.2900814819e+02; /* 0x43d6810b */ -const SA6: f32 = 1.0863500214e+02; /* 0x42d9451f */ -const SA7: f32 = 6.5702495575e+00; /* 0x40d23f7c */ -const SA8: f32 = -6.0424413532e-02; /* 0xbd777f97 */ -/* - * Coefficients for approximation to erfc in [1/.35,28] - */ -const RB0: f32 = -9.8649431020e-03; /* 0xbc21a092 */ -const RB1: f32 = -7.9928326607e-01; /* 0xbf4c9dd4 */ -const RB2: f32 = -1.7757955551e+01; /* 0xc18e104b */ -const RB3: f32 = -1.6063638306e+02; /* 0xc320a2ea */ -const RB4: f32 = -6.3756646729e+02; /* 0xc41f6441 */ -const RB5: f32 = -1.0250950928e+03; /* 0xc480230b */ -const RB6: f32 = -4.8351919556e+02; /* 0xc3f1c275 */ -const SB1: f32 = 3.0338060379e+01; /* 0x41f2b459 */ -const SB2: f32 = 3.2579251099e+02; /* 0x43a2e571 */ -const SB3: f32 = 1.5367296143e+03; /* 0x44c01759 */ -const SB4: f32 = 3.1998581543e+03; /* 0x4547fdbb */ -const SB5: f32 = 2.5530502930e+03; /* 0x451f90ce */ -const SB6: f32 = 4.7452853394e+02; /* 0x43ed43a7 */ -const SB7: f32 = -2.2440952301e+01; /* 0xc1b38712 */ - -fn erfc1(x: f32) -> f32 { - let s: f32; - let p: f32; - let q: f32; - - s = fabsf(x) - 1.0; - p = PA0+s*(PA1+s*(PA2+s*(PA3+s*(PA4+s*(PA5+s*PA6))))); - q = 1.0+s*(QA1+s*(QA2+s*(QA3+s*(QA4+s*(QA5+s*QA6))))); - return 1.0 - ERX - p/q; -} - -fn erfc2(mut ix: u32, mut x: f32) -> f32 { - let s: f32; - let r: f32; - let big_s: f32; - let z: f32; - - if ix < 0x3fa00000 { /* |x| < 1.25 */ - return erfc1(x); - } - - x = fabsf(x); - s = 1.0/(x*x); - if ix < 0x4036db6d { /* |x| < 1/0.35 */ - r = RA0+s*(RA1+s*(RA2+s*(RA3+s*(RA4+s*( - RA5+s*(RA6+s*RA7)))))); - big_s = 1.0+s*(SA1+s*(SA2+s*(SA3+s*(SA4+s*( - SA5+s*(SA6+s*(SA7+s*SA8))))))); - } else { /* |x| >= 1/0.35 */ - r = RB0+s*(RB1+s*(RB2+s*(RB3+s*(RB4+s*( - RB5+s*RB6))))); - big_s = 1.0+s*(SB1+s*(SB2+s*(SB3+s*(SB4+s*( - SB5+s*(SB6+s*SB7)))))); - } - ix = x.to_bits(); - z = f32::from_bits(ix&0xffffe000); - - expf(-z*z - 0.5625) * expf((z-x)*(z+x) + r/big_s)/x -} - -pub fn erff(x: f32) -> f32 -{ - let r: f32; - let s: f32; - let z: f32; - let y: f32; - let mut ix: u32; - let sign: usize; - - ix = x.to_bits(); - sign = (ix>>31) as usize; - ix &= 0x7fffffff; - if ix >= 0x7f800000 { - /* erf(nan)=nan, erf(+-inf)=+-1 */ - return 1.0-2.0*(sign as f32) + 1.0/x; - } - if ix < 0x3f580000 { /* |x| < 0.84375 */ - if ix < 0x31800000 { /* |x| < 2**-28 */ - /*avoid underflow */ - return 0.125*(8.0*x + EFX8*x); - } - z = x*x; - r = PP0+z*(PP1+z*(PP2+z*(PP3+z*PP4))); - s = 1.0+z*(QQ1+z*(QQ2+z*(QQ3+z*(QQ4+z*QQ5)))); - y = r/s; - return x + x*y; - } - if ix < 0x40c00000 { /* |x| < 6 */ - y = 1.0 - erfc2(ix,x); - } else { - let x1p_120 = f32::from_bits(0x03800000); - y = 1.0 - x1p_120; - } - - if sign != 0 { - -y - } else { - y - } -} - -pub fn erfcf(x: f32) -> f32 { - let r: f32; - let s: f32; - let z: f32; - let y: f32; - let mut ix: u32; - let sign: usize; - - ix = x.to_bits(); - sign = (ix>>31) as usize; - ix &= 0x7fffffff; - if ix >= 0x7f800000 { - /* erfc(nan)=nan, erfc(+-inf)=0,2 */ - return 2.0*(sign as f32) + 1.0/x; - } - - if ix < 0x3f580000 { /* |x| < 0.84375 */ - if ix < 0x23800000 { /* |x| < 2**-56 */ - return 1.0 - x; - } - z = x*x; - r = PP0+z*(PP1+z*(PP2+z*(PP3+z*PP4))); - s = 1.0+z*(QQ1+z*(QQ2+z*(QQ3+z*(QQ4+z*QQ5)))); - y = r/s; - if sign != 0 || ix < 0x3e800000 { /* x < 1/4 */ - return 1.0 - (x+x*y); - } - return 0.5 - (x - 0.5 + x*y); - } - if ix < 0x41e00000 { /* |x| < 28 */ - if sign != 0 { - return 2.0 - erfc2(ix, x); - } else { - return erfc2(ix, x); - } - } - - let x1p_120 = f32::from_bits(0x03800000); - if sign != 0 { - 2.0 - x1p_120 - } else { - x1p_120*x1p_120 - } -} +/* origin: FreeBSD /usr/src/lib/msun/src/s_erff.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{expf, fabsf}; + +const ERX: f32 = 8.4506291151e-01; /* 0x3f58560b */ +/* + * Coefficients for approximation to erf on [0,0.84375] + */ +const EFX8: f32 = 1.0270333290e+00; /* 0x3f8375d4 */ +const PP0: f32 = 1.2837916613e-01; /* 0x3e0375d4 */ +const PP1: f32 = -3.2504209876e-01; /* 0xbea66beb */ +const PP2: f32 = -2.8481749818e-02; /* 0xbce9528f */ +const PP3: f32 = -5.7702702470e-03; /* 0xbbbd1489 */ +const PP4: f32 = -2.3763017452e-05; /* 0xb7c756b1 */ +const QQ1: f32 = 3.9791721106e-01; /* 0x3ecbbbce */ +const QQ2: f32 = 6.5022252500e-02; /* 0x3d852a63 */ +const QQ3: f32 = 5.0813062117e-03; /* 0x3ba68116 */ +const QQ4: f32 = 1.3249473704e-04; /* 0x390aee49 */ +const QQ5: f32 = -3.9602282413e-06; /* 0xb684e21a */ +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +const PA0: f32 = -2.3621185683e-03; /* 0xbb1acdc6 */ +const PA1: f32 = 4.1485610604e-01; /* 0x3ed46805 */ +const PA2: f32 = -3.7220788002e-01; /* 0xbebe9208 */ +const PA3: f32 = 3.1834661961e-01; /* 0x3ea2fe54 */ +const PA4: f32 = -1.1089469492e-01; /* 0xbde31cc2 */ +const PA5: f32 = 3.5478305072e-02; /* 0x3d1151b3 */ +const PA6: f32 = -2.1663755178e-03; /* 0xbb0df9c0 */ +const QA1: f32 = 1.0642088205e-01; /* 0x3dd9f331 */ +const QA2: f32 = 5.4039794207e-01; /* 0x3f0a5785 */ +const QA3: f32 = 7.1828655899e-02; /* 0x3d931ae7 */ +const QA4: f32 = 1.2617121637e-01; /* 0x3e013307 */ +const QA5: f32 = 1.3637083583e-02; /* 0x3c5f6e13 */ +const QA6: f32 = 1.1984500103e-02; /* 0x3c445aa3 */ +/* + * Coefficients for approximation to erfc in [1.25,1/0.35] + */ +const RA0: f32 = -9.8649440333e-03; /* 0xbc21a093 */ +const RA1: f32 = -6.9385856390e-01; /* 0xbf31a0b7 */ +const RA2: f32 = -1.0558626175e+01; /* 0xc128f022 */ +const RA3: f32 = -6.2375331879e+01; /* 0xc2798057 */ +const RA4: f32 = -1.6239666748e+02; /* 0xc322658c */ +const RA5: f32 = -1.8460508728e+02; /* 0xc3389ae7 */ +const RA6: f32 = -8.1287437439e+01; /* 0xc2a2932b */ +const RA7: f32 = -9.8143291473e+00; /* 0xc11d077e */ +const SA1: f32 = 1.9651271820e+01; /* 0x419d35ce */ +const SA2: f32 = 1.3765776062e+02; /* 0x4309a863 */ +const SA3: f32 = 4.3456588745e+02; /* 0x43d9486f */ +const SA4: f32 = 6.4538726807e+02; /* 0x442158c9 */ +const SA5: f32 = 4.2900814819e+02; /* 0x43d6810b */ +const SA6: f32 = 1.0863500214e+02; /* 0x42d9451f */ +const SA7: f32 = 6.5702495575e+00; /* 0x40d23f7c */ +const SA8: f32 = -6.0424413532e-02; /* 0xbd777f97 */ +/* + * Coefficients for approximation to erfc in [1/.35,28] + */ +const RB0: f32 = -9.8649431020e-03; /* 0xbc21a092 */ +const RB1: f32 = -7.9928326607e-01; /* 0xbf4c9dd4 */ +const RB2: f32 = -1.7757955551e+01; /* 0xc18e104b */ +const RB3: f32 = -1.6063638306e+02; /* 0xc320a2ea */ +const RB4: f32 = -6.3756646729e+02; /* 0xc41f6441 */ +const RB5: f32 = -1.0250950928e+03; /* 0xc480230b */ +const RB6: f32 = -4.8351919556e+02; /* 0xc3f1c275 */ +const SB1: f32 = 3.0338060379e+01; /* 0x41f2b459 */ +const SB2: f32 = 3.2579251099e+02; /* 0x43a2e571 */ +const SB3: f32 = 1.5367296143e+03; /* 0x44c01759 */ +const SB4: f32 = 3.1998581543e+03; /* 0x4547fdbb */ +const SB5: f32 = 2.5530502930e+03; /* 0x451f90ce */ +const SB6: f32 = 4.7452853394e+02; /* 0x43ed43a7 */ +const SB7: f32 = -2.2440952301e+01; /* 0xc1b38712 */ + +fn erfc1(x: f32) -> f32 { + let s: f32; + let p: f32; + let q: f32; + + s = fabsf(x) - 1.0; + p = PA0 + s * (PA1 + s * (PA2 + s * (PA3 + s * (PA4 + s * (PA5 + s * PA6))))); + q = 1.0 + s * (QA1 + s * (QA2 + s * (QA3 + s * (QA4 + s * (QA5 + s * QA6))))); + return 1.0 - ERX - p / q; +} + +fn erfc2(mut ix: u32, mut x: f32) -> f32 { + let s: f32; + let r: f32; + let big_s: f32; + let z: f32; + + if ix < 0x3fa00000 { + /* |x| < 1.25 */ + return erfc1(x); + } + + x = fabsf(x); + s = 1.0 / (x * x); + if ix < 0x4036db6d { + /* |x| < 1/0.35 */ + r = RA0 + s * (RA1 + s * (RA2 + s * (RA3 + s * (RA4 + s * (RA5 + s * (RA6 + s * RA7)))))); + big_s = 1.0 + + s * (SA1 + + s * (SA2 + s * (SA3 + s * (SA4 + s * (SA5 + s * (SA6 + s * (SA7 + s * SA8))))))); + } else { + /* |x| >= 1/0.35 */ + r = RB0 + s * (RB1 + s * (RB2 + s * (RB3 + s * (RB4 + s * (RB5 + s * RB6))))); + big_s = + 1.0 + s * (SB1 + s * (SB2 + s * (SB3 + s * (SB4 + s * (SB5 + s * (SB6 + s * SB7)))))); + } + ix = x.to_bits(); + z = f32::from_bits(ix & 0xffffe000); + + expf(-z * z - 0.5625) * expf((z - x) * (z + x) + r / big_s) / x +} + +pub fn erff(x: f32) -> f32 { + let r: f32; + let s: f32; + let z: f32; + let y: f32; + let mut ix: u32; + let sign: usize; + + ix = x.to_bits(); + sign = (ix >> 31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + /* erf(nan)=nan, erf(+-inf)=+-1 */ + return 1.0 - 2.0 * (sign as f32) + 1.0 / x; + } + if ix < 0x3f580000 { + /* |x| < 0.84375 */ + if ix < 0x31800000 { + /* |x| < 2**-28 */ + /*avoid underflow */ + return 0.125 * (8.0 * x + EFX8 * x); + } + z = x * x; + r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); + s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); + y = r / s; + return x + x * y; + } + if ix < 0x40c00000 { + /* |x| < 6 */ + y = 1.0 - erfc2(ix, x); + } else { + let x1p_120 = f32::from_bits(0x03800000); + y = 1.0 - x1p_120; + } + + if sign != 0 { + -y + } else { + y + } +} + +pub fn erfcf(x: f32) -> f32 { + let r: f32; + let s: f32; + let z: f32; + let y: f32; + let mut ix: u32; + let sign: usize; + + ix = x.to_bits(); + sign = (ix >> 31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + /* erfc(nan)=nan, erfc(+-inf)=0,2 */ + return 2.0 * (sign as f32) + 1.0 / x; + } + + if ix < 0x3f580000 { + /* |x| < 0.84375 */ + if ix < 0x23800000 { + /* |x| < 2**-56 */ + return 1.0 - x; + } + z = x * x; + r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); + s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); + y = r / s; + if sign != 0 || ix < 0x3e800000 { + /* x < 1/4 */ + return 1.0 - (x + x * y); + } + return 0.5 - (x - 0.5 + x * y); + } + if ix < 0x41e00000 { + /* |x| < 28 */ + if sign != 0 { + return 2.0 - erfc2(ix, x); + } else { + return erfc2(ix, x); + } + } + + let x1p_120 = f32::from_bits(0x03800000); + if sign != 0 { + 2.0 - x1p_120 + } else { + x1p_120 * x1p_120 + } +} diff --git a/libm/src/math/exp10.rs b/libm/src/math/exp10.rs index d12fa0be3..9537f76f1 100644 --- a/libm/src/math/exp10.rs +++ b/libm/src/math/exp10.rs @@ -1,24 +1,21 @@ -use super::{exp2, modf, pow}; - -const LN10: f64 = 3.32192809488736234787031942948939; -const P10: &[f64] = &[ - 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, - 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, - 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, - 1e10, 1e11, 1e12, 1e13, 1e14, 1e15 -]; - -pub fn exp10(x: f64) -> f64 -{ - let (mut y, n) = modf(x); - let u: u64 = n.to_bits(); - /* fabs(n) < 16 without raising invalid on nan */ - if (u>>52 & 0x7ff) < 0x3ff+4 { - if y == 0.0 { - return P10[((n as isize) + 15) as usize]; - } - y = exp2(LN10 * y); - return y * P10[((n as isize) + 15) as usize]; - } - return pow(10.0, x); -} +use super::{exp2, modf, pow}; + +const LN10: f64 = 3.32192809488736234787031942948939; +const P10: &[f64] = &[ + 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, +]; + +pub fn exp10(x: f64) -> f64 { + let (mut y, n) = modf(x); + let u: u64 = n.to_bits(); + /* fabs(n) < 16 without raising invalid on nan */ + if (u >> 52 & 0x7ff) < 0x3ff + 4 { + if y == 0.0 { + return P10[((n as isize) + 15) as usize]; + } + y = exp2(LN10 * y); + return y * P10[((n as isize) + 15) as usize]; + } + return pow(10.0, x); +} diff --git a/libm/src/math/exp10f.rs b/libm/src/math/exp10f.rs index 8fb88a52c..d45fff36e 100644 --- a/libm/src/math/exp10f.rs +++ b/libm/src/math/exp10f.rs @@ -1,22 +1,21 @@ -use super::{exp2, exp2f, modff}; - -const LN10_F32: f32 = 3.32192809488736234787031942948939; -const LN10_F64: f64 = 3.32192809488736234787031942948939; -const P10: &[f32] = &[ - 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, - 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7 -]; - -pub fn exp10f(x: f32) -> f32 { - let (mut y, n) = modff(x); - let u = n.to_bits(); - /* fabsf(n) < 8 without raising invalid on nan */ - if (u>>23 & 0xff) < 0x7f+3 { - if y == 0.0 { - return P10[((n as isize) + 7) as usize] - } - y = exp2f(LN10_F32 * y); - return y * P10[((n as isize) + 7) as usize]; - } - return exp2(LN10_F64 * (x as f64)) as f32; -} +use super::{exp2, exp2f, modff}; + +const LN10_F32: f32 = 3.32192809488736234787031942948939; +const LN10_F64: f64 = 3.32192809488736234787031942948939; +const P10: &[f32] = &[ + 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, +]; + +pub fn exp10f(x: f32) -> f32 { + let (mut y, n) = modff(x); + let u = n.to_bits(); + /* fabsf(n) < 8 without raising invalid on nan */ + if (u >> 23 & 0xff) < 0x7f + 3 { + if y == 0.0 { + return P10[((n as isize) + 7) as usize]; + } + y = exp2f(LN10_F32 * y); + return y * P10[((n as isize) + 7) as usize]; + } + return exp2(LN10_F64 * (x as f64)) as f32; +} diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 21c854cd0..6b062481d 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -43,7 +43,7 @@ fn mul(x: u64, y: u64) -> (u64, u64) { t1 = xlo * ylo; t2 = xlo * yhi + xhi * ylo; t3 = xhi * yhi; - let lo = t1 + (t2 << 32); + let lo = t1.wrapping_add(t2 << 32); let hi = t3 + (t2 >> 32) + (t1 > lo) as u64; (hi, lo) } @@ -116,7 +116,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { let mut nonzero: i32 = 1; if samesign { /* r += z */ - rlo += zlo; + rlo = rlo.wrapping_add(zlo); rhi += zhi + (rlo < zlo) as u64; } else { /* r -= z */ diff --git a/libm/src/math/fmod.rs b/libm/src/math/fmod.rs index ecc9b39a5..2cdd8a9ba 100644 --- a/libm/src/math/fmod.rs +++ b/libm/src/math/fmod.rs @@ -46,7 +46,7 @@ pub fn fmod(x: f64, y: f64) -> f64 { /* x mod y */ while ex > ey { - i = uxi - uyi; + i = uxi.wrapping_sub(uyi); if i >> 63 == 0 { if i == 0 { return 0.0 * x; @@ -56,7 +56,7 @@ pub fn fmod(x: f64, y: f64) -> f64 { uxi <<= 1; ex -= 1; } - i = uxi - uyi; + i = uxi.wrapping_sub(uyi); if i >> 63 == 0 { if i == 0 { return 0.0 * x; diff --git a/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs index 98f51f455..3e6779a93 100644 --- a/libm/src/math/fmodf.rs +++ b/libm/src/math/fmodf.rs @@ -52,7 +52,7 @@ pub fn fmodf(x: f32, y: f32) -> f32 { /* x mod y */ while ex > ey { - i = uxi - uyi; + i = uxi.wrapping_sub(uyi); if i >> 31 == 0 { if i == 0 { return 0.0 * x; @@ -64,7 +64,7 @@ pub fn fmodf(x: f32, y: f32) -> f32 { ex -= 1; } - i = uxi - uyi; + i = uxi.wrapping_sub(uyi); if i >> 31 == 0 { if i == 0 { return 0.0 * x; diff --git a/libm/src/math/frexp.rs b/libm/src/math/frexp.rs index 45733a3aa..badad786a 100644 --- a/libm/src/math/frexp.rs +++ b/libm/src/math/frexp.rs @@ -1,20 +1,20 @@ -pub fn frexp(x: f64) -> (f64, isize) { - let mut y = x.to_bits(); - let ee = ((y>>52) & 0x7ff) as isize; - - if ee == 0 { - if x != 0.0 { - let x1p64 = f64::from_bits(0x43f0000000000000); - let (x, e) = frexp(x*x1p64); - return (x, e - 64); - } - return (x, 0); - } else if ee == 0x7ff { - return (x, 0); - } - - let e = ee - 0x3fe; - y &= 0x800fffffffffffff; - y |= 0x3fe0000000000000; - return (f64::from_bits(y), e); -} +pub fn frexp(x: f64) -> (f64, i32) { + let mut y = x.to_bits(); + let ee = ((y >> 52) & 0x7ff) as i32; + + if ee == 0 { + if x != 0.0 { + let x1p64 = f64::from_bits(0x43f0000000000000); + let (x, e) = frexp(x * x1p64); + return (x, e - 64); + } + return (x, 0); + } else if ee == 0x7ff { + return (x, 0); + } + + let e = ee - 0x3fe; + y &= 0x800fffffffffffff; + y |= 0x3fe0000000000000; + return (f64::from_bits(y), e); +} diff --git a/libm/src/math/frexpf.rs b/libm/src/math/frexpf.rs index 1c9dae0bb..2919c0ab0 100644 --- a/libm/src/math/frexpf.rs +++ b/libm/src/math/frexpf.rs @@ -1,21 +1,21 @@ -pub fn frexpf(x: f32) -> (f32, isize) { - let mut y = x.to_bits(); - let ee: isize = ((y>>23) & 0xff) as isize; - - if ee == 0 { - if x != 0.0 { - let x1p64 = f32::from_bits(0x5f800000); - let (x, e) = frexpf(x*x1p64); - return (x, e - 64); - } else { - return (x, 0); - } - } else if ee == 0xff { - return (x, 0); - } - - let e = ee - 0x7e; - y &= 0x807fffff; - y |= 0x3f000000; - return (f32::from_bits(y), e); -} +pub fn frexpf(x: f32) -> (f32, i32) { + let mut y = x.to_bits(); + let ee: i32 = ((y >> 23) & 0xff) as i32; + + if ee == 0 { + if x != 0.0 { + let x1p64 = f32::from_bits(0x5f800000); + let (x, e) = frexpf(x * x1p64); + return (x, e - 64); + } else { + return (x, 0); + } + } else if ee == 0xff { + return (x, 0); + } + + let e = ee - 0x7e; + y &= 0x807fffff; + y |= 0x3f000000; + (f32::from_bits(y), e) +} diff --git a/libm/src/math/ilogb.rs b/libm/src/math/ilogb.rs index 78fe030a1..8a1289ca4 100644 --- a/libm/src/math/ilogb.rs +++ b/libm/src/math/ilogb.rs @@ -1,31 +1,31 @@ -const FP_ILOGBNAN: isize = -1 - (((!0) >> 1)); -const FP_ILOGB0: isize = FP_ILOGBNAN; - -pub fn ilogb(x: f64) -> isize { - let mut i: u64 = x.to_bits(); - let e = ((i>>52) & 0x7ff) as isize; - - if e == 0 { - i <<= 12; - if i == 0 { - force_eval!(0.0/0.0); - return FP_ILOGB0; - } - /* subnormal x */ - let mut e = -0x3ff; - while (i>>63) == 0 { - e -= 1; - i <<= 1; - } - return e; - } - if e == 0x7ff { - force_eval!(0.0/0.0); - if (i<<12) != 0 { - return FP_ILOGBNAN; - } else { - return isize::max_value(); - } - } - return e - 0x3ff; -} +const FP_ILOGBNAN: i32 = -1 - ((!0) >> 1); +const FP_ILOGB0: i32 = FP_ILOGBNAN; + +pub fn ilogb(x: f64) -> i32 { + let mut i: u64 = x.to_bits(); + let e = ((i >> 52) & 0x7ff) as i32; + + if e == 0 { + i <<= 12; + if i == 0 { + force_eval!(0.0 / 0.0); + return FP_ILOGB0; + } + /* subnormal x */ + let mut e = -0x3ff; + while (i >> 63) == 0 { + e -= 1; + i <<= 1; + } + return e; + } + if e == 0x7ff { + force_eval!(0.0 / 0.0); + if (i << 12) != 0 { + return FP_ILOGBNAN; + } else { + return i32::max_value(); + } + } + return e - 0x3ff; +} diff --git a/libm/src/math/ilogbf.rs b/libm/src/math/ilogbf.rs index 9ca1c3606..1bf4670a8 100644 --- a/libm/src/math/ilogbf.rs +++ b/libm/src/math/ilogbf.rs @@ -1,31 +1,31 @@ -const FP_ILOGBNAN: isize = -1 - (((!0) >> 1)); -const FP_ILOGB0: isize = FP_ILOGBNAN; - -pub fn ilogbf(x: f32) -> isize { - let mut i = x.to_bits(); - let e = ((i>>23) & 0xff) as isize; - - if e == 0 { - i <<= 9; - if i == 0 { - force_eval!(0.0/0.0); - return FP_ILOGB0; - } - /* subnormal x */ - let mut e = -0x7f; - while (i>>31) == 0 { - e -= 1; - i <<= 1; - } - return e; - } - if e == 0xff { - force_eval!(0.0/0.0); - if (i<<9) != 0 { - return FP_ILOGBNAN; - } else { - return isize::max_value(); - } - } - return e - 0x7f; -} +const FP_ILOGBNAN: i32 = -1 - ((!0) >> 1); +const FP_ILOGB0: i32 = FP_ILOGBNAN; + +pub fn ilogbf(x: f32) -> i32 { + let mut i = x.to_bits(); + let e = ((i >> 23) & 0xff) as i32; + + if e == 0 { + i <<= 9; + if i == 0 { + force_eval!(0.0 / 0.0); + return FP_ILOGB0; + } + /* subnormal x */ + let mut e = -0x7f; + while (i >> 31) == 0 { + e -= 1; + i <<= 1; + } + return e; + } + if e == 0xff { + force_eval!(0.0 / 0.0); + if (i << 9) != 0 { + return FP_ILOGBNAN; + } else { + return i32::max_value(); + } + } + return e - 0x7f; +} diff --git a/libm/src/math/j0.rs b/libm/src/math/j0.rs index 02625b086..c4258ccca 100644 --- a/libm/src/math/j0.rs +++ b/libm/src/math/j0.rs @@ -1,392 +1,422 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_j0.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* j0(x), y0(x) - * Bessel function of the first and second kinds of order zero. - * Method -- j0(x): - * 1. For tiny x, we use j0(x) = 1 - x^2/4 + x^4/64 - ... - * 2. Reduce x to |x| since j0(x)=j0(-x), and - * for x in (0,2) - * j0(x) = 1-z/4+ z^2*R0/S0, where z = x*x; - * (precision: |j0-1+z/4-z^2R0/S0 |<2**-63.67 ) - * for x in (2,inf) - * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) - * where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) - * as follow: - * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) - * = 1/sqrt(2) * (cos(x) + sin(x)) - * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) - * = 1/sqrt(2) * (sin(x) - cos(x)) - * (To avoid cancellation, use - * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) - * to compute the worse one.) - * - * 3 Special cases - * j0(nan)= nan - * j0(0) = 1 - * j0(inf) = 0 - * - * Method -- y0(x): - * 1. For x<2. - * Since - * y0(x) = 2/pi*(j0(x)*(ln(x/2)+Euler) + x^2/4 - ...) - * therefore y0(x)-2/pi*j0(x)*ln(x) is an even function. - * We use the following function to approximate y0, - * y0(x) = U(z)/V(z) + (2/pi)*(j0(x)*ln(x)), z= x^2 - * where - * U(z) = u00 + u01*z + ... + u06*z^6 - * V(z) = 1 + v01*z + ... + v04*z^4 - * with absolute approximation error bounded by 2**-72. - * Note: For tiny x, U/V = u0 and j0(x)~1, hence - * y0(tiny) = u0 + (2/pi)*ln(tiny), (choose tiny<2**-27) - * 2. For x>=2. - * y0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)+q0(x)*sin(x0)) - * where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) - * by the method mentioned above. - * 3. Special cases: y0(0)=-inf, y0(x<0)=NaN, y0(inf)=0. - */ - -use super::{cos, get_low_word, get_high_word, fabs, log, sin, sqrt}; -const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ -const TPI: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ - -/* common method when |x|>=2 */ -fn common(ix: u32, x: f64, y0: bool) -> f64 { - let s: f64; - let mut c: f64; - let mut ss: f64; - let mut cc: f64; - let z: f64; - - /* - * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x-pi/4)-q0(x)*sin(x-pi/4)) - * y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x-pi/4)+q0(x)*cos(x-pi/4)) - * - * sin(x-pi/4) = (sin(x) - cos(x))/sqrt(2) - * cos(x-pi/4) = (sin(x) + cos(x))/sqrt(2) - * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) - */ - s = sin(x); - c = cos(x); - if y0 { - c = -c; - } - cc = s+c; - /* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */ - if ix < 0x7fe00000 { - ss = s-c; - z = -cos(2.0*x); - if s*c < 0.0 { - cc = z/ss; - } else { - ss = z/cc; - } - if ix < 0x48000000 { - if y0 { - ss = -ss; - } - cc = pzero(x)*cc-qzero(x)*ss; - } - } - return INVSQRTPI*cc/sqrt(x); -} - -/* R0/S0 on [0, 2.00] */ -const R02: f64 = 1.56249999999999947958e-02; /* 0x3F8FFFFF, 0xFFFFFFFD */ -const R03: f64 = -1.89979294238854721751e-04; /* 0xBF28E6A5, 0xB61AC6E9 */ -const R04: f64 = 1.82954049532700665670e-06; /* 0x3EBEB1D1, 0x0C503919 */ -const R05: f64 = -4.61832688532103189199e-09; /* 0xBE33D5E7, 0x73D63FCE */ -const S01: f64 = 1.56191029464890010492e-02; /* 0x3F8FFCE8, 0x82C8C2A4 */ -const S02: f64 = 1.16926784663337450260e-04; /* 0x3F1EA6D2, 0xDD57DBF4 */ -const S03: f64 = 5.13546550207318111446e-07; /* 0x3EA13B54, 0xCE84D5A9 */ -const S04: f64 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */ - -pub fn j0(mut x: f64) -> f64 -{ - let z: f64; - let r: f64; - let s: f64; - let mut ix: u32; - - ix = get_high_word(x); - ix &= 0x7fffffff; - - /* j0(+-inf)=0, j0(nan)=nan */ - if ix >= 0x7ff00000 { - return 1.0/(x*x); - } - x = fabs(x); - - if ix >= 0x40000000 { /* |x| >= 2 */ - /* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */ - return common(ix,x,false); - } - - /* 1 - x*x/4 + x*x*R(x^2)/S(x^2) */ - if ix >= 0x3f200000 { /* |x| >= 2**-13 */ - /* up to 4ulp error close to 2 */ - z = x*x; - r = z*(R02+z*(R03+z*(R04+z*R05))); - s = 1.0+z*(S01+z*(S02+z*(S03+z*S04))); - return (1.0+x/2.0)*(1.0-x/2.0) + z*(r/s); - } - - /* 1 - x*x/4 */ - /* prevent underflow */ - /* inexact should be raised when x!=0, this is not done correctly */ - if ix >= 0x38000000 { /* |x| >= 2**-127 */ - x = 0.25*x*x; - } - return 1.0 - x; -} - -const U00: f64 = -7.38042951086872317523e-02; /* 0xBFB2E4D6, 0x99CBD01F */ -const U01: f64 = 1.76666452509181115538e-01; /* 0x3FC69D01, 0x9DE9E3FC */ -const U02: f64 = -1.38185671945596898896e-02; /* 0xBF8C4CE8, 0xB16CFA97 */ -const U03: f64 = 3.47453432093683650238e-04; /* 0x3F36C54D, 0x20B29B6B */ -const U04: f64 = -3.81407053724364161125e-06; /* 0xBECFFEA7, 0x73D25CAD */ -const U05: f64 = 1.95590137035022920206e-08; /* 0x3E550057, 0x3B4EABD4 */ -const U06: f64 = -3.98205194132103398453e-11; /* 0xBDC5E43D, 0x693FB3C8 */ -const V01: f64 = 1.27304834834123699328e-02; /* 0x3F8A1270, 0x91C9C71A */ -const V02: f64 = 7.60068627350353253702e-05; /* 0x3F13ECBB, 0xF578C6C1 */ -const V03: f64 = 2.59150851840457805467e-07; /* 0x3E91642D, 0x7FF202FD */ -const V04: f64 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */ - -pub fn y0(x: f64) -> f64 -{ - let z: f64; - let u: f64; - let v: f64; - let ix: u32; - let lx: u32; - - ix = get_high_word(x); - lx = get_low_word(x); - - /* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */ - if ((ix<<1) | lx) == 0 { - return -1.0/0.0; - } - if (ix>>31) != 0 { - return 0.0/0.0; - } - if ix >= 0x7ff00000 { - return 1.0/x; - } - - if ix >= 0x40000000 { /* x >= 2 */ - /* large ulp errors near zeros: 3.958, 7.086,.. */ - return common(ix,x,true); - } - - /* U(x^2)/V(x^2) + (2/pi)*j0(x)*log(x) */ - if ix >= 0x3e400000 { /* x >= 2**-27 */ - /* large ulp error near the first zero, x ~= 0.89 */ - z = x*x; - u = U00+z*(U01+z*(U02+z*(U03+z*(U04+z*(U05+z*U06))))); - v = 1.0+z*(V01+z*(V02+z*(V03+z*V04))); - return u/v + TPI*(j0(x)*log(x)); - } - return U00 + TPI*log(x); -} - -/* The asymptotic expansions of pzero is - * 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x. - * For x >= 2, We approximate pzero by - * pzero(x) = 1 + (R/S) - * where R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10 - * S = 1 + pS0*s^2 + ... + pS4*s^10 - * and - * | pzero(x)-1-R/S | <= 2 ** ( -60.26) - */ -const PR8: [f64; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ - 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ - -7.03124999999900357484e-02, /* 0xBFB1FFFF, 0xFFFFFD32 */ - -8.08167041275349795626e+00, /* 0xC02029D0, 0xB44FA779 */ - -2.57063105679704847262e+02, /* 0xC0701102, 0x7B19E863 */ - -2.48521641009428822144e+03, /* 0xC0A36A6E, 0xCD4DCAFC */ - -5.25304380490729545272e+03, /* 0xC0B4850B, 0x36CC643D */ -]; -const PS8: [f64; 5] = [ - 1.16534364619668181717e+02, /* 0x405D2233, 0x07A96751 */ - 3.83374475364121826715e+03, /* 0x40ADF37D, 0x50596938 */ - 4.05978572648472545552e+04, /* 0x40E3D2BB, 0x6EB6B05F */ - 1.16752972564375915681e+05, /* 0x40FC810F, 0x8F9FA9BD */ - 4.76277284146730962675e+04, /* 0x40E74177, 0x4F2C49DC */ -]; - -const PR5: [f64; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ - -1.14125464691894502584e-11, /* 0xBDA918B1, 0x47E495CC */ - -7.03124940873599280078e-02, /* 0xBFB1FFFF, 0xE69AFBC6 */ - -4.15961064470587782438e+00, /* 0xC010A370, 0xF90C6BBF */ - -6.76747652265167261021e+01, /* 0xC050EB2F, 0x5A7D1783 */ - -3.31231299649172967747e+02, /* 0xC074B3B3, 0x6742CC63 */ - -3.46433388365604912451e+02, /* 0xC075A6EF, 0x28A38BD7 */ -]; -const PS5: [f64; 5] = [ - 6.07539382692300335975e+01, /* 0x404E6081, 0x0C98C5DE */ - 1.05125230595704579173e+03, /* 0x40906D02, 0x5C7E2864 */ - 5.97897094333855784498e+03, /* 0x40B75AF8, 0x8FBE1D60 */ - 9.62544514357774460223e+03, /* 0x40C2CCB8, 0xFA76FA38 */ - 2.40605815922939109441e+03, /* 0x40A2CC1D, 0xC70BE864 */ -]; - -const PR3: [f64; 6] = [/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ - -2.54704601771951915620e-09, /* 0xBE25E103, 0x6FE1AA86 */ - -7.03119616381481654654e-02, /* 0xBFB1FFF6, 0xF7C0E24B */ - -2.40903221549529611423e+00, /* 0xC00345B2, 0xAEA48074 */ - -2.19659774734883086467e+01, /* 0xC035F74A, 0x4CB94E14 */ - -5.80791704701737572236e+01, /* 0xC04D0A22, 0x420A1A45 */ - -3.14479470594888503854e+01, /* 0xC03F72AC, 0xA892D80F */ -]; -const PS3: [f64; 5] = [ - 3.58560338055209726349e+01, /* 0x4041ED92, 0x84077DD3 */ - 3.61513983050303863820e+02, /* 0x40769839, 0x464A7C0E */ - 1.19360783792111533330e+03, /* 0x4092A66E, 0x6D1061D6 */ - 1.12799679856907414432e+03, /* 0x40919FFC, 0xB8C39B7E */ - 1.73580930813335754692e+02, /* 0x4065B296, 0xFC379081 */ -]; - -const PR2: [f64; 6] = [/* for x in [2.8570,2]=1/[0.3499,0.5] */ - -8.87534333032526411254e-08, /* 0xBE77D316, 0xE927026D */ - -7.03030995483624743247e-02, /* 0xBFB1FF62, 0x495E1E42 */ - -1.45073846780952986357e+00, /* 0xBFF73639, 0x8A24A843 */ - -7.63569613823527770791e+00, /* 0xC01E8AF3, 0xEDAFA7F3 */ - -1.11931668860356747786e+01, /* 0xC02662E6, 0xC5246303 */ - -3.23364579351335335033e+00, /* 0xC009DE81, 0xAF8FE70F */ -]; -const PS2: [f64; 5] = [ - 2.22202997532088808441e+01, /* 0x40363865, 0x908B5959 */ - 1.36206794218215208048e+02, /* 0x4061069E, 0x0EE8878F */ - 2.70470278658083486789e+02, /* 0x4070E786, 0x42EA079B */ - 1.53875394208320329881e+02, /* 0x40633C03, 0x3AB6FAFF */ - 1.46576176948256193810e+01, /* 0x402D50B3, 0x44391809 */ -]; - -fn pzero(x: f64) -> f64 -{ - let p: &[f64; 6]; - let q: &[f64; 5]; - let z: f64; - let r: f64; - let s: f64; - let mut ix: u32; - - ix = get_high_word(x); - ix &= 0x7fffffff; - if ix >= 0x40200000 {p = &PR8; q = &PS8;} - else if ix >= 0x40122E8B {p = &PR5; q = &PS5;} - else if ix >= 0x4006DB6D {p = &PR3; q = &PS3;} - else /*ix >= 0x40000000*/{p = &PR2; q = &PS2;} - z = 1.0/(x*x); - r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); - s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); - return 1.0 + r/s; -} - - -/* For x >= 8, the asymptotic expansions of qzero is - * -1/8 s + 75/1024 s^3 - ..., where s = 1/x. - * We approximate pzero by - * qzero(x) = s*(-1.25 + (R/S)) - * where R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10 - * S = 1 + qS0*s^2 + ... + qS5*s^12 - * and - * | qzero(x)/s +1.25-R/S | <= 2 ** ( -61.22) - */ -const QR8: [f64; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ - 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ - 7.32421874999935051953e-02, /* 0x3FB2BFFF, 0xFFFFFE2C */ - 1.17682064682252693899e+01, /* 0x40278952, 0x5BB334D6 */ - 5.57673380256401856059e+02, /* 0x40816D63, 0x15301825 */ - 8.85919720756468632317e+03, /* 0x40C14D99, 0x3E18F46D */ - 3.70146267776887834771e+04, /* 0x40E212D4, 0x0E901566 */ -]; -const QS8: [f64; 6] = [ - 1.63776026895689824414e+02, /* 0x406478D5, 0x365B39BC */ - 8.09834494656449805916e+03, /* 0x40BFA258, 0x4E6B0563 */ - 1.42538291419120476348e+05, /* 0x41016652, 0x54D38C3F */ - 8.03309257119514397345e+05, /* 0x412883DA, 0x83A52B43 */ - 8.40501579819060512818e+05, /* 0x4129A66B, 0x28DE0B3D */ - -3.43899293537866615225e+05, /* 0xC114FD6D, 0x2C9530C5 */ -]; - -const QR5: [f64; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ - 1.84085963594515531381e-11, /* 0x3DB43D8F, 0x29CC8CD9 */ - 7.32421766612684765896e-02, /* 0x3FB2BFFF, 0xD172B04C */ - 5.83563508962056953777e+00, /* 0x401757B0, 0xB9953DD3 */ - 1.35111577286449829671e+02, /* 0x4060E392, 0x0A8788E9 */ - 1.02724376596164097464e+03, /* 0x40900CF9, 0x9DC8C481 */ - 1.98997785864605384631e+03, /* 0x409F17E9, 0x53C6E3A6 */ -]; -const QS5: [f64; 6] = [ - 8.27766102236537761883e+01, /* 0x4054B1B3, 0xFB5E1543 */ - 2.07781416421392987104e+03, /* 0x40A03BA0, 0xDA21C0CE */ - 1.88472887785718085070e+04, /* 0x40D267D2, 0x7B591E6D */ - 5.67511122894947329769e+04, /* 0x40EBB5E3, 0x97E02372 */ - 3.59767538425114471465e+04, /* 0x40E19118, 0x1F7A54A0 */ - -5.35434275601944773371e+03, /* 0xC0B4EA57, 0xBEDBC609 */ -]; - -const QR3: [f64; 6] = [/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ - 4.37741014089738620906e-09, /* 0x3E32CD03, 0x6ADECB82 */ - 7.32411180042911447163e-02, /* 0x3FB2BFEE, 0x0E8D0842 */ - 3.34423137516170720929e+00, /* 0x400AC0FC, 0x61149CF5 */ - 4.26218440745412650017e+01, /* 0x40454F98, 0x962DAEDD */ - 1.70808091340565596283e+02, /* 0x406559DB, 0xE25EFD1F */ - 1.66733948696651168575e+02, /* 0x4064D77C, 0x81FA21E0 */ -]; -const QS3: [f64; 6] = [ - 4.87588729724587182091e+01, /* 0x40486122, 0xBFE343A6 */ - 7.09689221056606015736e+02, /* 0x40862D83, 0x86544EB3 */ - 3.70414822620111362994e+03, /* 0x40ACF04B, 0xE44DFC63 */ - 6.46042516752568917582e+03, /* 0x40B93C6C, 0xD7C76A28 */ - 2.51633368920368957333e+03, /* 0x40A3A8AA, 0xD94FB1C0 */ - -1.49247451836156386662e+02, /* 0xC062A7EB, 0x201CF40F */ -]; - -const QR2: [f64; 6] = [/* for x in [2.8570,2]=1/[0.3499,0.5] */ - 1.50444444886983272379e-07, /* 0x3E84313B, 0x54F76BDB */ - 7.32234265963079278272e-02, /* 0x3FB2BEC5, 0x3E883E34 */ - 1.99819174093815998816e+00, /* 0x3FFFF897, 0xE727779C */ - 1.44956029347885735348e+01, /* 0x402CFDBF, 0xAAF96FE5 */ - 3.16662317504781540833e+01, /* 0x403FAA8E, 0x29FBDC4A */ - 1.62527075710929267416e+01, /* 0x403040B1, 0x71814BB4 */ -]; -const QS2: [f64; 6] = [ - 3.03655848355219184498e+01, /* 0x403E5D96, 0xF7C07AED */ - 2.69348118608049844624e+02, /* 0x4070D591, 0xE4D14B40 */ - 8.44783757595320139444e+02, /* 0x408A6645, 0x22B3BF22 */ - 8.82935845112488550512e+02, /* 0x408B977C, 0x9C5CC214 */ - 2.12666388511798828631e+02, /* 0x406A9553, 0x0E001365 */ - -5.31095493882666946917e+00, /* 0xC0153E6A, 0xF8B32931 */ -]; - -fn qzero(x: f64) -> f64 -{ - let p: &[f64; 6]; - let q: &[f64; 6]; - let s: f64; - let r: f64; - let z: f64; - let mut ix: u32; - - ix = get_high_word(x); - ix &= 0x7fffffff; - if ix >= 0x40200000 {p = &QR8; q = &QS8;} - else if ix >= 0x40122E8B {p = &QR5; q = &QS5;} - else if ix >= 0x4006DB6D {p = &QR3; q = &QS3;} - else /*ix >= 0x40000000*/{p = &QR2; q = &QS2;} - z = 1.0/(x*x); - r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); - s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); - return (-0.125 + r/s)/x; -} +/* origin: FreeBSD /usr/src/lib/msun/src/e_j0.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* j0(x), y0(x) + * Bessel function of the first and second kinds of order zero. + * Method -- j0(x): + * 1. For tiny x, we use j0(x) = 1 - x^2/4 + x^4/64 - ... + * 2. Reduce x to |x| since j0(x)=j0(-x), and + * for x in (0,2) + * j0(x) = 1-z/4+ z^2*R0/S0, where z = x*x; + * (precision: |j0-1+z/4-z^2R0/S0 |<2**-63.67 ) + * for x in (2,inf) + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) + * as follow: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * (To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one.) + * + * 3 Special cases + * j0(nan)= nan + * j0(0) = 1 + * j0(inf) = 0 + * + * Method -- y0(x): + * 1. For x<2. + * Since + * y0(x) = 2/pi*(j0(x)*(ln(x/2)+Euler) + x^2/4 - ...) + * therefore y0(x)-2/pi*j0(x)*ln(x) is an even function. + * We use the following function to approximate y0, + * y0(x) = U(z)/V(z) + (2/pi)*(j0(x)*ln(x)), z= x^2 + * where + * U(z) = u00 + u01*z + ... + u06*z^6 + * V(z) = 1 + v01*z + ... + v04*z^4 + * with absolute approximation error bounded by 2**-72. + * Note: For tiny x, U/V = u0 and j0(x)~1, hence + * y0(tiny) = u0 + (2/pi)*ln(tiny), (choose tiny<2**-27) + * 2. For x>=2. + * y0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)+q0(x)*sin(x0)) + * where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) + * by the method mentioned above. + * 3. Special cases: y0(0)=-inf, y0(x<0)=NaN, y0(inf)=0. + */ + +use super::{cos, fabs, get_high_word, get_low_word, log, sin, sqrt}; +const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ +const TPI: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ + +/* common method when |x|>=2 */ +fn common(ix: u32, x: f64, y0: bool) -> f64 { + let s: f64; + let mut c: f64; + let mut ss: f64; + let mut cc: f64; + let z: f64; + + /* + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x-pi/4)-q0(x)*sin(x-pi/4)) + * y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x-pi/4)+q0(x)*cos(x-pi/4)) + * + * sin(x-pi/4) = (sin(x) - cos(x))/sqrt(2) + * cos(x-pi/4) = (sin(x) + cos(x))/sqrt(2) + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + */ + s = sin(x); + c = cos(x); + if y0 { + c = -c; + } + cc = s + c; + /* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */ + if ix < 0x7fe00000 { + ss = s - c; + z = -cos(2.0 * x); + if s * c < 0.0 { + cc = z / ss; + } else { + ss = z / cc; + } + if ix < 0x48000000 { + if y0 { + ss = -ss; + } + cc = pzero(x) * cc - qzero(x) * ss; + } + } + return INVSQRTPI * cc / sqrt(x); +} + +/* R0/S0 on [0, 2.00] */ +const R02: f64 = 1.56249999999999947958e-02; /* 0x3F8FFFFF, 0xFFFFFFFD */ +const R03: f64 = -1.89979294238854721751e-04; /* 0xBF28E6A5, 0xB61AC6E9 */ +const R04: f64 = 1.82954049532700665670e-06; /* 0x3EBEB1D1, 0x0C503919 */ +const R05: f64 = -4.61832688532103189199e-09; /* 0xBE33D5E7, 0x73D63FCE */ +const S01: f64 = 1.56191029464890010492e-02; /* 0x3F8FFCE8, 0x82C8C2A4 */ +const S02: f64 = 1.16926784663337450260e-04; /* 0x3F1EA6D2, 0xDD57DBF4 */ +const S03: f64 = 5.13546550207318111446e-07; /* 0x3EA13B54, 0xCE84D5A9 */ +const S04: f64 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */ + +pub fn j0(mut x: f64) -> f64 { + let z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + + /* j0(+-inf)=0, j0(nan)=nan */ + if ix >= 0x7ff00000 { + return 1.0 / (x * x); + } + x = fabs(x); + + if ix >= 0x40000000 { + /* |x| >= 2 */ + /* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */ + return common(ix, x, false); + } + + /* 1 - x*x/4 + x*x*R(x^2)/S(x^2) */ + if ix >= 0x3f200000 { + /* |x| >= 2**-13 */ + /* up to 4ulp error close to 2 */ + z = x * x; + r = z * (R02 + z * (R03 + z * (R04 + z * R05))); + s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * S04))); + return (1.0 + x / 2.0) * (1.0 - x / 2.0) + z * (r / s); + } + + /* 1 - x*x/4 */ + /* prevent underflow */ + /* inexact should be raised when x!=0, this is not done correctly */ + if ix >= 0x38000000 { + /* |x| >= 2**-127 */ + x = 0.25 * x * x; + } + return 1.0 - x; +} + +const U00: f64 = -7.38042951086872317523e-02; /* 0xBFB2E4D6, 0x99CBD01F */ +const U01: f64 = 1.76666452509181115538e-01; /* 0x3FC69D01, 0x9DE9E3FC */ +const U02: f64 = -1.38185671945596898896e-02; /* 0xBF8C4CE8, 0xB16CFA97 */ +const U03: f64 = 3.47453432093683650238e-04; /* 0x3F36C54D, 0x20B29B6B */ +const U04: f64 = -3.81407053724364161125e-06; /* 0xBECFFEA7, 0x73D25CAD */ +const U05: f64 = 1.95590137035022920206e-08; /* 0x3E550057, 0x3B4EABD4 */ +const U06: f64 = -3.98205194132103398453e-11; /* 0xBDC5E43D, 0x693FB3C8 */ +const V01: f64 = 1.27304834834123699328e-02; /* 0x3F8A1270, 0x91C9C71A */ +const V02: f64 = 7.60068627350353253702e-05; /* 0x3F13ECBB, 0xF578C6C1 */ +const V03: f64 = 2.59150851840457805467e-07; /* 0x3E91642D, 0x7FF202FD */ +const V04: f64 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */ + +pub fn y0(x: f64) -> f64 { + let z: f64; + let u: f64; + let v: f64; + let ix: u32; + let lx: u32; + + ix = get_high_word(x); + lx = get_low_word(x); + + /* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */ + if ((ix << 1) | lx) == 0 { + return -1.0 / 0.0; + } + if (ix >> 31) != 0 { + return 0.0 / 0.0; + } + if ix >= 0x7ff00000 { + return 1.0 / x; + } + + if ix >= 0x40000000 { + /* x >= 2 */ + /* large ulp errors near zeros: 3.958, 7.086,.. */ + return common(ix, x, true); + } + + /* U(x^2)/V(x^2) + (2/pi)*j0(x)*log(x) */ + if ix >= 0x3e400000 { + /* x >= 2**-27 */ + /* large ulp error near the first zero, x ~= 0.89 */ + z = x * x; + u = U00 + z * (U01 + z * (U02 + z * (U03 + z * (U04 + z * (U05 + z * U06))))); + v = 1.0 + z * (V01 + z * (V02 + z * (V03 + z * V04))); + return u / v + TPI * (j0(x) * log(x)); + } + return U00 + TPI * log(x); +} + +/* The asymptotic expansions of pzero is + * 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x. + * For x >= 2, We approximate pzero by + * pzero(x) = 1 + (R/S) + * where R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10 + * S = 1 + pS0*s^2 + ... + pS4*s^10 + * and + * | pzero(x)-1-R/S | <= 2 ** ( -60.26) + */ +const PR8: [f64; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + -7.03124999999900357484e-02, /* 0xBFB1FFFF, 0xFFFFFD32 */ + -8.08167041275349795626e+00, /* 0xC02029D0, 0xB44FA779 */ + -2.57063105679704847262e+02, /* 0xC0701102, 0x7B19E863 */ + -2.48521641009428822144e+03, /* 0xC0A36A6E, 0xCD4DCAFC */ + -5.25304380490729545272e+03, /* 0xC0B4850B, 0x36CC643D */ +]; +const PS8: [f64; 5] = [ + 1.16534364619668181717e+02, /* 0x405D2233, 0x07A96751 */ + 3.83374475364121826715e+03, /* 0x40ADF37D, 0x50596938 */ + 4.05978572648472545552e+04, /* 0x40E3D2BB, 0x6EB6B05F */ + 1.16752972564375915681e+05, /* 0x40FC810F, 0x8F9FA9BD */ + 4.76277284146730962675e+04, /* 0x40E74177, 0x4F2C49DC */ +]; + +const PR5: [f64; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -1.14125464691894502584e-11, /* 0xBDA918B1, 0x47E495CC */ + -7.03124940873599280078e-02, /* 0xBFB1FFFF, 0xE69AFBC6 */ + -4.15961064470587782438e+00, /* 0xC010A370, 0xF90C6BBF */ + -6.76747652265167261021e+01, /* 0xC050EB2F, 0x5A7D1783 */ + -3.31231299649172967747e+02, /* 0xC074B3B3, 0x6742CC63 */ + -3.46433388365604912451e+02, /* 0xC075A6EF, 0x28A38BD7 */ +]; +const PS5: [f64; 5] = [ + 6.07539382692300335975e+01, /* 0x404E6081, 0x0C98C5DE */ + 1.05125230595704579173e+03, /* 0x40906D02, 0x5C7E2864 */ + 5.97897094333855784498e+03, /* 0x40B75AF8, 0x8FBE1D60 */ + 9.62544514357774460223e+03, /* 0x40C2CCB8, 0xFA76FA38 */ + 2.40605815922939109441e+03, /* 0x40A2CC1D, 0xC70BE864 */ +]; + +const PR3: [f64; 6] = [ + /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + -2.54704601771951915620e-09, /* 0xBE25E103, 0x6FE1AA86 */ + -7.03119616381481654654e-02, /* 0xBFB1FFF6, 0xF7C0E24B */ + -2.40903221549529611423e+00, /* 0xC00345B2, 0xAEA48074 */ + -2.19659774734883086467e+01, /* 0xC035F74A, 0x4CB94E14 */ + -5.80791704701737572236e+01, /* 0xC04D0A22, 0x420A1A45 */ + -3.14479470594888503854e+01, /* 0xC03F72AC, 0xA892D80F */ +]; +const PS3: [f64; 5] = [ + 3.58560338055209726349e+01, /* 0x4041ED92, 0x84077DD3 */ + 3.61513983050303863820e+02, /* 0x40769839, 0x464A7C0E */ + 1.19360783792111533330e+03, /* 0x4092A66E, 0x6D1061D6 */ + 1.12799679856907414432e+03, /* 0x40919FFC, 0xB8C39B7E */ + 1.73580930813335754692e+02, /* 0x4065B296, 0xFC379081 */ +]; + +const PR2: [f64; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + -8.87534333032526411254e-08, /* 0xBE77D316, 0xE927026D */ + -7.03030995483624743247e-02, /* 0xBFB1FF62, 0x495E1E42 */ + -1.45073846780952986357e+00, /* 0xBFF73639, 0x8A24A843 */ + -7.63569613823527770791e+00, /* 0xC01E8AF3, 0xEDAFA7F3 */ + -1.11931668860356747786e+01, /* 0xC02662E6, 0xC5246303 */ + -3.23364579351335335033e+00, /* 0xC009DE81, 0xAF8FE70F */ +]; +const PS2: [f64; 5] = [ + 2.22202997532088808441e+01, /* 0x40363865, 0x908B5959 */ + 1.36206794218215208048e+02, /* 0x4061069E, 0x0EE8878F */ + 2.70470278658083486789e+02, /* 0x4070E786, 0x42EA079B */ + 1.53875394208320329881e+02, /* 0x40633C03, 0x3AB6FAFF */ + 1.46576176948256193810e+01, /* 0x402D50B3, 0x44391809 */ +]; + +fn pzero(x: f64) -> f64 { + let p: &[f64; 6]; + let q: &[f64; 5]; + let z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 { + p = &PR8; + q = &PS8; + } else if ix >= 0x40122E8B { + p = &PR5; + q = &PS5; + } else if ix >= 0x4006DB6D { + p = &PR3; + q = &PS3; + } else + /*ix >= 0x40000000*/ + { + p = &PR2; + q = &PS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); + return 1.0 + r / s; +} + +/* For x >= 8, the asymptotic expansions of qzero is + * -1/8 s + 75/1024 s^3 - ..., where s = 1/x. + * We approximate pzero by + * qzero(x) = s*(-1.25 + (R/S)) + * where R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10 + * S = 1 + qS0*s^2 + ... + qS5*s^12 + * and + * | qzero(x)/s +1.25-R/S | <= 2 ** ( -61.22) + */ +const QR8: [f64; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + 7.32421874999935051953e-02, /* 0x3FB2BFFF, 0xFFFFFE2C */ + 1.17682064682252693899e+01, /* 0x40278952, 0x5BB334D6 */ + 5.57673380256401856059e+02, /* 0x40816D63, 0x15301825 */ + 8.85919720756468632317e+03, /* 0x40C14D99, 0x3E18F46D */ + 3.70146267776887834771e+04, /* 0x40E212D4, 0x0E901566 */ +]; +const QS8: [f64; 6] = [ + 1.63776026895689824414e+02, /* 0x406478D5, 0x365B39BC */ + 8.09834494656449805916e+03, /* 0x40BFA258, 0x4E6B0563 */ + 1.42538291419120476348e+05, /* 0x41016652, 0x54D38C3F */ + 8.03309257119514397345e+05, /* 0x412883DA, 0x83A52B43 */ + 8.40501579819060512818e+05, /* 0x4129A66B, 0x28DE0B3D */ + -3.43899293537866615225e+05, /* 0xC114FD6D, 0x2C9530C5 */ +]; + +const QR5: [f64; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.84085963594515531381e-11, /* 0x3DB43D8F, 0x29CC8CD9 */ + 7.32421766612684765896e-02, /* 0x3FB2BFFF, 0xD172B04C */ + 5.83563508962056953777e+00, /* 0x401757B0, 0xB9953DD3 */ + 1.35111577286449829671e+02, /* 0x4060E392, 0x0A8788E9 */ + 1.02724376596164097464e+03, /* 0x40900CF9, 0x9DC8C481 */ + 1.98997785864605384631e+03, /* 0x409F17E9, 0x53C6E3A6 */ +]; +const QS5: [f64; 6] = [ + 8.27766102236537761883e+01, /* 0x4054B1B3, 0xFB5E1543 */ + 2.07781416421392987104e+03, /* 0x40A03BA0, 0xDA21C0CE */ + 1.88472887785718085070e+04, /* 0x40D267D2, 0x7B591E6D */ + 5.67511122894947329769e+04, /* 0x40EBB5E3, 0x97E02372 */ + 3.59767538425114471465e+04, /* 0x40E19118, 0x1F7A54A0 */ + -5.35434275601944773371e+03, /* 0xC0B4EA57, 0xBEDBC609 */ +]; + +const QR3: [f64; 6] = [ + /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + 4.37741014089738620906e-09, /* 0x3E32CD03, 0x6ADECB82 */ + 7.32411180042911447163e-02, /* 0x3FB2BFEE, 0x0E8D0842 */ + 3.34423137516170720929e+00, /* 0x400AC0FC, 0x61149CF5 */ + 4.26218440745412650017e+01, /* 0x40454F98, 0x962DAEDD */ + 1.70808091340565596283e+02, /* 0x406559DB, 0xE25EFD1F */ + 1.66733948696651168575e+02, /* 0x4064D77C, 0x81FA21E0 */ +]; +const QS3: [f64; 6] = [ + 4.87588729724587182091e+01, /* 0x40486122, 0xBFE343A6 */ + 7.09689221056606015736e+02, /* 0x40862D83, 0x86544EB3 */ + 3.70414822620111362994e+03, /* 0x40ACF04B, 0xE44DFC63 */ + 6.46042516752568917582e+03, /* 0x40B93C6C, 0xD7C76A28 */ + 2.51633368920368957333e+03, /* 0x40A3A8AA, 0xD94FB1C0 */ + -1.49247451836156386662e+02, /* 0xC062A7EB, 0x201CF40F */ +]; + +const QR2: [f64; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.50444444886983272379e-07, /* 0x3E84313B, 0x54F76BDB */ + 7.32234265963079278272e-02, /* 0x3FB2BEC5, 0x3E883E34 */ + 1.99819174093815998816e+00, /* 0x3FFFF897, 0xE727779C */ + 1.44956029347885735348e+01, /* 0x402CFDBF, 0xAAF96FE5 */ + 3.16662317504781540833e+01, /* 0x403FAA8E, 0x29FBDC4A */ + 1.62527075710929267416e+01, /* 0x403040B1, 0x71814BB4 */ +]; +const QS2: [f64; 6] = [ + 3.03655848355219184498e+01, /* 0x403E5D96, 0xF7C07AED */ + 2.69348118608049844624e+02, /* 0x4070D591, 0xE4D14B40 */ + 8.44783757595320139444e+02, /* 0x408A6645, 0x22B3BF22 */ + 8.82935845112488550512e+02, /* 0x408B977C, 0x9C5CC214 */ + 2.12666388511798828631e+02, /* 0x406A9553, 0x0E001365 */ + -5.31095493882666946917e+00, /* 0xC0153E6A, 0xF8B32931 */ +]; + +fn qzero(x: f64) -> f64 { + let p: &[f64; 6]; + let q: &[f64; 6]; + let s: f64; + let r: f64; + let z: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 { + p = &QR8; + q = &QS8; + } else if ix >= 0x40122E8B { + p = &QR5; + q = &QS5; + } else if ix >= 0x4006DB6D { + p = &QR3; + q = &QS3; + } else + /*ix >= 0x40000000*/ + { + p = &QR2; + q = &QS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); + return (-0.125 + r / s) / x; +} diff --git a/libm/src/math/j0f.rs b/libm/src/math/j0f.rs index e2faed0b2..91c03dbbc 100644 --- a/libm/src/math/j0f.rs +++ b/libm/src/math/j0f.rs @@ -1,330 +1,359 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_j0f.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -use super::{cosf, fabsf, logf, sinf, sqrtf}; - -const INVSQRTPI: f32 = 5.6418961287e-01; /* 0x3f106ebb */ -const TPI: f32 = 6.3661974669e-01; /* 0x3f22f983 */ - -fn common(ix: u32, x: f32, y0: bool) -> f32 -{ - let z: f32; - let s: f32; - let mut c: f32; - let mut ss: f32; - let mut cc: f32; - /* - * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) - * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) - */ - s = sinf(x); - c = cosf(x); - if y0 { - c = -c; - } - cc = s+c; - if ix < 0x7f000000 { - ss = s-c; - z = -cosf(2.0*x); - if s*c < 0.0 { - cc = z/ss; - } else { - ss = z/cc; - } - if ix < 0x58800000 { - if y0 { - ss = -ss; - } - cc = pzerof(x)*cc-qzerof(x)*ss; - } - } - return INVSQRTPI*cc/sqrtf(x); -} - -/* R0/S0 on [0, 2.00] */ -const R02: f32 = 1.5625000000e-02; /* 0x3c800000 */ -const R03: f32 = -1.8997929874e-04; /* 0xb947352e */ -const R04: f32 = 1.8295404516e-06; /* 0x35f58e88 */ -const R05: f32 = -4.6183270541e-09; /* 0xb19eaf3c */ -const S01: f32 = 1.5619102865e-02; /* 0x3c7fe744 */ -const S02: f32 = 1.1692678527e-04; /* 0x38f53697 */ -const S03: f32 = 5.1354652442e-07; /* 0x3509daa6 */ -const S04: f32 = 1.1661400734e-09; /* 0x30a045e8 */ - -pub fn j0f(mut x: f32) -> f32 -{ - let z: f32; - let r: f32; - let s: f32; - let mut ix: u32; - - ix = x.to_bits(); - ix &= 0x7fffffff; - if ix >= 0x7f800000 { - return 1.0/(x*x); - } - x = fabsf(x); - - if ix >= 0x40000000 { /* |x| >= 2 */ - /* large ulp error near zeros */ - return common(ix, x, false); - } - if ix >= 0x3a000000 { /* |x| >= 2**-11 */ - /* up to 4ulp error near 2 */ - z = x*x; - r = z*(R02+z*(R03+z*(R04+z*R05))); - s = 1.0+z*(S01+z*(S02+z*(S03+z*S04))); - return (1.0+x/2.0)*(1.0-x/2.0) + z*(r/s); - } - if ix >= 0x21800000 { /* |x| >= 2**-60 */ - x = 0.25*x*x; - } - return 1.0 - x; -} - -const U00: f32 = -7.3804296553e-02; /* 0xbd9726b5 */ -const U01: f32 = 1.7666645348e-01; /* 0x3e34e80d */ -const U02: f32 = -1.3818567619e-02; /* 0xbc626746 */ -const U03: f32 = 3.4745343146e-04; /* 0x39b62a69 */ -const U04: f32 = -3.8140706238e-06; /* 0xb67ff53c */ -const U05: f32 = 1.9559013964e-08; /* 0x32a802ba */ -const U06: f32 = -3.9820518410e-11; /* 0xae2f21eb */ -const V01: f32 = 1.2730483897e-02; /* 0x3c509385 */ -const V02: f32 = 7.6006865129e-05; /* 0x389f65e0 */ -const V03: f32 = 2.5915085189e-07; /* 0x348b216c */ -const V04: f32 = 4.4111031494e-10; /* 0x2ff280c2 */ - -pub fn y0f(x: f32) -> f32 -{ - let z: f32; - let u: f32; - let v: f32; - let ix: u32; - - ix = x.to_bits(); - if (ix & 0x7fffffff) == 0 { - return -1.0/0.0; - } - if (ix>>31) !=0 { - return 0.0/0.0; - } - if ix >= 0x7f800000 { - return 1.0/x; - } - if ix >= 0x40000000 { /* |x| >= 2.0 */ - /* large ulp error near zeros */ - return common(ix,x,true); - } - if ix >= 0x39000000 { /* x >= 2**-13 */ - /* large ulp error at x ~= 0.89 */ - z = x*x; - u = U00+z*(U01+z*(U02+z*(U03+z*(U04+z*(U05+z*U06))))); - v = 1.0+z*(V01+z*(V02+z*(V03+z*V04))); - return u/v + TPI*(j0f(x)*logf(x)); - } - return U00 + TPI*logf(x); -} - -/* The asymptotic expansions of pzero is - * 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x. - * For x >= 2, We approximate pzero by - * pzero(x) = 1 + (R/S) - * where R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10 - * S = 1 + pS0*s^2 + ... + pS4*s^10 - * and - * | pzero(x)-1-R/S | <= 2 ** ( -60.26) - */ -const PR8: [f32; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ - 0.0000000000e+00, /* 0x00000000 */ - -7.0312500000e-02, /* 0xbd900000 */ - -8.0816707611e+00, /* 0xc1014e86 */ - -2.5706311035e+02, /* 0xc3808814 */ - -2.4852163086e+03, /* 0xc51b5376 */ - -5.2530439453e+03, /* 0xc5a4285a */ -]; -const PS8: [f32; 5] = [ - 1.1653436279e+02, /* 0x42e91198 */ - 3.8337448730e+03, /* 0x456f9beb */ - 4.0597855469e+04, /* 0x471e95db */ - 1.1675296875e+05, /* 0x47e4087c */ - 4.7627726562e+04, /* 0x473a0bba */ -]; -const PR5: [f32; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ - -1.1412546255e-11, /* 0xad48c58a */ - -7.0312492549e-02, /* 0xbd8fffff */ - -4.1596107483e+00, /* 0xc0851b88 */ - -6.7674766541e+01, /* 0xc287597b */ - -3.3123129272e+02, /* 0xc3a59d9b */ - -3.4643338013e+02, /* 0xc3ad3779 */ -]; -const PS5: [f32; 5] = [ - 6.0753936768e+01, /* 0x42730408 */ - 1.0512523193e+03, /* 0x44836813 */ - 5.9789707031e+03, /* 0x45bad7c4 */ - 9.6254453125e+03, /* 0x461665c8 */ - 2.4060581055e+03, /* 0x451660ee */ -]; - -const PR3: [f32; 6] = [/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ - -2.5470459075e-09, /* 0xb12f081b */ - -7.0311963558e-02, /* 0xbd8fffb8 */ - -2.4090321064e+00, /* 0xc01a2d95 */ - -2.1965976715e+01, /* 0xc1afba52 */ - -5.8079170227e+01, /* 0xc2685112 */ - -3.1447946548e+01, /* 0xc1fb9565 */ -]; -const PS3: [f32; 5] = [ - 3.5856033325e+01, /* 0x420f6c94 */ - 3.6151397705e+02, /* 0x43b4c1ca */ - 1.1936077881e+03, /* 0x44953373 */ - 1.1279968262e+03, /* 0x448cffe6 */ - 1.7358093262e+02, /* 0x432d94b8 */ -]; - -const PR2: [f32; 6] = [/* for x in [2.8570,2]=1/[0.3499,0.5] */ - -8.8753431271e-08, /* 0xb3be98b7 */ - -7.0303097367e-02, /* 0xbd8ffb12 */ - -1.4507384300e+00, /* 0xbfb9b1cc */ - -7.6356959343e+00, /* 0xc0f4579f */ - -1.1193166733e+01, /* 0xc1331736 */ - -3.2336456776e+00, /* 0xc04ef40d */ -]; -const PS2: [f32; 5] = [ - 2.2220300674e+01, /* 0x41b1c32d */ - 1.3620678711e+02, /* 0x430834f0 */ - 2.7047027588e+02, /* 0x43873c32 */ - 1.5387539673e+02, /* 0x4319e01a */ - 1.4657617569e+01, /* 0x416a859a */ -]; - -fn pzerof(x: f32) -> f32 -{ - let p: &[f32; 6]; - let q: &[f32; 5]; - let z: f32; - let r: f32; - let s: f32; - let mut ix: u32; - - ix = x.to_bits(); - ix &= 0x7fffffff; - if ix >= 0x41000000 {p = &PR8; q = &PS8;} - else if ix >= 0x409173eb {p = &PR5; q = &PS5;} - else if ix >= 0x4036d917 {p = &PR3; q = &PS3;} - else /*ix >= 0x40000000*/{p = &PR2; q = &PS2;} - z = 1.0/(x*x); - r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); - s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); - return 1.0 + r/s; -} - - -/* For x >= 8, the asymptotic expansions of qzero is - * -1/8 s + 75/1024 s^3 - ..., where s = 1/x. - * We approximate pzero by - * qzero(x) = s*(-1.25 + (R/S)) - * where R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10 - * S = 1 + qS0*s^2 + ... + qS5*s^12 - * and - * | qzero(x)/s +1.25-R/S | <= 2 ** ( -61.22) - */ -const QR8: [f32; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ - 0.0000000000e+00, /* 0x00000000 */ - 7.3242187500e-02, /* 0x3d960000 */ - 1.1768206596e+01, /* 0x413c4a93 */ - 5.5767340088e+02, /* 0x440b6b19 */ - 8.8591972656e+03, /* 0x460a6cca */ - 3.7014625000e+04, /* 0x471096a0 */ -]; -const QS8: [f32; 6] = [ - 1.6377603149e+02, /* 0x4323c6aa */ - 8.0983447266e+03, /* 0x45fd12c2 */ - 1.4253829688e+05, /* 0x480b3293 */ - 8.0330925000e+05, /* 0x49441ed4 */ - 8.4050156250e+05, /* 0x494d3359 */ - -3.4389928125e+05, /* 0xc8a7eb69 */ -]; - -const QR5: [f32; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ - 1.8408595828e-11, /* 0x2da1ec79 */ - 7.3242180049e-02, /* 0x3d95ffff */ - 5.8356351852e+00, /* 0x40babd86 */ - 1.3511157227e+02, /* 0x43071c90 */ - 1.0272437744e+03, /* 0x448067cd */ - 1.9899779053e+03, /* 0x44f8bf4b */ -]; -const QS5: [f32; 6] = [ - 8.2776611328e+01, /* 0x42a58da0 */ - 2.0778142090e+03, /* 0x4501dd07 */ - 1.8847289062e+04, /* 0x46933e94 */ - 5.6751113281e+04, /* 0x475daf1d */ - 3.5976753906e+04, /* 0x470c88c1 */ - -5.3543427734e+03, /* 0xc5a752be */ -]; - -const QR3: [f32; 6] = [/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ - 4.3774099900e-09, /* 0x3196681b */ - 7.3241114616e-02, /* 0x3d95ff70 */ - 3.3442313671e+00, /* 0x405607e3 */ - 4.2621845245e+01, /* 0x422a7cc5 */ - 1.7080809021e+02, /* 0x432acedf */ - 1.6673394775e+02, /* 0x4326bbe4 */ -]; -const QS3: [f32; 6] = [ - 4.8758872986e+01, /* 0x42430916 */ - 7.0968920898e+02, /* 0x44316c1c */ - 3.7041481934e+03, /* 0x4567825f */ - 6.4604252930e+03, /* 0x45c9e367 */ - 2.5163337402e+03, /* 0x451d4557 */ - -1.4924745178e+02, /* 0xc3153f59 */ -]; - -const QR2: [f32; 6] = [/* for x in [2.8570,2]=1/[0.3499,0.5] */ - 1.5044444979e-07, /* 0x342189db */ - 7.3223426938e-02, /* 0x3d95f62a */ - 1.9981917143e+00, /* 0x3fffc4bf */ - 1.4495602608e+01, /* 0x4167edfd */ - 3.1666231155e+01, /* 0x41fd5471 */ - 1.6252708435e+01, /* 0x4182058c */ -]; -const QS2: [f32; 6] = [ - 3.0365585327e+01, /* 0x41f2ecb8 */ - 2.6934811401e+02, /* 0x4386ac8f */ - 8.4478375244e+02, /* 0x44533229 */ - 8.8293585205e+02, /* 0x445cbbe5 */ - 2.1266638184e+02, /* 0x4354aa98 */ - -5.3109550476e+00, /* 0xc0a9f358 */ -]; - -fn qzerof(x: f32) -> f32 -{ - let p: &[f32; 6]; - let q: &[f32; 6]; - let s: f32; - let r: f32; - let z: f32; - let mut ix: u32; - - ix = x.to_bits(); - ix &= 0x7fffffff; - if ix >= 0x41000000 {p = &QR8; q = &QS8;} - else if ix >= 0x409173eb {p = &QR5; q = &QS5;} - else if ix >= 0x4036d917 {p = &QR3; q = &QS3;} - else /*ix >= 0x40000000*/{p = &QR2; q = &QS2;} - z = 1.0/(x*x); - r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); - s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); - return (-0.125 + r/s)/x; -} +/* origin: FreeBSD /usr/src/lib/msun/src/e_j0f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{cosf, fabsf, logf, sinf, sqrtf}; + +const INVSQRTPI: f32 = 5.6418961287e-01; /* 0x3f106ebb */ +const TPI: f32 = 6.3661974669e-01; /* 0x3f22f983 */ + +fn common(ix: u32, x: f32, y0: bool) -> f32 { + let z: f32; + let s: f32; + let mut c: f32; + let mut ss: f32; + let mut cc: f32; + /* + * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + */ + s = sinf(x); + c = cosf(x); + if y0 { + c = -c; + } + cc = s + c; + if ix < 0x7f000000 { + ss = s - c; + z = -cosf(2.0 * x); + if s * c < 0.0 { + cc = z / ss; + } else { + ss = z / cc; + } + if ix < 0x58800000 { + if y0 { + ss = -ss; + } + cc = pzerof(x) * cc - qzerof(x) * ss; + } + } + return INVSQRTPI * cc / sqrtf(x); +} + +/* R0/S0 on [0, 2.00] */ +const R02: f32 = 1.5625000000e-02; /* 0x3c800000 */ +const R03: f32 = -1.8997929874e-04; /* 0xb947352e */ +const R04: f32 = 1.8295404516e-06; /* 0x35f58e88 */ +const R05: f32 = -4.6183270541e-09; /* 0xb19eaf3c */ +const S01: f32 = 1.5619102865e-02; /* 0x3c7fe744 */ +const S02: f32 = 1.1692678527e-04; /* 0x38f53697 */ +const S03: f32 = 5.1354652442e-07; /* 0x3509daa6 */ +const S04: f32 = 1.1661400734e-09; /* 0x30a045e8 */ + +pub fn j0f(mut x: f32) -> f32 { + let z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + return 1.0 / (x * x); + } + x = fabsf(x); + + if ix >= 0x40000000 { + /* |x| >= 2 */ + /* large ulp error near zeros */ + return common(ix, x, false); + } + if ix >= 0x3a000000 { + /* |x| >= 2**-11 */ + /* up to 4ulp error near 2 */ + z = x * x; + r = z * (R02 + z * (R03 + z * (R04 + z * R05))); + s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * S04))); + return (1.0 + x / 2.0) * (1.0 - x / 2.0) + z * (r / s); + } + if ix >= 0x21800000 { + /* |x| >= 2**-60 */ + x = 0.25 * x * x; + } + return 1.0 - x; +} + +const U00: f32 = -7.3804296553e-02; /* 0xbd9726b5 */ +const U01: f32 = 1.7666645348e-01; /* 0x3e34e80d */ +const U02: f32 = -1.3818567619e-02; /* 0xbc626746 */ +const U03: f32 = 3.4745343146e-04; /* 0x39b62a69 */ +const U04: f32 = -3.8140706238e-06; /* 0xb67ff53c */ +const U05: f32 = 1.9559013964e-08; /* 0x32a802ba */ +const U06: f32 = -3.9820518410e-11; /* 0xae2f21eb */ +const V01: f32 = 1.2730483897e-02; /* 0x3c509385 */ +const V02: f32 = 7.6006865129e-05; /* 0x389f65e0 */ +const V03: f32 = 2.5915085189e-07; /* 0x348b216c */ +const V04: f32 = 4.4111031494e-10; /* 0x2ff280c2 */ + +pub fn y0f(x: f32) -> f32 { + let z: f32; + let u: f32; + let v: f32; + let ix: u32; + + ix = x.to_bits(); + if (ix & 0x7fffffff) == 0 { + return -1.0 / 0.0; + } + if (ix >> 31) != 0 { + return 0.0 / 0.0; + } + if ix >= 0x7f800000 { + return 1.0 / x; + } + if ix >= 0x40000000 { + /* |x| >= 2.0 */ + /* large ulp error near zeros */ + return common(ix, x, true); + } + if ix >= 0x39000000 { + /* x >= 2**-13 */ + /* large ulp error at x ~= 0.89 */ + z = x * x; + u = U00 + z * (U01 + z * (U02 + z * (U03 + z * (U04 + z * (U05 + z * U06))))); + v = 1.0 + z * (V01 + z * (V02 + z * (V03 + z * V04))); + return u / v + TPI * (j0f(x) * logf(x)); + } + return U00 + TPI * logf(x); +} + +/* The asymptotic expansions of pzero is + * 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x. + * For x >= 2, We approximate pzero by + * pzero(x) = 1 + (R/S) + * where R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10 + * S = 1 + pS0*s^2 + ... + pS4*s^10 + * and + * | pzero(x)-1-R/S | <= 2 ** ( -60.26) + */ +const PR8: [f32; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + -7.0312500000e-02, /* 0xbd900000 */ + -8.0816707611e+00, /* 0xc1014e86 */ + -2.5706311035e+02, /* 0xc3808814 */ + -2.4852163086e+03, /* 0xc51b5376 */ + -5.2530439453e+03, /* 0xc5a4285a */ +]; +const PS8: [f32; 5] = [ + 1.1653436279e+02, /* 0x42e91198 */ + 3.8337448730e+03, /* 0x456f9beb */ + 4.0597855469e+04, /* 0x471e95db */ + 1.1675296875e+05, /* 0x47e4087c */ + 4.7627726562e+04, /* 0x473a0bba */ +]; +const PR5: [f32; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -1.1412546255e-11, /* 0xad48c58a */ + -7.0312492549e-02, /* 0xbd8fffff */ + -4.1596107483e+00, /* 0xc0851b88 */ + -6.7674766541e+01, /* 0xc287597b */ + -3.3123129272e+02, /* 0xc3a59d9b */ + -3.4643338013e+02, /* 0xc3ad3779 */ +]; +const PS5: [f32; 5] = [ + 6.0753936768e+01, /* 0x42730408 */ + 1.0512523193e+03, /* 0x44836813 */ + 5.9789707031e+03, /* 0x45bad7c4 */ + 9.6254453125e+03, /* 0x461665c8 */ + 2.4060581055e+03, /* 0x451660ee */ +]; + +const PR3: [f32; 6] = [ + /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + -2.5470459075e-09, /* 0xb12f081b */ + -7.0311963558e-02, /* 0xbd8fffb8 */ + -2.4090321064e+00, /* 0xc01a2d95 */ + -2.1965976715e+01, /* 0xc1afba52 */ + -5.8079170227e+01, /* 0xc2685112 */ + -3.1447946548e+01, /* 0xc1fb9565 */ +]; +const PS3: [f32; 5] = [ + 3.5856033325e+01, /* 0x420f6c94 */ + 3.6151397705e+02, /* 0x43b4c1ca */ + 1.1936077881e+03, /* 0x44953373 */ + 1.1279968262e+03, /* 0x448cffe6 */ + 1.7358093262e+02, /* 0x432d94b8 */ +]; + +const PR2: [f32; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + -8.8753431271e-08, /* 0xb3be98b7 */ + -7.0303097367e-02, /* 0xbd8ffb12 */ + -1.4507384300e+00, /* 0xbfb9b1cc */ + -7.6356959343e+00, /* 0xc0f4579f */ + -1.1193166733e+01, /* 0xc1331736 */ + -3.2336456776e+00, /* 0xc04ef40d */ +]; +const PS2: [f32; 5] = [ + 2.2220300674e+01, /* 0x41b1c32d */ + 1.3620678711e+02, /* 0x430834f0 */ + 2.7047027588e+02, /* 0x43873c32 */ + 1.5387539673e+02, /* 0x4319e01a */ + 1.4657617569e+01, /* 0x416a859a */ +]; + +fn pzerof(x: f32) -> f32 { + let p: &[f32; 6]; + let q: &[f32; 5]; + let z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 { + p = &PR8; + q = &PS8; + } else if ix >= 0x409173eb { + p = &PR5; + q = &PS5; + } else if ix >= 0x4036d917 { + p = &PR3; + q = &PS3; + } else + /*ix >= 0x40000000*/ + { + p = &PR2; + q = &PS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); + return 1.0 + r / s; +} + +/* For x >= 8, the asymptotic expansions of qzero is + * -1/8 s + 75/1024 s^3 - ..., where s = 1/x. + * We approximate pzero by + * qzero(x) = s*(-1.25 + (R/S)) + * where R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10 + * S = 1 + qS0*s^2 + ... + qS5*s^12 + * and + * | qzero(x)/s +1.25-R/S | <= 2 ** ( -61.22) + */ +const QR8: [f32; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + 7.3242187500e-02, /* 0x3d960000 */ + 1.1768206596e+01, /* 0x413c4a93 */ + 5.5767340088e+02, /* 0x440b6b19 */ + 8.8591972656e+03, /* 0x460a6cca */ + 3.7014625000e+04, /* 0x471096a0 */ +]; +const QS8: [f32; 6] = [ + 1.6377603149e+02, /* 0x4323c6aa */ + 8.0983447266e+03, /* 0x45fd12c2 */ + 1.4253829688e+05, /* 0x480b3293 */ + 8.0330925000e+05, /* 0x49441ed4 */ + 8.4050156250e+05, /* 0x494d3359 */ + -3.4389928125e+05, /* 0xc8a7eb69 */ +]; + +const QR5: [f32; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.8408595828e-11, /* 0x2da1ec79 */ + 7.3242180049e-02, /* 0x3d95ffff */ + 5.8356351852e+00, /* 0x40babd86 */ + 1.3511157227e+02, /* 0x43071c90 */ + 1.0272437744e+03, /* 0x448067cd */ + 1.9899779053e+03, /* 0x44f8bf4b */ +]; +const QS5: [f32; 6] = [ + 8.2776611328e+01, /* 0x42a58da0 */ + 2.0778142090e+03, /* 0x4501dd07 */ + 1.8847289062e+04, /* 0x46933e94 */ + 5.6751113281e+04, /* 0x475daf1d */ + 3.5976753906e+04, /* 0x470c88c1 */ + -5.3543427734e+03, /* 0xc5a752be */ +]; + +const QR3: [f32; 6] = [ + /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + 4.3774099900e-09, /* 0x3196681b */ + 7.3241114616e-02, /* 0x3d95ff70 */ + 3.3442313671e+00, /* 0x405607e3 */ + 4.2621845245e+01, /* 0x422a7cc5 */ + 1.7080809021e+02, /* 0x432acedf */ + 1.6673394775e+02, /* 0x4326bbe4 */ +]; +const QS3: [f32; 6] = [ + 4.8758872986e+01, /* 0x42430916 */ + 7.0968920898e+02, /* 0x44316c1c */ + 3.7041481934e+03, /* 0x4567825f */ + 6.4604252930e+03, /* 0x45c9e367 */ + 2.5163337402e+03, /* 0x451d4557 */ + -1.4924745178e+02, /* 0xc3153f59 */ +]; + +const QR2: [f32; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.5044444979e-07, /* 0x342189db */ + 7.3223426938e-02, /* 0x3d95f62a */ + 1.9981917143e+00, /* 0x3fffc4bf */ + 1.4495602608e+01, /* 0x4167edfd */ + 3.1666231155e+01, /* 0x41fd5471 */ + 1.6252708435e+01, /* 0x4182058c */ +]; +const QS2: [f32; 6] = [ + 3.0365585327e+01, /* 0x41f2ecb8 */ + 2.6934811401e+02, /* 0x4386ac8f */ + 8.4478375244e+02, /* 0x44533229 */ + 8.8293585205e+02, /* 0x445cbbe5 */ + 2.1266638184e+02, /* 0x4354aa98 */ + -5.3109550476e+00, /* 0xc0a9f358 */ +]; + +fn qzerof(x: f32) -> f32 { + let p: &[f32; 6]; + let q: &[f32; 6]; + let s: f32; + let r: f32; + let z: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 { + p = &QR8; + q = &QS8; + } else if ix >= 0x409173eb { + p = &QR5; + q = &QS5; + } else if ix >= 0x4036d917 { + p = &QR3; + q = &QS3; + } else + /*ix >= 0x40000000*/ + { + p = &QR2; + q = &QS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); + return (-0.125 + r / s) / x; +} diff --git a/libm/src/math/j1.rs b/libm/src/math/j1.rs index 92289a613..02a65ca5a 100644 --- a/libm/src/math/j1.rs +++ b/libm/src/math/j1.rs @@ -1,387 +1,414 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_j1.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* j1(x), y1(x) - * Bessel function of the first and second kinds of order zero. - * Method -- j1(x): - * 1. For tiny x, we use j1(x) = x/2 - x^3/16 + x^5/384 - ... - * 2. Reduce x to |x| since j1(x)=-j1(-x), and - * for x in (0,2) - * j1(x) = x/2 + x*z*R0/S0, where z = x*x; - * (precision: |j1/x - 1/2 - R0/S0 |<2**-61.51 ) - * for x in (2,inf) - * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x1)-q1(x)*sin(x1)) - * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) - * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) - * as follow: - * cos(x1) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) - * = 1/sqrt(2) * (sin(x) - cos(x)) - * sin(x1) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) - * = -1/sqrt(2) * (sin(x) + cos(x)) - * (To avoid cancellation, use - * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) - * to compute the worse one.) - * - * 3 Special cases - * j1(nan)= nan - * j1(0) = 0 - * j1(inf) = 0 - * - * Method -- y1(x): - * 1. screen out x<=0 cases: y1(0)=-inf, y1(x<0)=NaN - * 2. For x<2. - * Since - * y1(x) = 2/pi*(j1(x)*(ln(x/2)+Euler)-1/x-x/2+5/64*x^3-...) - * therefore y1(x)-2/pi*j1(x)*ln(x)-1/x is an odd function. - * We use the following function to approximate y1, - * y1(x) = x*U(z)/V(z) + (2/pi)*(j1(x)*ln(x)-1/x), z= x^2 - * where for x in [0,2] (abs err less than 2**-65.89) - * U(z) = U0[0] + U0[1]*z + ... + U0[4]*z^4 - * V(z) = 1 + v0[0]*z + ... + v0[4]*z^5 - * Note: For tiny x, 1/x dominate y1 and hence - * y1(tiny) = -2/pi/tiny, (choose tiny<2**-54) - * 3. For x>=2. - * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) - * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) - * by method mentioned above. - */ - -use super::{cos, get_high_word, get_low_word, fabs, log, sin, sqrt}; - -const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ -const TPI: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ - -fn common(ix: u32, x: f64, y1: bool, sign: bool) -> f64 -{ - let z: f64; - let mut s: f64; - let c: f64; - let mut ss: f64; - let mut cc: f64; - - /* - * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x-3pi/4)-q1(x)*sin(x-3pi/4)) - * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x-3pi/4)+q1(x)*cos(x-3pi/4)) - * - * sin(x-3pi/4) = -(sin(x) + cos(x))/sqrt(2) - * cos(x-3pi/4) = (sin(x) - cos(x))/sqrt(2) - * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) - */ - s = sin(x); - if y1 { - s = -s; - } - c = cos(x); - cc = s-c; - if ix < 0x7fe00000 { - /* avoid overflow in 2*x */ - ss = -s-c; - z = cos(2.0*x); - if s*c > 0.0 { - cc = z/ss; - } else { - ss = z/cc; - } - if ix < 0x48000000 { - if y1 { - ss = -ss; - } - cc = pone(x)*cc-qone(x)*ss; - } - } - if sign { - cc = -cc; - } - return INVSQRTPI*cc/sqrt(x); -} - -/* R0/S0 on [0,2] */ -const R00: f64 = -6.25000000000000000000e-02; /* 0xBFB00000, 0x00000000 */ -const R01: f64 = 1.40705666955189706048e-03; /* 0x3F570D9F, 0x98472C61 */ -const R02: f64 = -1.59955631084035597520e-05; /* 0xBEF0C5C6, 0xBA169668 */ -const R03: f64 = 4.96727999609584448412e-08; /* 0x3E6AAAFA, 0x46CA0BD9 */ -const S01: f64 = 1.91537599538363460805e-02; /* 0x3F939D0B, 0x12637E53 */ -const S02: f64 = 1.85946785588630915560e-04; /* 0x3F285F56, 0xB9CDF664 */ -const S03: f64 = 1.17718464042623683263e-06; /* 0x3EB3BFF8, 0x333F8498 */ -const S04: f64 = 5.04636257076217042715e-09; /* 0x3E35AC88, 0xC97DFF2C */ -const S05: f64 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */ - -pub fn j1(x: f64) -> f64 -{ - let mut z: f64; - let r: f64; - let s: f64; - let mut ix: u32; - let sign: bool; - - ix = get_high_word(x); - sign = (ix>>31) != 0; - ix &= 0x7fffffff; - if ix >= 0x7ff00000 { - return 1.0/(x*x); - } - if ix >= 0x40000000 { /* |x| >= 2 */ - return common(ix, fabs(x), false, sign); - } - if ix >= 0x38000000 { /* |x| >= 2**-127 */ - z = x*x; - r = z*(R00+z*(R01+z*(R02+z*R03))); - s = 1.0+z*(S01+z*(S02+z*(S03+z*(S04+z*S05)))); - z = r/s; - } else { - /* avoid underflow, raise inexact if x!=0 */ - z = x; - } - return (0.5 + z)*x; -} - -const U0: [f64; 5] = [ - -1.96057090646238940668e-01, /* 0xBFC91866, 0x143CBC8A */ - 5.04438716639811282616e-02, /* 0x3FA9D3C7, 0x76292CD1 */ - -1.91256895875763547298e-03, /* 0xBF5F55E5, 0x4844F50F */ - 2.35252600561610495928e-05, /* 0x3EF8AB03, 0x8FA6B88E */ - -9.19099158039878874504e-08, /* 0xBE78AC00, 0x569105B8 */ -]; -const V0: [f64; 5] = [ - 1.99167318236649903973e-02, /* 0x3F94650D, 0x3F4DA9F0 */ - 2.02552581025135171496e-04, /* 0x3F2A8C89, 0x6C257764 */ - 1.35608801097516229404e-06, /* 0x3EB6C05A, 0x894E8CA6 */ - 6.22741452364621501295e-09, /* 0x3E3ABF1D, 0x5BA69A86 */ - 1.66559246207992079114e-11, /* 0x3DB25039, 0xDACA772A */ -]; - -pub fn y1(x: f64) -> f64 -{ - let z: f64; - let u: f64; - let v: f64; - let ix: u32; - let lx: u32; - - ix = get_high_word(x); - lx = get_low_word(x); - - /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */ - if (ix<<1 | lx) == 0 { - return -1.0/0.0; - } - if (ix>>31) != 0 { - return 0.0/0.0; - } - if ix >= 0x7ff00000 { - return 1.0/x; - } - - if ix >= 0x40000000 { /* x >= 2 */ - return common(ix, x, true, false); - } - if ix < 0x3c900000 { /* x < 2**-54 */ - return -TPI/x; - } - z = x*x; - u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4]))); - v = 1.0+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4])))); - return x*(u/v) + TPI*(j1(x)*log(x)-1.0/x); -} - -/* For x >= 8, the asymptotic expansions of pone is - * 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x. - * We approximate pone by - * pone(x) = 1 + (R/S) - * where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10 - * S = 1 + ps0*s^2 + ... + ps4*s^10 - * and - * | pone(x)-1-R/S | <= 2 ** ( -60.06) - */ - -const PR8: [f64; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ - 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ - 1.17187499999988647970e-01, /* 0x3FBDFFFF, 0xFFFFFCCE */ - 1.32394806593073575129e+01, /* 0x402A7A9D, 0x357F7FCE */ - 4.12051854307378562225e+02, /* 0x4079C0D4, 0x652EA590 */ - 3.87474538913960532227e+03, /* 0x40AE457D, 0xA3A532CC */ - 7.91447954031891731574e+03, /* 0x40BEEA7A, 0xC32782DD */ -]; -const PS8: [f64; 5] = [ - 1.14207370375678408436e+02, /* 0x405C8D45, 0x8E656CAC */ - 3.65093083420853463394e+03, /* 0x40AC85DC, 0x964D274F */ - 3.69562060269033463555e+04, /* 0x40E20B86, 0x97C5BB7F */ - 9.76027935934950801311e+04, /* 0x40F7D42C, 0xB28F17BB */ - 3.08042720627888811578e+04, /* 0x40DE1511, 0x697A0B2D */ -]; - -const PR5: [f64; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ - 1.31990519556243522749e-11, /* 0x3DAD0667, 0xDAE1CA7D */ - 1.17187493190614097638e-01, /* 0x3FBDFFFF, 0xE2C10043 */ - 6.80275127868432871736e+00, /* 0x401B3604, 0x6E6315E3 */ - 1.08308182990189109773e+02, /* 0x405B13B9, 0x452602ED */ - 5.17636139533199752805e+02, /* 0x40802D16, 0xD052D649 */ - 5.28715201363337541807e+02, /* 0x408085B8, 0xBB7E0CB7 */ -]; -const PS5: [f64; 5] = [ - 5.92805987221131331921e+01, /* 0x404DA3EA, 0xA8AF633D */ - 9.91401418733614377743e+02, /* 0x408EFB36, 0x1B066701 */ - 5.35326695291487976647e+03, /* 0x40B4E944, 0x5706B6FB */ - 7.84469031749551231769e+03, /* 0x40BEA4B0, 0xB8A5BB15 */ - 1.50404688810361062679e+03, /* 0x40978030, 0x036F5E51 */ -]; - -const PR3: [f64; 6] = [ - 3.02503916137373618024e-09, /* 0x3E29FC21, 0xA7AD9EDD */ - 1.17186865567253592491e-01, /* 0x3FBDFFF5, 0x5B21D17B */ - 3.93297750033315640650e+00, /* 0x400F76BC, 0xE85EAD8A */ - 3.51194035591636932736e+01, /* 0x40418F48, 0x9DA6D129 */ - 9.10550110750781271918e+01, /* 0x4056C385, 0x4D2C1837 */ - 4.85590685197364919645e+01, /* 0x4048478F, 0x8EA83EE5 */ -]; -const PS3: [f64; 5] = [ - 3.47913095001251519989e+01, /* 0x40416549, 0xA134069C */ - 3.36762458747825746741e+02, /* 0x40750C33, 0x07F1A75F */ - 1.04687139975775130551e+03, /* 0x40905B7C, 0x5037D523 */ - 8.90811346398256432622e+02, /* 0x408BD67D, 0xA32E31E9 */ - 1.03787932439639277504e+02, /* 0x4059F26D, 0x7C2EED53 */ -]; - -const PR2: [f64; 6] = [/* for x in [2.8570,2]=1/[0.3499,0.5] */ - 1.07710830106873743082e-07, /* 0x3E7CE9D4, 0xF65544F4 */ - 1.17176219462683348094e-01, /* 0x3FBDFF42, 0xBE760D83 */ - 2.36851496667608785174e+00, /* 0x4002F2B7, 0xF98FAEC0 */ - 1.22426109148261232917e+01, /* 0x40287C37, 0x7F71A964 */ - 1.76939711271687727390e+01, /* 0x4031B1A8, 0x177F8EE2 */ - 5.07352312588818499250e+00, /* 0x40144B49, 0xA574C1FE */ -]; -const PS2: [f64; 5] = [ - 2.14364859363821409488e+01, /* 0x40356FBD, 0x8AD5ECDC */ - 1.25290227168402751090e+02, /* 0x405F5293, 0x14F92CD5 */ - 2.32276469057162813669e+02, /* 0x406D08D8, 0xD5A2DBD9 */ - 1.17679373287147100768e+02, /* 0x405D6B7A, 0xDA1884A9 */ - 8.36463893371618283368e+00, /* 0x4020BAB1, 0xF44E5192 */ -]; - -fn pone(x: f64) -> f64 -{ - let p: &[f64; 6]; - let q: &[f64; 5]; - let z: f64; - let r: f64; - let s: f64; - let mut ix: u32; - - ix = get_high_word(x); - ix &= 0x7fffffff; - if ix >= 0x40200000 {p = &PR8; q = &PS8;} - else if ix >= 0x40122E8B {p = &PR5; q = &PS5;} - else if ix >= 0x4006DB6D {p = &PR3; q = &PS3;} - else /*ix >= 0x40000000*/{p = &PR2; q = &PS2;} - z = 1.0/(x*x); - r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); - s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); - return 1.0+ r/s; -} - -/* For x >= 8, the asymptotic expansions of qone is - * 3/8 s - 105/1024 s^3 - ..., where s = 1/x. - * We approximate pone by - * qone(x) = s*(0.375 + (R/S)) - * where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10 - * S = 1 + qs1*s^2 + ... + qs6*s^12 - * and - * | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13) - */ - -const QR8: [f64; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ - 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ - -1.02539062499992714161e-01, /* 0xBFBA3FFF, 0xFFFFFDF3 */ - -1.62717534544589987888e+01, /* 0xC0304591, 0xA26779F7 */ - -7.59601722513950107896e+02, /* 0xC087BCD0, 0x53E4B576 */ - -1.18498066702429587167e+04, /* 0xC0C724E7, 0x40F87415 */ - -4.84385124285750353010e+04, /* 0xC0E7A6D0, 0x65D09C6A */ -]; -const QS8: [f64; 6] = [ - 1.61395369700722909556e+02, /* 0x40642CA6, 0xDE5BCDE5 */ - 7.82538599923348465381e+03, /* 0x40BE9162, 0xD0D88419 */ - 1.33875336287249578163e+05, /* 0x4100579A, 0xB0B75E98 */ - 7.19657723683240939863e+05, /* 0x4125F653, 0x72869C19 */ - 6.66601232617776375264e+05, /* 0x412457D2, 0x7719AD5C */ - -2.94490264303834643215e+05, /* 0xC111F969, 0x0EA5AA18 */ -]; - -const QR5: [f64; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ - -2.08979931141764104297e-11, /* 0xBDB6FA43, 0x1AA1A098 */ - -1.02539050241375426231e-01, /* 0xBFBA3FFF, 0xCB597FEF */ - -8.05644828123936029840e+00, /* 0xC0201CE6, 0xCA03AD4B */ - -1.83669607474888380239e+02, /* 0xC066F56D, 0x6CA7B9B0 */ - -1.37319376065508163265e+03, /* 0xC09574C6, 0x6931734F */ - -2.61244440453215656817e+03, /* 0xC0A468E3, 0x88FDA79D */ -]; -const QS5: [f64; 6] = [ - 8.12765501384335777857e+01, /* 0x405451B2, 0xFF5A11B2 */ - 1.99179873460485964642e+03, /* 0x409F1F31, 0xE77BF839 */ - 1.74684851924908907677e+04, /* 0x40D10F1F, 0x0D64CE29 */ - 4.98514270910352279316e+04, /* 0x40E8576D, 0xAABAD197 */ - 2.79480751638918118260e+04, /* 0x40DB4B04, 0xCF7C364B */ - -4.71918354795128470869e+03, /* 0xC0B26F2E, 0xFCFFA004 */ -]; - -const QR3: [f64; 6] = [ - -5.07831226461766561369e-09, /* 0xBE35CFA9, 0xD38FC84F */ - -1.02537829820837089745e-01, /* 0xBFBA3FEB, 0x51AEED54 */ - -4.61011581139473403113e+00, /* 0xC01270C2, 0x3302D9FF */ - -5.78472216562783643212e+01, /* 0xC04CEC71, 0xC25D16DA */ - -2.28244540737631695038e+02, /* 0xC06C87D3, 0x4718D55F */ - -2.19210128478909325622e+02, /* 0xC06B66B9, 0x5F5C1BF6 */ -]; -const QS3: [f64; 6] = [ - 4.76651550323729509273e+01, /* 0x4047D523, 0xCCD367E4 */ - 6.73865112676699709482e+02, /* 0x40850EEB, 0xC031EE3E */ - 3.38015286679526343505e+03, /* 0x40AA684E, 0x448E7C9A */ - 5.54772909720722782367e+03, /* 0x40B5ABBA, 0xA61D54A6 */ - 1.90311919338810798763e+03, /* 0x409DBC7A, 0x0DD4DF4B */ - -1.35201191444307340817e+02, /* 0xC060E670, 0x290A311F */ -]; - -const QR2: [f64; 6] = [/* for x in [2.8570,2]=1/[0.3499,0.5] */ - -1.78381727510958865572e-07, /* 0xBE87F126, 0x44C626D2 */ - -1.02517042607985553460e-01, /* 0xBFBA3E8E, 0x9148B010 */ - -2.75220568278187460720e+00, /* 0xC0060484, 0x69BB4EDA */ - -1.96636162643703720221e+01, /* 0xC033A9E2, 0xC168907F */ - -4.23253133372830490089e+01, /* 0xC04529A3, 0xDE104AAA */ - -2.13719211703704061733e+01, /* 0xC0355F36, 0x39CF6E52 */ -]; -const QS2: [f64; 6] = [ - 2.95333629060523854548e+01, /* 0x403D888A, 0x78AE64FF */ - 2.52981549982190529136e+02, /* 0x406F9F68, 0xDB821CBA */ - 7.57502834868645436472e+02, /* 0x4087AC05, 0xCE49A0F7 */ - 7.39393205320467245656e+02, /* 0x40871B25, 0x48D4C029 */ - 1.55949003336666123687e+02, /* 0x40637E5E, 0x3C3ED8D4 */ - -4.95949898822628210127e+00, /* 0xC013D686, 0xE71BE86B */ -]; - -fn qone(x: f64) -> f64 -{ - let p: &[f64; 6]; - let q: &[f64; 6]; - let s: f64; - let r: f64; - let z: f64; - let mut ix: u32; - - ix = get_high_word(x); - ix &= 0x7fffffff; - if ix >= 0x40200000 {p = &QR8; q = &QS8;} - else if ix >= 0x40122E8B {p = &QR5; q = &QS5;} - else if ix >= 0x4006DB6D {p = &QR3; q = &QS3;} - else /*ix >= 0x40000000*/{p = &QR2; q = &QS2;} - z = 1.0/(x*x); - r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); - s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); - return (0.375 + r/s)/x; -} +/* origin: FreeBSD /usr/src/lib/msun/src/e_j1.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* j1(x), y1(x) + * Bessel function of the first and second kinds of order zero. + * Method -- j1(x): + * 1. For tiny x, we use j1(x) = x/2 - x^3/16 + x^5/384 - ... + * 2. Reduce x to |x| since j1(x)=-j1(-x), and + * for x in (0,2) + * j1(x) = x/2 + x*z*R0/S0, where z = x*x; + * (precision: |j1/x - 1/2 - R0/S0 |<2**-61.51 ) + * for x in (2,inf) + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x1)-q1(x)*sin(x1)) + * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) + * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) + * as follow: + * cos(x1) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x1) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (sin(x) + cos(x)) + * (To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one.) + * + * 3 Special cases + * j1(nan)= nan + * j1(0) = 0 + * j1(inf) = 0 + * + * Method -- y1(x): + * 1. screen out x<=0 cases: y1(0)=-inf, y1(x<0)=NaN + * 2. For x<2. + * Since + * y1(x) = 2/pi*(j1(x)*(ln(x/2)+Euler)-1/x-x/2+5/64*x^3-...) + * therefore y1(x)-2/pi*j1(x)*ln(x)-1/x is an odd function. + * We use the following function to approximate y1, + * y1(x) = x*U(z)/V(z) + (2/pi)*(j1(x)*ln(x)-1/x), z= x^2 + * where for x in [0,2] (abs err less than 2**-65.89) + * U(z) = U0[0] + U0[1]*z + ... + U0[4]*z^4 + * V(z) = 1 + v0[0]*z + ... + v0[4]*z^5 + * Note: For tiny x, 1/x dominate y1 and hence + * y1(tiny) = -2/pi/tiny, (choose tiny<2**-54) + * 3. For x>=2. + * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) + * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) + * by method mentioned above. + */ + +use super::{cos, fabs, get_high_word, get_low_word, log, sin, sqrt}; + +const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ +const TPI: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ + +fn common(ix: u32, x: f64, y1: bool, sign: bool) -> f64 { + let z: f64; + let mut s: f64; + let c: f64; + let mut ss: f64; + let mut cc: f64; + + /* + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x-3pi/4)-q1(x)*sin(x-3pi/4)) + * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x-3pi/4)+q1(x)*cos(x-3pi/4)) + * + * sin(x-3pi/4) = -(sin(x) + cos(x))/sqrt(2) + * cos(x-3pi/4) = (sin(x) - cos(x))/sqrt(2) + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + */ + s = sin(x); + if y1 { + s = -s; + } + c = cos(x); + cc = s - c; + if ix < 0x7fe00000 { + /* avoid overflow in 2*x */ + ss = -s - c; + z = cos(2.0 * x); + if s * c > 0.0 { + cc = z / ss; + } else { + ss = z / cc; + } + if ix < 0x48000000 { + if y1 { + ss = -ss; + } + cc = pone(x) * cc - qone(x) * ss; + } + } + if sign { + cc = -cc; + } + return INVSQRTPI * cc / sqrt(x); +} + +/* R0/S0 on [0,2] */ +const R00: f64 = -6.25000000000000000000e-02; /* 0xBFB00000, 0x00000000 */ +const R01: f64 = 1.40705666955189706048e-03; /* 0x3F570D9F, 0x98472C61 */ +const R02: f64 = -1.59955631084035597520e-05; /* 0xBEF0C5C6, 0xBA169668 */ +const R03: f64 = 4.96727999609584448412e-08; /* 0x3E6AAAFA, 0x46CA0BD9 */ +const S01: f64 = 1.91537599538363460805e-02; /* 0x3F939D0B, 0x12637E53 */ +const S02: f64 = 1.85946785588630915560e-04; /* 0x3F285F56, 0xB9CDF664 */ +const S03: f64 = 1.17718464042623683263e-06; /* 0x3EB3BFF8, 0x333F8498 */ +const S04: f64 = 5.04636257076217042715e-09; /* 0x3E35AC88, 0xC97DFF2C */ +const S05: f64 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */ + +pub fn j1(x: f64) -> f64 { + let mut z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + let sign: bool; + + ix = get_high_word(x); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + if ix >= 0x7ff00000 { + return 1.0 / (x * x); + } + if ix >= 0x40000000 { + /* |x| >= 2 */ + return common(ix, fabs(x), false, sign); + } + if ix >= 0x38000000 { + /* |x| >= 2**-127 */ + z = x * x; + r = z * (R00 + z * (R01 + z * (R02 + z * R03))); + s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * (S04 + z * S05)))); + z = r / s; + } else { + /* avoid underflow, raise inexact if x!=0 */ + z = x; + } + return (0.5 + z) * x; +} + +const U0: [f64; 5] = [ + -1.96057090646238940668e-01, /* 0xBFC91866, 0x143CBC8A */ + 5.04438716639811282616e-02, /* 0x3FA9D3C7, 0x76292CD1 */ + -1.91256895875763547298e-03, /* 0xBF5F55E5, 0x4844F50F */ + 2.35252600561610495928e-05, /* 0x3EF8AB03, 0x8FA6B88E */ + -9.19099158039878874504e-08, /* 0xBE78AC00, 0x569105B8 */ +]; +const V0: [f64; 5] = [ + 1.99167318236649903973e-02, /* 0x3F94650D, 0x3F4DA9F0 */ + 2.02552581025135171496e-04, /* 0x3F2A8C89, 0x6C257764 */ + 1.35608801097516229404e-06, /* 0x3EB6C05A, 0x894E8CA6 */ + 6.22741452364621501295e-09, /* 0x3E3ABF1D, 0x5BA69A86 */ + 1.66559246207992079114e-11, /* 0x3DB25039, 0xDACA772A */ +]; + +pub fn y1(x: f64) -> f64 { + let z: f64; + let u: f64; + let v: f64; + let ix: u32; + let lx: u32; + + ix = get_high_word(x); + lx = get_low_word(x); + + /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */ + if (ix << 1 | lx) == 0 { + return -1.0 / 0.0; + } + if (ix >> 31) != 0 { + return 0.0 / 0.0; + } + if ix >= 0x7ff00000 { + return 1.0 / x; + } + + if ix >= 0x40000000 { + /* x >= 2 */ + return common(ix, x, true, false); + } + if ix < 0x3c900000 { + /* x < 2**-54 */ + return -TPI / x; + } + z = x * x; + u = U0[0] + z * (U0[1] + z * (U0[2] + z * (U0[3] + z * U0[4]))); + v = 1.0 + z * (V0[0] + z * (V0[1] + z * (V0[2] + z * (V0[3] + z * V0[4])))); + return x * (u / v) + TPI * (j1(x) * log(x) - 1.0 / x); +} + +/* For x >= 8, the asymptotic expansions of pone is + * 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x. + * We approximate pone by + * pone(x) = 1 + (R/S) + * where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10 + * S = 1 + ps0*s^2 + ... + ps4*s^10 + * and + * | pone(x)-1-R/S | <= 2 ** ( -60.06) + */ + +const PR8: [f64; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + 1.17187499999988647970e-01, /* 0x3FBDFFFF, 0xFFFFFCCE */ + 1.32394806593073575129e+01, /* 0x402A7A9D, 0x357F7FCE */ + 4.12051854307378562225e+02, /* 0x4079C0D4, 0x652EA590 */ + 3.87474538913960532227e+03, /* 0x40AE457D, 0xA3A532CC */ + 7.91447954031891731574e+03, /* 0x40BEEA7A, 0xC32782DD */ +]; +const PS8: [f64; 5] = [ + 1.14207370375678408436e+02, /* 0x405C8D45, 0x8E656CAC */ + 3.65093083420853463394e+03, /* 0x40AC85DC, 0x964D274F */ + 3.69562060269033463555e+04, /* 0x40E20B86, 0x97C5BB7F */ + 9.76027935934950801311e+04, /* 0x40F7D42C, 0xB28F17BB */ + 3.08042720627888811578e+04, /* 0x40DE1511, 0x697A0B2D */ +]; + +const PR5: [f64; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.31990519556243522749e-11, /* 0x3DAD0667, 0xDAE1CA7D */ + 1.17187493190614097638e-01, /* 0x3FBDFFFF, 0xE2C10043 */ + 6.80275127868432871736e+00, /* 0x401B3604, 0x6E6315E3 */ + 1.08308182990189109773e+02, /* 0x405B13B9, 0x452602ED */ + 5.17636139533199752805e+02, /* 0x40802D16, 0xD052D649 */ + 5.28715201363337541807e+02, /* 0x408085B8, 0xBB7E0CB7 */ +]; +const PS5: [f64; 5] = [ + 5.92805987221131331921e+01, /* 0x404DA3EA, 0xA8AF633D */ + 9.91401418733614377743e+02, /* 0x408EFB36, 0x1B066701 */ + 5.35326695291487976647e+03, /* 0x40B4E944, 0x5706B6FB */ + 7.84469031749551231769e+03, /* 0x40BEA4B0, 0xB8A5BB15 */ + 1.50404688810361062679e+03, /* 0x40978030, 0x036F5E51 */ +]; + +const PR3: [f64; 6] = [ + 3.02503916137373618024e-09, /* 0x3E29FC21, 0xA7AD9EDD */ + 1.17186865567253592491e-01, /* 0x3FBDFFF5, 0x5B21D17B */ + 3.93297750033315640650e+00, /* 0x400F76BC, 0xE85EAD8A */ + 3.51194035591636932736e+01, /* 0x40418F48, 0x9DA6D129 */ + 9.10550110750781271918e+01, /* 0x4056C385, 0x4D2C1837 */ + 4.85590685197364919645e+01, /* 0x4048478F, 0x8EA83EE5 */ +]; +const PS3: [f64; 5] = [ + 3.47913095001251519989e+01, /* 0x40416549, 0xA134069C */ + 3.36762458747825746741e+02, /* 0x40750C33, 0x07F1A75F */ + 1.04687139975775130551e+03, /* 0x40905B7C, 0x5037D523 */ + 8.90811346398256432622e+02, /* 0x408BD67D, 0xA32E31E9 */ + 1.03787932439639277504e+02, /* 0x4059F26D, 0x7C2EED53 */ +]; + +const PR2: [f64; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.07710830106873743082e-07, /* 0x3E7CE9D4, 0xF65544F4 */ + 1.17176219462683348094e-01, /* 0x3FBDFF42, 0xBE760D83 */ + 2.36851496667608785174e+00, /* 0x4002F2B7, 0xF98FAEC0 */ + 1.22426109148261232917e+01, /* 0x40287C37, 0x7F71A964 */ + 1.76939711271687727390e+01, /* 0x4031B1A8, 0x177F8EE2 */ + 5.07352312588818499250e+00, /* 0x40144B49, 0xA574C1FE */ +]; +const PS2: [f64; 5] = [ + 2.14364859363821409488e+01, /* 0x40356FBD, 0x8AD5ECDC */ + 1.25290227168402751090e+02, /* 0x405F5293, 0x14F92CD5 */ + 2.32276469057162813669e+02, /* 0x406D08D8, 0xD5A2DBD9 */ + 1.17679373287147100768e+02, /* 0x405D6B7A, 0xDA1884A9 */ + 8.36463893371618283368e+00, /* 0x4020BAB1, 0xF44E5192 */ +]; + +fn pone(x: f64) -> f64 { + let p: &[f64; 6]; + let q: &[f64; 5]; + let z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 { + p = &PR8; + q = &PS8; + } else if ix >= 0x40122E8B { + p = &PR5; + q = &PS5; + } else if ix >= 0x4006DB6D { + p = &PR3; + q = &PS3; + } else + /*ix >= 0x40000000*/ + { + p = &PR2; + q = &PS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); + return 1.0 + r / s; +} + +/* For x >= 8, the asymptotic expansions of qone is + * 3/8 s - 105/1024 s^3 - ..., where s = 1/x. + * We approximate pone by + * qone(x) = s*(0.375 + (R/S)) + * where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10 + * S = 1 + qs1*s^2 + ... + qs6*s^12 + * and + * | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13) + */ + +const QR8: [f64; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + -1.02539062499992714161e-01, /* 0xBFBA3FFF, 0xFFFFFDF3 */ + -1.62717534544589987888e+01, /* 0xC0304591, 0xA26779F7 */ + -7.59601722513950107896e+02, /* 0xC087BCD0, 0x53E4B576 */ + -1.18498066702429587167e+04, /* 0xC0C724E7, 0x40F87415 */ + -4.84385124285750353010e+04, /* 0xC0E7A6D0, 0x65D09C6A */ +]; +const QS8: [f64; 6] = [ + 1.61395369700722909556e+02, /* 0x40642CA6, 0xDE5BCDE5 */ + 7.82538599923348465381e+03, /* 0x40BE9162, 0xD0D88419 */ + 1.33875336287249578163e+05, /* 0x4100579A, 0xB0B75E98 */ + 7.19657723683240939863e+05, /* 0x4125F653, 0x72869C19 */ + 6.66601232617776375264e+05, /* 0x412457D2, 0x7719AD5C */ + -2.94490264303834643215e+05, /* 0xC111F969, 0x0EA5AA18 */ +]; + +const QR5: [f64; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -2.08979931141764104297e-11, /* 0xBDB6FA43, 0x1AA1A098 */ + -1.02539050241375426231e-01, /* 0xBFBA3FFF, 0xCB597FEF */ + -8.05644828123936029840e+00, /* 0xC0201CE6, 0xCA03AD4B */ + -1.83669607474888380239e+02, /* 0xC066F56D, 0x6CA7B9B0 */ + -1.37319376065508163265e+03, /* 0xC09574C6, 0x6931734F */ + -2.61244440453215656817e+03, /* 0xC0A468E3, 0x88FDA79D */ +]; +const QS5: [f64; 6] = [ + 8.12765501384335777857e+01, /* 0x405451B2, 0xFF5A11B2 */ + 1.99179873460485964642e+03, /* 0x409F1F31, 0xE77BF839 */ + 1.74684851924908907677e+04, /* 0x40D10F1F, 0x0D64CE29 */ + 4.98514270910352279316e+04, /* 0x40E8576D, 0xAABAD197 */ + 2.79480751638918118260e+04, /* 0x40DB4B04, 0xCF7C364B */ + -4.71918354795128470869e+03, /* 0xC0B26F2E, 0xFCFFA004 */ +]; + +const QR3: [f64; 6] = [ + -5.07831226461766561369e-09, /* 0xBE35CFA9, 0xD38FC84F */ + -1.02537829820837089745e-01, /* 0xBFBA3FEB, 0x51AEED54 */ + -4.61011581139473403113e+00, /* 0xC01270C2, 0x3302D9FF */ + -5.78472216562783643212e+01, /* 0xC04CEC71, 0xC25D16DA */ + -2.28244540737631695038e+02, /* 0xC06C87D3, 0x4718D55F */ + -2.19210128478909325622e+02, /* 0xC06B66B9, 0x5F5C1BF6 */ +]; +const QS3: [f64; 6] = [ + 4.76651550323729509273e+01, /* 0x4047D523, 0xCCD367E4 */ + 6.73865112676699709482e+02, /* 0x40850EEB, 0xC031EE3E */ + 3.38015286679526343505e+03, /* 0x40AA684E, 0x448E7C9A */ + 5.54772909720722782367e+03, /* 0x40B5ABBA, 0xA61D54A6 */ + 1.90311919338810798763e+03, /* 0x409DBC7A, 0x0DD4DF4B */ + -1.35201191444307340817e+02, /* 0xC060E670, 0x290A311F */ +]; + +const QR2: [f64; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + -1.78381727510958865572e-07, /* 0xBE87F126, 0x44C626D2 */ + -1.02517042607985553460e-01, /* 0xBFBA3E8E, 0x9148B010 */ + -2.75220568278187460720e+00, /* 0xC0060484, 0x69BB4EDA */ + -1.96636162643703720221e+01, /* 0xC033A9E2, 0xC168907F */ + -4.23253133372830490089e+01, /* 0xC04529A3, 0xDE104AAA */ + -2.13719211703704061733e+01, /* 0xC0355F36, 0x39CF6E52 */ +]; +const QS2: [f64; 6] = [ + 2.95333629060523854548e+01, /* 0x403D888A, 0x78AE64FF */ + 2.52981549982190529136e+02, /* 0x406F9F68, 0xDB821CBA */ + 7.57502834868645436472e+02, /* 0x4087AC05, 0xCE49A0F7 */ + 7.39393205320467245656e+02, /* 0x40871B25, 0x48D4C029 */ + 1.55949003336666123687e+02, /* 0x40637E5E, 0x3C3ED8D4 */ + -4.95949898822628210127e+00, /* 0xC013D686, 0xE71BE86B */ +]; + +fn qone(x: f64) -> f64 { + let p: &[f64; 6]; + let q: &[f64; 6]; + let s: f64; + let r: f64; + let z: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 { + p = &QR8; + q = &QS8; + } else if ix >= 0x40122E8B { + p = &QR5; + q = &QS5; + } else if ix >= 0x4006DB6D { + p = &QR3; + q = &QS3; + } else + /*ix >= 0x40000000*/ + { + p = &QR2; + q = &QS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); + return (0.375 + r / s) / x; +} diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs index 7cf9c45b9..83ac1acff 100644 --- a/libm/src/math/j1f.rs +++ b/libm/src/math/j1f.rs @@ -1,331 +1,358 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_j1f.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -use super::{cosf, fabsf, logf, sinf, sqrtf}; - -const INVSQRTPI: f32 = 5.6418961287e-01; /* 0x3f106ebb */ -const TPI: f32 = 6.3661974669e-01; /* 0x3f22f983 */ - -fn common(ix: u32, x: f32, y1: bool, sign: bool) -> f32 -{ - let z: f64; - let mut s: f64; - let c: f64; - let mut ss: f64; - let mut cc: f64; - - s = sinf(x) as f64; - if y1 { - s = -s; - } - c = cosf(x) as f64; - cc = s-c; - if ix < 0x7f000000 { - ss = -s-c; - z = cosf(2.0*x) as f64; - if s*c > 0.0 { - cc = z/ss; - } else { - ss = z/cc; - } - if ix < 0x58800000 { - if y1 { - ss = -ss; - } - cc = (ponef(x) as f64)*cc-(qonef(x) as f64)*ss; - } - } - if sign { - cc = -cc; - } - return INVSQRTPI*(cc as f32)/sqrtf(x); -} - -/* R0/S0 on [0,2] */ -const R00: f32 = -6.2500000000e-02; /* 0xbd800000 */ -const R01: f32 = 1.4070566976e-03; /* 0x3ab86cfd */ -const R02: f32 = -1.5995563444e-05; /* 0xb7862e36 */ -const R03: f32 = 4.9672799207e-08; /* 0x335557d2 */ -const S01: f32 = 1.9153760746e-02; /* 0x3c9ce859 */ -const S02: f32 = 1.8594678841e-04; /* 0x3942fab6 */ -const S03: f32 = 1.1771846857e-06; /* 0x359dffc2 */ -const S04: f32 = 5.0463624390e-09; /* 0x31ad6446 */ -const S05: f32 = 1.2354227016e-11; /* 0x2d59567e */ - -pub fn j1f(x: f32) -> f32 -{ - let mut z: f32; - let r: f32; - let s: f32; - let mut ix: u32; - let sign: bool; - - ix = x.to_bits(); - sign = (ix>>31) != 0; - ix &= 0x7fffffff; - if ix >= 0x7f800000 { - return 1.0/(x*x); - } - if ix >= 0x40000000 { /* |x| >= 2 */ - return common(ix, fabsf(x), false, sign); - } - if ix >= 0x39000000 { /* |x| >= 2**-13 */ - z = x*x; - r = z*(R00+z*(R01+z*(R02+z*R03))); - s = 1.0+z*(S01+z*(S02+z*(S03+z*(S04+z*S05)))); - z = 0.5 + r/s; - } else { - z = 0.5; - } - return z*x; -} - -const U0: [f32; 5] = [ - -1.9605709612e-01, /* 0xbe48c331 */ - 5.0443872809e-02, /* 0x3d4e9e3c */ - -1.9125689287e-03, /* 0xbafaaf2a */ - 2.3525259166e-05, /* 0x37c5581c */ - -9.1909917899e-08, /* 0xb3c56003 */ -]; -const V0: [f32; 5] = [ - 1.9916731864e-02, /* 0x3ca3286a */ - 2.0255257550e-04, /* 0x3954644b */ - 1.3560879779e-06, /* 0x35b602d4 */ - 6.2274145840e-09, /* 0x31d5f8eb */ - 1.6655924903e-11, /* 0x2d9281cf */ -]; - -pub fn y1f(x: f32) -> f32 -{ - let z: f32; - let u: f32; - let v: f32; - let ix: u32; - - ix = x.to_bits(); - if (ix & 0x7fffffff) == 0 { - return -1.0/0.0; - } - if (ix>>31) != 0{ - return 0.0/0.0; - } - if ix >= 0x7f800000 { - return 1.0/x; - } - if ix >= 0x40000000 { /* |x| >= 2.0 */ - return common(ix,x,true,false); - } - if ix < 0x33000000 { /* x < 2**-25 */ - return -TPI/x; - } - z = x*x; - u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4]))); - v = 1.0+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4])))); - return x*(u/v) + TPI*(j1f(x)*logf(x)-1.0/x); -} - -/* For x >= 8, the asymptotic expansions of pone is - * 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x. - * We approximate pone by - * pone(x) = 1 + (R/S) - * where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10 - * S = 1 + ps0*s^2 + ... + ps4*s^10 - * and - * | pone(x)-1-R/S | <= 2 ** ( -60.06) - */ - -const PR8: [f32; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ - 0.0000000000e+00, /* 0x00000000 */ - 1.1718750000e-01, /* 0x3df00000 */ - 1.3239480972e+01, /* 0x4153d4ea */ - 4.1205184937e+02, /* 0x43ce06a3 */ - 3.8747453613e+03, /* 0x45722bed */ - 7.9144794922e+03, /* 0x45f753d6 */ -]; -const PS8: [f32; 5] = [ - 1.1420736694e+02, /* 0x42e46a2c */ - 3.6509309082e+03, /* 0x45642ee5 */ - 3.6956207031e+04, /* 0x47105c35 */ - 9.7602796875e+04, /* 0x47bea166 */ - 3.0804271484e+04, /* 0x46f0a88b */ -]; - -const PR5: [f32; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ - 1.3199052094e-11, /* 0x2d68333f */ - 1.1718749255e-01, /* 0x3defffff */ - 6.8027510643e+00, /* 0x40d9b023 */ - 1.0830818176e+02, /* 0x42d89dca */ - 5.1763616943e+02, /* 0x440168b7 */ - 5.2871520996e+02, /* 0x44042dc6 */ -]; -const PS5: [f32; 5] = [ - 5.9280597687e+01, /* 0x426d1f55 */ - 9.9140142822e+02, /* 0x4477d9b1 */ - 5.3532670898e+03, /* 0x45a74a23 */ - 7.8446904297e+03, /* 0x45f52586 */ - 1.5040468750e+03, /* 0x44bc0180 */ -]; - -const PR3: [f32; 6] = [ - 3.0250391081e-09, /* 0x314fe10d */ - 1.1718686670e-01, /* 0x3defffab */ - 3.9329774380e+00, /* 0x407bb5e7 */ - 3.5119403839e+01, /* 0x420c7a45 */ - 9.1055007935e+01, /* 0x42b61c2a */ - 4.8559066772e+01, /* 0x42423c7c */ -]; -const PS3: [f32; 5] = [ - 3.4791309357e+01, /* 0x420b2a4d */ - 3.3676245117e+02, /* 0x43a86198 */ - 1.0468714600e+03, /* 0x4482dbe3 */ - 8.9081134033e+02, /* 0x445eb3ed */ - 1.0378793335e+02, /* 0x42cf936c */ -]; - -const PR2: [f32; 6] = [/* for x in [2.8570,2]=1/[0.3499,0.5] */ - 1.0771083225e-07, /* 0x33e74ea8 */ - 1.1717621982e-01, /* 0x3deffa16 */ - 2.3685150146e+00, /* 0x401795c0 */ - 1.2242610931e+01, /* 0x4143e1bc */ - 1.7693971634e+01, /* 0x418d8d41 */ - 5.0735230446e+00, /* 0x40a25a4d */ -]; -const PS2: [f32; 5] = [ - 2.1436485291e+01, /* 0x41ab7dec */ - 1.2529022980e+02, /* 0x42fa9499 */ - 2.3227647400e+02, /* 0x436846c7 */ - 1.1767937469e+02, /* 0x42eb5bd7 */ - 8.3646392822e+00, /* 0x4105d590 */ -]; - -fn ponef(x: f32) -> f32 -{ - let p: &[f32; 6]; - let q: &[f32; 5]; - let z: f32; - let r: f32; - let s: f32; - let mut ix: u32; - - ix = x.to_bits(); - ix &= 0x7fffffff; - if ix >= 0x41000000 {p = &PR8; q = &PS8;} - else if ix >= 0x409173eb {p = &PR5; q = &PS5;} - else if ix >= 0x4036d917 {p = &PR3; q = &PS3;} - else /*ix >= 0x40000000*/{p = &PR2; q = &PS2;} - z = 1.0/(x*x); - r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); - s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); - return 1.0 + r/s; -} - -/* For x >= 8, the asymptotic expansions of qone is - * 3/8 s - 105/1024 s^3 - ..., where s = 1/x. - * We approximate pone by - * qone(x) = s*(0.375 + (R/S)) - * where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10 - * S = 1 + qs1*s^2 + ... + qs6*s^12 - * and - * | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13) - */ - -const QR8: [f32; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ - 0.0000000000e+00, /* 0x00000000 */ - -1.0253906250e-01, /* 0xbdd20000 */ - -1.6271753311e+01, /* 0xc1822c8d */ - -7.5960174561e+02, /* 0xc43de683 */ - -1.1849806641e+04, /* 0xc639273a */ - -4.8438511719e+04, /* 0xc73d3683 */ -]; -const QS8: [f32; 6] = [ - 1.6139537048e+02, /* 0x43216537 */ - 7.8253862305e+03, /* 0x45f48b17 */ - 1.3387534375e+05, /* 0x4802bcd6 */ - 7.1965775000e+05, /* 0x492fb29c */ - 6.6660125000e+05, /* 0x4922be94 */ - -2.9449025000e+05, /* 0xc88fcb48 */ -]; - -const QR5: [f32; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ - -2.0897993405e-11, /* 0xadb7d219 */ - -1.0253904760e-01, /* 0xbdd1fffe */ - -8.0564479828e+00, /* 0xc100e736 */ - -1.8366960144e+02, /* 0xc337ab6b */ - -1.3731937256e+03, /* 0xc4aba633 */ - -2.6124443359e+03, /* 0xc523471c */ -]; -const QS5: [f32; 6] = [ - 8.1276550293e+01, /* 0x42a28d98 */ - 1.9917987061e+03, /* 0x44f8f98f */ - 1.7468484375e+04, /* 0x468878f8 */ - 4.9851425781e+04, /* 0x4742bb6d */ - 2.7948074219e+04, /* 0x46da5826 */ - -4.7191835938e+03, /* 0xc5937978 */ -]; - -const QR3: [f32; 6] = [ - -5.0783124372e-09, /* 0xb1ae7d4f */ - -1.0253783315e-01, /* 0xbdd1ff5b */ - -4.6101160049e+00, /* 0xc0938612 */ - -5.7847221375e+01, /* 0xc267638e */ - -2.2824453735e+02, /* 0xc3643e9a */ - -2.1921012878e+02, /* 0xc35b35cb */ -]; -const QS3: [f32; 6] = [ - 4.7665153503e+01, /* 0x423ea91e */ - 6.7386511230e+02, /* 0x4428775e */ - 3.3801528320e+03, /* 0x45534272 */ - 5.5477290039e+03, /* 0x45ad5dd5 */ - 1.9031191406e+03, /* 0x44ede3d0 */ - -1.3520118713e+02, /* 0xc3073381 */ -]; - -const QR2: [f32; 6] = [ /* for x in [2.8570,2]=1/[0.3499,0.5] */ - -1.7838172539e-07, /* 0xb43f8932 */ - -1.0251704603e-01, /* 0xbdd1f475 */ - -2.7522056103e+00, /* 0xc0302423 */ - -1.9663616180e+01, /* 0xc19d4f16 */ - -4.2325313568e+01, /* 0xc2294d1f */ - -2.1371921539e+01, /* 0xc1aaf9b2 */ -]; -const QS2: [f32; 6] = [ - 2.9533363342e+01, /* 0x41ec4454 */ - 2.5298155212e+02, /* 0x437cfb47 */ - 7.5750280762e+02, /* 0x443d602e */ - 7.3939318848e+02, /* 0x4438d92a */ - 1.5594900513e+02, /* 0x431bf2f2 */ - -4.9594988823e+00, /* 0xc09eb437 */ -]; - -fn qonef(x: f32) -> f32 -{ - let p: &[f32; 6]; - let q: &[f32; 6]; - let s: f32; - let r: f32; - let z: f32; - let mut ix: u32; - - ix = x.to_bits(); - ix &= 0x7fffffff; - if ix >= 0x41000000 {p = &QR8; q = &QS8;} - else if ix >= 0x409173eb {p = &QR5; q = &QS5;} - else if ix >= 0x4036d917 {p = &QR3; q = &QS3;} - else /*ix >= 0x40000000*/{p = &QR2; q = &QS2;} - z = 1.0/(x*x); - r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); - s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); - return (0.375 + r/s)/x; -} +/* origin: FreeBSD /usr/src/lib/msun/src/e_j1f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{cosf, fabsf, logf, sinf, sqrtf}; + +const INVSQRTPI: f32 = 5.6418961287e-01; /* 0x3f106ebb */ +const TPI: f32 = 6.3661974669e-01; /* 0x3f22f983 */ + +fn common(ix: u32, x: f32, y1: bool, sign: bool) -> f32 { + let z: f64; + let mut s: f64; + let c: f64; + let mut ss: f64; + let mut cc: f64; + + s = sinf(x) as f64; + if y1 { + s = -s; + } + c = cosf(x) as f64; + cc = s - c; + if ix < 0x7f000000 { + ss = -s - c; + z = cosf(2.0 * x) as f64; + if s * c > 0.0 { + cc = z / ss; + } else { + ss = z / cc; + } + if ix < 0x58800000 { + if y1 { + ss = -ss; + } + cc = (ponef(x) as f64) * cc - (qonef(x) as f64) * ss; + } + } + if sign { + cc = -cc; + } + return INVSQRTPI * (cc as f32) / sqrtf(x); +} + +/* R0/S0 on [0,2] */ +const R00: f32 = -6.2500000000e-02; /* 0xbd800000 */ +const R01: f32 = 1.4070566976e-03; /* 0x3ab86cfd */ +const R02: f32 = -1.5995563444e-05; /* 0xb7862e36 */ +const R03: f32 = 4.9672799207e-08; /* 0x335557d2 */ +const S01: f32 = 1.9153760746e-02; /* 0x3c9ce859 */ +const S02: f32 = 1.8594678841e-04; /* 0x3942fab6 */ +const S03: f32 = 1.1771846857e-06; /* 0x359dffc2 */ +const S04: f32 = 5.0463624390e-09; /* 0x31ad6446 */ +const S05: f32 = 1.2354227016e-11; /* 0x2d59567e */ + +pub fn j1f(x: f32) -> f32 { + let mut z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + let sign: bool; + + ix = x.to_bits(); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + return 1.0 / (x * x); + } + if ix >= 0x40000000 { + /* |x| >= 2 */ + return common(ix, fabsf(x), false, sign); + } + if ix >= 0x39000000 { + /* |x| >= 2**-13 */ + z = x * x; + r = z * (R00 + z * (R01 + z * (R02 + z * R03))); + s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * (S04 + z * S05)))); + z = 0.5 + r / s; + } else { + z = 0.5; + } + return z * x; +} + +const U0: [f32; 5] = [ + -1.9605709612e-01, /* 0xbe48c331 */ + 5.0443872809e-02, /* 0x3d4e9e3c */ + -1.9125689287e-03, /* 0xbafaaf2a */ + 2.3525259166e-05, /* 0x37c5581c */ + -9.1909917899e-08, /* 0xb3c56003 */ +]; +const V0: [f32; 5] = [ + 1.9916731864e-02, /* 0x3ca3286a */ + 2.0255257550e-04, /* 0x3954644b */ + 1.3560879779e-06, /* 0x35b602d4 */ + 6.2274145840e-09, /* 0x31d5f8eb */ + 1.6655924903e-11, /* 0x2d9281cf */ +]; + +pub fn y1f(x: f32) -> f32 { + let z: f32; + let u: f32; + let v: f32; + let ix: u32; + + ix = x.to_bits(); + if (ix & 0x7fffffff) == 0 { + return -1.0 / 0.0; + } + if (ix >> 31) != 0 { + return 0.0 / 0.0; + } + if ix >= 0x7f800000 { + return 1.0 / x; + } + if ix >= 0x40000000 { + /* |x| >= 2.0 */ + return common(ix, x, true, false); + } + if ix < 0x33000000 { + /* x < 2**-25 */ + return -TPI / x; + } + z = x * x; + u = U0[0] + z * (U0[1] + z * (U0[2] + z * (U0[3] + z * U0[4]))); + v = 1.0 + z * (V0[0] + z * (V0[1] + z * (V0[2] + z * (V0[3] + z * V0[4])))); + return x * (u / v) + TPI * (j1f(x) * logf(x) - 1.0 / x); +} + +/* For x >= 8, the asymptotic expansions of pone is + * 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x. + * We approximate pone by + * pone(x) = 1 + (R/S) + * where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10 + * S = 1 + ps0*s^2 + ... + ps4*s^10 + * and + * | pone(x)-1-R/S | <= 2 ** ( -60.06) + */ + +const PR8: [f32; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + 1.1718750000e-01, /* 0x3df00000 */ + 1.3239480972e+01, /* 0x4153d4ea */ + 4.1205184937e+02, /* 0x43ce06a3 */ + 3.8747453613e+03, /* 0x45722bed */ + 7.9144794922e+03, /* 0x45f753d6 */ +]; +const PS8: [f32; 5] = [ + 1.1420736694e+02, /* 0x42e46a2c */ + 3.6509309082e+03, /* 0x45642ee5 */ + 3.6956207031e+04, /* 0x47105c35 */ + 9.7602796875e+04, /* 0x47bea166 */ + 3.0804271484e+04, /* 0x46f0a88b */ +]; + +const PR5: [f32; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.3199052094e-11, /* 0x2d68333f */ + 1.1718749255e-01, /* 0x3defffff */ + 6.8027510643e+00, /* 0x40d9b023 */ + 1.0830818176e+02, /* 0x42d89dca */ + 5.1763616943e+02, /* 0x440168b7 */ + 5.2871520996e+02, /* 0x44042dc6 */ +]; +const PS5: [f32; 5] = [ + 5.9280597687e+01, /* 0x426d1f55 */ + 9.9140142822e+02, /* 0x4477d9b1 */ + 5.3532670898e+03, /* 0x45a74a23 */ + 7.8446904297e+03, /* 0x45f52586 */ + 1.5040468750e+03, /* 0x44bc0180 */ +]; + +const PR3: [f32; 6] = [ + 3.0250391081e-09, /* 0x314fe10d */ + 1.1718686670e-01, /* 0x3defffab */ + 3.9329774380e+00, /* 0x407bb5e7 */ + 3.5119403839e+01, /* 0x420c7a45 */ + 9.1055007935e+01, /* 0x42b61c2a */ + 4.8559066772e+01, /* 0x42423c7c */ +]; +const PS3: [f32; 5] = [ + 3.4791309357e+01, /* 0x420b2a4d */ + 3.3676245117e+02, /* 0x43a86198 */ + 1.0468714600e+03, /* 0x4482dbe3 */ + 8.9081134033e+02, /* 0x445eb3ed */ + 1.0378793335e+02, /* 0x42cf936c */ +]; + +const PR2: [f32; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.0771083225e-07, /* 0x33e74ea8 */ + 1.1717621982e-01, /* 0x3deffa16 */ + 2.3685150146e+00, /* 0x401795c0 */ + 1.2242610931e+01, /* 0x4143e1bc */ + 1.7693971634e+01, /* 0x418d8d41 */ + 5.0735230446e+00, /* 0x40a25a4d */ +]; +const PS2: [f32; 5] = [ + 2.1436485291e+01, /* 0x41ab7dec */ + 1.2529022980e+02, /* 0x42fa9499 */ + 2.3227647400e+02, /* 0x436846c7 */ + 1.1767937469e+02, /* 0x42eb5bd7 */ + 8.3646392822e+00, /* 0x4105d590 */ +]; + +fn ponef(x: f32) -> f32 { + let p: &[f32; 6]; + let q: &[f32; 5]; + let z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 { + p = &PR8; + q = &PS8; + } else if ix >= 0x409173eb { + p = &PR5; + q = &PS5; + } else if ix >= 0x4036d917 { + p = &PR3; + q = &PS3; + } else + /*ix >= 0x40000000*/ + { + p = &PR2; + q = &PS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); + return 1.0 + r / s; +} + +/* For x >= 8, the asymptotic expansions of qone is + * 3/8 s - 105/1024 s^3 - ..., where s = 1/x. + * We approximate pone by + * qone(x) = s*(0.375 + (R/S)) + * where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10 + * S = 1 + qs1*s^2 + ... + qs6*s^12 + * and + * | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13) + */ + +const QR8: [f32; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + -1.0253906250e-01, /* 0xbdd20000 */ + -1.6271753311e+01, /* 0xc1822c8d */ + -7.5960174561e+02, /* 0xc43de683 */ + -1.1849806641e+04, /* 0xc639273a */ + -4.8438511719e+04, /* 0xc73d3683 */ +]; +const QS8: [f32; 6] = [ + 1.6139537048e+02, /* 0x43216537 */ + 7.8253862305e+03, /* 0x45f48b17 */ + 1.3387534375e+05, /* 0x4802bcd6 */ + 7.1965775000e+05, /* 0x492fb29c */ + 6.6660125000e+05, /* 0x4922be94 */ + -2.9449025000e+05, /* 0xc88fcb48 */ +]; + +const QR5: [f32; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -2.0897993405e-11, /* 0xadb7d219 */ + -1.0253904760e-01, /* 0xbdd1fffe */ + -8.0564479828e+00, /* 0xc100e736 */ + -1.8366960144e+02, /* 0xc337ab6b */ + -1.3731937256e+03, /* 0xc4aba633 */ + -2.6124443359e+03, /* 0xc523471c */ +]; +const QS5: [f32; 6] = [ + 8.1276550293e+01, /* 0x42a28d98 */ + 1.9917987061e+03, /* 0x44f8f98f */ + 1.7468484375e+04, /* 0x468878f8 */ + 4.9851425781e+04, /* 0x4742bb6d */ + 2.7948074219e+04, /* 0x46da5826 */ + -4.7191835938e+03, /* 0xc5937978 */ +]; + +const QR3: [f32; 6] = [ + -5.0783124372e-09, /* 0xb1ae7d4f */ + -1.0253783315e-01, /* 0xbdd1ff5b */ + -4.6101160049e+00, /* 0xc0938612 */ + -5.7847221375e+01, /* 0xc267638e */ + -2.2824453735e+02, /* 0xc3643e9a */ + -2.1921012878e+02, /* 0xc35b35cb */ +]; +const QS3: [f32; 6] = [ + 4.7665153503e+01, /* 0x423ea91e */ + 6.7386511230e+02, /* 0x4428775e */ + 3.3801528320e+03, /* 0x45534272 */ + 5.5477290039e+03, /* 0x45ad5dd5 */ + 1.9031191406e+03, /* 0x44ede3d0 */ + -1.3520118713e+02, /* 0xc3073381 */ +]; + +const QR2: [f32; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + -1.7838172539e-07, /* 0xb43f8932 */ + -1.0251704603e-01, /* 0xbdd1f475 */ + -2.7522056103e+00, /* 0xc0302423 */ + -1.9663616180e+01, /* 0xc19d4f16 */ + -4.2325313568e+01, /* 0xc2294d1f */ + -2.1371921539e+01, /* 0xc1aaf9b2 */ +]; +const QS2: [f32; 6] = [ + 2.9533363342e+01, /* 0x41ec4454 */ + 2.5298155212e+02, /* 0x437cfb47 */ + 7.5750280762e+02, /* 0x443d602e */ + 7.3939318848e+02, /* 0x4438d92a */ + 1.5594900513e+02, /* 0x431bf2f2 */ + -4.9594988823e+00, /* 0xc09eb437 */ +]; + +fn qonef(x: f32) -> f32 { + let p: &[f32; 6]; + let q: &[f32; 6]; + let s: f32; + let r: f32; + let z: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 { + p = &QR8; + q = &QS8; + } else if ix >= 0x409173eb { + p = &QR5; + q = &QS5; + } else if ix >= 0x4036d917 { + p = &QR3; + q = &QS3; + } else + /*ix >= 0x40000000*/ + { + p = &QR2; + q = &QS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); + return (0.375 + r / s) / x; +} diff --git a/libm/src/math/jn.rs b/libm/src/math/jn.rs index 7f7c06fee..70c980266 100644 --- a/libm/src/math/jn.rs +++ b/libm/src/math/jn.rs @@ -1,338 +1,343 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_jn.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* - * jn(n, x), yn(n, x) - * floating point Bessel's function of the 1st and 2nd kind - * of order n - * - * Special cases: - * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; - * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. - * Note 2. About jn(n,x), yn(n,x) - * For n=0, j0(x) is called, - * for n=1, j1(x) is called, - * for n<=x, forward recursion is used starting - * from values of j0(x) and j1(x). - * for n>x, a continued fraction approximation to - * j(n,x)/j(n-1,x) is evaluated and then backward - * recursion is used starting from a supposed value - * for j(n,x). The resulting value of j(0,x) is - * compared with the actual value to correct the - * supposed value of j(n,x). - * - * yn(n,x) is similar in all respects, except - * that forward recursion is used for all - * values of n>1. - */ - -use super::{cos, fabs, get_high_word, get_low_word, log, j0, j1, sin, sqrt, y0, y1}; - -const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ - -pub fn jn(n: isize, mut x: f64) -> f64 -{ - let mut ix: u32; - let lx: u32; - let nm1: isize; - let mut i: isize; - let mut sign: bool; - let mut a: f64; - let mut b: f64; - let mut temp: f64; - - ix = get_high_word(x); - lx = get_low_word(x); - sign = (ix>>31) != 0; - ix &= 0x7fffffff; - - // -lx == !lx + 1 - if (ix | (lx|(!lx+1))>>31) > 0x7ff00000 { /* nan */ - return x; - } - - /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) - * Thus, J(-n,x) = J(n,-x) - */ - /* nm1 = |n|-1 is used instead of |n| to handle n==INT_MIN */ - if n == 0 { - return j0(x); - } - if n < 0 { - nm1 = -(n+1); - x = -x; - sign = !sign; - } else { - nm1 = n-1; - } - if nm1 == 0 { - return j1(x); - } - - sign &= (n & 1) != 0; /* even n: 0, odd n: signbit(x) */ - x = fabs(x); - if (ix|lx) == 0 || ix == 0x7ff00000 { /* if x is 0 or inf */ - b = 0.0; - } else if (nm1 as f64) < x { - /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ - if ix >= 0x52d00000 { /* x > 2**302 */ - /* (x >> n**2) - * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) - * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) - * Let s=sin(x), c=cos(x), - * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then - * - * n sin(xn)*sqt2 cos(xn)*sqt2 - * ---------------------------------- - * 0 s-c c+s - * 1 -s-c -c+s - * 2 -s+c -c-s - * 3 s+c c-s - */ - temp = match nm1&3 { - 0 => -cos(x)+sin(x), - 1 => -cos(x)-sin(x), - 2 => cos(x)-sin(x), - 3 | _ => cos(x)+sin(x), - }; - b = INVSQRTPI*temp/sqrt(x); - } else { - a = j0(x); - b = j1(x); - i = 0; - while i < nm1 { - i += 1; - temp = b; - b = b*(2.0*(i as f64)/x) - a; /* avoid underflow */ - a = temp; - } - } - } else { - if ix < 0x3e100000 { /* x < 2**-29 */ - /* x is tiny, return the first Taylor expansion of J(n,x) - * J(n,x) = 1/n!*(x/2)^n - ... - */ - if nm1 > 32 { /* underflow */ - b = 0.0; - } else { - temp = x*0.5; - b = temp; - a = 1.0; - i = 2; - while i <= nm1 + 1 { - a *= i as f64; /* a = n! */ - b *= temp; /* b = (x/2)^n */ - i += 1; - } - b = b/a; - } - } else { - /* use backward recurrence */ - /* x x^2 x^2 - * J(n,x)/J(n-1,x) = ---- ------ ------ ..... - * 2n - 2(n+1) - 2(n+2) - * - * 1 1 1 - * (for large x) = ---- ------ ------ ..... - * 2n 2(n+1) 2(n+2) - * -- - ------ - ------ - - * x x x - * - * Let w = 2n/x and h=2/x, then the above quotient - * is equal to the continued fraction: - * 1 - * = ----------------------- - * 1 - * w - ----------------- - * 1 - * w+h - --------- - * w+2h - ... - * - * To determine how many terms needed, let - * Q(0) = w, Q(1) = w(w+h) - 1, - * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), - * When Q(k) > 1e4 good for single - * When Q(k) > 1e9 good for double - * When Q(k) > 1e17 good for quadruple - */ - /* determine k */ - let mut t: f64; - let mut q0: f64; - let mut q1: f64; - let mut w: f64; - let h: f64; - let mut z: f64; - let mut tmp: f64; - let nf: f64; - - let mut k: isize; - - nf = (nm1 as f64) + 1.0; - w = 2.0*nf/x; - h = 2.0/x; - z = w+h; - q0 = w; - q1 = w*z - 1.0; - k = 1; - while q1 < 1.0e9 { - k += 1; - z += h; - tmp = z*q1 - q0; - q0 = q1; - q1 = tmp; - } - t = 0.0; - i = k; - while i >= 0 { - t = 1.0/(2.0*((i as f64)+nf)/x - t); - i -= 1; - } - a = t; - b = 1.0; - /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) - * Hence, if n*(log(2n/x)) > ... - * single 8.8722839355e+01 - * double 7.09782712893383973096e+02 - * long double 1.1356523406294143949491931077970765006170e+04 - * then recurrent value may overflow and the result is - * likely underflow to zero - */ - tmp = nf*log(fabs(w)); - if tmp < 7.09782712893383973096e+02 { - i = nm1; - while i > 0 { - temp = b; - b = b*(2.0*(i as f64))/x - a; - a = temp; - i -= 1; - } - } else { - i = nm1; - while i > 0 { - temp = b; - b = b*(2.0*(i as f64))/x - a; - a = temp; - /* scale b to avoid spurious overflow */ - let x1p500 = f64::from_bits(0x5f30000000000000); // 0x1p500 == 2^500 - if b > x1p500 { - a /= b; - t /= b; - b = 1.0; - } - i -= 1; - } - } - z = j0(x); - w = j1(x); - if fabs(z) >= fabs(w) { - b = t*z/b; - } else { - b = t*w/a; - } - } - } - - if sign { - -b - } else { - b - } -} - - -pub fn yn(n: isize, x: f64) -> f64 -{ - let mut ix: u32; - let lx: u32; - let mut ib: u32; - let nm1: isize; - let mut sign: bool; - let mut i: isize; - let mut a: f64; - let mut b: f64; - let mut temp: f64; - - ix = get_high_word(x); - lx = get_low_word(x); - sign = (ix>>31) != 0; - ix &= 0x7fffffff; - - // -lx == !lx + 1 - if (ix | (lx|(!lx+1))>>31) > 0x7ff00000 { /* nan */ - return x; - } - if sign && (ix|lx) != 0 { /* x < 0 */ - return 0.0/0.0; - } - if ix == 0x7ff00000 { - return 0.0; - } - - if n == 0 { - return y0(x); - } - if n < 0 { - nm1 = -(n+1); - sign = (n&1) != 0; - } else { - nm1 = n-1; - sign = false; - } - if nm1 == 0 { - if sign { - return -y1(x); - } else { - return y1(x); - } - } - - if ix >= 0x52d00000 { /* x > 2**302 */ - /* (x >> n**2) - * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) - * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) - * Let s=sin(x), c=cos(x), - * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then - * - * n sin(xn)*sqt2 cos(xn)*sqt2 - * ---------------------------------- - * 0 s-c c+s - * 1 -s-c -c+s - * 2 -s+c -c-s - * 3 s+c c-s - */ - temp = match nm1&3 { - 0 => -sin(x)-cos(x), - 1 => -sin(x)+cos(x), - 2 => sin(x)+cos(x), - 3 | _ => sin(x)-cos(x), - }; - b = INVSQRTPI*temp/sqrt(x); - } else { - a = y0(x); - b = y1(x); - /* quit if b is -inf */ - ib = get_high_word(b); - i = 0; - while i < nm1 && ib != 0xfff00000 { - i += 1; - temp = b; - b = (2.0*(i as f64)/x)*b - a; - ib = get_high_word(b); - a = temp; - } - } - - if sign { - -b - } else { - b - } -} +/* origin: FreeBSD /usr/src/lib/msun/src/e_jn.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * jn(n, x), yn(n, x) + * floating point Bessel's function of the 1st and 2nd kind + * of order n + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + * Note 2. About jn(n,x), yn(n,x) + * For n=0, j0(x) is called, + * for n=1, j1(x) is called, + * for n<=x, forward recursion is used starting + * from values of j0(x) and j1(x). + * for n>x, a continued fraction approximation to + * j(n,x)/j(n-1,x) is evaluated and then backward + * recursion is used starting from a supposed value + * for j(n,x). The resulting value of j(0,x) is + * compared with the actual value to correct the + * supposed value of j(n,x). + * + * yn(n,x) is similar in all respects, except + * that forward recursion is used for all + * values of n>1. + */ + +use super::{cos, fabs, get_high_word, get_low_word, j0, j1, log, sin, sqrt, y0, y1}; + +const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ + +pub fn jn(n: i32, mut x: f64) -> f64 { + let mut ix: u32; + let lx: u32; + let nm1: i32; + let mut i: i32; + let mut sign: bool; + let mut a: f64; + let mut b: f64; + let mut temp: f64; + + ix = get_high_word(x); + lx = get_low_word(x); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + // -lx == !lx + 1 + if (ix | (lx | (!lx + 1)) >> 31) > 0x7ff00000 { + /* nan */ + return x; + } + + /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) + * Thus, J(-n,x) = J(n,-x) + */ + /* nm1 = |n|-1 is used instead of |n| to handle n==INT_MIN */ + if n == 0 { + return j0(x); + } + if n < 0 { + nm1 = -(n + 1); + x = -x; + sign = !sign; + } else { + nm1 = n - 1; + } + if nm1 == 0 { + return j1(x); + } + + sign &= (n & 1) != 0; /* even n: 0, odd n: signbit(x) */ + x = fabs(x); + if (ix | lx) == 0 || ix == 0x7ff00000 { + /* if x is 0 or inf */ + b = 0.0; + } else if (nm1 as f64) < x { + /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ + if ix >= 0x52d00000 { + /* x > 2**302 */ + /* (x >> n**2) + * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Let s=sin(x), c=cos(x), + * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + * + * n sin(xn)*sqt2 cos(xn)*sqt2 + * ---------------------------------- + * 0 s-c c+s + * 1 -s-c -c+s + * 2 -s+c -c-s + * 3 s+c c-s + */ + temp = match nm1 & 3 { + 0 => -cos(x) + sin(x), + 1 => -cos(x) - sin(x), + 2 => cos(x) - sin(x), + 3 | _ => cos(x) + sin(x), + }; + b = INVSQRTPI * temp / sqrt(x); + } else { + a = j0(x); + b = j1(x); + i = 0; + while i < nm1 { + i += 1; + temp = b; + b = b * (2.0 * (i as f64) / x) - a; /* avoid underflow */ + a = temp; + } + } + } else { + if ix < 0x3e100000 { + /* x < 2**-29 */ + /* x is tiny, return the first Taylor expansion of J(n,x) + * J(n,x) = 1/n!*(x/2)^n - ... + */ + if nm1 > 32 { + /* underflow */ + b = 0.0; + } else { + temp = x * 0.5; + b = temp; + a = 1.0; + i = 2; + while i <= nm1 + 1 { + a *= i as f64; /* a = n! */ + b *= temp; /* b = (x/2)^n */ + i += 1; + } + b = b / a; + } + } else { + /* use backward recurrence */ + /* x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quadruple + */ + /* determine k */ + let mut t: f64; + let mut q0: f64; + let mut q1: f64; + let mut w: f64; + let h: f64; + let mut z: f64; + let mut tmp: f64; + let nf: f64; + + let mut k: i32; + + nf = (nm1 as f64) + 1.0; + w = 2.0 * nf / x; + h = 2.0 / x; + z = w + h; + q0 = w; + q1 = w * z - 1.0; + k = 1; + while q1 < 1.0e9 { + k += 1; + z += h; + tmp = z * q1 - q0; + q0 = q1; + q1 = tmp; + } + t = 0.0; + i = k; + while i >= 0 { + t = 1.0 / (2.0 * ((i as f64) + nf) / x - t); + i -= 1; + } + a = t; + b = 1.0; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * Hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = nf * log(fabs(w)); + if tmp < 7.09782712893383973096e+02 { + i = nm1; + while i > 0 { + temp = b; + b = b * (2.0 * (i as f64)) / x - a; + a = temp; + i -= 1; + } + } else { + i = nm1; + while i > 0 { + temp = b; + b = b * (2.0 * (i as f64)) / x - a; + a = temp; + /* scale b to avoid spurious overflow */ + let x1p500 = f64::from_bits(0x5f30000000000000); // 0x1p500 == 2^500 + if b > x1p500 { + a /= b; + t /= b; + b = 1.0; + } + i -= 1; + } + } + z = j0(x); + w = j1(x); + if fabs(z) >= fabs(w) { + b = t * z / b; + } else { + b = t * w / a; + } + } + } + + if sign { + -b + } else { + b + } +} + +pub fn yn(n: i32, x: f64) -> f64 { + let mut ix: u32; + let lx: u32; + let mut ib: u32; + let nm1: i32; + let mut sign: bool; + let mut i: i32; + let mut a: f64; + let mut b: f64; + let mut temp: f64; + + ix = get_high_word(x); + lx = get_low_word(x); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + // -lx == !lx + 1 + if (ix | (lx | (!lx + 1)) >> 31) > 0x7ff00000 { + /* nan */ + return x; + } + if sign && (ix | lx) != 0 { + /* x < 0 */ + return 0.0 / 0.0; + } + if ix == 0x7ff00000 { + return 0.0; + } + + if n == 0 { + return y0(x); + } + if n < 0 { + nm1 = -(n + 1); + sign = (n & 1) != 0; + } else { + nm1 = n - 1; + sign = false; + } + if nm1 == 0 { + if sign { + return -y1(x); + } else { + return y1(x); + } + } + + if ix >= 0x52d00000 { + /* x > 2**302 */ + /* (x >> n**2) + * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Let s=sin(x), c=cos(x), + * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + * + * n sin(xn)*sqt2 cos(xn)*sqt2 + * ---------------------------------- + * 0 s-c c+s + * 1 -s-c -c+s + * 2 -s+c -c-s + * 3 s+c c-s + */ + temp = match nm1 & 3 { + 0 => -sin(x) - cos(x), + 1 => -sin(x) + cos(x), + 2 => sin(x) + cos(x), + 3 | _ => sin(x) - cos(x), + }; + b = INVSQRTPI * temp / sqrt(x); + } else { + a = y0(x); + b = y1(x); + /* quit if b is -inf */ + ib = get_high_word(b); + i = 0; + while i < nm1 && ib != 0xfff00000 { + i += 1; + temp = b; + b = (2.0 * (i as f64) / x) * b - a; + ib = get_high_word(b); + a = temp; + } + } + + if sign { + -b + } else { + b + } +} diff --git a/libm/src/math/jnf.rs b/libm/src/math/jnf.rs index 4cd848a03..360f62e20 100644 --- a/libm/src/math/jnf.rs +++ b/libm/src/math/jnf.rs @@ -1,255 +1,259 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_jnf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -use super::{fabsf, j0f, j1f, logf, y0f, y1f}; - -pub fn jnf(n: isize, mut x: f32) -> f32 -{ - let mut ix: u32; - let mut nm1: isize; - let mut sign: bool; - let mut i: isize; - let mut a: f32; - let mut b: f32; - let mut temp: f32; - - ix = x.to_bits(); - sign = (ix>>31) != 0; - ix &= 0x7fffffff; - if ix > 0x7f800000 { /* nan */ - return x; - } - - /* J(-n,x) = J(n,-x), use |n|-1 to avoid overflow in -n */ - if n == 0 { - return j0f(x); - } - if n < 0 { - nm1 = -(n+1); - x = -x; - sign = !sign; - } else { - nm1 = n-1; - } - if nm1 == 0 { - return j1f(x); - } - - sign &= (n&1) != 0; /* even n: 0, odd n: signbit(x) */ - x = fabsf(x); - if ix == 0 || ix == 0x7f800000 { /* if x is 0 or inf */ - b = 0.0; - } else if (nm1 as f32) < x { - /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ - a = j0f(x); - b = j1f(x); - i = 0; - while i < nm1 { - i += 1; - temp = b; - b = b*(2.0*(i as f32)/x) - a; - a = temp; - } - } else { - if ix < 0x35800000 { /* x < 2**-20 */ - /* x is tiny, return the first Taylor expansion of J(n,x) - * J(n,x) = 1/n!*(x/2)^n - ... - */ - if nm1 > 8 { /* underflow */ - nm1 = 8; - } - temp = 0.5 * x; - b = temp; - a = 1.0; - i = 2; - while i <= nm1 + 1 { - a *= i as f32; /* a = n! */ - b *= temp; /* b = (x/2)^n */ - i += 1; - } - b = b/a; - } else { - /* use backward recurrence */ - /* x x^2 x^2 - * J(n,x)/J(n-1,x) = ---- ------ ------ ..... - * 2n - 2(n+1) - 2(n+2) - * - * 1 1 1 - * (for large x) = ---- ------ ------ ..... - * 2n 2(n+1) 2(n+2) - * -- - ------ - ------ - - * x x x - * - * Let w = 2n/x and h=2/x, then the above quotient - * is equal to the continued fraction: - * 1 - * = ----------------------- - * 1 - * w - ----------------- - * 1 - * w+h - --------- - * w+2h - ... - * - * To determine how many terms needed, let - * Q(0) = w, Q(1) = w(w+h) - 1, - * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), - * When Q(k) > 1e4 good for single - * When Q(k) > 1e9 good for double - * When Q(k) > 1e17 good for quadruple - */ - /* determine k */ - let mut t: f32; - let mut q0: f32; - let mut q1: f32; - let mut w: f32; - let h: f32; - let mut z: f32; - let mut tmp: f32; - let nf: f32; - let mut k: isize; - - nf = (nm1 as f32)+1.0; - w = 2.0*(nf as f32)/x; - h = 2.0/x; - z = w+h; - q0 = w; - q1 = w*z - 1.0; - k = 1; - while q1 < 1.0e4 { - k += 1; - z += h; - tmp = z*q1 - q0; - q0 = q1; - q1 = tmp; - } - t = 0.0; - i = k; - while i >= 0 { - t = 1.0/(2.0*((i as f32)+nf)/x-t); - i -= 1; - } - a = t; - b = 1.0; - /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) - * Hence, if n*(log(2n/x)) > ... - * single 8.8722839355e+01 - * double 7.09782712893383973096e+02 - * long double 1.1356523406294143949491931077970765006170e+04 - * then recurrent value may overflow and the result is - * likely underflow to zero - */ - tmp = nf*logf(fabsf(w)); - if tmp < 88.721679688 { - i = nm1; - while i > 0 { - temp = b; - b = 2.0*(i as f32)*b/x - a; - a = temp; - i -= 1; - } - } else { - i = nm1; - while i > 0 { - temp = b; - b = 2.0*(i as f32)*b/x - a; - a = temp; - /* scale b to avoid spurious overflow */ - let x1p60 = f32::from_bits(0x5d800000); // 0x1p60 == 2^60 - if b > x1p60 { - a /= b; - t /= b; - b = 1.0; - } - i -= 1; - } - } - z = j0f(x); - w = j1f(x); - if fabsf(z) >= fabsf(w) { - b = t*z/b; - } else { - b = t*w/a; - } - } - } - - if sign { - -b - } else { - b - } -} - -pub fn ynf(n: isize, x: f32) -> f32 -{ - let mut ix: u32; - let mut ib: u32; - let nm1: isize; - let mut sign: bool; - let mut i: isize; - let mut a: f32; - let mut b: f32; - let mut temp: f32; - - ix = x.to_bits(); - sign = (ix>>31) != 0; - ix &= 0x7fffffff; - if ix > 0x7f800000 { /* nan */ - return x; - } - if sign && ix != 0 { /* x < 0 */ - return 0.0/0.0; - } - if ix == 0x7f800000 { - return 0.0; - } - - if n == 0 { - return y0f(x); - } - if n < 0 { - nm1 = -(n+1); - sign = (n&1) != 0; - } else { - nm1 = n-1; - sign = false; - } - if nm1 == 0 { - if sign { - return -y1f(x); - } else { - return y1f(x); - } - } - - a = y0f(x); - b = y1f(x); - /* quit if b is -inf */ - ib = b.to_bits(); - i = 0; - while i < nm1 && ib != 0xff800000 { - i += 1; - temp = b; - b = (2.0*(i as f32)/x)*b - a; - ib = b.to_bits(); - a = temp; - } - - if sign { - -b - } else { - b - } -} +/* origin: FreeBSD /usr/src/lib/msun/src/e_jnf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{fabsf, j0f, j1f, logf, y0f, y1f}; + +pub fn jnf(n: i32, mut x: f32) -> f32 { + let mut ix: u32; + let mut nm1: i32; + let mut sign: bool; + let mut i: i32; + let mut a: f32; + let mut b: f32; + let mut temp: f32; + + ix = x.to_bits(); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + if ix > 0x7f800000 { + /* nan */ + return x; + } + + /* J(-n,x) = J(n,-x), use |n|-1 to avoid overflow in -n */ + if n == 0 { + return j0f(x); + } + if n < 0 { + nm1 = -(n + 1); + x = -x; + sign = !sign; + } else { + nm1 = n - 1; + } + if nm1 == 0 { + return j1f(x); + } + + sign &= (n & 1) != 0; /* even n: 0, odd n: signbit(x) */ + x = fabsf(x); + if ix == 0 || ix == 0x7f800000 { + /* if x is 0 or inf */ + b = 0.0; + } else if (nm1 as f32) < x { + /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ + a = j0f(x); + b = j1f(x); + i = 0; + while i < nm1 { + i += 1; + temp = b; + b = b * (2.0 * (i as f32) / x) - a; + a = temp; + } + } else { + if ix < 0x35800000 { + /* x < 2**-20 */ + /* x is tiny, return the first Taylor expansion of J(n,x) + * J(n,x) = 1/n!*(x/2)^n - ... + */ + if nm1 > 8 { + /* underflow */ + nm1 = 8; + } + temp = 0.5 * x; + b = temp; + a = 1.0; + i = 2; + while i <= nm1 + 1 { + a *= i as f32; /* a = n! */ + b *= temp; /* b = (x/2)^n */ + i += 1; + } + b = b / a; + } else { + /* use backward recurrence */ + /* x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quadruple + */ + /* determine k */ + let mut t: f32; + let mut q0: f32; + let mut q1: f32; + let mut w: f32; + let h: f32; + let mut z: f32; + let mut tmp: f32; + let nf: f32; + let mut k: i32; + + nf = (nm1 as f32) + 1.0; + w = 2.0 * (nf as f32) / x; + h = 2.0 / x; + z = w + h; + q0 = w; + q1 = w * z - 1.0; + k = 1; + while q1 < 1.0e4 { + k += 1; + z += h; + tmp = z * q1 - q0; + q0 = q1; + q1 = tmp; + } + t = 0.0; + i = k; + while i >= 0 { + t = 1.0 / (2.0 * ((i as f32) + nf) / x - t); + i -= 1; + } + a = t; + b = 1.0; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * Hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = nf * logf(fabsf(w)); + if tmp < 88.721679688 { + i = nm1; + while i > 0 { + temp = b; + b = 2.0 * (i as f32) * b / x - a; + a = temp; + i -= 1; + } + } else { + i = nm1; + while i > 0 { + temp = b; + b = 2.0 * (i as f32) * b / x - a; + a = temp; + /* scale b to avoid spurious overflow */ + let x1p60 = f32::from_bits(0x5d800000); // 0x1p60 == 2^60 + if b > x1p60 { + a /= b; + t /= b; + b = 1.0; + } + i -= 1; + } + } + z = j0f(x); + w = j1f(x); + if fabsf(z) >= fabsf(w) { + b = t * z / b; + } else { + b = t * w / a; + } + } + } + + if sign { + -b + } else { + b + } +} + +pub fn ynf(n: i32, x: f32) -> f32 { + let mut ix: u32; + let mut ib: u32; + let nm1: i32; + let mut sign: bool; + let mut i: i32; + let mut a: f32; + let mut b: f32; + let mut temp: f32; + + ix = x.to_bits(); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + if ix > 0x7f800000 { + /* nan */ + return x; + } + if sign && ix != 0 { + /* x < 0 */ + return 0.0 / 0.0; + } + if ix == 0x7f800000 { + return 0.0; + } + + if n == 0 { + return y0f(x); + } + if n < 0 { + nm1 = -(n + 1); + sign = (n & 1) != 0; + } else { + nm1 = n - 1; + sign = false; + } + if nm1 == 0 { + if sign { + return -y1f(x); + } else { + return y1f(x); + } + } + + a = y0f(x); + b = y1f(x); + /* quit if b is -inf */ + ib = b.to_bits(); + i = 0; + while i < nm1 && ib != 0xff800000 { + i += 1; + temp = b; + b = (2.0 * (i as f32) / x) * b - a; + ib = b.to_bits(); + a = temp; + } + + if sign { + -b + } else { + b + } +} diff --git a/libm/src/math/lgamma.rs b/libm/src/math/lgamma.rs index 35b252652..b1a321e30 100644 --- a/libm/src/math/lgamma.rs +++ b/libm/src/math/lgamma.rs @@ -1,309 +1,323 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_lgamma_r.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - * - */ -/* lgamma_r(x, signgamp) - * Reentrant version of the logarithm of the Gamma function - * with user provide pointer for the sign of Gamma(x). - * - * Method: - * 1. Argument Reduction for 0 < x <= 8 - * Since gamma(1+s)=s*gamma(s), for x in [0,8], we may - * reduce x to a number in [1.5,2.5] by - * lgamma(1+s) = log(s) + lgamma(s) - * for example, - * lgamma(7.3) = log(6.3) + lgamma(6.3) - * = log(6.3*5.3) + lgamma(5.3) - * = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3) - * 2. Polynomial approximation of lgamma around its - * minimun ymin=1.461632144968362245 to maintain monotonicity. - * On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use - * Let z = x-ymin; - * lgamma(x) = -1.214862905358496078218 + z^2*poly(z) - * where - * poly(z) is a 14 degree polynomial. - * 2. Rational approximation in the primary interval [2,3] - * We use the following approximation: - * s = x-2.0; - * lgamma(x) = 0.5*s + s*P(s)/Q(s) - * with accuracy - * |P/Q - (lgamma(x)-0.5s)| < 2**-61.71 - * Our algorithms are based on the following observation - * - * zeta(2)-1 2 zeta(3)-1 3 - * lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ... - * 2 3 - * - * where Euler = 0.5771... is the Euler constant, which is very - * close to 0.5. - * - * 3. For x>=8, we have - * lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+.... - * (better formula: - * lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...) - * Let z = 1/x, then we approximation - * f(z) = lgamma(x) - (x-0.5)(log(x)-1) - * by - * 3 5 11 - * w = w0 + w1*z + w2*z + w3*z + ... + w6*z - * where - * |w - f(z)| < 2**-58.74 - * - * 4. For negative x, since (G is gamma function) - * -x*G(-x)*G(x) = PI/sin(PI*x), - * we have - * G(x) = PI/(sin(PI*x)*(-x)*G(-x)) - * since G(-x) is positive, sign(G(x)) = sign(sin(PI*x)) for x<0 - * Hence, for x<0, signgam = sign(sin(PI*x)) and - * lgamma(x) = log(|Gamma(x)|) - * = log(PI/(|x*sin(PI*x)|)) - lgamma(-x); - * Note: one should avoid compute PI*(-x) directly in the - * computation of sin(PI*(-x)). - * - * 5. Special Cases - * lgamma(2+s) ~ s*(1-Euler) for tiny s - * lgamma(1) = lgamma(2) = 0 - * lgamma(x) ~ -log(|x|) for tiny x - * lgamma(0) = lgamma(neg.integer) = inf and raise divide-by-zero - * lgamma(inf) = inf - * lgamma(-inf) = inf (bug for bug compatible with C99!?) - * - */ - -use super::{floor, k_cos, k_sin, log}; - -const PI: f64 = 3.14159265358979311600e+00; /* 0x400921FB, 0x54442D18 */ -const A0: f64 = 7.72156649015328655494e-02; /* 0x3FB3C467, 0xE37DB0C8 */ -const A1: f64 = 3.22467033424113591611e-01; /* 0x3FD4A34C, 0xC4A60FAD */ -const A2: f64 = 6.73523010531292681824e-02; /* 0x3FB13E00, 0x1A5562A7 */ -const A3: f64 = 2.05808084325167332806e-02; /* 0x3F951322, 0xAC92547B */ -const A4: f64 = 7.38555086081402883957e-03; /* 0x3F7E404F, 0xB68FEFE8 */ -const A5: f64 = 2.89051383673415629091e-03; /* 0x3F67ADD8, 0xCCB7926B */ -const A6: f64 = 1.19270763183362067845e-03; /* 0x3F538A94, 0x116F3F5D */ -const A7: f64 = 5.10069792153511336608e-04; /* 0x3F40B6C6, 0x89B99C00 */ -const A8: f64 = 2.20862790713908385557e-04; /* 0x3F2CF2EC, 0xED10E54D */ -const A9: f64 = 1.08011567247583939954e-04; /* 0x3F1C5088, 0x987DFB07 */ -const A10: f64 = 2.52144565451257326939e-05; /* 0x3EFA7074, 0x428CFA52 */ -const A11: f64 = 4.48640949618915160150e-05; /* 0x3F07858E, 0x90A45837 */ -const TC: f64 = 1.46163214496836224576e+00; /* 0x3FF762D8, 0x6356BE3F */ -const TF: f64 = -1.21486290535849611461e-01; /* 0xBFBF19B9, 0xBCC38A42 */ -/* tt = -(tail of TF) */ -const TT: f64 = -3.63867699703950536541e-18; /* 0xBC50C7CA, 0xA48A971F */ -const T0: f64 = 4.83836122723810047042e-01; /* 0x3FDEF72B, 0xC8EE38A2 */ -const T1: f64 = -1.47587722994593911752e-01; /* 0xBFC2E427, 0x8DC6C509 */ -const T2: f64 = 6.46249402391333854778e-02; /* 0x3FB08B42, 0x94D5419B */ -const T3: f64 = -3.27885410759859649565e-02; /* 0xBFA0C9A8, 0xDF35B713 */ -const T4: f64 = 1.79706750811820387126e-02; /* 0x3F9266E7, 0x970AF9EC */ -const T5: f64 = -1.03142241298341437450e-02; /* 0xBF851F9F, 0xBA91EC6A */ -const T6: f64 = 6.10053870246291332635e-03; /* 0x3F78FCE0, 0xE370E344 */ -const T7: f64 = -3.68452016781138256760e-03; /* 0xBF6E2EFF, 0xB3E914D7 */ -const T8: f64 = 2.25964780900612472250e-03; /* 0x3F6282D3, 0x2E15C915 */ -const T9: f64 = -1.40346469989232843813e-03; /* 0xBF56FE8E, 0xBF2D1AF1 */ -const T10: f64 = 8.81081882437654011382e-04; /* 0x3F4CDF0C, 0xEF61A8E9 */ -const T11: f64 = -5.38595305356740546715e-04; /* 0xBF41A610, 0x9C73E0EC */ -const T12: f64 = 3.15632070903625950361e-04; /* 0x3F34AF6D, 0x6C0EBBF7 */ -const T13: f64 = -3.12754168375120860518e-04; /* 0xBF347F24, 0xECC38C38 */ -const T14: f64 = 3.35529192635519073543e-04; /* 0x3F35FD3E, 0xE8C2D3F4 */ -const U0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ -const U1: f64 = 6.32827064025093366517e-01; /* 0x3FE4401E, 0x8B005DFF */ -const U2: f64 = 1.45492250137234768737e+00; /* 0x3FF7475C, 0xD119BD6F */ -const U3: f64 = 9.77717527963372745603e-01; /* 0x3FEF4976, 0x44EA8450 */ -const U4: f64 = 2.28963728064692451092e-01; /* 0x3FCD4EAE, 0xF6010924 */ -const U5: f64 = 1.33810918536787660377e-02; /* 0x3F8B678B, 0xBF2BAB09 */ -const V1: f64 = 2.45597793713041134822e+00; /* 0x4003A5D7, 0xC2BD619C */ -const V2: f64 = 2.12848976379893395361e+00; /* 0x40010725, 0xA42B18F5 */ -const V3: f64 = 7.69285150456672783825e-01; /* 0x3FE89DFB, 0xE45050AF */ -const V4: f64 = 1.04222645593369134254e-01; /* 0x3FBAAE55, 0xD6537C88 */ -const V5: f64 = 3.21709242282423911810e-03; /* 0x3F6A5ABB, 0x57D0CF61 */ -const S0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ -const S1: f64 = 2.14982415960608852501e-01; /* 0x3FCB848B, 0x36E20878 */ -const S2: f64 = 3.25778796408930981787e-01; /* 0x3FD4D98F, 0x4F139F59 */ -const S3: f64 = 1.46350472652464452805e-01; /* 0x3FC2BB9C, 0xBEE5F2F7 */ -const S4: f64 = 2.66422703033638609560e-02; /* 0x3F9B481C, 0x7E939961 */ -const S5: f64 = 1.84028451407337715652e-03; /* 0x3F5E26B6, 0x7368F239 */ -const S6: f64 = 3.19475326584100867617e-05; /* 0x3F00BFEC, 0xDD17E945 */ -const R1: f64 = 1.39200533467621045958e+00; /* 0x3FF645A7, 0x62C4AB74 */ -const R2: f64 = 7.21935547567138069525e-01; /* 0x3FE71A18, 0x93D3DCDC */ -const R3: f64 = 1.71933865632803078993e-01; /* 0x3FC601ED, 0xCCFBDF27 */ -const R4: f64 = 1.86459191715652901344e-02; /* 0x3F9317EA, 0x742ED475 */ -const R5: f64 = 7.77942496381893596434e-04; /* 0x3F497DDA, 0xCA41A95B */ -const R6: f64 = 7.32668430744625636189e-06; /* 0x3EDEBAF7, 0xA5B38140 */ -const W0: f64 = 4.18938533204672725052e-01; /* 0x3FDACFE3, 0x90C97D69 */ -const W1: f64 = 8.33333333333329678849e-02; /* 0x3FB55555, 0x5555553B */ -const W2: f64 = -2.77777777728775536470e-03; /* 0xBF66C16C, 0x16B02E5C */ -const W3: f64 = 7.93650558643019558500e-04; /* 0x3F4A019F, 0x98CF38B6 */ -const W4: f64 = -5.95187557450339963135e-04; /* 0xBF4380CB, 0x8C0FE741 */ -const W5: f64 = 8.36339918996282139126e-04; /* 0x3F4B67BA, 0x4CDAD5D1 */ -const W6: f64 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */ - -/* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ -fn sin_pi(mut x: f64) -> f64 -{ - let mut n: isize; - - /* spurious inexact if odd int */ - x = 2.0*(x*0.5 - floor(x*0.5)); /* x mod 2.0 */ - - n = (x*4.0) as isize; - n = (n+1)/2; - x -= (n as f64)*0.5; - x *= PI; - - match n { - 1 => k_cos(x, 0.0), - 2 => k_sin(-x, 0.0, 0), - 3 => -k_cos(x, 0.0), - 0|_ => k_sin(x, 0.0, 0), - } -} - -pub fn lgamma(x: f64) -> f64 { - lgamma_r(x).0 -} - -pub fn lgamma_r(mut x: f64) -> (f64, isize) -{ - let u: u64 = x.to_bits(); - let mut t: f64; - let y: f64; - let mut z: f64; - let nadj: f64; - let p: f64; - let p1: f64; - let p2: f64; - let p3: f64; - let q: f64; - let mut r: f64; - let w: f64; - let ix: u32; - let sign: bool; - let i: isize; - let mut signgam: isize; - - /* purge off +-inf, NaN, +-0, tiny and negative arguments */ - signgam = 1; - sign = (u>>63) != 0; - ix = ((u>>32) as u32) & 0x7fffffff; - if ix >= 0x7ff00000 { - return (x*x, signgam); - } - if ix < (0x3ff-70)<<20 { /* |x|<2**-70, return -log(|x|) */ - if sign { - x = -x; - signgam = -1; - } - return (-log(x), signgam); - } - if sign { - x = -x; - t = sin_pi(x); - if t == 0.0 { /* -integer */ - return (1.0/(x-x), signgam); - } - if t > 0.0 { - signgam = -1; - } else { - t = -t; - } - nadj = log(PI/(t*x)); - } else { - nadj = 0.0; - } - - /* purge off 1 and 2 */ - if (ix == 0x3ff00000 || ix == 0x40000000) && (u & 0xffffffff) == 0 { - r = 0.0; - } - /* for x < 2.0 */ - else if ix < 0x40000000 { - if ix <= 0x3feccccc { /* lgamma(x) = lgamma(x+1)-log(x) */ - r = -log(x); - if ix >= 0x3FE76944 { - y = 1.0 - x; - i = 0; - } else if ix >= 0x3FCDA661 { - y = x - (TC-1.0); - i = 1; - } else { - y = x; - i = 2; - } - } else { - r = 0.0; - if ix >= 0x3FFBB4C3 { /* [1.7316,2] */ - y = 2.0 - x; - i = 0; - } else if ix >= 0x3FF3B4C4 { /* [1.23,1.73] */ - y = x - TC; - i = 1; - } else { - y = x - 1.0; - i = 2; - } - } - match i { - 0 => { - z = y*y; - p1 = A0+z*(A2+z*(A4+z*(A6+z*(A8+z*A10)))); - p2 = z*(A1+z*(A3+z*(A5+z*(A7+z*(A9+z*A11))))); - p = y*p1+p2; - r += p-0.5*y; - } - 1 => { - z = y*y; - w = z*y; - p1 = T0+w*(T3+w*(T6+w*(T9 +w*T12))); /* parallel comp */ - p2 = T1+w*(T4+w*(T7+w*(T10+w*T13))); - p3 = T2+w*(T5+w*(T8+w*(T11+w*T14))); - p = z*p1-(TT-w*(p2+y*p3)); - r += TF + p; - } - 2 => { - p1 = y*(U0+y*(U1+y*(U2+y*(U3+y*(U4+y*U5))))); - p2 = 1.0+y*(V1+y*(V2+y*(V3+y*(V4+y*V5)))); - r += -0.5*y + p1/p2; - } - #[cfg(feature = "checked")] - _ => unreachable!(), - #[cfg(not(feature = "checked"))] - _ => {} - } - } else if ix < 0x40200000 { /* x < 8.0 */ - i = x as isize; - y = x - (i as f64); - p = y*(S0+y*(S1+y*(S2+y*(S3+y*(S4+y*(S5+y*S6)))))); - q = 1.0+y*(R1+y*(R2+y*(R3+y*(R4+y*(R5+y*R6))))); - r = 0.5*y+p/q; - z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ - // TODO: In C, this was implemented using switch jumps with fallthrough. - // Does this implementation have performance problems? - if i >= 7 { z *= y + 6.0; } - if i >= 6 { z *= y + 5.0; } - if i >= 5 { z *= y + 4.0; } - if i >= 4 { z *= y + 3.0; } - if i >= 3 { - z *= y + 2.0; - r += log(z); - } - } else if ix < 0x43900000 { /* 8.0 <= x < 2**58 */ - t = log(x); - z = 1.0/x; - y = z*z; - w = W0+z*(W1+y*(W2+y*(W3+y*(W4+y*(W5+y*W6))))); - r = (x-0.5)*(t-1.0)+w; - } else { /* 2**58 <= x <= inf */ - r = x*(log(x)-1.0); - } - if sign { - r = nadj - r; - } - return (r, signgam); -} +/* origin: FreeBSD /usr/src/lib/msun/src/e_lgamma_r.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + */ +/* lgamma_r(x, signgamp) + * Reentrant version of the logarithm of the Gamma function + * with user provide pointer for the sign of Gamma(x). + * + * Method: + * 1. Argument Reduction for 0 < x <= 8 + * Since gamma(1+s)=s*gamma(s), for x in [0,8], we may + * reduce x to a number in [1.5,2.5] by + * lgamma(1+s) = log(s) + lgamma(s) + * for example, + * lgamma(7.3) = log(6.3) + lgamma(6.3) + * = log(6.3*5.3) + lgamma(5.3) + * = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3) + * 2. Polynomial approximation of lgamma around its + * minimun ymin=1.461632144968362245 to maintain monotonicity. + * On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use + * Let z = x-ymin; + * lgamma(x) = -1.214862905358496078218 + z^2*poly(z) + * where + * poly(z) is a 14 degree polynomial. + * 2. Rational approximation in the primary interval [2,3] + * We use the following approximation: + * s = x-2.0; + * lgamma(x) = 0.5*s + s*P(s)/Q(s) + * with accuracy + * |P/Q - (lgamma(x)-0.5s)| < 2**-61.71 + * Our algorithms are based on the following observation + * + * zeta(2)-1 2 zeta(3)-1 3 + * lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ... + * 2 3 + * + * where Euler = 0.5771... is the Euler constant, which is very + * close to 0.5. + * + * 3. For x>=8, we have + * lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+.... + * (better formula: + * lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...) + * Let z = 1/x, then we approximation + * f(z) = lgamma(x) - (x-0.5)(log(x)-1) + * by + * 3 5 11 + * w = w0 + w1*z + w2*z + w3*z + ... + w6*z + * where + * |w - f(z)| < 2**-58.74 + * + * 4. For negative x, since (G is gamma function) + * -x*G(-x)*G(x) = PI/sin(PI*x), + * we have + * G(x) = PI/(sin(PI*x)*(-x)*G(-x)) + * since G(-x) is positive, sign(G(x)) = sign(sin(PI*x)) for x<0 + * Hence, for x<0, signgam = sign(sin(PI*x)) and + * lgamma(x) = log(|Gamma(x)|) + * = log(PI/(|x*sin(PI*x)|)) - lgamma(-x); + * Note: one should avoid compute PI*(-x) directly in the + * computation of sin(PI*(-x)). + * + * 5. Special Cases + * lgamma(2+s) ~ s*(1-Euler) for tiny s + * lgamma(1) = lgamma(2) = 0 + * lgamma(x) ~ -log(|x|) for tiny x + * lgamma(0) = lgamma(neg.integer) = inf and raise divide-by-zero + * lgamma(inf) = inf + * lgamma(-inf) = inf (bug for bug compatible with C99!?) + * + */ + +use super::{floor, k_cos, k_sin, log}; + +const PI: f64 = 3.14159265358979311600e+00; /* 0x400921FB, 0x54442D18 */ +const A0: f64 = 7.72156649015328655494e-02; /* 0x3FB3C467, 0xE37DB0C8 */ +const A1: f64 = 3.22467033424113591611e-01; /* 0x3FD4A34C, 0xC4A60FAD */ +const A2: f64 = 6.73523010531292681824e-02; /* 0x3FB13E00, 0x1A5562A7 */ +const A3: f64 = 2.05808084325167332806e-02; /* 0x3F951322, 0xAC92547B */ +const A4: f64 = 7.38555086081402883957e-03; /* 0x3F7E404F, 0xB68FEFE8 */ +const A5: f64 = 2.89051383673415629091e-03; /* 0x3F67ADD8, 0xCCB7926B */ +const A6: f64 = 1.19270763183362067845e-03; /* 0x3F538A94, 0x116F3F5D */ +const A7: f64 = 5.10069792153511336608e-04; /* 0x3F40B6C6, 0x89B99C00 */ +const A8: f64 = 2.20862790713908385557e-04; /* 0x3F2CF2EC, 0xED10E54D */ +const A9: f64 = 1.08011567247583939954e-04; /* 0x3F1C5088, 0x987DFB07 */ +const A10: f64 = 2.52144565451257326939e-05; /* 0x3EFA7074, 0x428CFA52 */ +const A11: f64 = 4.48640949618915160150e-05; /* 0x3F07858E, 0x90A45837 */ +const TC: f64 = 1.46163214496836224576e+00; /* 0x3FF762D8, 0x6356BE3F */ +const TF: f64 = -1.21486290535849611461e-01; /* 0xBFBF19B9, 0xBCC38A42 */ +/* tt = -(tail of TF) */ +const TT: f64 = -3.63867699703950536541e-18; /* 0xBC50C7CA, 0xA48A971F */ +const T0: f64 = 4.83836122723810047042e-01; /* 0x3FDEF72B, 0xC8EE38A2 */ +const T1: f64 = -1.47587722994593911752e-01; /* 0xBFC2E427, 0x8DC6C509 */ +const T2: f64 = 6.46249402391333854778e-02; /* 0x3FB08B42, 0x94D5419B */ +const T3: f64 = -3.27885410759859649565e-02; /* 0xBFA0C9A8, 0xDF35B713 */ +const T4: f64 = 1.79706750811820387126e-02; /* 0x3F9266E7, 0x970AF9EC */ +const T5: f64 = -1.03142241298341437450e-02; /* 0xBF851F9F, 0xBA91EC6A */ +const T6: f64 = 6.10053870246291332635e-03; /* 0x3F78FCE0, 0xE370E344 */ +const T7: f64 = -3.68452016781138256760e-03; /* 0xBF6E2EFF, 0xB3E914D7 */ +const T8: f64 = 2.25964780900612472250e-03; /* 0x3F6282D3, 0x2E15C915 */ +const T9: f64 = -1.40346469989232843813e-03; /* 0xBF56FE8E, 0xBF2D1AF1 */ +const T10: f64 = 8.81081882437654011382e-04; /* 0x3F4CDF0C, 0xEF61A8E9 */ +const T11: f64 = -5.38595305356740546715e-04; /* 0xBF41A610, 0x9C73E0EC */ +const T12: f64 = 3.15632070903625950361e-04; /* 0x3F34AF6D, 0x6C0EBBF7 */ +const T13: f64 = -3.12754168375120860518e-04; /* 0xBF347F24, 0xECC38C38 */ +const T14: f64 = 3.35529192635519073543e-04; /* 0x3F35FD3E, 0xE8C2D3F4 */ +const U0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ +const U1: f64 = 6.32827064025093366517e-01; /* 0x3FE4401E, 0x8B005DFF */ +const U2: f64 = 1.45492250137234768737e+00; /* 0x3FF7475C, 0xD119BD6F */ +const U3: f64 = 9.77717527963372745603e-01; /* 0x3FEF4976, 0x44EA8450 */ +const U4: f64 = 2.28963728064692451092e-01; /* 0x3FCD4EAE, 0xF6010924 */ +const U5: f64 = 1.33810918536787660377e-02; /* 0x3F8B678B, 0xBF2BAB09 */ +const V1: f64 = 2.45597793713041134822e+00; /* 0x4003A5D7, 0xC2BD619C */ +const V2: f64 = 2.12848976379893395361e+00; /* 0x40010725, 0xA42B18F5 */ +const V3: f64 = 7.69285150456672783825e-01; /* 0x3FE89DFB, 0xE45050AF */ +const V4: f64 = 1.04222645593369134254e-01; /* 0x3FBAAE55, 0xD6537C88 */ +const V5: f64 = 3.21709242282423911810e-03; /* 0x3F6A5ABB, 0x57D0CF61 */ +const S0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ +const S1: f64 = 2.14982415960608852501e-01; /* 0x3FCB848B, 0x36E20878 */ +const S2: f64 = 3.25778796408930981787e-01; /* 0x3FD4D98F, 0x4F139F59 */ +const S3: f64 = 1.46350472652464452805e-01; /* 0x3FC2BB9C, 0xBEE5F2F7 */ +const S4: f64 = 2.66422703033638609560e-02; /* 0x3F9B481C, 0x7E939961 */ +const S5: f64 = 1.84028451407337715652e-03; /* 0x3F5E26B6, 0x7368F239 */ +const S6: f64 = 3.19475326584100867617e-05; /* 0x3F00BFEC, 0xDD17E945 */ +const R1: f64 = 1.39200533467621045958e+00; /* 0x3FF645A7, 0x62C4AB74 */ +const R2: f64 = 7.21935547567138069525e-01; /* 0x3FE71A18, 0x93D3DCDC */ +const R3: f64 = 1.71933865632803078993e-01; /* 0x3FC601ED, 0xCCFBDF27 */ +const R4: f64 = 1.86459191715652901344e-02; /* 0x3F9317EA, 0x742ED475 */ +const R5: f64 = 7.77942496381893596434e-04; /* 0x3F497DDA, 0xCA41A95B */ +const R6: f64 = 7.32668430744625636189e-06; /* 0x3EDEBAF7, 0xA5B38140 */ +const W0: f64 = 4.18938533204672725052e-01; /* 0x3FDACFE3, 0x90C97D69 */ +const W1: f64 = 8.33333333333329678849e-02; /* 0x3FB55555, 0x5555553B */ +const W2: f64 = -2.77777777728775536470e-03; /* 0xBF66C16C, 0x16B02E5C */ +const W3: f64 = 7.93650558643019558500e-04; /* 0x3F4A019F, 0x98CF38B6 */ +const W4: f64 = -5.95187557450339963135e-04; /* 0xBF4380CB, 0x8C0FE741 */ +const W5: f64 = 8.36339918996282139126e-04; /* 0x3F4B67BA, 0x4CDAD5D1 */ +const W6: f64 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */ + +/* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ +fn sin_pi(mut x: f64) -> f64 { + let mut n: i32; + + /* spurious inexact if odd int */ + x = 2.0 * (x * 0.5 - floor(x * 0.5)); /* x mod 2.0 */ + + n = (x * 4.0) as i32; + n = (n + 1) / 2; + x -= (n as f64) * 0.5; + x *= PI; + + match n { + 1 => k_cos(x, 0.0), + 2 => k_sin(-x, 0.0, 0), + 3 => -k_cos(x, 0.0), + 0 | _ => k_sin(x, 0.0, 0), + } +} + +pub fn lgamma(x: f64) -> f64 { + lgamma_r(x).0 +} + +pub fn lgamma_r(mut x: f64) -> (f64, i32) { + let u: u64 = x.to_bits(); + let mut t: f64; + let y: f64; + let mut z: f64; + let nadj: f64; + let p: f64; + let p1: f64; + let p2: f64; + let p3: f64; + let q: f64; + let mut r: f64; + let w: f64; + let ix: u32; + let sign: bool; + let i: i32; + let mut signgam: i32; + + /* purge off +-inf, NaN, +-0, tiny and negative arguments */ + signgam = 1; + sign = (u >> 63) != 0; + ix = ((u >> 32) as u32) & 0x7fffffff; + if ix >= 0x7ff00000 { + return (x * x, signgam); + } + if ix < (0x3ff - 70) << 20 { + /* |x|<2**-70, return -log(|x|) */ + if sign { + x = -x; + signgam = -1; + } + return (-log(x), signgam); + } + if sign { + x = -x; + t = sin_pi(x); + if t == 0.0 { + /* -integer */ + return (1.0 / (x - x), signgam); + } + if t > 0.0 { + signgam = -1; + } else { + t = -t; + } + nadj = log(PI / (t * x)); + } else { + nadj = 0.0; + } + + /* purge off 1 and 2 */ + if (ix == 0x3ff00000 || ix == 0x40000000) && (u & 0xffffffff) == 0 { + r = 0.0; + } + /* for x < 2.0 */ + else if ix < 0x40000000 { + if ix <= 0x3feccccc { + /* lgamma(x) = lgamma(x+1)-log(x) */ + r = -log(x); + if ix >= 0x3FE76944 { + y = 1.0 - x; + i = 0; + } else if ix >= 0x3FCDA661 { + y = x - (TC - 1.0); + i = 1; + } else { + y = x; + i = 2; + } + } else { + r = 0.0; + if ix >= 0x3FFBB4C3 { + /* [1.7316,2] */ + y = 2.0 - x; + i = 0; + } else if ix >= 0x3FF3B4C4 { + /* [1.23,1.73] */ + y = x - TC; + i = 1; + } else { + y = x - 1.0; + i = 2; + } + } + match i { + 0 => { + z = y * y; + p1 = A0 + z * (A2 + z * (A4 + z * (A6 + z * (A8 + z * A10)))); + p2 = z * (A1 + z * (A3 + z * (A5 + z * (A7 + z * (A9 + z * A11))))); + p = y * p1 + p2; + r += p - 0.5 * y; + } + 1 => { + z = y * y; + w = z * y; + p1 = T0 + w * (T3 + w * (T6 + w * (T9 + w * T12))); /* parallel comp */ + p2 = T1 + w * (T4 + w * (T7 + w * (T10 + w * T13))); + p3 = T2 + w * (T5 + w * (T8 + w * (T11 + w * T14))); + p = z * p1 - (TT - w * (p2 + y * p3)); + r += TF + p; + } + 2 => { + p1 = y * (U0 + y * (U1 + y * (U2 + y * (U3 + y * (U4 + y * U5))))); + p2 = 1.0 + y * (V1 + y * (V2 + y * (V3 + y * (V4 + y * V5)))); + r += -0.5 * y + p1 / p2; + } + #[cfg(feature = "checked")] + _ => unreachable!(), + #[cfg(not(feature = "checked"))] + _ => {} + } + } else if ix < 0x40200000 { + /* x < 8.0 */ + i = x as i32; + y = x - (i as f64); + p = y * (S0 + y * (S1 + y * (S2 + y * (S3 + y * (S4 + y * (S5 + y * S6)))))); + q = 1.0 + y * (R1 + y * (R2 + y * (R3 + y * (R4 + y * (R5 + y * R6))))); + r = 0.5 * y + p / q; + z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ + // TODO: In C, this was implemented using switch jumps with fallthrough. + // Does this implementation have performance problems? + if i >= 7 { + z *= y + 6.0; + } + if i >= 6 { + z *= y + 5.0; + } + if i >= 5 { + z *= y + 4.0; + } + if i >= 4 { + z *= y + 3.0; + } + if i >= 3 { + z *= y + 2.0; + r += log(z); + } + } else if ix < 0x43900000 { + /* 8.0 <= x < 2**58 */ + t = log(x); + z = 1.0 / x; + y = z * z; + w = W0 + z * (W1 + y * (W2 + y * (W3 + y * (W4 + y * (W5 + y * W6))))); + r = (x - 0.5) * (t - 1.0) + w; + } else { + /* 2**58 <= x <= inf */ + r = x * (log(x) - 1.0); + } + if sign { + r = nadj - r; + } + return (r, signgam); +} diff --git a/libm/src/math/lgammaf.rs b/libm/src/math/lgammaf.rs index 60effa316..8fe8060b5 100644 --- a/libm/src/math/lgammaf.rs +++ b/libm/src/math/lgammaf.rs @@ -1,244 +1,258 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_lgammaf_r.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -use super::{floorf, k_cosf, k_sinf, logf}; - -const PI: f32 = 3.1415927410e+00; /* 0x40490fdb */ -const A0: f32 = 7.7215664089e-02; /* 0x3d9e233f */ -const A1: f32 = 3.2246702909e-01; /* 0x3ea51a66 */ -const A2: f32 = 6.7352302372e-02; /* 0x3d89f001 */ -const A3: f32 = 2.0580807701e-02; /* 0x3ca89915 */ -const A4: f32 = 7.3855509982e-03; /* 0x3bf2027e */ -const A5: f32 = 2.8905137442e-03; /* 0x3b3d6ec6 */ -const A6: f32 = 1.1927076848e-03; /* 0x3a9c54a1 */ -const A7: f32 = 5.1006977446e-04; /* 0x3a05b634 */ -const A8: f32 = 2.2086278477e-04; /* 0x39679767 */ -const A9: f32 = 1.0801156895e-04; /* 0x38e28445 */ -const A10: f32 = 2.5214456400e-05; /* 0x37d383a2 */ -const A11: f32 = 4.4864096708e-05; /* 0x383c2c75 */ -const TC: f32 = 1.4616321325e+00; /* 0x3fbb16c3 */ -const TF: f32 = -1.2148628384e-01; /* 0xbdf8cdcd */ -/* TT = -(tail of TF) */ -const TT: f32 = 6.6971006518e-09; /* 0x31e61c52 */ -const T0: f32 = 4.8383611441e-01; /* 0x3ef7b95e */ -const T1: f32 = -1.4758771658e-01; /* 0xbe17213c */ -const T2: f32 = 6.4624942839e-02; /* 0x3d845a15 */ -const T3: f32 = -3.2788541168e-02; /* 0xbd064d47 */ -const T4: f32 = 1.7970675603e-02; /* 0x3c93373d */ -const T5: f32 = -1.0314224288e-02; /* 0xbc28fcfe */ -const T6: f32 = 6.1005386524e-03; /* 0x3bc7e707 */ -const T7: f32 = -3.6845202558e-03; /* 0xbb7177fe */ -const T8: f32 = 2.2596477065e-03; /* 0x3b141699 */ -const T9: f32 = -1.4034647029e-03; /* 0xbab7f476 */ -const T10: f32 = 8.8108185446e-04; /* 0x3a66f867 */ -const T11: f32 = -5.3859531181e-04; /* 0xba0d3085 */ -const T12: f32 = 3.1563205994e-04; /* 0x39a57b6b */ -const T13: f32 = -3.1275415677e-04; /* 0xb9a3f927 */ -const T14: f32 = 3.3552918467e-04; /* 0x39afe9f7 */ -const U0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ -const U1: f32 = 6.3282704353e-01; /* 0x3f2200f4 */ -const U2: f32 = 1.4549225569e+00; /* 0x3fba3ae7 */ -const U3: f32 = 9.7771751881e-01; /* 0x3f7a4bb2 */ -const U4: f32 = 2.2896373272e-01; /* 0x3e6a7578 */ -const U5: f32 = 1.3381091878e-02; /* 0x3c5b3c5e */ -const V1: f32 = 2.4559779167e+00; /* 0x401d2ebe */ -const V2: f32 = 2.1284897327e+00; /* 0x4008392d */ -const V3: f32 = 7.6928514242e-01; /* 0x3f44efdf */ -const V4: f32 = 1.0422264785e-01; /* 0x3dd572af */ -const V5: f32 = 3.2170924824e-03; /* 0x3b52d5db */ -const S0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ -const S1: f32 = 2.1498242021e-01; /* 0x3e5c245a */ -const S2: f32 = 3.2577878237e-01; /* 0x3ea6cc7a */ -const S3: f32 = 1.4635047317e-01; /* 0x3e15dce6 */ -const S4: f32 = 2.6642270386e-02; /* 0x3cda40e4 */ -const S5: f32 = 1.8402845599e-03; /* 0x3af135b4 */ -const S6: f32 = 3.1947532989e-05; /* 0x3805ff67 */ -const R1: f32 = 1.3920053244e+00; /* 0x3fb22d3b */ -const R2: f32 = 7.2193557024e-01; /* 0x3f38d0c5 */ -const R3: f32 = 1.7193385959e-01; /* 0x3e300f6e */ -const R4: f32 = 1.8645919859e-02; /* 0x3c98bf54 */ -const R5: f32 = 7.7794247773e-04; /* 0x3a4beed6 */ -const R6: f32 = 7.3266842264e-06; /* 0x36f5d7bd */ -const W0: f32 = 4.1893854737e-01; /* 0x3ed67f1d */ -const W1: f32 = 8.3333335817e-02; /* 0x3daaaaab */ -const W2: f32 = -2.7777778450e-03; /* 0xbb360b61 */ -const W3: f32 = 7.9365057172e-04; /* 0x3a500cfd */ -const W4: f32 = -5.9518753551e-04; /* 0xba1c065c */ -const W5: f32 = 8.3633989561e-04; /* 0x3a5b3dd2 */ -const W6: f32 = -1.6309292987e-03; /* 0xbad5c4e8 */ - -/* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ -fn sin_pi(mut x: f32) -> f32 -{ - let mut y: f64; - let mut n: isize; - - /* spurious inexact if odd int */ - x = 2.0*(x*0.5 - floorf(x*0.5)); /* x mod 2.0 */ - - n = (x*4.0) as isize; - n = (n+1)/2; - y = (x as f64) - (n as f64)*0.5; - y *= 3.14159265358979323846; - match n { - 1 => k_cosf(y), - 2 => k_sinf(-y), - 3 => -k_cosf(y), - 0|_ => k_sinf(y), - } -} - -pub fn lgammaf(x: f32) -> f32 { - lgammaf_r(x).0 -} - -pub fn lgammaf_r(mut x: f32) -> (f32, isize) -{ - let u = x.to_bits(); - let mut t: f32; - let y: f32; - let mut z: f32; - let nadj: f32; - let p: f32; - let p1: f32; - let p2: f32; - let p3: f32; - let q: f32; - let mut r: f32; - let w: f32; - let ix: u32; - let i: isize; - let sign: bool; - let mut signgam: isize; - - /* purge off +-inf, NaN, +-0, tiny and negative arguments */ - signgam = 1; - sign = (u>>31) != 0; - ix = u & 0x7fffffff; - if ix >= 0x7f800000 { - return (x*x, signgam); - } - if ix < 0x35000000 { /* |x| < 2**-21, return -log(|x|) */ - if sign { - signgam = -1; - x = -x; - } - return (-logf(x), signgam); - } - if sign { - x = -x; - t = sin_pi(x); - if t == 0.0 { /* -integer */ - return (1.0/(x-x), signgam); - } - if t > 0.0 { - signgam = -1; - } else { - t = -t; - } - nadj = logf(PI/(t*x)); - } else { - nadj = 0.0; - } - - /* purge off 1 and 2 */ - if ix == 0x3f800000 || ix == 0x40000000 { - r = 0.0; - } - /* for x < 2.0 */ - else if ix < 0x40000000 { - if ix <= 0x3f666666 { /* lgamma(x) = lgamma(x+1)-log(x) */ - r = -logf(x); - if ix >= 0x3f3b4a20 { - y = 1.0 - x; - i = 0; - } else if ix >= 0x3e6d3308 { - y = x - (TC-1.0); - i = 1; - } else { - y = x; - i = 2; - } - } else { - r = 0.0; - if ix >= 0x3fdda618 { /* [1.7316,2] */ - y = 2.0 - x; - i = 0; - } else if ix >= 0x3F9da620 { /* [1.23,1.73] */ - y = x - TC; - i = 1; - } else { - y = x - 1.0; - i = 2; - } - } - match i { - 0 => { - z = y*y; - p1 = A0+z*(A2+z*(A4+z*(A6+z*(A8+z*A10)))); - p2 = z*(A1+z*(A3+z*(A5+z*(A7+z*(A9+z*A11))))); - p = y*p1+p2; - r += p - 0.5*y; - } - 1 => { - z = y*y; - w = z*y; - p1 = T0+w*(T3+w*(T6+w*(T9 +w*T12))); /* parallel comp */ - p2 = T1+w*(T4+w*(T7+w*(T10+w*T13))); - p3 = T2+w*(T5+w*(T8+w*(T11+w*T14))); - p = z*p1-(TT-w*(p2+y*p3)); - r += TF + p; - } - 2 => { - p1 = y*(U0+y*(U1+y*(U2+y*(U3+y*(U4+y*U5))))); - p2 = 1.0+y*(V1+y*(V2+y*(V3+y*(V4+y*V5)))); - r += -0.5*y + p1/p2; - } - #[cfg(feature = "checked")] - _ => unreachable!(), - #[cfg(not(feature = "checked"))] - _ => {} - } - } else if ix < 0x41000000 { /* x < 8.0 */ - i = x as isize; - y = x - (i as f32); - p = y*(S0+y*(S1+y*(S2+y*(S3+y*(S4+y*(S5+y*S6)))))); - q = 1.0+y*(R1+y*(R2+y*(R3+y*(R4+y*(R5+y*R6))))); - r = 0.5*y+p/q; - z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ - // TODO: In C, this was implemented using switch jumps with fallthrough. - // Does this implementation have performance problems? - if i >= 7 { z *= y + 6.0; } - if i >= 6 { z *= y + 5.0; } - if i >= 5 { z *= y + 4.0; } - if i >= 4 { z *= y + 3.0; } - if i >= 3 { - z *= y + 2.0; - r += logf(z); - } - } else if ix < 0x5c800000 { /* 8.0 <= x < 2**58 */ - t = logf(x); - z = 1.0/x; - y = z*z; - w = W0+z*(W1+y*(W2+y*(W3+y*(W4+y*(W5+y*W6))))); - r = (x-0.5)*(t-1.0)+w; - } else { /* 2**58 <= x <= inf */ - r = x*(logf(x)-1.0); - } - if sign { - r = nadj - r; - } - return (r, signgam); -} +/* origin: FreeBSD /usr/src/lib/msun/src/e_lgammaf_r.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{floorf, k_cosf, k_sinf, logf}; + +const PI: f32 = 3.1415927410e+00; /* 0x40490fdb */ +const A0: f32 = 7.7215664089e-02; /* 0x3d9e233f */ +const A1: f32 = 3.2246702909e-01; /* 0x3ea51a66 */ +const A2: f32 = 6.7352302372e-02; /* 0x3d89f001 */ +const A3: f32 = 2.0580807701e-02; /* 0x3ca89915 */ +const A4: f32 = 7.3855509982e-03; /* 0x3bf2027e */ +const A5: f32 = 2.8905137442e-03; /* 0x3b3d6ec6 */ +const A6: f32 = 1.1927076848e-03; /* 0x3a9c54a1 */ +const A7: f32 = 5.1006977446e-04; /* 0x3a05b634 */ +const A8: f32 = 2.2086278477e-04; /* 0x39679767 */ +const A9: f32 = 1.0801156895e-04; /* 0x38e28445 */ +const A10: f32 = 2.5214456400e-05; /* 0x37d383a2 */ +const A11: f32 = 4.4864096708e-05; /* 0x383c2c75 */ +const TC: f32 = 1.4616321325e+00; /* 0x3fbb16c3 */ +const TF: f32 = -1.2148628384e-01; /* 0xbdf8cdcd */ +/* TT = -(tail of TF) */ +const TT: f32 = 6.6971006518e-09; /* 0x31e61c52 */ +const T0: f32 = 4.8383611441e-01; /* 0x3ef7b95e */ +const T1: f32 = -1.4758771658e-01; /* 0xbe17213c */ +const T2: f32 = 6.4624942839e-02; /* 0x3d845a15 */ +const T3: f32 = -3.2788541168e-02; /* 0xbd064d47 */ +const T4: f32 = 1.7970675603e-02; /* 0x3c93373d */ +const T5: f32 = -1.0314224288e-02; /* 0xbc28fcfe */ +const T6: f32 = 6.1005386524e-03; /* 0x3bc7e707 */ +const T7: f32 = -3.6845202558e-03; /* 0xbb7177fe */ +const T8: f32 = 2.2596477065e-03; /* 0x3b141699 */ +const T9: f32 = -1.4034647029e-03; /* 0xbab7f476 */ +const T10: f32 = 8.8108185446e-04; /* 0x3a66f867 */ +const T11: f32 = -5.3859531181e-04; /* 0xba0d3085 */ +const T12: f32 = 3.1563205994e-04; /* 0x39a57b6b */ +const T13: f32 = -3.1275415677e-04; /* 0xb9a3f927 */ +const T14: f32 = 3.3552918467e-04; /* 0x39afe9f7 */ +const U0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ +const U1: f32 = 6.3282704353e-01; /* 0x3f2200f4 */ +const U2: f32 = 1.4549225569e+00; /* 0x3fba3ae7 */ +const U3: f32 = 9.7771751881e-01; /* 0x3f7a4bb2 */ +const U4: f32 = 2.2896373272e-01; /* 0x3e6a7578 */ +const U5: f32 = 1.3381091878e-02; /* 0x3c5b3c5e */ +const V1: f32 = 2.4559779167e+00; /* 0x401d2ebe */ +const V2: f32 = 2.1284897327e+00; /* 0x4008392d */ +const V3: f32 = 7.6928514242e-01; /* 0x3f44efdf */ +const V4: f32 = 1.0422264785e-01; /* 0x3dd572af */ +const V5: f32 = 3.2170924824e-03; /* 0x3b52d5db */ +const S0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ +const S1: f32 = 2.1498242021e-01; /* 0x3e5c245a */ +const S2: f32 = 3.2577878237e-01; /* 0x3ea6cc7a */ +const S3: f32 = 1.4635047317e-01; /* 0x3e15dce6 */ +const S4: f32 = 2.6642270386e-02; /* 0x3cda40e4 */ +const S5: f32 = 1.8402845599e-03; /* 0x3af135b4 */ +const S6: f32 = 3.1947532989e-05; /* 0x3805ff67 */ +const R1: f32 = 1.3920053244e+00; /* 0x3fb22d3b */ +const R2: f32 = 7.2193557024e-01; /* 0x3f38d0c5 */ +const R3: f32 = 1.7193385959e-01; /* 0x3e300f6e */ +const R4: f32 = 1.8645919859e-02; /* 0x3c98bf54 */ +const R5: f32 = 7.7794247773e-04; /* 0x3a4beed6 */ +const R6: f32 = 7.3266842264e-06; /* 0x36f5d7bd */ +const W0: f32 = 4.1893854737e-01; /* 0x3ed67f1d */ +const W1: f32 = 8.3333335817e-02; /* 0x3daaaaab */ +const W2: f32 = -2.7777778450e-03; /* 0xbb360b61 */ +const W3: f32 = 7.9365057172e-04; /* 0x3a500cfd */ +const W4: f32 = -5.9518753551e-04; /* 0xba1c065c */ +const W5: f32 = 8.3633989561e-04; /* 0x3a5b3dd2 */ +const W6: f32 = -1.6309292987e-03; /* 0xbad5c4e8 */ + +/* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ +fn sin_pi(mut x: f32) -> f32 { + let mut y: f64; + let mut n: isize; + + /* spurious inexact if odd int */ + x = 2.0 * (x * 0.5 - floorf(x * 0.5)); /* x mod 2.0 */ + + n = (x * 4.0) as isize; + n = (n + 1) / 2; + y = (x as f64) - (n as f64) * 0.5; + y *= 3.14159265358979323846; + match n { + 1 => k_cosf(y), + 2 => k_sinf(-y), + 3 => -k_cosf(y), + 0 | _ => k_sinf(y), + } +} + +pub fn lgammaf(x: f32) -> f32 { + lgammaf_r(x).0 +} + +pub fn lgammaf_r(mut x: f32) -> (f32, isize) { + let u = x.to_bits(); + let mut t: f32; + let y: f32; + let mut z: f32; + let nadj: f32; + let p: f32; + let p1: f32; + let p2: f32; + let p3: f32; + let q: f32; + let mut r: f32; + let w: f32; + let ix: u32; + let i: isize; + let sign: bool; + let mut signgam: isize; + + /* purge off +-inf, NaN, +-0, tiny and negative arguments */ + signgam = 1; + sign = (u >> 31) != 0; + ix = u & 0x7fffffff; + if ix >= 0x7f800000 { + return (x * x, signgam); + } + if ix < 0x35000000 { + /* |x| < 2**-21, return -log(|x|) */ + if sign { + signgam = -1; + x = -x; + } + return (-logf(x), signgam); + } + if sign { + x = -x; + t = sin_pi(x); + if t == 0.0 { + /* -integer */ + return (1.0 / (x - x), signgam); + } + if t > 0.0 { + signgam = -1; + } else { + t = -t; + } + nadj = logf(PI / (t * x)); + } else { + nadj = 0.0; + } + + /* purge off 1 and 2 */ + if ix == 0x3f800000 || ix == 0x40000000 { + r = 0.0; + } + /* for x < 2.0 */ + else if ix < 0x40000000 { + if ix <= 0x3f666666 { + /* lgamma(x) = lgamma(x+1)-log(x) */ + r = -logf(x); + if ix >= 0x3f3b4a20 { + y = 1.0 - x; + i = 0; + } else if ix >= 0x3e6d3308 { + y = x - (TC - 1.0); + i = 1; + } else { + y = x; + i = 2; + } + } else { + r = 0.0; + if ix >= 0x3fdda618 { + /* [1.7316,2] */ + y = 2.0 - x; + i = 0; + } else if ix >= 0x3F9da620 { + /* [1.23,1.73] */ + y = x - TC; + i = 1; + } else { + y = x - 1.0; + i = 2; + } + } + match i { + 0 => { + z = y * y; + p1 = A0 + z * (A2 + z * (A4 + z * (A6 + z * (A8 + z * A10)))); + p2 = z * (A1 + z * (A3 + z * (A5 + z * (A7 + z * (A9 + z * A11))))); + p = y * p1 + p2; + r += p - 0.5 * y; + } + 1 => { + z = y * y; + w = z * y; + p1 = T0 + w * (T3 + w * (T6 + w * (T9 + w * T12))); /* parallel comp */ + p2 = T1 + w * (T4 + w * (T7 + w * (T10 + w * T13))); + p3 = T2 + w * (T5 + w * (T8 + w * (T11 + w * T14))); + p = z * p1 - (TT - w * (p2 + y * p3)); + r += TF + p; + } + 2 => { + p1 = y * (U0 + y * (U1 + y * (U2 + y * (U3 + y * (U4 + y * U5))))); + p2 = 1.0 + y * (V1 + y * (V2 + y * (V3 + y * (V4 + y * V5)))); + r += -0.5 * y + p1 / p2; + } + #[cfg(feature = "checked")] + _ => unreachable!(), + #[cfg(not(feature = "checked"))] + _ => {} + } + } else if ix < 0x41000000 { + /* x < 8.0 */ + i = x as isize; + y = x - (i as f32); + p = y * (S0 + y * (S1 + y * (S2 + y * (S3 + y * (S4 + y * (S5 + y * S6)))))); + q = 1.0 + y * (R1 + y * (R2 + y * (R3 + y * (R4 + y * (R5 + y * R6))))); + r = 0.5 * y + p / q; + z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ + // TODO: In C, this was implemented using switch jumps with fallthrough. + // Does this implementation have performance problems? + if i >= 7 { + z *= y + 6.0; + } + if i >= 6 { + z *= y + 5.0; + } + if i >= 5 { + z *= y + 4.0; + } + if i >= 4 { + z *= y + 3.0; + } + if i >= 3 { + z *= y + 2.0; + r += logf(z); + } + } else if ix < 0x5c800000 { + /* 8.0 <= x < 2**58 */ + t = logf(x); + z = 1.0 / x; + y = z * z; + w = W0 + z * (W1 + y * (W2 + y * (W3 + y * (W4 + y * (W5 + y * W6))))); + r = (x - 0.5) * (t - 1.0) + w; + } else { + /* 2**58 <= x <= inf */ + r = x * (logf(x) - 1.0); + } + if sign { + r = nadj - r; + } + return (r, signgam); +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 6e53b020c..b70b0cd6b 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -118,8 +118,6 @@ mod frexp; mod frexpf; mod hypot; mod hypotf; -mod ldexp; -mod ldexpf; mod ilogb; mod ilogbf; mod j0; @@ -128,6 +126,8 @@ mod j1; mod j1f; mod jn; mod jnf; +mod ldexp; +mod ldexpf; mod lgamma; mod lgammaf; mod log; @@ -192,8 +192,8 @@ pub use self::cosh::cosh; pub use self::coshf::coshf; pub use self::erf::erf; pub use self::erf::erfc; -pub use self::erff::erff; pub use self::erff::erfcf; +pub use self::erff::erff; pub use self::exp::exp; pub use self::exp10::exp10; pub use self::exp10f::exp10f; @@ -216,8 +216,6 @@ pub use self::frexp::frexp; pub use self::frexpf::frexpf; pub use self::hypot::hypot; pub use self::hypotf::hypotf; -pub use self::ldexp::ldexp; -pub use self::ldexpf::ldexpf; pub use self::ilogb::ilogb; pub use self::ilogbf::ilogbf; pub use self::j0::j0; @@ -232,6 +230,8 @@ pub use self::jn::jn; pub use self::jn::yn; pub use self::jnf::jnf; pub use self::jnf::ynf; +pub use self::ldexp::ldexp; +pub use self::ldexpf::ldexpf; pub use self::lgamma::lgamma; pub use self::lgamma::lgamma_r; pub use self::lgammaf::lgammaf; diff --git a/libm/src/math/modf.rs b/libm/src/math/modf.rs index 1ff8ee116..a37f8b918 100644 --- a/libm/src/math/modf.rs +++ b/libm/src/math/modf.rs @@ -1,33 +1,34 @@ -pub fn modf(x: f64) -> (f64, f64) { - let rv2: f64; - let mut u = x.to_bits(); - let mask: u64; - let e = ((u>>52 & 0x7ff) as isize) - 0x3ff; - - /* no fractional part */ - if e >= 52 { - rv2 = x; - if e == 0x400 && (u<<12) != 0 { /* nan */ - return (x, rv2); - } - u &= 1<<63; - return (f64::from_bits(u), rv2); - } - - /* no integral part*/ - if e < 0 { - u &= 1<<63; - rv2 = f64::from_bits(u); - return (x, rv2); - } - - mask = ((!0)>>12)>>e; - if (u & mask) == 0 { - rv2 = x; - u &= 1<<63; - return (f64::from_bits(u), rv2); - } - u &= !mask; - rv2 = f64::from_bits(u); - return (x - rv2, rv2); -} +pub fn modf(x: f64) -> (f64, f64) { + let rv2: f64; + let mut u = x.to_bits(); + let mask: u64; + let e = ((u >> 52 & 0x7ff) as isize) - 0x3ff; + + /* no fractional part */ + if e >= 52 { + rv2 = x; + if e == 0x400 && (u << 12) != 0 { + /* nan */ + return (x, rv2); + } + u &= 1 << 63; + return (f64::from_bits(u), rv2); + } + + /* no integral part*/ + if e < 0 { + u &= 1 << 63; + rv2 = f64::from_bits(u); + return (x, rv2); + } + + mask = ((!0) >> 12) >> e; + if (u & mask) == 0 { + rv2 = x; + u &= 1 << 63; + return (f64::from_bits(u), rv2); + } + u &= !mask; + rv2 = f64::from_bits(u); + return (x - rv2, rv2); +} diff --git a/libm/src/math/modff.rs b/libm/src/math/modff.rs index 5250e8d38..4ce9052e7 100644 --- a/libm/src/math/modff.rs +++ b/libm/src/math/modff.rs @@ -1,32 +1,33 @@ -pub fn modff(x: f32) -> (f32, f32) { - let rv2: f32; - let mut u: u32 = x.to_bits(); - let mask: u32; - let e = ((u>>23 & 0xff) as isize) - 0x7f; - - /* no fractional part */ - if e >= 23 { - rv2 = x; - if e == 0x80 && (u<<9) != 0 { /* nan */ - return (x, rv2); - } - u &= 0x80000000; - return (f32::from_bits(u), rv2); - } - /* no integral part */ - if e < 0 { - u &= 0x80000000; - rv2 = f32::from_bits(u); - return (x, rv2); - } - - mask = 0x007fffff>>e; - if (u & mask) == 0 { - rv2 = x; - u &= 0x80000000; - return (f32::from_bits(u), rv2); - } - u &= !mask; - rv2 = f32::from_bits(u); - return (x - rv2, rv2); -} +pub fn modff(x: f32) -> (f32, f32) { + let rv2: f32; + let mut u: u32 = x.to_bits(); + let mask: u32; + let e = ((u >> 23 & 0xff) as isize) - 0x7f; + + /* no fractional part */ + if e >= 23 { + rv2 = x; + if e == 0x80 && (u << 9) != 0 { + /* nan */ + return (x, rv2); + } + u &= 0x80000000; + return (f32::from_bits(u), rv2); + } + /* no integral part */ + if e < 0 { + u &= 0x80000000; + rv2 = f32::from_bits(u); + return (x, rv2); + } + + mask = 0x007fffff >> e; + if (u & mask) == 0 { + rv2 = x; + u &= 0x80000000; + return (f32::from_bits(u), rv2); + } + u &= !mask; + rv2 = f32::from_bits(u); + return (x - rv2, rv2); +} diff --git a/libm/src/math/remquo.rs b/libm/src/math/remquo.rs index 98f4b3858..3681b947c 100644 --- a/libm/src/math/remquo.rs +++ b/libm/src/math/remquo.rs @@ -1,98 +1,97 @@ -pub fn remquo(mut x: f64, mut y: f64) -> (f64, isize) -{ - let ux: u64 = x.to_bits(); - let mut uy: u64 = y.to_bits(); - let mut ex = ((ux>>52) & 0x7ff) as isize; - let mut ey = ((uy>>52) & 0x7ff) as isize; - let sx = (ux>>63) != 0; - let sy = (uy>>63) != 0; - let mut q: u32; - let mut i: u64; - let mut uxi: u64 = ux; - - if (uy<<1) == 0 || y.is_nan() || ex == 0x7ff { - return ((x*y)/(x*y), 0); - } - if (ux<<1) == 0 { - return (x, 0); - } - - /* normalize x and y */ - if ex == 0 { - i = uxi << 12; - while (i>>63) == 0 { - ex -= 1; - i <<= 1; - } - uxi <<= -ex + 1; - } else { - uxi &= (!0) >> 12; - uxi |= 1 << 52; - } - if ey == 0 { - i = uy<<12; - while (i>>63) == 0 { - ey -= 1; - i <<= 1; - } - uy <<= -ey + 1; - } else { - uy &= (!0) >> 12; - uy |= 1 << 52; - } - - q = 0; - - if ex+1 != ey { - if ex < ey { - return (x, 0); - } - /* x mod y */ - while ex > ey { - i = uxi - uy; - if (i>>63) == 0 { - uxi = i; - q += 1; - } - uxi <<= 1; - q <<= 1; - ex -= 1; - } - i = uxi - uy; - if (i>>63) == 0 { - uxi = i; - q += 1; - } - if uxi == 0 { - ex = -60; - } else { - while (uxi>>52) == 0 { - uxi <<= 1; - ex -= 1; - } - } - } - - /* scale result and decide between |x| and |x|-|y| */ - if ex > 0 { - uxi -= 1 << 52; - uxi |= (ex as u64) << 52; - } else { - uxi >>= -ex + 1; - } - x = f64::from_bits(uxi); - if sy { - y = -y; - } - if ex == ey || (ex+1 == ey && (2.0*x > y || (2.0*x == y && (q%2) != 0))) { - x -= y; - q += 1; - } - q &= 0x7fffffff; - let quo = if sx ^ sy { -(q as isize) } else { q as isize }; - if sx { - (-x, quo) - } else { - (x, quo) - } -} +pub fn remquo(mut x: f64, mut y: f64) -> (f64, isize) { + let ux: u64 = x.to_bits(); + let mut uy: u64 = y.to_bits(); + let mut ex = ((ux >> 52) & 0x7ff) as isize; + let mut ey = ((uy >> 52) & 0x7ff) as isize; + let sx = (ux >> 63) != 0; + let sy = (uy >> 63) != 0; + let mut q: u32; + let mut i: u64; + let mut uxi: u64 = ux; + + if (uy << 1) == 0 || y.is_nan() || ex == 0x7ff { + return ((x * y) / (x * y), 0); + } + if (ux << 1) == 0 { + return (x, 0); + } + + /* normalize x and y */ + if ex == 0 { + i = uxi << 12; + while (i >> 63) == 0 { + ex -= 1; + i <<= 1; + } + uxi <<= -ex + 1; + } else { + uxi &= (!0) >> 12; + uxi |= 1 << 52; + } + if ey == 0 { + i = uy << 12; + while (i >> 63) == 0 { + ey -= 1; + i <<= 1; + } + uy <<= -ey + 1; + } else { + uy &= (!0) >> 12; + uy |= 1 << 52; + } + + q = 0; + + if ex + 1 != ey { + if ex < ey { + return (x, 0); + } + /* x mod y */ + while ex > ey { + i = uxi - uy; + if (i >> 63) == 0 { + uxi = i; + q += 1; + } + uxi <<= 1; + q <<= 1; + ex -= 1; + } + i = uxi - uy; + if (i >> 63) == 0 { + uxi = i; + q += 1; + } + if uxi == 0 { + ex = -60; + } else { + while (uxi >> 52) == 0 { + uxi <<= 1; + ex -= 1; + } + } + } + + /* scale result and decide between |x| and |x|-|y| */ + if ex > 0 { + uxi -= 1 << 52; + uxi |= (ex as u64) << 52; + } else { + uxi >>= -ex + 1; + } + x = f64::from_bits(uxi); + if sy { + y = -y; + } + if ex == ey || (ex + 1 == ey && (2.0 * x > y || (2.0 * x == y && (q % 2) != 0))) { + x -= y; + q += 1; + } + q &= 0x7fffffff; + let quo = if sx ^ sy { -(q as isize) } else { q as isize }; + if sx { + (-x, quo) + } else { + (x, quo) + } +} diff --git a/libm/src/math/remquof.rs b/libm/src/math/remquof.rs index 4307e1906..40ded5d69 100644 --- a/libm/src/math/remquof.rs +++ b/libm/src/math/remquof.rs @@ -1,97 +1,96 @@ -pub fn remquof(mut x: f32, mut y: f32) -> (f32, isize) -{ - let ux: u32 = x.to_bits(); - let mut uy: u32 = y.to_bits(); - let mut ex = ((ux>>23) & 0xff) as isize; - let mut ey = ((uy>>23) & 0xff) as isize; - let sx = (ux>>31) != 0; - let sy = (uy>>31) != 0; - let mut q: u32; - let mut i: u32; - let mut uxi: u32 = ux; - - if (uy<<1) == 0 || y.is_nan() || ex == 0xff { - return ((x*y)/(x*y), 0); - } - if (ux<<1) == 0 { - return (x, 0); - } - - /* normalize x and y */ - if ex == 0 { - i = uxi<<9; - while (i>>31) == 0 { - ex -= 1; - i <<= 1; - } - uxi <<= -ex + 1; - } else { - uxi &= (!0) >> 9; - uxi |= 1 << 23; - } - if ey == 0 { - i = uy<<9; - while (i>>31) == 0 { - ey -= 1; - i <<= 1; - } - uy <<= -ey + 1; - } else { - uy &= (!0) >> 9; - uy |= 1 << 23; - } - - q = 0; - if ex+1 != ey { - if ex < ey { - return (x, 0); - } - /* x mod y */ - while ex > ey { - i = uxi - uy; - if (i>>31) == 0 { - uxi = i; - q += 1; - } - uxi <<= 1; - q <<= 1; - ex -= 1; - } - i = uxi - uy; - if (i>>31) == 0 { - uxi = i; - q += 1; - } - if uxi == 0 { - ex = -30; - } else { - while (uxi>>23) == 0 { - uxi <<= 1; - ex -= 1; - } - } - } - - /* scale result and decide between |x| and |x|-|y| */ - if ex > 0 { - uxi -= 1 << 23; - uxi |= (ex as u32) << 23; - } else { - uxi >>= -ex + 1; - } - x = f32::from_bits(uxi); - if sy { - y = -y; - } - if ex == ey || (ex+1 == ey && (2.0*x > y || (2.0*x == y && (q%2) != 0))) { - x -= y; - q += 1; - } - q &= 0x7fffffff; - let quo = if sx^sy { -(q as isize) } else { q as isize }; - if sx { - (-x, quo) - } else { - (x, quo) - } -} +pub fn remquof(mut x: f32, mut y: f32) -> (f32, isize) { + let ux: u32 = x.to_bits(); + let mut uy: u32 = y.to_bits(); + let mut ex = ((ux >> 23) & 0xff) as isize; + let mut ey = ((uy >> 23) & 0xff) as isize; + let sx = (ux >> 31) != 0; + let sy = (uy >> 31) != 0; + let mut q: u32; + let mut i: u32; + let mut uxi: u32 = ux; + + if (uy << 1) == 0 || y.is_nan() || ex == 0xff { + return ((x * y) / (x * y), 0); + } + if (ux << 1) == 0 { + return (x, 0); + } + + /* normalize x and y */ + if ex == 0 { + i = uxi << 9; + while (i >> 31) == 0 { + ex -= 1; + i <<= 1; + } + uxi <<= -ex + 1; + } else { + uxi &= (!0) >> 9; + uxi |= 1 << 23; + } + if ey == 0 { + i = uy << 9; + while (i >> 31) == 0 { + ey -= 1; + i <<= 1; + } + uy <<= -ey + 1; + } else { + uy &= (!0) >> 9; + uy |= 1 << 23; + } + + q = 0; + if ex + 1 != ey { + if ex < ey { + return (x, 0); + } + /* x mod y */ + while ex > ey { + i = uxi - uy; + if (i >> 31) == 0 { + uxi = i; + q += 1; + } + uxi <<= 1; + q <<= 1; + ex -= 1; + } + i = uxi - uy; + if (i >> 31) == 0 { + uxi = i; + q += 1; + } + if uxi == 0 { + ex = -30; + } else { + while (uxi >> 23) == 0 { + uxi <<= 1; + ex -= 1; + } + } + } + + /* scale result and decide between |x| and |x|-|y| */ + if ex > 0 { + uxi -= 1 << 23; + uxi |= (ex as u32) << 23; + } else { + uxi >>= -ex + 1; + } + x = f32::from_bits(uxi); + if sy { + y = -y; + } + if ex == ey || (ex + 1 == ey && (2.0 * x > y || (2.0 * x == y && (q % 2) != 0))) { + x -= y; + q += 1; + } + q &= 0x7fffffff; + let quo = if sx ^ sy { -(q as isize) } else { q as isize }; + if sx { + (-x, quo) + } else { + (x, quo) + } +} diff --git a/libm/src/math/sincos.rs b/libm/src/math/sincos.rs index c15ee4661..750908df4 100644 --- a/libm/src/math/sincos.rs +++ b/libm/src/math/sincos.rs @@ -1,60 +1,59 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -use super::{get_high_word, k_cos, k_sin, rem_pio2}; - -pub fn sincos(x: f64) -> (f64, f64) -{ - let s: f64; - let c: f64; - let mut ix: u32; - - ix = get_high_word(x); - ix &= 0x7fffffff; - - /* |x| ~< pi/4 */ - if ix <= 0x3fe921fb { - /* if |x| < 2**-27 * sqrt(2) */ - if ix < 0x3e46a09e { - /* raise inexact if x!=0 and underflow if subnormal */ - let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120 == 2^120 - if ix < 0x00100000 { - force_eval!(x/x1p120); - } else { - force_eval!(x+x1p120); - } - return (x, 1.0); - } - return (k_sin(x, 0.0, 0), k_cos(x, 0.0)); - } - - /* sincos(Inf or NaN) is NaN */ - if ix >= 0x7ff00000 { - let rv = x - x; - return (rv, rv); - } - - /* argument reduction needed */ - let (n, y0, y1) = rem_pio2(x); - s = k_sin(y0, y1, 1); - c = k_cos(y0, y1); - match n&3 { - 0 => (s, c), - 1 => (c, -s), - 2 => (-s, -c), - 3 => (-c, s), - #[cfg(feature = "checked")] - _ => unreachable!(), - #[cfg(not(feature = "checked"))] - _ => (0.0, 1.0), - } -} +/* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{get_high_word, k_cos, k_sin, rem_pio2}; + +pub fn sincos(x: f64) -> (f64, f64) { + let s: f64; + let c: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + + /* |x| ~< pi/4 */ + if ix <= 0x3fe921fb { + /* if |x| < 2**-27 * sqrt(2) */ + if ix < 0x3e46a09e { + /* raise inexact if x!=0 and underflow if subnormal */ + let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120 == 2^120 + if ix < 0x00100000 { + force_eval!(x / x1p120); + } else { + force_eval!(x + x1p120); + } + return (x, 1.0); + } + return (k_sin(x, 0.0, 0), k_cos(x, 0.0)); + } + + /* sincos(Inf or NaN) is NaN */ + if ix >= 0x7ff00000 { + let rv = x - x; + return (rv, rv); + } + + /* argument reduction needed */ + let (n, y0, y1) = rem_pio2(x); + s = k_sin(y0, y1, 1); + c = k_cos(y0, y1); + match n & 3 { + 0 => (s, c), + 1 => (c, -s), + 2 => (-s, -c), + 3 => (-c, s), + #[cfg(feature = "checked")] + _ => unreachable!(), + #[cfg(not(feature = "checked"))] + _ => (0.0, 1.0), + } +} diff --git a/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs index 911421d63..bb9a00392 100644 --- a/libm/src/math/sincosf.rs +++ b/libm/src/math/sincosf.rs @@ -1,122 +1,123 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - * Optimized by Bruce D. Evans. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -use super::{k_cosf, k_sinf, rem_pio2f}; - -/* Small multiples of pi/2 rounded to double precision. */ -const PI_2: f32 = 0.5 * 3.1415926535897931160E+00; -const S1PIO2: f32 = 1.0*PI_2; /* 0x3FF921FB, 0x54442D18 */ -const S2PIO2: f32 = 2.0*PI_2; /* 0x400921FB, 0x54442D18 */ -const S3PIO2: f32 = 3.0*PI_2; /* 0x4012D97C, 0x7F3321D2 */ -const S4PIO2: f32 = 4.0*PI_2; /* 0x401921FB, 0x54442D18 */ - -pub fn sincosf(x: f32) -> (f32, f32) -{ - let s: f32; - let c: f32; - let mut ix: u32; - let sign: bool; - - ix = x.to_bits(); - sign = (ix >> 31) != 0; - ix &= 0x7fffffff; - - /* |x| ~<= pi/4 */ - if ix <= 0x3f490fda { - /* |x| < 2**-12 */ - if ix < 0x39800000 { - /* raise inexact if x!=0 and underflow if subnormal */ - - let x1p120 = f32::from_bits(0x7b800000); // 0x1p120 == 2^120 - if ix < 0x00100000 { - force_eval!(x/x1p120); - } else { - force_eval!(x+x1p120); - } - return (x, 1.0); - } - return (k_sinf(x as f64), k_cosf(x as f64)); - } - - /* |x| ~<= 5*pi/4 */ - if ix <= 0x407b53d1 { - if ix <= 0x4016cbe3 { /* |x| ~<= 3pi/4 */ - if sign { - s = -k_cosf((x + S1PIO2) as f64); - c = k_sinf((x + S1PIO2) as f64); - } else { - s = k_cosf((S1PIO2 - x) as f64); - c = k_sinf((S1PIO2 - x) as f64); - } - } - /* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */ - else { - if sign { - s = k_sinf((x + S2PIO2) as f64); - c = k_cosf((x + S2PIO2) as f64); - } else { - s = k_sinf((x - S2PIO2) as f64); - c = k_cosf((x - S2PIO2) as f64); - } - } - - return (s, c); - } - - /* |x| ~<= 9*pi/4 */ - if ix <= 0x40e231d5 { - if ix <= 0x40afeddf { /* |x| ~<= 7*pi/4 */ - if sign { - s = k_cosf((x + S3PIO2) as f64); - c = -k_sinf((x + S3PIO2) as f64); - } else { - s = -k_cosf((x - S3PIO2) as f64); - c = k_sinf((x - S3PIO2) as f64); - } - } else { - if sign { - s = k_cosf((x + S4PIO2) as f64); - c = k_sinf((x + S4PIO2) as f64); - } else { - s = k_cosf((x - S4PIO2) as f64); - c = k_sinf((x - S4PIO2) as f64); - } - } - - return (s, c); - } - - /* sin(Inf or NaN) is NaN */ - if ix >= 0x7f800000 { - let rv = x - x; - return (rv, rv); - } - - /* general argument reduction needed */ - let (n, y) = rem_pio2f(x); - s = k_sinf(y); - c = k_cosf(y); - match n&3 { - 0 => (s, c), - 1 => (c, -s), - 2 => (-s, -c), - 3 => (-c, s), - #[cfg(feature = "checked")] - _ => unreachable!(), - #[cfg(not(feature = "checked"))] - _ => (0.0, 1.0), - } -} +/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{k_cosf, k_sinf, rem_pio2f}; + +/* Small multiples of pi/2 rounded to double precision. */ +const PI_2: f32 = 0.5 * 3.1415926535897931160E+00; +const S1PIO2: f32 = 1.0 * PI_2; /* 0x3FF921FB, 0x54442D18 */ +const S2PIO2: f32 = 2.0 * PI_2; /* 0x400921FB, 0x54442D18 */ +const S3PIO2: f32 = 3.0 * PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const S4PIO2: f32 = 4.0 * PI_2; /* 0x401921FB, 0x54442D18 */ + +pub fn sincosf(x: f32) -> (f32, f32) { + let s: f32; + let c: f32; + let mut ix: u32; + let sign: bool; + + ix = x.to_bits(); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + /* |x| ~<= pi/4 */ + if ix <= 0x3f490fda { + /* |x| < 2**-12 */ + if ix < 0x39800000 { + /* raise inexact if x!=0 and underflow if subnormal */ + + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120 == 2^120 + if ix < 0x00100000 { + force_eval!(x / x1p120); + } else { + force_eval!(x + x1p120); + } + return (x, 1.0); + } + return (k_sinf(x as f64), k_cosf(x as f64)); + } + + /* |x| ~<= 5*pi/4 */ + if ix <= 0x407b53d1 { + if ix <= 0x4016cbe3 { + /* |x| ~<= 3pi/4 */ + if sign { + s = -k_cosf((x + S1PIO2) as f64); + c = k_sinf((x + S1PIO2) as f64); + } else { + s = k_cosf((S1PIO2 - x) as f64); + c = k_sinf((S1PIO2 - x) as f64); + } + } + /* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */ + else { + if sign { + s = k_sinf((x + S2PIO2) as f64); + c = k_cosf((x + S2PIO2) as f64); + } else { + s = k_sinf((x - S2PIO2) as f64); + c = k_cosf((x - S2PIO2) as f64); + } + } + + return (s, c); + } + + /* |x| ~<= 9*pi/4 */ + if ix <= 0x40e231d5 { + if ix <= 0x40afeddf { + /* |x| ~<= 7*pi/4 */ + if sign { + s = k_cosf((x + S3PIO2) as f64); + c = -k_sinf((x + S3PIO2) as f64); + } else { + s = -k_cosf((x - S3PIO2) as f64); + c = k_sinf((x - S3PIO2) as f64); + } + } else { + if sign { + s = k_cosf((x + S4PIO2) as f64); + c = k_sinf((x + S4PIO2) as f64); + } else { + s = k_cosf((x - S4PIO2) as f64); + c = k_sinf((x - S4PIO2) as f64); + } + } + + return (s, c); + } + + /* sin(Inf or NaN) is NaN */ + if ix >= 0x7f800000 { + let rv = x - x; + return (rv, rv); + } + + /* general argument reduction needed */ + let (n, y) = rem_pio2f(x); + s = k_sinf(y); + c = k_cosf(y); + match n & 3 { + 0 => (s, c), + 1 => (c, -s), + 2 => (-s, -c), + 3 => (-c, s), + #[cfg(feature = "checked")] + _ => unreachable!(), + #[cfg(not(feature = "checked"))] + _ => (0.0, 1.0), + } +} diff --git a/libm/src/math/tgamma.rs b/libm/src/math/tgamma.rs index 598f46f1c..f8ccf669a 100644 --- a/libm/src/math/tgamma.rs +++ b/libm/src/math/tgamma.rs @@ -1,179 +1,207 @@ -/* -"A Precision Approximation of the Gamma Function" - Cornelius Lanczos (1964) -"Lanczos Implementation of the Gamma Function" - Paul Godfrey (2001) -"An Analysis of the Lanczos Gamma Approximation" - Glendon Ralph Pugh (2004) - -approximation method: - - (x - 0.5) S(x) -Gamma(x) = (x + g - 0.5) * ---------------- - exp(x + g - 0.5) - -with - a1 a2 a3 aN -S(x) ~= [ a0 + ----- + ----- + ----- + ... + ----- ] - x + 1 x + 2 x + 3 x + N - -with a0, a1, a2, a3,.. aN constants which depend on g. - -for x < 0 the following reflection formula is used: - -Gamma(x)*Gamma(-x) = -pi/(x sin(pi x)) - -most ideas and constants are from boost and python -*/ -extern crate core; -use super::{exp, floor, k_cos, k_sin, pow}; - -const PI: f64 = 3.141592653589793238462643383279502884; - -/* sin(pi x) with x > 0x1p-100, if sin(pi*x)==0 the sign is arbitrary */ -fn sinpi(mut x: f64) -> f64 -{ - let mut n: isize; - - /* argument reduction: x = |x| mod 2 */ - /* spurious inexact when x is odd int */ - x = x * 0.5; - x = 2.0 * (x - floor(x)); - - /* reduce x into [-.25,.25] */ - n = (4.0 * x) as isize; - n = (n+1)/2; - x -= (n as f64) * 0.5; - - x *= PI; - match n { - 1 => k_cos(x, 0.0), - 2 => k_sin(-x, 0.0, 0), - 3 => -k_cos(x, 0.0), - 0|_ => k_sin(x, 0.0, 0), - } -} - -const N: usize = 12; -//static const double g = 6.024680040776729583740234375; -const GMHALF: f64 = 5.524680040776729583740234375; -const SNUM: [f64; N+1] = [ - 23531376880.410759688572007674451636754734846804940, - 42919803642.649098768957899047001988850926355848959, - 35711959237.355668049440185451547166705960488635843, - 17921034426.037209699919755754458931112671403265390, - 6039542586.3520280050642916443072979210699388420708, - 1439720407.3117216736632230727949123939715485786772, - 248874557.86205415651146038641322942321632125127801, - 31426415.585400194380614231628318205362874684987640, - 2876370.6289353724412254090516208496135991145378768, - 186056.26539522349504029498971604569928220784236328, - 8071.6720023658162106380029022722506138218516325024, - 210.82427775157934587250973392071336271166969580291, - 2.5066282746310002701649081771338373386264310793408, -]; -const SDEN: [f64; N+1] = [ - 0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, - 45995730.0, 13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0, -]; -/* n! for small integer n */ -const FACT: [f64; 23] = [ - 1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0, 3628800.0, - 39916800.0, 479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, - 20922789888000.0, 355687428096000.0, 6402373705728000.0, 121645100408832000.0, - 2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0, -]; - -/* S(x) rational function for positive x */ -fn s(x: f64) -> f64 -{ - let mut num: f64 = 0.0; - let mut den: f64 = 0.0; - - /* to avoid overflow handle large x differently */ - if x < 8.0 { - for i in (0..=N).rev() { - num = num * x + SNUM[i]; - den = den * x + SDEN[i]; - } - } else { - for i in 0..=N { - num = num / x + SNUM[i]; - den = den / x + SDEN[i]; - } - } - return num/den; -} - -pub fn tgamma(mut x: f64) -> f64 -{ - let u: u64 = x.to_bits(); - let absx: f64; - let mut y: f64; - let mut dy: f64; - let mut z: f64; - let mut r: f64; - let ix: u32 = ((u >> 32) as u32) & 0x7fffffff; - let sign: bool = (u>>64) != 0; - - /* special cases */ - if ix >= 0x7ff00000 { - /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */ - return x + core::f64::INFINITY; - } - if ix < ((0x3ff-54)<<20) { - /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */ - return 1.0/x; - } - - /* integer arguments */ - /* raise inexact when non-integer */ - if x == floor(x) { - if sign { - return 0.0/0.0; - } - if x <= FACT.len() as f64 { - return FACT[(x as usize) - 1]; - } - } - - /* x >= 172: tgamma(x)=inf with overflow */ - /* x =< -184: tgamma(x)=+-0 with underflow */ - if ix >= 0x40670000 { /* |x| >= 184 */ - if sign { - let x1p_126 = f64::from_bits(0x3810000000000000); // 0x1p-126 == 2^-126 - force_eval!((x1p_126/x) as f32); - if floor(x) * 0.5 == floor(x * 0.5) { - return 0.0; - } else { - return -0.0; - } - } - let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 == 2^1023 - x *= x1p1023; - return x; - } - - absx = if sign { -x } else { x }; - - /* handle the error of x + g - 0.5 */ - y = absx + GMHALF; - if absx > GMHALF { - dy = y - absx; - dy -= GMHALF; - } else { - dy = y - GMHALF; - dy -= absx; - } - - z = absx - 0.5; - r = s(absx) * exp(-y); - if x < 0.0 { - /* reflection formula for negative x */ - /* sinpi(absx) is not 0, integers are already handled */ - r = -PI / (sinpi(absx) * absx * r); - dy = -dy; - z = -z; - } - r += dy * (GMHALF+0.5) * r / y; - z = pow(y, 0.5*z); - y = r * z * z; - return y; -} +/* +"A Precision Approximation of the Gamma Function" - Cornelius Lanczos (1964) +"Lanczos Implementation of the Gamma Function" - Paul Godfrey (2001) +"An Analysis of the Lanczos Gamma Approximation" - Glendon Ralph Pugh (2004) + +approximation method: + + (x - 0.5) S(x) +Gamma(x) = (x + g - 0.5) * ---------------- + exp(x + g - 0.5) + +with + a1 a2 a3 aN +S(x) ~= [ a0 + ----- + ----- + ----- + ... + ----- ] + x + 1 x + 2 x + 3 x + N + +with a0, a1, a2, a3,.. aN constants which depend on g. + +for x < 0 the following reflection formula is used: + +Gamma(x)*Gamma(-x) = -pi/(x sin(pi x)) + +most ideas and constants are from boost and python +*/ +extern crate core; +use super::{exp, floor, k_cos, k_sin, pow}; + +const PI: f64 = 3.141592653589793238462643383279502884; + +/* sin(pi x) with x > 0x1p-100, if sin(pi*x)==0 the sign is arbitrary */ +fn sinpi(mut x: f64) -> f64 { + let mut n: isize; + + /* argument reduction: x = |x| mod 2 */ + /* spurious inexact when x is odd int */ + x = x * 0.5; + x = 2.0 * (x - floor(x)); + + /* reduce x into [-.25,.25] */ + n = (4.0 * x) as isize; + n = (n + 1) / 2; + x -= (n as f64) * 0.5; + + x *= PI; + match n { + 1 => k_cos(x, 0.0), + 2 => k_sin(-x, 0.0, 0), + 3 => -k_cos(x, 0.0), + 0 | _ => k_sin(x, 0.0, 0), + } +} + +const N: usize = 12; +//static const double g = 6.024680040776729583740234375; +const GMHALF: f64 = 5.524680040776729583740234375; +const SNUM: [f64; N + 1] = [ + 23531376880.410759688572007674451636754734846804940, + 42919803642.649098768957899047001988850926355848959, + 35711959237.355668049440185451547166705960488635843, + 17921034426.037209699919755754458931112671403265390, + 6039542586.3520280050642916443072979210699388420708, + 1439720407.3117216736632230727949123939715485786772, + 248874557.86205415651146038641322942321632125127801, + 31426415.585400194380614231628318205362874684987640, + 2876370.6289353724412254090516208496135991145378768, + 186056.26539522349504029498971604569928220784236328, + 8071.6720023658162106380029022722506138218516325024, + 210.82427775157934587250973392071336271166969580291, + 2.5066282746310002701649081771338373386264310793408, +]; +const SDEN: [f64; N + 1] = [ + 0.0, + 39916800.0, + 120543840.0, + 150917976.0, + 105258076.0, + 45995730.0, + 13339535.0, + 2637558.0, + 357423.0, + 32670.0, + 1925.0, + 66.0, + 1.0, +]; +/* n! for small integer n */ +const FACT: [f64; 23] = [ + 1.0, + 1.0, + 2.0, + 6.0, + 24.0, + 120.0, + 720.0, + 5040.0, + 40320.0, + 362880.0, + 3628800.0, + 39916800.0, + 479001600.0, + 6227020800.0, + 87178291200.0, + 1307674368000.0, + 20922789888000.0, + 355687428096000.0, + 6402373705728000.0, + 121645100408832000.0, + 2432902008176640000.0, + 51090942171709440000.0, + 1124000727777607680000.0, +]; + +/* S(x) rational function for positive x */ +fn s(x: f64) -> f64 { + let mut num: f64 = 0.0; + let mut den: f64 = 0.0; + + /* to avoid overflow handle large x differently */ + if x < 8.0 { + for i in (0..=N).rev() { + num = num * x + SNUM[i]; + den = den * x + SDEN[i]; + } + } else { + for i in 0..=N { + num = num / x + SNUM[i]; + den = den / x + SDEN[i]; + } + } + return num / den; +} + +pub fn tgamma(mut x: f64) -> f64 { + let u: u64 = x.to_bits(); + let absx: f64; + let mut y: f64; + let mut dy: f64; + let mut z: f64; + let mut r: f64; + let ix: u32 = ((u >> 32) as u32) & 0x7fffffff; + let sign: bool = (u >> 63) != 0; + + /* special cases */ + if ix >= 0x7ff00000 { + /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */ + return x + core::f64::INFINITY; + } + if ix < ((0x3ff - 54) << 20) { + /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */ + return 1.0 / x; + } + + /* integer arguments */ + /* raise inexact when non-integer */ + if x == floor(x) { + if sign { + return 0.0 / 0.0; + } + if x <= FACT.len() as f64 { + return FACT[(x as usize) - 1]; + } + } + + /* x >= 172: tgamma(x)=inf with overflow */ + /* x =< -184: tgamma(x)=+-0 with underflow */ + if ix >= 0x40670000 { + /* |x| >= 184 */ + if sign { + let x1p_126 = f64::from_bits(0x3810000000000000); // 0x1p-126 == 2^-126 + force_eval!((x1p_126 / x) as f32); + if floor(x) * 0.5 == floor(x * 0.5) { + return 0.0; + } else { + return -0.0; + } + } + let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 == 2^1023 + x *= x1p1023; + return x; + } + + absx = if sign { -x } else { x }; + + /* handle the error of x + g - 0.5 */ + y = absx + GMHALF; + if absx > GMHALF { + dy = y - absx; + dy -= GMHALF; + } else { + dy = y - GMHALF; + dy -= absx; + } + + z = absx - 0.5; + r = s(absx) * exp(-y); + if x < 0.0 { + /* reflection formula for negative x */ + /* sinpi(absx) is not 0, integers are already handled */ + r = -PI / (sinpi(absx) * absx * r); + dy = -dy; + z = -z; + } + r += dy * (GMHALF + 0.5) * r / y; + z = pow(y, 0.5 * z); + y = r * z * z; + return y; +} diff --git a/libm/src/math/tgammaf.rs b/libm/src/math/tgammaf.rs index b9c799ce7..a8f161f0c 100644 --- a/libm/src/math/tgammaf.rs +++ b/libm/src/math/tgammaf.rs @@ -1,5 +1,5 @@ -use super::{tgamma}; - -pub fn tgammaf(x: f32) -> f32 { - tgamma(x as f64) as f32 -} +use super::tgamma; + +pub fn tgammaf(x: f32) -> f32 { + tgamma(x as f64) as f32 +} From db82d4cb604e75967814acc0398012be0f8cdaaa Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Tue, 7 May 2019 09:07:26 +0300 Subject: [PATCH 0227/1459] sh +x --- libm/ci/run-docker.sh | 0 libm/ci/run.sh | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 libm/ci/run-docker.sh mode change 100644 => 100755 libm/ci/run.sh diff --git a/libm/ci/run-docker.sh b/libm/ci/run-docker.sh old mode 100644 new mode 100755 diff --git a/libm/ci/run.sh b/libm/ci/run.sh old mode 100644 new mode 100755 From ad4156ef1fb1fd4dd7b9f5455bd3e15464e8fc8b Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Thu, 9 May 2019 06:58:58 +0300 Subject: [PATCH 0228/1459] rm asine, mv lgamma --- libm/build.rs | 9 +- libm/src/math/asinef.rs | 93 ----------- libm/src/math/lgamma.rs | 320 +------------------------------------ libm/src/math/lgamma_r.rs | 319 ++++++++++++++++++++++++++++++++++++ libm/src/math/lgammaf.rs | 255 +---------------------------- libm/src/math/lgammaf_r.rs | 254 +++++++++++++++++++++++++++++ libm/src/math/mod.rs | 6 +- libm/src/math/modf.rs | 2 +- libm/src/math/modff.rs | 2 +- libm/src/math/remquo.rs | 8 +- libm/src/math/remquof.rs | 8 +- 11 files changed, 593 insertions(+), 683 deletions(-) delete mode 100644 libm/src/math/asinef.rs create mode 100644 libm/src/math/lgamma_r.rs create mode 100644 libm/src/math/lgammaf_r.rs diff --git a/libm/build.rs b/libm/build.rs index 29521ab19..4d739a121 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -41,17 +41,16 @@ mod musl_reference_tests { "rem_pio2.rs", "rem_pio2_large.rs", "rem_pio2f.rs", - "remquo.rs", - "remquof.rs", - "lgamma.rs", // lgamma passed, lgamma_r has more than 1 result - "lgammaf.rs", // lgammaf passed, lgammaf_r has more than 1 result + "remquo.rs", // more than 1 result + "remquof.rs", // more than 1 result + "lgamma_r.rs", // more than 1 result + "lgammaf_r.rs", // more than 1 result "frexp.rs", // more than 1 result "frexpf.rs", // more than 1 result "sincos.rs", // more than 1 result "sincosf.rs", // more than 1 result "modf.rs", // more than 1 result "modff.rs", // more than 1 result - "asinef.rs", // not exists "jn.rs", // passed, but very slow "jnf.rs", // passed, but very slow ]; diff --git a/libm/src/math/asinef.rs b/libm/src/math/asinef.rs deleted file mode 100644 index cd1428bc2..000000000 --- a/libm/src/math/asinef.rs +++ /dev/null @@ -1,93 +0,0 @@ -/* @(#)z_asinef.c 1.0 98/08/13 */ -/****************************************************************** - * The following routines are coded directly from the algorithms - * and coefficients given in "Software Manual for the Elementary - * Functions" by William J. Cody, Jr. and William Waite, Prentice - * Hall, 1980. - ******************************************************************/ -/****************************************************************** - * Arcsine - * - * Input: - * x - floating point value - * acosine - indicates acos calculation - * - * Output: - * Arcsine of x. - * - * Description: - * This routine calculates arcsine / arccosine. - * - *****************************************************************/ - -use super::{fabsf, sqrtf}; - -const P: [f32; 2] = [ 0.933935835, -0.504400557 ]; -const Q: [f32; 2] = [ 0.560363004e+1, -0.554846723e+1 ]; -const A: [f32; 2] = [ 0.0, 0.785398163 ]; -const B: [f32; 2] = [ 1.570796326, 0.785398163 ]; -const Z_ROOTEPS_F: f32 = 1.7263349182589107e-4; - -pub fn asinef(x: f32, acosine: bool) -> f32 { - let i: usize; - let mut branch: bool = false; - let g: f32; - let mut res: f32 = 0.0; - let mut y: f32; - - /* Check for special values. */ - //i = numtestf (x); - if x.is_nan() || x.is_infinite() { - force_eval!(x); - return x; - } - - y = fabsf(x); - let flag = acosine; - - if y > 0.5 { - i = (!flag) as usize; - - /* Check for range error. */ - if y > 1.0 { - return 0.0 / 0.0; - } - - g = (1.0 - y) / 2.0; - y = -2.0 * sqrtf(g); - branch = true; - } else { - i = flag; - if y < Z_ROOTEPS_F { - res = y; - g = 0.0; // pleasing the uninitialized variable - } else { - g = y * y; - } - } - - if y >= Z_ROOTEPS_F || branch { - /* Calculate the Taylor series. */ - let p = (P[1] * g + P[0]) * g; - let q = (g + Q[1]) * g + Q[0]; - let r = p / q; - - res = y + y * r; - } - - /* Calculate asine or acose. */ - if flag == 0 { - res = (A[i] + res) + A[i]; - if x < 0.0 { - res = -res; - } - } else { - if x < 0.0 { - res = (B[i] + res) + B[i]; - } else { - res = (A[i] - res) + A[i]; - } - } - - return res; -} diff --git a/libm/src/math/lgamma.rs b/libm/src/math/lgamma.rs index b1a321e30..5bc87e85e 100644 --- a/libm/src/math/lgamma.rs +++ b/libm/src/math/lgamma.rs @@ -1,323 +1,5 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_lgamma_r.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - * - */ -/* lgamma_r(x, signgamp) - * Reentrant version of the logarithm of the Gamma function - * with user provide pointer for the sign of Gamma(x). - * - * Method: - * 1. Argument Reduction for 0 < x <= 8 - * Since gamma(1+s)=s*gamma(s), for x in [0,8], we may - * reduce x to a number in [1.5,2.5] by - * lgamma(1+s) = log(s) + lgamma(s) - * for example, - * lgamma(7.3) = log(6.3) + lgamma(6.3) - * = log(6.3*5.3) + lgamma(5.3) - * = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3) - * 2. Polynomial approximation of lgamma around its - * minimun ymin=1.461632144968362245 to maintain monotonicity. - * On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use - * Let z = x-ymin; - * lgamma(x) = -1.214862905358496078218 + z^2*poly(z) - * where - * poly(z) is a 14 degree polynomial. - * 2. Rational approximation in the primary interval [2,3] - * We use the following approximation: - * s = x-2.0; - * lgamma(x) = 0.5*s + s*P(s)/Q(s) - * with accuracy - * |P/Q - (lgamma(x)-0.5s)| < 2**-61.71 - * Our algorithms are based on the following observation - * - * zeta(2)-1 2 zeta(3)-1 3 - * lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ... - * 2 3 - * - * where Euler = 0.5771... is the Euler constant, which is very - * close to 0.5. - * - * 3. For x>=8, we have - * lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+.... - * (better formula: - * lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...) - * Let z = 1/x, then we approximation - * f(z) = lgamma(x) - (x-0.5)(log(x)-1) - * by - * 3 5 11 - * w = w0 + w1*z + w2*z + w3*z + ... + w6*z - * where - * |w - f(z)| < 2**-58.74 - * - * 4. For negative x, since (G is gamma function) - * -x*G(-x)*G(x) = PI/sin(PI*x), - * we have - * G(x) = PI/(sin(PI*x)*(-x)*G(-x)) - * since G(-x) is positive, sign(G(x)) = sign(sin(PI*x)) for x<0 - * Hence, for x<0, signgam = sign(sin(PI*x)) and - * lgamma(x) = log(|Gamma(x)|) - * = log(PI/(|x*sin(PI*x)|)) - lgamma(-x); - * Note: one should avoid compute PI*(-x) directly in the - * computation of sin(PI*(-x)). - * - * 5. Special Cases - * lgamma(2+s) ~ s*(1-Euler) for tiny s - * lgamma(1) = lgamma(2) = 0 - * lgamma(x) ~ -log(|x|) for tiny x - * lgamma(0) = lgamma(neg.integer) = inf and raise divide-by-zero - * lgamma(inf) = inf - * lgamma(-inf) = inf (bug for bug compatible with C99!?) - * - */ - -use super::{floor, k_cos, k_sin, log}; - -const PI: f64 = 3.14159265358979311600e+00; /* 0x400921FB, 0x54442D18 */ -const A0: f64 = 7.72156649015328655494e-02; /* 0x3FB3C467, 0xE37DB0C8 */ -const A1: f64 = 3.22467033424113591611e-01; /* 0x3FD4A34C, 0xC4A60FAD */ -const A2: f64 = 6.73523010531292681824e-02; /* 0x3FB13E00, 0x1A5562A7 */ -const A3: f64 = 2.05808084325167332806e-02; /* 0x3F951322, 0xAC92547B */ -const A4: f64 = 7.38555086081402883957e-03; /* 0x3F7E404F, 0xB68FEFE8 */ -const A5: f64 = 2.89051383673415629091e-03; /* 0x3F67ADD8, 0xCCB7926B */ -const A6: f64 = 1.19270763183362067845e-03; /* 0x3F538A94, 0x116F3F5D */ -const A7: f64 = 5.10069792153511336608e-04; /* 0x3F40B6C6, 0x89B99C00 */ -const A8: f64 = 2.20862790713908385557e-04; /* 0x3F2CF2EC, 0xED10E54D */ -const A9: f64 = 1.08011567247583939954e-04; /* 0x3F1C5088, 0x987DFB07 */ -const A10: f64 = 2.52144565451257326939e-05; /* 0x3EFA7074, 0x428CFA52 */ -const A11: f64 = 4.48640949618915160150e-05; /* 0x3F07858E, 0x90A45837 */ -const TC: f64 = 1.46163214496836224576e+00; /* 0x3FF762D8, 0x6356BE3F */ -const TF: f64 = -1.21486290535849611461e-01; /* 0xBFBF19B9, 0xBCC38A42 */ -/* tt = -(tail of TF) */ -const TT: f64 = -3.63867699703950536541e-18; /* 0xBC50C7CA, 0xA48A971F */ -const T0: f64 = 4.83836122723810047042e-01; /* 0x3FDEF72B, 0xC8EE38A2 */ -const T1: f64 = -1.47587722994593911752e-01; /* 0xBFC2E427, 0x8DC6C509 */ -const T2: f64 = 6.46249402391333854778e-02; /* 0x3FB08B42, 0x94D5419B */ -const T3: f64 = -3.27885410759859649565e-02; /* 0xBFA0C9A8, 0xDF35B713 */ -const T4: f64 = 1.79706750811820387126e-02; /* 0x3F9266E7, 0x970AF9EC */ -const T5: f64 = -1.03142241298341437450e-02; /* 0xBF851F9F, 0xBA91EC6A */ -const T6: f64 = 6.10053870246291332635e-03; /* 0x3F78FCE0, 0xE370E344 */ -const T7: f64 = -3.68452016781138256760e-03; /* 0xBF6E2EFF, 0xB3E914D7 */ -const T8: f64 = 2.25964780900612472250e-03; /* 0x3F6282D3, 0x2E15C915 */ -const T9: f64 = -1.40346469989232843813e-03; /* 0xBF56FE8E, 0xBF2D1AF1 */ -const T10: f64 = 8.81081882437654011382e-04; /* 0x3F4CDF0C, 0xEF61A8E9 */ -const T11: f64 = -5.38595305356740546715e-04; /* 0xBF41A610, 0x9C73E0EC */ -const T12: f64 = 3.15632070903625950361e-04; /* 0x3F34AF6D, 0x6C0EBBF7 */ -const T13: f64 = -3.12754168375120860518e-04; /* 0xBF347F24, 0xECC38C38 */ -const T14: f64 = 3.35529192635519073543e-04; /* 0x3F35FD3E, 0xE8C2D3F4 */ -const U0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ -const U1: f64 = 6.32827064025093366517e-01; /* 0x3FE4401E, 0x8B005DFF */ -const U2: f64 = 1.45492250137234768737e+00; /* 0x3FF7475C, 0xD119BD6F */ -const U3: f64 = 9.77717527963372745603e-01; /* 0x3FEF4976, 0x44EA8450 */ -const U4: f64 = 2.28963728064692451092e-01; /* 0x3FCD4EAE, 0xF6010924 */ -const U5: f64 = 1.33810918536787660377e-02; /* 0x3F8B678B, 0xBF2BAB09 */ -const V1: f64 = 2.45597793713041134822e+00; /* 0x4003A5D7, 0xC2BD619C */ -const V2: f64 = 2.12848976379893395361e+00; /* 0x40010725, 0xA42B18F5 */ -const V3: f64 = 7.69285150456672783825e-01; /* 0x3FE89DFB, 0xE45050AF */ -const V4: f64 = 1.04222645593369134254e-01; /* 0x3FBAAE55, 0xD6537C88 */ -const V5: f64 = 3.21709242282423911810e-03; /* 0x3F6A5ABB, 0x57D0CF61 */ -const S0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ -const S1: f64 = 2.14982415960608852501e-01; /* 0x3FCB848B, 0x36E20878 */ -const S2: f64 = 3.25778796408930981787e-01; /* 0x3FD4D98F, 0x4F139F59 */ -const S3: f64 = 1.46350472652464452805e-01; /* 0x3FC2BB9C, 0xBEE5F2F7 */ -const S4: f64 = 2.66422703033638609560e-02; /* 0x3F9B481C, 0x7E939961 */ -const S5: f64 = 1.84028451407337715652e-03; /* 0x3F5E26B6, 0x7368F239 */ -const S6: f64 = 3.19475326584100867617e-05; /* 0x3F00BFEC, 0xDD17E945 */ -const R1: f64 = 1.39200533467621045958e+00; /* 0x3FF645A7, 0x62C4AB74 */ -const R2: f64 = 7.21935547567138069525e-01; /* 0x3FE71A18, 0x93D3DCDC */ -const R3: f64 = 1.71933865632803078993e-01; /* 0x3FC601ED, 0xCCFBDF27 */ -const R4: f64 = 1.86459191715652901344e-02; /* 0x3F9317EA, 0x742ED475 */ -const R5: f64 = 7.77942496381893596434e-04; /* 0x3F497DDA, 0xCA41A95B */ -const R6: f64 = 7.32668430744625636189e-06; /* 0x3EDEBAF7, 0xA5B38140 */ -const W0: f64 = 4.18938533204672725052e-01; /* 0x3FDACFE3, 0x90C97D69 */ -const W1: f64 = 8.33333333333329678849e-02; /* 0x3FB55555, 0x5555553B */ -const W2: f64 = -2.77777777728775536470e-03; /* 0xBF66C16C, 0x16B02E5C */ -const W3: f64 = 7.93650558643019558500e-04; /* 0x3F4A019F, 0x98CF38B6 */ -const W4: f64 = -5.95187557450339963135e-04; /* 0xBF4380CB, 0x8C0FE741 */ -const W5: f64 = 8.36339918996282139126e-04; /* 0x3F4B67BA, 0x4CDAD5D1 */ -const W6: f64 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */ - -/* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ -fn sin_pi(mut x: f64) -> f64 { - let mut n: i32; - - /* spurious inexact if odd int */ - x = 2.0 * (x * 0.5 - floor(x * 0.5)); /* x mod 2.0 */ - - n = (x * 4.0) as i32; - n = (n + 1) / 2; - x -= (n as f64) * 0.5; - x *= PI; - - match n { - 1 => k_cos(x, 0.0), - 2 => k_sin(-x, 0.0, 0), - 3 => -k_cos(x, 0.0), - 0 | _ => k_sin(x, 0.0, 0), - } -} +use super::lgamma_r; pub fn lgamma(x: f64) -> f64 { lgamma_r(x).0 } - -pub fn lgamma_r(mut x: f64) -> (f64, i32) { - let u: u64 = x.to_bits(); - let mut t: f64; - let y: f64; - let mut z: f64; - let nadj: f64; - let p: f64; - let p1: f64; - let p2: f64; - let p3: f64; - let q: f64; - let mut r: f64; - let w: f64; - let ix: u32; - let sign: bool; - let i: i32; - let mut signgam: i32; - - /* purge off +-inf, NaN, +-0, tiny and negative arguments */ - signgam = 1; - sign = (u >> 63) != 0; - ix = ((u >> 32) as u32) & 0x7fffffff; - if ix >= 0x7ff00000 { - return (x * x, signgam); - } - if ix < (0x3ff - 70) << 20 { - /* |x|<2**-70, return -log(|x|) */ - if sign { - x = -x; - signgam = -1; - } - return (-log(x), signgam); - } - if sign { - x = -x; - t = sin_pi(x); - if t == 0.0 { - /* -integer */ - return (1.0 / (x - x), signgam); - } - if t > 0.0 { - signgam = -1; - } else { - t = -t; - } - nadj = log(PI / (t * x)); - } else { - nadj = 0.0; - } - - /* purge off 1 and 2 */ - if (ix == 0x3ff00000 || ix == 0x40000000) && (u & 0xffffffff) == 0 { - r = 0.0; - } - /* for x < 2.0 */ - else if ix < 0x40000000 { - if ix <= 0x3feccccc { - /* lgamma(x) = lgamma(x+1)-log(x) */ - r = -log(x); - if ix >= 0x3FE76944 { - y = 1.0 - x; - i = 0; - } else if ix >= 0x3FCDA661 { - y = x - (TC - 1.0); - i = 1; - } else { - y = x; - i = 2; - } - } else { - r = 0.0; - if ix >= 0x3FFBB4C3 { - /* [1.7316,2] */ - y = 2.0 - x; - i = 0; - } else if ix >= 0x3FF3B4C4 { - /* [1.23,1.73] */ - y = x - TC; - i = 1; - } else { - y = x - 1.0; - i = 2; - } - } - match i { - 0 => { - z = y * y; - p1 = A0 + z * (A2 + z * (A4 + z * (A6 + z * (A8 + z * A10)))); - p2 = z * (A1 + z * (A3 + z * (A5 + z * (A7 + z * (A9 + z * A11))))); - p = y * p1 + p2; - r += p - 0.5 * y; - } - 1 => { - z = y * y; - w = z * y; - p1 = T0 + w * (T3 + w * (T6 + w * (T9 + w * T12))); /* parallel comp */ - p2 = T1 + w * (T4 + w * (T7 + w * (T10 + w * T13))); - p3 = T2 + w * (T5 + w * (T8 + w * (T11 + w * T14))); - p = z * p1 - (TT - w * (p2 + y * p3)); - r += TF + p; - } - 2 => { - p1 = y * (U0 + y * (U1 + y * (U2 + y * (U3 + y * (U4 + y * U5))))); - p2 = 1.0 + y * (V1 + y * (V2 + y * (V3 + y * (V4 + y * V5)))); - r += -0.5 * y + p1 / p2; - } - #[cfg(feature = "checked")] - _ => unreachable!(), - #[cfg(not(feature = "checked"))] - _ => {} - } - } else if ix < 0x40200000 { - /* x < 8.0 */ - i = x as i32; - y = x - (i as f64); - p = y * (S0 + y * (S1 + y * (S2 + y * (S3 + y * (S4 + y * (S5 + y * S6)))))); - q = 1.0 + y * (R1 + y * (R2 + y * (R3 + y * (R4 + y * (R5 + y * R6))))); - r = 0.5 * y + p / q; - z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ - // TODO: In C, this was implemented using switch jumps with fallthrough. - // Does this implementation have performance problems? - if i >= 7 { - z *= y + 6.0; - } - if i >= 6 { - z *= y + 5.0; - } - if i >= 5 { - z *= y + 4.0; - } - if i >= 4 { - z *= y + 3.0; - } - if i >= 3 { - z *= y + 2.0; - r += log(z); - } - } else if ix < 0x43900000 { - /* 8.0 <= x < 2**58 */ - t = log(x); - z = 1.0 / x; - y = z * z; - w = W0 + z * (W1 + y * (W2 + y * (W3 + y * (W4 + y * (W5 + y * W6))))); - r = (x - 0.5) * (t - 1.0) + w; - } else { - /* 2**58 <= x <= inf */ - r = x * (log(x) - 1.0); - } - if sign { - r = nadj - r; - } - return (r, signgam); -} diff --git a/libm/src/math/lgamma_r.rs b/libm/src/math/lgamma_r.rs new file mode 100644 index 000000000..382a501fc --- /dev/null +++ b/libm/src/math/lgamma_r.rs @@ -0,0 +1,319 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_lgamma_r.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + */ +/* lgamma_r(x, signgamp) + * Reentrant version of the logarithm of the Gamma function + * with user provide pointer for the sign of Gamma(x). + * + * Method: + * 1. Argument Reduction for 0 < x <= 8 + * Since gamma(1+s)=s*gamma(s), for x in [0,8], we may + * reduce x to a number in [1.5,2.5] by + * lgamma(1+s) = log(s) + lgamma(s) + * for example, + * lgamma(7.3) = log(6.3) + lgamma(6.3) + * = log(6.3*5.3) + lgamma(5.3) + * = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3) + * 2. Polynomial approximation of lgamma around its + * minimun ymin=1.461632144968362245 to maintain monotonicity. + * On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use + * Let z = x-ymin; + * lgamma(x) = -1.214862905358496078218 + z^2*poly(z) + * where + * poly(z) is a 14 degree polynomial. + * 2. Rational approximation in the primary interval [2,3] + * We use the following approximation: + * s = x-2.0; + * lgamma(x) = 0.5*s + s*P(s)/Q(s) + * with accuracy + * |P/Q - (lgamma(x)-0.5s)| < 2**-61.71 + * Our algorithms are based on the following observation + * + * zeta(2)-1 2 zeta(3)-1 3 + * lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ... + * 2 3 + * + * where Euler = 0.5771... is the Euler constant, which is very + * close to 0.5. + * + * 3. For x>=8, we have + * lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+.... + * (better formula: + * lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...) + * Let z = 1/x, then we approximation + * f(z) = lgamma(x) - (x-0.5)(log(x)-1) + * by + * 3 5 11 + * w = w0 + w1*z + w2*z + w3*z + ... + w6*z + * where + * |w - f(z)| < 2**-58.74 + * + * 4. For negative x, since (G is gamma function) + * -x*G(-x)*G(x) = PI/sin(PI*x), + * we have + * G(x) = PI/(sin(PI*x)*(-x)*G(-x)) + * since G(-x) is positive, sign(G(x)) = sign(sin(PI*x)) for x<0 + * Hence, for x<0, signgam = sign(sin(PI*x)) and + * lgamma(x) = log(|Gamma(x)|) + * = log(PI/(|x*sin(PI*x)|)) - lgamma(-x); + * Note: one should avoid compute PI*(-x) directly in the + * computation of sin(PI*(-x)). + * + * 5. Special Cases + * lgamma(2+s) ~ s*(1-Euler) for tiny s + * lgamma(1) = lgamma(2) = 0 + * lgamma(x) ~ -log(|x|) for tiny x + * lgamma(0) = lgamma(neg.integer) = inf and raise divide-by-zero + * lgamma(inf) = inf + * lgamma(-inf) = inf (bug for bug compatible with C99!?) + * + */ + +use super::{floor, k_cos, k_sin, log}; + +const PI: f64 = 3.14159265358979311600e+00; /* 0x400921FB, 0x54442D18 */ +const A0: f64 = 7.72156649015328655494e-02; /* 0x3FB3C467, 0xE37DB0C8 */ +const A1: f64 = 3.22467033424113591611e-01; /* 0x3FD4A34C, 0xC4A60FAD */ +const A2: f64 = 6.73523010531292681824e-02; /* 0x3FB13E00, 0x1A5562A7 */ +const A3: f64 = 2.05808084325167332806e-02; /* 0x3F951322, 0xAC92547B */ +const A4: f64 = 7.38555086081402883957e-03; /* 0x3F7E404F, 0xB68FEFE8 */ +const A5: f64 = 2.89051383673415629091e-03; /* 0x3F67ADD8, 0xCCB7926B */ +const A6: f64 = 1.19270763183362067845e-03; /* 0x3F538A94, 0x116F3F5D */ +const A7: f64 = 5.10069792153511336608e-04; /* 0x3F40B6C6, 0x89B99C00 */ +const A8: f64 = 2.20862790713908385557e-04; /* 0x3F2CF2EC, 0xED10E54D */ +const A9: f64 = 1.08011567247583939954e-04; /* 0x3F1C5088, 0x987DFB07 */ +const A10: f64 = 2.52144565451257326939e-05; /* 0x3EFA7074, 0x428CFA52 */ +const A11: f64 = 4.48640949618915160150e-05; /* 0x3F07858E, 0x90A45837 */ +const TC: f64 = 1.46163214496836224576e+00; /* 0x3FF762D8, 0x6356BE3F */ +const TF: f64 = -1.21486290535849611461e-01; /* 0xBFBF19B9, 0xBCC38A42 */ +/* tt = -(tail of TF) */ +const TT: f64 = -3.63867699703950536541e-18; /* 0xBC50C7CA, 0xA48A971F */ +const T0: f64 = 4.83836122723810047042e-01; /* 0x3FDEF72B, 0xC8EE38A2 */ +const T1: f64 = -1.47587722994593911752e-01; /* 0xBFC2E427, 0x8DC6C509 */ +const T2: f64 = 6.46249402391333854778e-02; /* 0x3FB08B42, 0x94D5419B */ +const T3: f64 = -3.27885410759859649565e-02; /* 0xBFA0C9A8, 0xDF35B713 */ +const T4: f64 = 1.79706750811820387126e-02; /* 0x3F9266E7, 0x970AF9EC */ +const T5: f64 = -1.03142241298341437450e-02; /* 0xBF851F9F, 0xBA91EC6A */ +const T6: f64 = 6.10053870246291332635e-03; /* 0x3F78FCE0, 0xE370E344 */ +const T7: f64 = -3.68452016781138256760e-03; /* 0xBF6E2EFF, 0xB3E914D7 */ +const T8: f64 = 2.25964780900612472250e-03; /* 0x3F6282D3, 0x2E15C915 */ +const T9: f64 = -1.40346469989232843813e-03; /* 0xBF56FE8E, 0xBF2D1AF1 */ +const T10: f64 = 8.81081882437654011382e-04; /* 0x3F4CDF0C, 0xEF61A8E9 */ +const T11: f64 = -5.38595305356740546715e-04; /* 0xBF41A610, 0x9C73E0EC */ +const T12: f64 = 3.15632070903625950361e-04; /* 0x3F34AF6D, 0x6C0EBBF7 */ +const T13: f64 = -3.12754168375120860518e-04; /* 0xBF347F24, 0xECC38C38 */ +const T14: f64 = 3.35529192635519073543e-04; /* 0x3F35FD3E, 0xE8C2D3F4 */ +const U0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ +const U1: f64 = 6.32827064025093366517e-01; /* 0x3FE4401E, 0x8B005DFF */ +const U2: f64 = 1.45492250137234768737e+00; /* 0x3FF7475C, 0xD119BD6F */ +const U3: f64 = 9.77717527963372745603e-01; /* 0x3FEF4976, 0x44EA8450 */ +const U4: f64 = 2.28963728064692451092e-01; /* 0x3FCD4EAE, 0xF6010924 */ +const U5: f64 = 1.33810918536787660377e-02; /* 0x3F8B678B, 0xBF2BAB09 */ +const V1: f64 = 2.45597793713041134822e+00; /* 0x4003A5D7, 0xC2BD619C */ +const V2: f64 = 2.12848976379893395361e+00; /* 0x40010725, 0xA42B18F5 */ +const V3: f64 = 7.69285150456672783825e-01; /* 0x3FE89DFB, 0xE45050AF */ +const V4: f64 = 1.04222645593369134254e-01; /* 0x3FBAAE55, 0xD6537C88 */ +const V5: f64 = 3.21709242282423911810e-03; /* 0x3F6A5ABB, 0x57D0CF61 */ +const S0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ +const S1: f64 = 2.14982415960608852501e-01; /* 0x3FCB848B, 0x36E20878 */ +const S2: f64 = 3.25778796408930981787e-01; /* 0x3FD4D98F, 0x4F139F59 */ +const S3: f64 = 1.46350472652464452805e-01; /* 0x3FC2BB9C, 0xBEE5F2F7 */ +const S4: f64 = 2.66422703033638609560e-02; /* 0x3F9B481C, 0x7E939961 */ +const S5: f64 = 1.84028451407337715652e-03; /* 0x3F5E26B6, 0x7368F239 */ +const S6: f64 = 3.19475326584100867617e-05; /* 0x3F00BFEC, 0xDD17E945 */ +const R1: f64 = 1.39200533467621045958e+00; /* 0x3FF645A7, 0x62C4AB74 */ +const R2: f64 = 7.21935547567138069525e-01; /* 0x3FE71A18, 0x93D3DCDC */ +const R3: f64 = 1.71933865632803078993e-01; /* 0x3FC601ED, 0xCCFBDF27 */ +const R4: f64 = 1.86459191715652901344e-02; /* 0x3F9317EA, 0x742ED475 */ +const R5: f64 = 7.77942496381893596434e-04; /* 0x3F497DDA, 0xCA41A95B */ +const R6: f64 = 7.32668430744625636189e-06; /* 0x3EDEBAF7, 0xA5B38140 */ +const W0: f64 = 4.18938533204672725052e-01; /* 0x3FDACFE3, 0x90C97D69 */ +const W1: f64 = 8.33333333333329678849e-02; /* 0x3FB55555, 0x5555553B */ +const W2: f64 = -2.77777777728775536470e-03; /* 0xBF66C16C, 0x16B02E5C */ +const W3: f64 = 7.93650558643019558500e-04; /* 0x3F4A019F, 0x98CF38B6 */ +const W4: f64 = -5.95187557450339963135e-04; /* 0xBF4380CB, 0x8C0FE741 */ +const W5: f64 = 8.36339918996282139126e-04; /* 0x3F4B67BA, 0x4CDAD5D1 */ +const W6: f64 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */ + +/* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ +fn sin_pi(mut x: f64) -> f64 { + let mut n: i32; + + /* spurious inexact if odd int */ + x = 2.0 * (x * 0.5 - floor(x * 0.5)); /* x mod 2.0 */ + + n = (x * 4.0) as i32; + n = (n + 1) / 2; + x -= (n as f64) * 0.5; + x *= PI; + + match n { + 1 => k_cos(x, 0.0), + 2 => k_sin(-x, 0.0, 0), + 3 => -k_cos(x, 0.0), + 0 | _ => k_sin(x, 0.0, 0), + } +} + +pub fn lgamma_r(mut x: f64) -> (f64, i32) { + let u: u64 = x.to_bits(); + let mut t: f64; + let y: f64; + let mut z: f64; + let nadj: f64; + let p: f64; + let p1: f64; + let p2: f64; + let p3: f64; + let q: f64; + let mut r: f64; + let w: f64; + let ix: u32; + let sign: bool; + let i: i32; + let mut signgam: i32; + + /* purge off +-inf, NaN, +-0, tiny and negative arguments */ + signgam = 1; + sign = (u >> 63) != 0; + ix = ((u >> 32) as u32) & 0x7fffffff; + if ix >= 0x7ff00000 { + return (x * x, signgam); + } + if ix < (0x3ff - 70) << 20 { + /* |x|<2**-70, return -log(|x|) */ + if sign { + x = -x; + signgam = -1; + } + return (-log(x), signgam); + } + if sign { + x = -x; + t = sin_pi(x); + if t == 0.0 { + /* -integer */ + return (1.0 / (x - x), signgam); + } + if t > 0.0 { + signgam = -1; + } else { + t = -t; + } + nadj = log(PI / (t * x)); + } else { + nadj = 0.0; + } + + /* purge off 1 and 2 */ + if (ix == 0x3ff00000 || ix == 0x40000000) && (u & 0xffffffff) == 0 { + r = 0.0; + } + /* for x < 2.0 */ + else if ix < 0x40000000 { + if ix <= 0x3feccccc { + /* lgamma(x) = lgamma(x+1)-log(x) */ + r = -log(x); + if ix >= 0x3FE76944 { + y = 1.0 - x; + i = 0; + } else if ix >= 0x3FCDA661 { + y = x - (TC - 1.0); + i = 1; + } else { + y = x; + i = 2; + } + } else { + r = 0.0; + if ix >= 0x3FFBB4C3 { + /* [1.7316,2] */ + y = 2.0 - x; + i = 0; + } else if ix >= 0x3FF3B4C4 { + /* [1.23,1.73] */ + y = x - TC; + i = 1; + } else { + y = x - 1.0; + i = 2; + } + } + match i { + 0 => { + z = y * y; + p1 = A0 + z * (A2 + z * (A4 + z * (A6 + z * (A8 + z * A10)))); + p2 = z * (A1 + z * (A3 + z * (A5 + z * (A7 + z * (A9 + z * A11))))); + p = y * p1 + p2; + r += p - 0.5 * y; + } + 1 => { + z = y * y; + w = z * y; + p1 = T0 + w * (T3 + w * (T6 + w * (T9 + w * T12))); /* parallel comp */ + p2 = T1 + w * (T4 + w * (T7 + w * (T10 + w * T13))); + p3 = T2 + w * (T5 + w * (T8 + w * (T11 + w * T14))); + p = z * p1 - (TT - w * (p2 + y * p3)); + r += TF + p; + } + 2 => { + p1 = y * (U0 + y * (U1 + y * (U2 + y * (U3 + y * (U4 + y * U5))))); + p2 = 1.0 + y * (V1 + y * (V2 + y * (V3 + y * (V4 + y * V5)))); + r += -0.5 * y + p1 / p2; + } + #[cfg(feature = "checked")] + _ => unreachable!(), + #[cfg(not(feature = "checked"))] + _ => {} + } + } else if ix < 0x40200000 { + /* x < 8.0 */ + i = x as i32; + y = x - (i as f64); + p = y * (S0 + y * (S1 + y * (S2 + y * (S3 + y * (S4 + y * (S5 + y * S6)))))); + q = 1.0 + y * (R1 + y * (R2 + y * (R3 + y * (R4 + y * (R5 + y * R6))))); + r = 0.5 * y + p / q; + z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ + // TODO: In C, this was implemented using switch jumps with fallthrough. + // Does this implementation have performance problems? + if i >= 7 { + z *= y + 6.0; + } + if i >= 6 { + z *= y + 5.0; + } + if i >= 5 { + z *= y + 4.0; + } + if i >= 4 { + z *= y + 3.0; + } + if i >= 3 { + z *= y + 2.0; + r += log(z); + } + } else if ix < 0x43900000 { + /* 8.0 <= x < 2**58 */ + t = log(x); + z = 1.0 / x; + y = z * z; + w = W0 + z * (W1 + y * (W2 + y * (W3 + y * (W4 + y * (W5 + y * W6))))); + r = (x - 0.5) * (t - 1.0) + w; + } else { + /* 2**58 <= x <= inf */ + r = x * (log(x) - 1.0); + } + if sign { + r = nadj - r; + } + return (r, signgam); +} diff --git a/libm/src/math/lgammaf.rs b/libm/src/math/lgammaf.rs index 8fe8060b5..dfdc87f96 100644 --- a/libm/src/math/lgammaf.rs +++ b/libm/src/math/lgammaf.rs @@ -1,258 +1,5 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_lgammaf_r.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -use super::{floorf, k_cosf, k_sinf, logf}; - -const PI: f32 = 3.1415927410e+00; /* 0x40490fdb */ -const A0: f32 = 7.7215664089e-02; /* 0x3d9e233f */ -const A1: f32 = 3.2246702909e-01; /* 0x3ea51a66 */ -const A2: f32 = 6.7352302372e-02; /* 0x3d89f001 */ -const A3: f32 = 2.0580807701e-02; /* 0x3ca89915 */ -const A4: f32 = 7.3855509982e-03; /* 0x3bf2027e */ -const A5: f32 = 2.8905137442e-03; /* 0x3b3d6ec6 */ -const A6: f32 = 1.1927076848e-03; /* 0x3a9c54a1 */ -const A7: f32 = 5.1006977446e-04; /* 0x3a05b634 */ -const A8: f32 = 2.2086278477e-04; /* 0x39679767 */ -const A9: f32 = 1.0801156895e-04; /* 0x38e28445 */ -const A10: f32 = 2.5214456400e-05; /* 0x37d383a2 */ -const A11: f32 = 4.4864096708e-05; /* 0x383c2c75 */ -const TC: f32 = 1.4616321325e+00; /* 0x3fbb16c3 */ -const TF: f32 = -1.2148628384e-01; /* 0xbdf8cdcd */ -/* TT = -(tail of TF) */ -const TT: f32 = 6.6971006518e-09; /* 0x31e61c52 */ -const T0: f32 = 4.8383611441e-01; /* 0x3ef7b95e */ -const T1: f32 = -1.4758771658e-01; /* 0xbe17213c */ -const T2: f32 = 6.4624942839e-02; /* 0x3d845a15 */ -const T3: f32 = -3.2788541168e-02; /* 0xbd064d47 */ -const T4: f32 = 1.7970675603e-02; /* 0x3c93373d */ -const T5: f32 = -1.0314224288e-02; /* 0xbc28fcfe */ -const T6: f32 = 6.1005386524e-03; /* 0x3bc7e707 */ -const T7: f32 = -3.6845202558e-03; /* 0xbb7177fe */ -const T8: f32 = 2.2596477065e-03; /* 0x3b141699 */ -const T9: f32 = -1.4034647029e-03; /* 0xbab7f476 */ -const T10: f32 = 8.8108185446e-04; /* 0x3a66f867 */ -const T11: f32 = -5.3859531181e-04; /* 0xba0d3085 */ -const T12: f32 = 3.1563205994e-04; /* 0x39a57b6b */ -const T13: f32 = -3.1275415677e-04; /* 0xb9a3f927 */ -const T14: f32 = 3.3552918467e-04; /* 0x39afe9f7 */ -const U0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ -const U1: f32 = 6.3282704353e-01; /* 0x3f2200f4 */ -const U2: f32 = 1.4549225569e+00; /* 0x3fba3ae7 */ -const U3: f32 = 9.7771751881e-01; /* 0x3f7a4bb2 */ -const U4: f32 = 2.2896373272e-01; /* 0x3e6a7578 */ -const U5: f32 = 1.3381091878e-02; /* 0x3c5b3c5e */ -const V1: f32 = 2.4559779167e+00; /* 0x401d2ebe */ -const V2: f32 = 2.1284897327e+00; /* 0x4008392d */ -const V3: f32 = 7.6928514242e-01; /* 0x3f44efdf */ -const V4: f32 = 1.0422264785e-01; /* 0x3dd572af */ -const V5: f32 = 3.2170924824e-03; /* 0x3b52d5db */ -const S0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ -const S1: f32 = 2.1498242021e-01; /* 0x3e5c245a */ -const S2: f32 = 3.2577878237e-01; /* 0x3ea6cc7a */ -const S3: f32 = 1.4635047317e-01; /* 0x3e15dce6 */ -const S4: f32 = 2.6642270386e-02; /* 0x3cda40e4 */ -const S5: f32 = 1.8402845599e-03; /* 0x3af135b4 */ -const S6: f32 = 3.1947532989e-05; /* 0x3805ff67 */ -const R1: f32 = 1.3920053244e+00; /* 0x3fb22d3b */ -const R2: f32 = 7.2193557024e-01; /* 0x3f38d0c5 */ -const R3: f32 = 1.7193385959e-01; /* 0x3e300f6e */ -const R4: f32 = 1.8645919859e-02; /* 0x3c98bf54 */ -const R5: f32 = 7.7794247773e-04; /* 0x3a4beed6 */ -const R6: f32 = 7.3266842264e-06; /* 0x36f5d7bd */ -const W0: f32 = 4.1893854737e-01; /* 0x3ed67f1d */ -const W1: f32 = 8.3333335817e-02; /* 0x3daaaaab */ -const W2: f32 = -2.7777778450e-03; /* 0xbb360b61 */ -const W3: f32 = 7.9365057172e-04; /* 0x3a500cfd */ -const W4: f32 = -5.9518753551e-04; /* 0xba1c065c */ -const W5: f32 = 8.3633989561e-04; /* 0x3a5b3dd2 */ -const W6: f32 = -1.6309292987e-03; /* 0xbad5c4e8 */ - -/* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ -fn sin_pi(mut x: f32) -> f32 { - let mut y: f64; - let mut n: isize; - - /* spurious inexact if odd int */ - x = 2.0 * (x * 0.5 - floorf(x * 0.5)); /* x mod 2.0 */ - - n = (x * 4.0) as isize; - n = (n + 1) / 2; - y = (x as f64) - (n as f64) * 0.5; - y *= 3.14159265358979323846; - match n { - 1 => k_cosf(y), - 2 => k_sinf(-y), - 3 => -k_cosf(y), - 0 | _ => k_sinf(y), - } -} +use super::lgammaf_r; pub fn lgammaf(x: f32) -> f32 { lgammaf_r(x).0 } - -pub fn lgammaf_r(mut x: f32) -> (f32, isize) { - let u = x.to_bits(); - let mut t: f32; - let y: f32; - let mut z: f32; - let nadj: f32; - let p: f32; - let p1: f32; - let p2: f32; - let p3: f32; - let q: f32; - let mut r: f32; - let w: f32; - let ix: u32; - let i: isize; - let sign: bool; - let mut signgam: isize; - - /* purge off +-inf, NaN, +-0, tiny and negative arguments */ - signgam = 1; - sign = (u >> 31) != 0; - ix = u & 0x7fffffff; - if ix >= 0x7f800000 { - return (x * x, signgam); - } - if ix < 0x35000000 { - /* |x| < 2**-21, return -log(|x|) */ - if sign { - signgam = -1; - x = -x; - } - return (-logf(x), signgam); - } - if sign { - x = -x; - t = sin_pi(x); - if t == 0.0 { - /* -integer */ - return (1.0 / (x - x), signgam); - } - if t > 0.0 { - signgam = -1; - } else { - t = -t; - } - nadj = logf(PI / (t * x)); - } else { - nadj = 0.0; - } - - /* purge off 1 and 2 */ - if ix == 0x3f800000 || ix == 0x40000000 { - r = 0.0; - } - /* for x < 2.0 */ - else if ix < 0x40000000 { - if ix <= 0x3f666666 { - /* lgamma(x) = lgamma(x+1)-log(x) */ - r = -logf(x); - if ix >= 0x3f3b4a20 { - y = 1.0 - x; - i = 0; - } else if ix >= 0x3e6d3308 { - y = x - (TC - 1.0); - i = 1; - } else { - y = x; - i = 2; - } - } else { - r = 0.0; - if ix >= 0x3fdda618 { - /* [1.7316,2] */ - y = 2.0 - x; - i = 0; - } else if ix >= 0x3F9da620 { - /* [1.23,1.73] */ - y = x - TC; - i = 1; - } else { - y = x - 1.0; - i = 2; - } - } - match i { - 0 => { - z = y * y; - p1 = A0 + z * (A2 + z * (A4 + z * (A6 + z * (A8 + z * A10)))); - p2 = z * (A1 + z * (A3 + z * (A5 + z * (A7 + z * (A9 + z * A11))))); - p = y * p1 + p2; - r += p - 0.5 * y; - } - 1 => { - z = y * y; - w = z * y; - p1 = T0 + w * (T3 + w * (T6 + w * (T9 + w * T12))); /* parallel comp */ - p2 = T1 + w * (T4 + w * (T7 + w * (T10 + w * T13))); - p3 = T2 + w * (T5 + w * (T8 + w * (T11 + w * T14))); - p = z * p1 - (TT - w * (p2 + y * p3)); - r += TF + p; - } - 2 => { - p1 = y * (U0 + y * (U1 + y * (U2 + y * (U3 + y * (U4 + y * U5))))); - p2 = 1.0 + y * (V1 + y * (V2 + y * (V3 + y * (V4 + y * V5)))); - r += -0.5 * y + p1 / p2; - } - #[cfg(feature = "checked")] - _ => unreachable!(), - #[cfg(not(feature = "checked"))] - _ => {} - } - } else if ix < 0x41000000 { - /* x < 8.0 */ - i = x as isize; - y = x - (i as f32); - p = y * (S0 + y * (S1 + y * (S2 + y * (S3 + y * (S4 + y * (S5 + y * S6)))))); - q = 1.0 + y * (R1 + y * (R2 + y * (R3 + y * (R4 + y * (R5 + y * R6))))); - r = 0.5 * y + p / q; - z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ - // TODO: In C, this was implemented using switch jumps with fallthrough. - // Does this implementation have performance problems? - if i >= 7 { - z *= y + 6.0; - } - if i >= 6 { - z *= y + 5.0; - } - if i >= 5 { - z *= y + 4.0; - } - if i >= 4 { - z *= y + 3.0; - } - if i >= 3 { - z *= y + 2.0; - r += logf(z); - } - } else if ix < 0x5c800000 { - /* 8.0 <= x < 2**58 */ - t = logf(x); - z = 1.0 / x; - y = z * z; - w = W0 + z * (W1 + y * (W2 + y * (W3 + y * (W4 + y * (W5 + y * W6))))); - r = (x - 0.5) * (t - 1.0) + w; - } else { - /* 2**58 <= x <= inf */ - r = x * (logf(x) - 1.0); - } - if sign { - r = nadj - r; - } - return (r, signgam); -} diff --git a/libm/src/math/lgammaf_r.rs b/libm/src/math/lgammaf_r.rs new file mode 100644 index 000000000..0745359a2 --- /dev/null +++ b/libm/src/math/lgammaf_r.rs @@ -0,0 +1,254 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_lgammaf_r.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{floorf, k_cosf, k_sinf, logf}; + +const PI: f32 = 3.1415927410e+00; /* 0x40490fdb */ +const A0: f32 = 7.7215664089e-02; /* 0x3d9e233f */ +const A1: f32 = 3.2246702909e-01; /* 0x3ea51a66 */ +const A2: f32 = 6.7352302372e-02; /* 0x3d89f001 */ +const A3: f32 = 2.0580807701e-02; /* 0x3ca89915 */ +const A4: f32 = 7.3855509982e-03; /* 0x3bf2027e */ +const A5: f32 = 2.8905137442e-03; /* 0x3b3d6ec6 */ +const A6: f32 = 1.1927076848e-03; /* 0x3a9c54a1 */ +const A7: f32 = 5.1006977446e-04; /* 0x3a05b634 */ +const A8: f32 = 2.2086278477e-04; /* 0x39679767 */ +const A9: f32 = 1.0801156895e-04; /* 0x38e28445 */ +const A10: f32 = 2.5214456400e-05; /* 0x37d383a2 */ +const A11: f32 = 4.4864096708e-05; /* 0x383c2c75 */ +const TC: f32 = 1.4616321325e+00; /* 0x3fbb16c3 */ +const TF: f32 = -1.2148628384e-01; /* 0xbdf8cdcd */ +/* TT = -(tail of TF) */ +const TT: f32 = 6.6971006518e-09; /* 0x31e61c52 */ +const T0: f32 = 4.8383611441e-01; /* 0x3ef7b95e */ +const T1: f32 = -1.4758771658e-01; /* 0xbe17213c */ +const T2: f32 = 6.4624942839e-02; /* 0x3d845a15 */ +const T3: f32 = -3.2788541168e-02; /* 0xbd064d47 */ +const T4: f32 = 1.7970675603e-02; /* 0x3c93373d */ +const T5: f32 = -1.0314224288e-02; /* 0xbc28fcfe */ +const T6: f32 = 6.1005386524e-03; /* 0x3bc7e707 */ +const T7: f32 = -3.6845202558e-03; /* 0xbb7177fe */ +const T8: f32 = 2.2596477065e-03; /* 0x3b141699 */ +const T9: f32 = -1.4034647029e-03; /* 0xbab7f476 */ +const T10: f32 = 8.8108185446e-04; /* 0x3a66f867 */ +const T11: f32 = -5.3859531181e-04; /* 0xba0d3085 */ +const T12: f32 = 3.1563205994e-04; /* 0x39a57b6b */ +const T13: f32 = -3.1275415677e-04; /* 0xb9a3f927 */ +const T14: f32 = 3.3552918467e-04; /* 0x39afe9f7 */ +const U0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ +const U1: f32 = 6.3282704353e-01; /* 0x3f2200f4 */ +const U2: f32 = 1.4549225569e+00; /* 0x3fba3ae7 */ +const U3: f32 = 9.7771751881e-01; /* 0x3f7a4bb2 */ +const U4: f32 = 2.2896373272e-01; /* 0x3e6a7578 */ +const U5: f32 = 1.3381091878e-02; /* 0x3c5b3c5e */ +const V1: f32 = 2.4559779167e+00; /* 0x401d2ebe */ +const V2: f32 = 2.1284897327e+00; /* 0x4008392d */ +const V3: f32 = 7.6928514242e-01; /* 0x3f44efdf */ +const V4: f32 = 1.0422264785e-01; /* 0x3dd572af */ +const V5: f32 = 3.2170924824e-03; /* 0x3b52d5db */ +const S0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ +const S1: f32 = 2.1498242021e-01; /* 0x3e5c245a */ +const S2: f32 = 3.2577878237e-01; /* 0x3ea6cc7a */ +const S3: f32 = 1.4635047317e-01; /* 0x3e15dce6 */ +const S4: f32 = 2.6642270386e-02; /* 0x3cda40e4 */ +const S5: f32 = 1.8402845599e-03; /* 0x3af135b4 */ +const S6: f32 = 3.1947532989e-05; /* 0x3805ff67 */ +const R1: f32 = 1.3920053244e+00; /* 0x3fb22d3b */ +const R2: f32 = 7.2193557024e-01; /* 0x3f38d0c5 */ +const R3: f32 = 1.7193385959e-01; /* 0x3e300f6e */ +const R4: f32 = 1.8645919859e-02; /* 0x3c98bf54 */ +const R5: f32 = 7.7794247773e-04; /* 0x3a4beed6 */ +const R6: f32 = 7.3266842264e-06; /* 0x36f5d7bd */ +const W0: f32 = 4.1893854737e-01; /* 0x3ed67f1d */ +const W1: f32 = 8.3333335817e-02; /* 0x3daaaaab */ +const W2: f32 = -2.7777778450e-03; /* 0xbb360b61 */ +const W3: f32 = 7.9365057172e-04; /* 0x3a500cfd */ +const W4: f32 = -5.9518753551e-04; /* 0xba1c065c */ +const W5: f32 = 8.3633989561e-04; /* 0x3a5b3dd2 */ +const W6: f32 = -1.6309292987e-03; /* 0xbad5c4e8 */ + +/* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ +fn sin_pi(mut x: f32) -> f32 { + let mut y: f64; + let mut n: isize; + + /* spurious inexact if odd int */ + x = 2.0 * (x * 0.5 - floorf(x * 0.5)); /* x mod 2.0 */ + + n = (x * 4.0) as isize; + n = (n + 1) / 2; + y = (x as f64) - (n as f64) * 0.5; + y *= 3.14159265358979323846; + match n { + 1 => k_cosf(y), + 2 => k_sinf(-y), + 3 => -k_cosf(y), + 0 | _ => k_sinf(y), + } +} + +pub fn lgammaf_r(mut x: f32) -> (f32, i32) { + let u = x.to_bits(); + let mut t: f32; + let y: f32; + let mut z: f32; + let nadj: f32; + let p: f32; + let p1: f32; + let p2: f32; + let p3: f32; + let q: f32; + let mut r: f32; + let w: f32; + let ix: u32; + let i: i32; + let sign: bool; + let mut signgam: i32; + + /* purge off +-inf, NaN, +-0, tiny and negative arguments */ + signgam = 1; + sign = (u >> 31) != 0; + ix = u & 0x7fffffff; + if ix >= 0x7f800000 { + return (x * x, signgam); + } + if ix < 0x35000000 { + /* |x| < 2**-21, return -log(|x|) */ + if sign { + signgam = -1; + x = -x; + } + return (-logf(x), signgam); + } + if sign { + x = -x; + t = sin_pi(x); + if t == 0.0 { + /* -integer */ + return (1.0 / (x - x), signgam); + } + if t > 0.0 { + signgam = -1; + } else { + t = -t; + } + nadj = logf(PI / (t * x)); + } else { + nadj = 0.0; + } + + /* purge off 1 and 2 */ + if ix == 0x3f800000 || ix == 0x40000000 { + r = 0.0; + } + /* for x < 2.0 */ + else if ix < 0x40000000 { + if ix <= 0x3f666666 { + /* lgamma(x) = lgamma(x+1)-log(x) */ + r = -logf(x); + if ix >= 0x3f3b4a20 { + y = 1.0 - x; + i = 0; + } else if ix >= 0x3e6d3308 { + y = x - (TC - 1.0); + i = 1; + } else { + y = x; + i = 2; + } + } else { + r = 0.0; + if ix >= 0x3fdda618 { + /* [1.7316,2] */ + y = 2.0 - x; + i = 0; + } else if ix >= 0x3F9da620 { + /* [1.23,1.73] */ + y = x - TC; + i = 1; + } else { + y = x - 1.0; + i = 2; + } + } + match i { + 0 => { + z = y * y; + p1 = A0 + z * (A2 + z * (A4 + z * (A6 + z * (A8 + z * A10)))); + p2 = z * (A1 + z * (A3 + z * (A5 + z * (A7 + z * (A9 + z * A11))))); + p = y * p1 + p2; + r += p - 0.5 * y; + } + 1 => { + z = y * y; + w = z * y; + p1 = T0 + w * (T3 + w * (T6 + w * (T9 + w * T12))); /* parallel comp */ + p2 = T1 + w * (T4 + w * (T7 + w * (T10 + w * T13))); + p3 = T2 + w * (T5 + w * (T8 + w * (T11 + w * T14))); + p = z * p1 - (TT - w * (p2 + y * p3)); + r += TF + p; + } + 2 => { + p1 = y * (U0 + y * (U1 + y * (U2 + y * (U3 + y * (U4 + y * U5))))); + p2 = 1.0 + y * (V1 + y * (V2 + y * (V3 + y * (V4 + y * V5)))); + r += -0.5 * y + p1 / p2; + } + #[cfg(feature = "checked")] + _ => unreachable!(), + #[cfg(not(feature = "checked"))] + _ => {} + } + } else if ix < 0x41000000 { + /* x < 8.0 */ + i = x as i32; + y = x - (i as f32); + p = y * (S0 + y * (S1 + y * (S2 + y * (S3 + y * (S4 + y * (S5 + y * S6)))))); + q = 1.0 + y * (R1 + y * (R2 + y * (R3 + y * (R4 + y * (R5 + y * R6))))); + r = 0.5 * y + p / q; + z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ + // TODO: In C, this was implemented using switch jumps with fallthrough. + // Does this implementation have performance problems? + if i >= 7 { + z *= y + 6.0; + } + if i >= 6 { + z *= y + 5.0; + } + if i >= 5 { + z *= y + 4.0; + } + if i >= 4 { + z *= y + 3.0; + } + if i >= 3 { + z *= y + 2.0; + r += logf(z); + } + } else if ix < 0x5c800000 { + /* 8.0 <= x < 2**58 */ + t = logf(x); + z = 1.0 / x; + y = z * z; + w = W0 + z * (W1 + y * (W2 + y * (W3 + y * (W4 + y * (W5 + y * W6))))); + r = (x - 0.5) * (t - 1.0) + w; + } else { + /* 2**58 <= x <= inf */ + r = x * (logf(x) - 1.0); + } + if sign { + r = nadj - r; + } + return (r, signgam); +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index b70b0cd6b..c4d247414 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -129,7 +129,9 @@ mod jnf; mod ldexp; mod ldexpf; mod lgamma; +mod lgamma_r; mod lgammaf; +mod lgammaf_r; mod log; mod log10; mod log10f; @@ -233,9 +235,9 @@ pub use self::jnf::ynf; pub use self::ldexp::ldexp; pub use self::ldexpf::ldexpf; pub use self::lgamma::lgamma; -pub use self::lgamma::lgamma_r; +pub use self::lgamma_r::lgamma_r; pub use self::lgammaf::lgammaf; -pub use self::lgammaf::lgammaf_r; +pub use self::lgammaf_r::lgammaf_r; pub use self::log::log; pub use self::log10::log10; pub use self::log10f::log10f; diff --git a/libm/src/math/modf.rs b/libm/src/math/modf.rs index a37f8b918..bcab33a81 100644 --- a/libm/src/math/modf.rs +++ b/libm/src/math/modf.rs @@ -2,7 +2,7 @@ pub fn modf(x: f64) -> (f64, f64) { let rv2: f64; let mut u = x.to_bits(); let mask: u64; - let e = ((u >> 52 & 0x7ff) as isize) - 0x3ff; + let e = ((u >> 52 & 0x7ff) as i32) - 0x3ff; /* no fractional part */ if e >= 52 { diff --git a/libm/src/math/modff.rs b/libm/src/math/modff.rs index 4ce9052e7..56ece12e3 100644 --- a/libm/src/math/modff.rs +++ b/libm/src/math/modff.rs @@ -2,7 +2,7 @@ pub fn modff(x: f32) -> (f32, f32) { let rv2: f32; let mut u: u32 = x.to_bits(); let mask: u32; - let e = ((u >> 23 & 0xff) as isize) - 0x7f; + let e = ((u >> 23 & 0xff) as i32) - 0x7f; /* no fractional part */ if e >= 23 { diff --git a/libm/src/math/remquo.rs b/libm/src/math/remquo.rs index 3681b947c..507f8db34 100644 --- a/libm/src/math/remquo.rs +++ b/libm/src/math/remquo.rs @@ -1,8 +1,8 @@ -pub fn remquo(mut x: f64, mut y: f64) -> (f64, isize) { +pub fn remquo(mut x: f64, mut y: f64) -> (f64, i32) { let ux: u64 = x.to_bits(); let mut uy: u64 = y.to_bits(); - let mut ex = ((ux >> 52) & 0x7ff) as isize; - let mut ey = ((uy >> 52) & 0x7ff) as isize; + let mut ex = ((ux >> 52) & 0x7ff) as i32; + let mut ey = ((uy >> 52) & 0x7ff) as i32; let sx = (ux >> 63) != 0; let sy = (uy >> 63) != 0; let mut q: u32; @@ -88,7 +88,7 @@ pub fn remquo(mut x: f64, mut y: f64) -> (f64, isize) { q += 1; } q &= 0x7fffffff; - let quo = if sx ^ sy { -(q as isize) } else { q as isize }; + let quo = if sx ^ sy { -(q as i32) } else { q as i32 }; if sx { (-x, quo) } else { diff --git a/libm/src/math/remquof.rs b/libm/src/math/remquof.rs index 40ded5d69..6aa4974ed 100644 --- a/libm/src/math/remquof.rs +++ b/libm/src/math/remquof.rs @@ -1,8 +1,8 @@ -pub fn remquof(mut x: f32, mut y: f32) -> (f32, isize) { +pub fn remquof(mut x: f32, mut y: f32) -> (f32, i32) { let ux: u32 = x.to_bits(); let mut uy: u32 = y.to_bits(); - let mut ex = ((ux >> 23) & 0xff) as isize; - let mut ey = ((uy >> 23) & 0xff) as isize; + let mut ex = ((ux >> 23) & 0xff) as i32; + let mut ey = ((uy >> 23) & 0xff) as i32; let sx = (ux >> 31) != 0; let sy = (uy >> 31) != 0; let mut q: u32; @@ -87,7 +87,7 @@ pub fn remquof(mut x: f32, mut y: f32) -> (f32, isize) { q += 1; } q &= 0x7fffffff; - let quo = if sx ^ sy { -(q as isize) } else { q as isize }; + let quo = if sx ^ sy { -(q as i32) } else { q as i32 }; if sx { (-x, quo) } else { From a662bf8995453a772d42e4f252cf8616bb679235 Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Thu, 9 May 2019 07:36:52 +0300 Subject: [PATCH 0229/1459] fix traits --- libm/build.rs | 20 +++++++++---------- libm/src/lib.rs | 52 +++++++++++++++++-------------------------------- 2 files changed, 28 insertions(+), 44 deletions(-) diff --git a/libm/build.rs b/libm/build.rs index 4d739a121..4da50a885 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -41,18 +41,18 @@ mod musl_reference_tests { "rem_pio2.rs", "rem_pio2_large.rs", "rem_pio2f.rs", - "remquo.rs", // more than 1 result - "remquof.rs", // more than 1 result + "remquo.rs", // more than 1 result + "remquof.rs", // more than 1 result "lgamma_r.rs", // more than 1 result "lgammaf_r.rs", // more than 1 result - "frexp.rs", // more than 1 result - "frexpf.rs", // more than 1 result - "sincos.rs", // more than 1 result - "sincosf.rs", // more than 1 result - "modf.rs", // more than 1 result - "modff.rs", // more than 1 result - "jn.rs", // passed, but very slow - "jnf.rs", // passed, but very slow + "frexp.rs", // more than 1 result + "frexpf.rs", // more than 1 result + "sincos.rs", // more than 1 result + "sincosf.rs", // more than 1 result + "modf.rs", // more than 1 result + "modff.rs", // more than 1 result + "jn.rs", // passed, but very slow + "jnf.rs", // passed, but very slow ]; struct Function { diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 5e94541ab..0d0f6155a 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -119,13 +119,7 @@ pub trait F32Ext: private::Sealed + Sized { fn atan2(self, other: Self) -> Self; - #[inline] - fn sin_cos(self) -> (Self, Self) - where - Self: Copy, - { - (self.sin(), self.cos()) - } + fn sin_cos(self) -> (Self, Self); fn exp_m1(self) -> Self; @@ -289,6 +283,11 @@ impl F32Ext for f32 { atan2f(self, other) } + #[inline] + fn sin_cos(self) -> (Self, Self) { + sincosf(self) + } + #[inline] fn exp_m1(self) -> Self { expm1f(self) @@ -316,24 +315,17 @@ impl F32Ext for f32 { #[inline] fn asinh(self) -> Self { - if self == f32::NEG_INFINITY { - f32::NEG_INFINITY - } else { - (self + ((self * self) + 1.0).sqrt()).ln() - } + asinhf(self) } #[inline] fn acosh(self) -> Self { - match self { - x if x < 1.0 => f32::NAN, - x => (x + ((x * x) - 1.0).sqrt()).ln(), - } + acoshf(self) } #[inline] fn atanh(self) -> Self { - 0.5 * ((2.0 * self) / (1.0 - self)).ln_1p() + atanhf(self) } } @@ -401,13 +393,7 @@ pub trait F64Ext: private::Sealed + Sized { fn atan2(self, other: Self) -> Self; - #[inline] - fn sin_cos(self) -> (Self, Self) - where - Self: Copy, - { - (self.sin(), self.cos()) - } + fn sin_cos(self) -> (Self, Self); fn exp_m1(self) -> Self; @@ -571,6 +557,11 @@ impl F64Ext for f64 { atan2(self, other) } + #[inline] + fn sin_cos(self) -> (Self, Self) { + sincos(self) + } + #[inline] fn exp_m1(self) -> Self { expm1(self) @@ -598,24 +589,17 @@ impl F64Ext for f64 { #[inline] fn asinh(self) -> Self { - if self == f64::NEG_INFINITY { - f64::NEG_INFINITY - } else { - (self + ((self * self) + 1.0).sqrt()).ln() - } + asinh(self) } #[inline] fn acosh(self) -> Self { - match self { - x if x < 1.0 => f64::NAN, - x => (x + ((x * x) - 1.0).sqrt()).ln(), - } + acosh(self) } #[inline] fn atanh(self) -> Self { - 0.5 * ((2.0 * self) / (1.0 - self)).ln_1p() + atanh(self) } } From 23c346ea04612306eda41808f86bf69d354df36a Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Thu, 9 May 2019 12:10:11 +0300 Subject: [PATCH 0230/1459] test several outputs --- libm/build.rs | 170 ++++++++++++++++++++++++++++----------- libm/src/math/remquo.rs | 4 +- libm/src/math/remquof.rs | 4 +- 3 files changed, 125 insertions(+), 53 deletions(-) diff --git a/libm/build.rs b/libm/build.rs index 4da50a885..642e929ce 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -27,38 +27,30 @@ mod musl_reference_tests { // These files are all internal functions or otherwise miscellaneous, not // defining a function we want to test. const IGNORED_FILES: &[&str] = &[ - "expo2.rs", + "expo2.rs", // kernel, private "fenv.rs", - "k_cos.rs", - "k_cosf.rs", - "k_expo2.rs", - "k_expo2f.rs", - "k_sin.rs", - "k_sinf.rs", - "k_tan.rs", - "k_tanf.rs", + "k_cos.rs", // kernel, private + "k_cosf.rs", // kernel, private + "k_expo2.rs", // kernel, private + "k_expo2f.rs", // kernel, private + "k_sin.rs", // kernel, private + "k_sinf.rs", // kernel, private + "k_tan.rs", // kernel, private + "k_tanf.rs", // kernel, private "mod.rs", - "rem_pio2.rs", - "rem_pio2_large.rs", - "rem_pio2f.rs", - "remquo.rs", // more than 1 result - "remquof.rs", // more than 1 result - "lgamma_r.rs", // more than 1 result - "lgammaf_r.rs", // more than 1 result - "frexp.rs", // more than 1 result - "frexpf.rs", // more than 1 result - "sincos.rs", // more than 1 result - "sincosf.rs", // more than 1 result - "modf.rs", // more than 1 result - "modff.rs", // more than 1 result - "jn.rs", // passed, but very slow - "jnf.rs", // passed, but very slow + "rem_pio2.rs", // kernel, private + "rem_pio2_large.rs", // kernel, private + "rem_pio2f.rs", // kernel, private + "sincos.rs", // more than 1 result + "sincosf.rs", // more than 1 result + "jn.rs", // passed, but very slow + "jnf.rs", // passed, but very slow ]; struct Function { name: String, args: Vec, - ret: Ty, + ret: Vec, tests: Vec, } @@ -71,7 +63,7 @@ mod musl_reference_tests { struct Test { inputs: Vec, - output: i64, + outputs: Vec, } pub fn generate() { @@ -103,7 +95,7 @@ mod musl_reference_tests { // After we have all our inputs, use the x86_64-unknown-linux-musl // target to generate the expected output. generate_test_outputs(&mut math); - + //panic!("Boo"); // ... and now that we have both inputs and expected outputs, do a bunch // of codegen to create the unit tests which we'll actually execute. generate_unit_tests(&math); @@ -125,7 +117,7 @@ mod musl_reference_tests { .collect::>(); let tail = &s[end + 1..]; let tail = eat(tail, " -> "); - let ret = parse_ty(tail.trim().split(' ').next().unwrap()); + let ret = parse_retty(tail.replace("{", "").trim()); return Function { name: name.to_string(), @@ -144,6 +136,16 @@ mod musl_reference_tests { } } + fn parse_retty(s: &str) -> Vec { + match s { + "(f32, f32)" => vec![Ty::F32, Ty::F32], + "(f32, i32)" => vec![Ty::F32, Ty::I32], + "(f64, f64)" => vec![Ty::F64, Ty::F64], + "(f64, i32)" => vec![Ty::F64, Ty::I32], + other => vec![parse_ty(other)], + } + } + fn eat<'a>(s: &'a str, prefix: &str) -> &'a str { if s.starts_with(prefix) { &s[prefix.len()..] @@ -163,7 +165,10 @@ mod musl_reference_tests { fn generate_test(args: &[Ty], rng: &mut R) -> Test { let inputs = args.iter().map(|ty| ty.gen_i64(rng)).collect(); // zero output for now since we'll generate it later - Test { inputs, output: 0 } + Test { + inputs, + outputs: vec![], + } } } @@ -192,6 +197,33 @@ mod musl_reference_tests { Ty::Bool => "i32", } } + + fn libc_pty(&self) -> &'static str { + match self { + Ty::F32 => "*mut f32", + Ty::F64 => "*mut f64", + Ty::I32 => "*mut i32", + Ty::Bool => "*mut i32", + } + } + + fn default(&self) -> &'static str { + match self { + Ty::F32 => "0_f32", + Ty::F64 => "0_f64", + Ty::I32 => "0_i32", + Ty::Bool => "false", + } + } + + fn to_i64(&self) -> &'static str { + match self { + Ty::F32 => ".to_bits() as i64", + Ty::F64 => ".to_bits() as i64", + Ty::I32 => " as i64", + Ty::Bool => " as i64", + } + } } fn generate_test_outputs(functions: &mut [Function]) { @@ -212,8 +244,11 @@ mod musl_reference_tests { for (i, arg) in function.args.iter().enumerate() { src.push_str(&format!("arg{}: {},", i, arg.libc_ty())); } + for (i, ret) in function.ret.iter().skip(1).enumerate() { + src.push_str(&format!("argret{}: {},", i, ret.libc_pty())); + } src.push_str(") -> "); - src.push_str(function.ret.libc_ty()); + src.push_str(function.ret[0].libc_ty()); src.push_str("; }"); src.push_str(&format!("static TESTS: &[[i64; {}]]", function.args.len())); @@ -229,6 +264,14 @@ mod musl_reference_tests { src.push_str("];"); src.push_str("for test in TESTS {"); + for (i, arg) in function.ret.iter().skip(1).enumerate() { + src.push_str(&format!("let mut argret{} = {};", i, arg.default())); + src.push_str(&format!( + "let argret_ptr{0} = &mut argret{0} as *mut {1};", + i, + arg.libc_ty() + )); + } src.push_str("let output = "); src.push_str(&function.name); src.push_str("("); @@ -241,17 +284,20 @@ mod musl_reference_tests { }); src.push_str(","); } + for (i, _) in function.ret.iter().skip(1).enumerate() { + src.push_str(&format!("argret_ptr{},", i)); + } src.push_str(");"); - src.push_str("let output = "); - src.push_str(match function.ret { - Ty::F32 => "output.to_bits() as i64", - Ty::F64 => "output.to_bits() as i64", - Ty::I32 => "output as i64", - Ty::Bool => "output as i64", - }); - src.push_str(";"); + src.push_str(&format!("let output = output{};", function.ret[0].to_i64())); src.push_str("result.extend_from_slice(&output.to_le_bytes());"); + for (i, ret) in function.ret.iter().skip(1).enumerate() { + src.push_str(&format!("let output{0} = argret{0}{1};", i, ret.to_i64())); + src.push_str(&format!( + "result.extend_from_slice(&output{}.to_le_bytes());", + i + )); + } src.push_str("}"); src.push_str("}"); @@ -288,8 +334,13 @@ mod musl_reference_tests { i64::from_le_bytes(exact) }); - for test in functions.iter_mut().flat_map(|f| f.tests.iter_mut()) { - test.output = results.next().unwrap(); + for f in functions.iter_mut() { + for test in f.tests.iter_mut() { + test.outputs = vec![results.next().unwrap()]; + for _ in f.ret.iter().skip(1) { + test.outputs.push(results.next().unwrap()); + } + } } assert!(results.next().is_none()); } @@ -306,8 +357,9 @@ mod musl_reference_tests { src.push_str(&function.name); src.push_str("_matches_musl() {"); src.push_str(&format!( - "static TESTS: &[([i64; {}], i64)]", - function.args.len() + "static TESTS: &[([i64; {}], [i64; {}])]", + function.args.len(), + function.ret.len(), )); src.push_str(" = &["); for test in function.tests.iter() { @@ -317,7 +369,12 @@ mod musl_reference_tests { src.push_str(","); } src.push_str("],"); - src.push_str(&test.output.to_string()); + src.push_str("["); + for val in test.outputs.iter() { + src.push_str(&val.to_string()); + src.push_str(","); + } + src.push_str("],"); src.push_str("),"); } src.push_str("];"); @@ -336,12 +393,27 @@ mod musl_reference_tests { src.push_str(","); } src.push_str(");"); - src.push_str(match function.ret { - Ty::F32 => "if _eqf(output, f32::from_bits(*expected as u32)).is_ok() { continue }", - Ty::F64 => "if _eq(output, f64::from_bits(*expected as u64)).is_ok() { continue }", - Ty::I32 => "if output as i64 == *expected { continue }", - Ty::Bool => unreachable!(), - }); + if function.ret.len() > 1 { + for (i, ret) in function.ret.iter().enumerate() { + src.push_str(&(match ret { + Ty::F32 => format!("if _eqf(output.{0}, f32::from_bits(expected[{0}] as u32)).is_ok() {{ continue }}", i), + Ty::F64 => format!("if _eq(output.{0}, f64::from_bits(expected[{0}] as u64)).is_ok() {{ continue }}", i), + Ty::I32 => format!("if output.{0} as i64 == expected[{0}] {{ continue }}", i), + Ty::Bool => unreachable!(), + })); + } + } else { + src.push_str(match function.ret[0] { + Ty::F32 => { + "if _eqf(output, f32::from_bits(expected[0] as u32)).is_ok() { continue }" + } + Ty::F64 => { + "if _eq(output, f64::from_bits(expected[0] as u64)).is_ok() { continue }" + } + Ty::I32 => "if output as i64 == expected[0] { continue }", + Ty::Bool => unreachable!(), + }); + } src.push_str( r#" diff --git a/libm/src/math/remquo.rs b/libm/src/math/remquo.rs index 507f8db34..1c2ba8918 100644 --- a/libm/src/math/remquo.rs +++ b/libm/src/math/remquo.rs @@ -48,7 +48,7 @@ pub fn remquo(mut x: f64, mut y: f64) -> (f64, i32) { } /* x mod y */ while ex > ey { - i = uxi - uy; + i = uxi.wrapping_sub(uy); if (i >> 63) == 0 { uxi = i; q += 1; @@ -57,7 +57,7 @@ pub fn remquo(mut x: f64, mut y: f64) -> (f64, i32) { q <<= 1; ex -= 1; } - i = uxi - uy; + i = uxi.wrapping_sub(uy); if (i >> 63) == 0 { uxi = i; q += 1; diff --git a/libm/src/math/remquof.rs b/libm/src/math/remquof.rs index 6aa4974ed..871d0c7d6 100644 --- a/libm/src/math/remquof.rs +++ b/libm/src/math/remquof.rs @@ -47,7 +47,7 @@ pub fn remquof(mut x: f32, mut y: f32) -> (f32, i32) { } /* x mod y */ while ex > ey { - i = uxi - uy; + i = uxi.wrapping_sub(uy); if (i >> 31) == 0 { uxi = i; q += 1; @@ -56,7 +56,7 @@ pub fn remquof(mut x: f32, mut y: f32) -> (f32, i32) { q <<= 1; ex -= 1; } - i = uxi - uy; + i = uxi.wrapping_sub(uy); if (i >> 31) == 0 { uxi = i; q += 1; From 89033ccf4bc281e481e592bb584b734d5e31ec46 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 9 May 2019 07:27:10 -0700 Subject: [PATCH 0231/1459] Move non-public functions to `pub(crate)` Remove exceptions from the test list after doing so --- libm/build.rs | 13 ------------- libm/src/math/expo2.rs | 2 +- libm/src/math/k_cos.rs | 2 +- libm/src/math/k_cosf.rs | 2 +- libm/src/math/k_expo2f.rs | 2 +- libm/src/math/k_sin.rs | 2 +- libm/src/math/k_sinf.rs | 2 +- libm/src/math/k_tan.rs | 2 +- libm/src/math/k_tanf.rs | 2 +- libm/src/math/rem_pio2.rs | 2 +- libm/src/math/rem_pio2_large.rs | 4 ++-- libm/src/math/rem_pio2f.rs | 2 +- 12 files changed, 12 insertions(+), 25 deletions(-) diff --git a/libm/build.rs b/libm/build.rs index 642e929ce..896b41326 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -27,20 +27,7 @@ mod musl_reference_tests { // These files are all internal functions or otherwise miscellaneous, not // defining a function we want to test. const IGNORED_FILES: &[&str] = &[ - "expo2.rs", // kernel, private "fenv.rs", - "k_cos.rs", // kernel, private - "k_cosf.rs", // kernel, private - "k_expo2.rs", // kernel, private - "k_expo2f.rs", // kernel, private - "k_sin.rs", // kernel, private - "k_sinf.rs", // kernel, private - "k_tan.rs", // kernel, private - "k_tanf.rs", // kernel, private - "mod.rs", - "rem_pio2.rs", // kernel, private - "rem_pio2_large.rs", // kernel, private - "rem_pio2f.rs", // kernel, private "sincos.rs", // more than 1 result "sincosf.rs", // more than 1 result "jn.rs", // passed, but very slow diff --git a/libm/src/math/expo2.rs b/libm/src/math/expo2.rs index 9e60ca994..ae6cc8121 100644 --- a/libm/src/math/expo2.rs +++ b/libm/src/math/expo2.rs @@ -3,7 +3,7 @@ use super::{combine_words, exp}; /* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */ #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn expo2(x: f64) -> f64 { +pub(crate) fn expo2(x: f64) -> f64 { /* k is such that k*ln2 has minimal relative error and x - kln2 > log(DBL_MIN) */ const K: i32 = 2043; let kln2 = f64::from_bits(0x40962066151add8b); diff --git a/libm/src/math/k_cos.rs b/libm/src/math/k_cos.rs index 8876fac21..4687b369a 100644 --- a/libm/src/math/k_cos.rs +++ b/libm/src/math/k_cos.rs @@ -53,7 +53,7 @@ const C6: f64 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ // any extra precision in w. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn k_cos(x: f64, y: f64) -> f64 { +pub(crate) fn k_cos(x: f64, y: f64) -> f64 { let z = x * x; let w = z * z; let r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6)); diff --git a/libm/src/math/k_cosf.rs b/libm/src/math/k_cosf.rs index 9b48e190d..79d0f238f 100644 --- a/libm/src/math/k_cosf.rs +++ b/libm/src/math/k_cosf.rs @@ -22,7 +22,7 @@ const C3: f64 = 0.0000243904487962774090654; /* 0x199342e0ee5069.0p-68 */ #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn k_cosf(x: f64) -> f32 { +pub(crate) fn k_cosf(x: f64) -> f32 { let z = x * x; let w = z * z; let r = C2 + z * C3; diff --git a/libm/src/math/k_expo2f.rs b/libm/src/math/k_expo2f.rs index 68a7a5032..de8507772 100644 --- a/libm/src/math/k_expo2f.rs +++ b/libm/src/math/k_expo2f.rs @@ -6,7 +6,7 @@ const K: i32 = 235; /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn k_expo2f(x: f32) -> f32 { +pub(crate) fn k_expo2f(x: f32) -> f32 { let k_ln2 = f32::from_bits(0x4322e3bc); /* note that k is odd and scale*scale overflows */ let scale = f32::from_bits(((0x7f + K / 2) as u32) << 23); diff --git a/libm/src/math/k_sin.rs b/libm/src/math/k_sin.rs index 15718c4c9..5d2bd68aa 100644 --- a/libm/src/math/k_sin.rs +++ b/libm/src/math/k_sin.rs @@ -45,7 +45,7 @@ const S6: f64 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ // sin(x) = x + (S1*x + (x *(r-y/2)+y)) #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn k_sin(x: f64, y: f64, iy: i32) -> f64 { +pub(crate) fn k_sin(x: f64, y: f64, iy: i32) -> f64 { let z = x * x; let w = z * z; let r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6); diff --git a/libm/src/math/k_sinf.rs b/libm/src/math/k_sinf.rs index 157fc104c..68fe926c2 100644 --- a/libm/src/math/k_sinf.rs +++ b/libm/src/math/k_sinf.rs @@ -22,7 +22,7 @@ const S4: f64 = 0.0000027183114939898219064; /* 0x16cd878c3b46a7.0p-71 */ #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn k_sinf(x: f64) -> f32 { +pub(crate) fn k_sinf(x: f64) -> f32 { let z = x * x; let w = z * z; let r = S3 + z * S4; diff --git a/libm/src/math/k_tan.rs b/libm/src/math/k_tan.rs index 684e937b9..ea3c386b0 100644 --- a/libm/src/math/k_tan.rs +++ b/libm/src/math/k_tan.rs @@ -60,7 +60,7 @@ const PIO4_LO: f64 = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */ #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn k_tan(mut x: f64, mut y: f64, odd: i32) -> f64 { +pub(crate) fn k_tan(mut x: f64, mut y: f64, odd: i32) -> f64 { let hx = (f64::to_bits(x) >> 32) as u32; let big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */ if big { diff --git a/libm/src/math/k_tanf.rs b/libm/src/math/k_tanf.rs index 96a591007..52651378d 100644 --- a/libm/src/math/k_tanf.rs +++ b/libm/src/math/k_tanf.rs @@ -21,7 +21,7 @@ const T: [f64; 6] = [ #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn k_tanf(x: f64, odd: bool) -> f32 { +pub(crate) fn k_tanf(x: f64, odd: bool) -> f32 { let z = x * x; /* * Split up the polynomial into small independent terms to give diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 951dd08b4..285663ea2 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -43,7 +43,7 @@ const PIO2_3T: f64 = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ // caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn rem_pio2(x: f64) -> (i32, f64, f64) { +pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { let x1p24 = f64::from_bits(0x4170000000000000); let sign = (f64::to_bits(x) >> 63) as i32; diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 8bab48569..006d3e153 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -224,11 +224,11 @@ const PIO2: [f64; 8] = [ /// independent of the exponent of the input. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { +pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24) - #[cfg(target_pointer_width = "64")] + #[cfg(all(target_pointer_width = "64", feature = "checked"))] assert!(e0 <= 16360); let nx = x.len(); diff --git a/libm/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs index 054c31184..af2745d1b 100644 --- a/libm/src/math/rem_pio2f.rs +++ b/libm/src/math/rem_pio2f.rs @@ -33,7 +33,7 @@ const PIO2_1T: f64 = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ /// use __rem_pio2_large() for large x #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn rem_pio2f(x: f32) -> (i32, f64) { +pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) { let x64 = x as f64; let mut tx: [f64; 1] = [0.]; From cb8a1bdabb1c28e04b599e84e6a1b0e74410a802 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 9 May 2019 07:52:52 -0700 Subject: [PATCH 0232/1459] Test sincos and sincosf --- libm/build.rs | 81 +++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 44 deletions(-) diff --git a/libm/build.rs b/libm/build.rs index 896b41326..b913a3e31 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -28,10 +28,8 @@ mod musl_reference_tests { // defining a function we want to test. const IGNORED_FILES: &[&str] = &[ "fenv.rs", - "sincos.rs", // more than 1 result - "sincosf.rs", // more than 1 result - "jn.rs", // passed, but very slow - "jnf.rs", // passed, but very slow + "jn.rs", // passed, but very slow + "jnf.rs", // passed, but very slow ]; struct Function { @@ -228,14 +226,22 @@ mod musl_reference_tests { src.push_str("extern { fn "); src.push_str(&function.name); src.push_str("("); + + let (ret, retptr) = match function.name.as_str() { + "sincos" | "sincosf" => (None, &function.ret[..]), + _ => (Some(&function.ret[0]), &function.ret[1..]), + }; for (i, arg) in function.args.iter().enumerate() { src.push_str(&format!("arg{}: {},", i, arg.libc_ty())); } - for (i, ret) in function.ret.iter().skip(1).enumerate() { + for (i, ret) in retptr.iter().enumerate() { src.push_str(&format!("argret{}: {},", i, ret.libc_pty())); } - src.push_str(") -> "); - src.push_str(function.ret[0].libc_ty()); + src.push_str(")"); + if let Some(ty) = ret { + src.push_str(" -> "); + src.push_str(ty.libc_ty()); + } src.push_str("; }"); src.push_str(&format!("static TESTS: &[[i64; {}]]", function.args.len())); @@ -251,13 +257,8 @@ mod musl_reference_tests { src.push_str("];"); src.push_str("for test in TESTS {"); - for (i, arg) in function.ret.iter().skip(1).enumerate() { + for (i, arg) in retptr.iter().enumerate() { src.push_str(&format!("let mut argret{} = {};", i, arg.default())); - src.push_str(&format!( - "let argret_ptr{0} = &mut argret{0} as *mut {1};", - i, - arg.libc_ty() - )); } src.push_str("let output = "); src.push_str(&function.name); @@ -271,18 +272,20 @@ mod musl_reference_tests { }); src.push_str(","); } - for (i, _) in function.ret.iter().skip(1).enumerate() { - src.push_str(&format!("argret_ptr{},", i)); + for (i, _) in retptr.iter().enumerate() { + src.push_str(&format!("&mut argret{},", i)); } src.push_str(");"); - src.push_str(&format!("let output = output{};", function.ret[0].to_i64())); - src.push_str("result.extend_from_slice(&output.to_le_bytes());"); + if let Some(ty) = &ret { + src.push_str(&format!("let output = output{};", ty.to_i64())); + src.push_str("result.extend_from_slice(&output.to_le_bytes());"); + } - for (i, ret) in function.ret.iter().skip(1).enumerate() { - src.push_str(&format!("let output{0} = argret{0}{1};", i, ret.to_i64())); + for (i, ret) in retptr.iter().enumerate() { src.push_str(&format!( - "result.extend_from_slice(&output{}.to_le_bytes());", - i + "result.extend_from_slice(&(argret{}{}).to_le_bytes());", + i, + ret.to_i64(), )); } src.push_str("}"); @@ -323,10 +326,7 @@ mod musl_reference_tests { for f in functions.iter_mut() { for test in f.tests.iter_mut() { - test.outputs = vec![results.next().unwrap()]; - for _ in f.ret.iter().skip(1) { - test.outputs.push(results.next().unwrap()); - } + test.outputs = (0..f.ret.len()).map(|_| results.next().unwrap()).collect(); } } assert!(results.next().is_none()); @@ -380,26 +380,19 @@ mod musl_reference_tests { src.push_str(","); } src.push_str(");"); - if function.ret.len() > 1 { - for (i, ret) in function.ret.iter().enumerate() { - src.push_str(&(match ret { - Ty::F32 => format!("if _eqf(output.{0}, f32::from_bits(expected[{0}] as u32)).is_ok() {{ continue }}", i), - Ty::F64 => format!("if _eq(output.{0}, f64::from_bits(expected[{0}] as u64)).is_ok() {{ continue }}", i), - Ty::I32 => format!("if output.{0} as i64 == expected[{0}] {{ continue }}", i), - Ty::Bool => unreachable!(), - })); - } - } else { - src.push_str(match function.ret[0] { - Ty::F32 => { - "if _eqf(output, f32::from_bits(expected[0] as u32)).is_ok() { continue }" - } - Ty::F64 => { - "if _eq(output, f64::from_bits(expected[0] as u64)).is_ok() { continue }" - } - Ty::I32 => "if output as i64 == expected[0] { continue }", + + for (i, ret) in function.ret.iter().enumerate() { + let get = if function.ret.len() == 1 { + String::new() + } else { + format!(".{}", i) + }; + src.push_str(&(match ret { + Ty::F32 => format!("if _eqf(output{}, f32::from_bits(expected[{}] as u32)).is_ok() {{ continue }}", get, i), + Ty::F64 => format!("if _eq(output{}, f64::from_bits(expected[{}] as u64)).is_ok() {{ continue }}", get, i), + Ty::I32 => format!("if output{} as i64 == expected[{}] {{ continue }}", get, i), Ty::Bool => unreachable!(), - }); + })); } src.push_str( From 41ea6c3149732d4bf118484836cd57eea3f2af49 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 9 May 2019 07:53:21 -0700 Subject: [PATCH 0233/1459] Generate NaN and Infinity more often Make sure they come up in RNG generation of floats --- libm/build.rs | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/libm/build.rs b/libm/build.rs index b913a3e31..decb91692 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -159,9 +159,26 @@ mod musl_reference_tests { impl Ty { fn gen_i64(&self, r: &mut R) -> i64 { - match self { - Ty::F32 => r.gen::().to_bits().into(), - Ty::F64 => r.gen::().to_bits() as i64, + use std::f32; + use std::f64; + + return match self { + Ty::F32 => { + if r.gen_range(0, 20) < 1 { + let i = *[f32::NAN, f32::INFINITY, f32::NEG_INFINITY].choose(r).unwrap(); + i.to_bits().into() + } else { + r.gen::().to_bits().into() + } + } + Ty::F64 => { + if r.gen_range(0, 20) < 1 { + let i = *[f64::NAN, f64::INFINITY, f64::NEG_INFINITY].choose(r).unwrap(); + i.to_bits() as i64 + } else { + r.gen::().to_bits() as i64 + } + } Ty::I32 => { if r.gen_range(0, 10) < 1 { let i = *[i32::max_value(), 0, i32::min_value()].choose(r).unwrap(); @@ -171,7 +188,7 @@ mod musl_reference_tests { } } Ty::Bool => r.gen::() as i64, - } + }; } fn libc_ty(&self) -> &'static str { From 2c69479d8e057e3258ed42ec7ce4b1aa2c1f0747 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 9 May 2019 07:58:57 -0700 Subject: [PATCH 0234/1459] Test jn and jnf --- libm/build.rs | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/libm/build.rs b/libm/build.rs index decb91692..bf28fe23c 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -28,8 +28,6 @@ mod musl_reference_tests { // defining a function we want to test. const IGNORED_FILES: &[&str] = &[ "fenv.rs", - "jn.rs", // passed, but very slow - "jnf.rs", // passed, but very slow ]; struct Function { @@ -143,15 +141,28 @@ mod musl_reference_tests { fn generate_random_tests(functions: &mut [Function], rng: &mut R) { for function in functions { for _ in 0..NTESTS { - function.tests.push(generate_test(&function.args, rng)); + function.tests.push(generate_test(function, rng)); } } - fn generate_test(args: &[Ty], rng: &mut R) -> Test { - let inputs = args.iter().map(|ty| ty.gen_i64(rng)).collect(); - // zero output for now since we'll generate it later + fn generate_test(function: &Function, rng: &mut R) -> Test { + let mut inputs = function + .args + .iter() + .map(|ty| ty.gen_i64(rng)) + .collect::>(); + + // First argument to this function appears to be a number of + // iterations, so passing in massive random numbers causes it to + // take forever to execute, so make sure we're not running random + // math code until the heat death of the universe. + if function.name == "jn" || function.name == "jnf" { + inputs[0] &= 0xffff; + } + Test { inputs, + // zero output for now since we'll generate it later outputs: vec![], } } @@ -165,7 +176,9 @@ mod musl_reference_tests { return match self { Ty::F32 => { if r.gen_range(0, 20) < 1 { - let i = *[f32::NAN, f32::INFINITY, f32::NEG_INFINITY].choose(r).unwrap(); + let i = *[f32::NAN, f32::INFINITY, f32::NEG_INFINITY] + .choose(r) + .unwrap(); i.to_bits().into() } else { r.gen::().to_bits().into() @@ -173,7 +186,9 @@ mod musl_reference_tests { } Ty::F64 => { if r.gen_range(0, 20) < 1 { - let i = *[f64::NAN, f64::INFINITY, f64::NEG_INFINITY].choose(r).unwrap(); + let i = *[f64::NAN, f64::INFINITY, f64::NEG_INFINITY] + .choose(r) + .unwrap(); i.to_bits() as i64 } else { r.gen::().to_bits() as i64 From 15298362076536bf91e48d5784f2e04c1973ba42 Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Thu, 9 May 2019 18:40:19 +0300 Subject: [PATCH 0235/1459] fix jn, ilogb --- libm/build.rs | 4 +--- libm/src/math/ilogb.rs | 14 +++++++------- libm/src/math/ilogbf.rs | 14 +++++++------- libm/src/math/jn.rs | 4 ++-- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/libm/build.rs b/libm/build.rs index bf28fe23c..9af6dec93 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -26,9 +26,7 @@ mod musl_reference_tests { // These files are all internal functions or otherwise miscellaneous, not // defining a function we want to test. - const IGNORED_FILES: &[&str] = &[ - "fenv.rs", - ]; + const IGNORED_FILES: &[&str] = &["fenv.rs"]; struct Function { name: String, diff --git a/libm/src/math/ilogb.rs b/libm/src/math/ilogb.rs index 8a1289ca4..0a380b7ef 100644 --- a/libm/src/math/ilogb.rs +++ b/libm/src/math/ilogb.rs @@ -1,4 +1,4 @@ -const FP_ILOGBNAN: i32 = -1 - ((!0) >> 1); +const FP_ILOGBNAN: i32 = -1 - 0x7fffffff; const FP_ILOGB0: i32 = FP_ILOGBNAN; pub fn ilogb(x: f64) -> i32 { @@ -17,15 +17,15 @@ pub fn ilogb(x: f64) -> i32 { e -= 1; i <<= 1; } - return e; - } - if e == 0x7ff { + e + } else if e == 0x7ff { force_eval!(0.0 / 0.0); if (i << 12) != 0 { - return FP_ILOGBNAN; + FP_ILOGBNAN } else { - return i32::max_value(); + i32::max_value() } + } else { + e - 0x3ff } - return e - 0x3ff; } diff --git a/libm/src/math/ilogbf.rs b/libm/src/math/ilogbf.rs index 1bf4670a8..b384fa4b2 100644 --- a/libm/src/math/ilogbf.rs +++ b/libm/src/math/ilogbf.rs @@ -1,4 +1,4 @@ -const FP_ILOGBNAN: i32 = -1 - ((!0) >> 1); +const FP_ILOGBNAN: i32 = -1 - 0x7fffffff; const FP_ILOGB0: i32 = FP_ILOGBNAN; pub fn ilogbf(x: f32) -> i32 { @@ -17,15 +17,15 @@ pub fn ilogbf(x: f32) -> i32 { e -= 1; i <<= 1; } - return e; - } - if e == 0xff { + e + } else if e == 0xff { force_eval!(0.0 / 0.0); if (i << 9) != 0 { - return FP_ILOGBNAN; + FP_ILOGBNAN } else { - return i32::max_value(); + i32::max_value() } + } else { + e - 0x7f } - return e - 0x7f; } diff --git a/libm/src/math/jn.rs b/libm/src/math/jn.rs index 70c980266..1be167f84 100644 --- a/libm/src/math/jn.rs +++ b/libm/src/math/jn.rs @@ -54,7 +54,7 @@ pub fn jn(n: i32, mut x: f64) -> f64 { ix &= 0x7fffffff; // -lx == !lx + 1 - if (ix | (lx | (!lx + 1)) >> 31) > 0x7ff00000 { + if (ix | (lx | ((!lx).wrapping_add(1))) >> 31) > 0x7ff00000 { /* nan */ return x; } @@ -268,7 +268,7 @@ pub fn yn(n: i32, x: f64) -> f64 { ix &= 0x7fffffff; // -lx == !lx + 1 - if (ix | (lx | (!lx + 1)) >> 31) > 0x7ff00000 { + if (ix | (lx | ((!lx).wrapping_add(1))) >> 31) > 0x7ff00000 { /* nan */ return x; } From c5d190b04aaf723fb2fc9ff658814b46e6ef3325 Mon Sep 17 00:00:00 2001 From: Igor null Date: Mon, 13 May 2019 12:14:03 +0300 Subject: [PATCH 0236/1459] rem_pio2: actually return medium value for x ~<= 5pi/4 --- libm/src/math/rem_pio2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 285663ea2..186333e57 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -85,7 +85,7 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { /* |x| ~<= 5pi/4 */ if (ix & 0xfffff) == 0x921fb { /* |x| ~= pi/2 or 2pi/2 */ - medium(x, ix); /* cancellation -- use medium case */ + return medium(x, ix); /* cancellation -- use medium case */ } if ix <= 0x4002d97c { /* |x| ~<= 3pi/4 */ From a8b95f58a8e640b2a5e725650b60bee1e76b9541 Mon Sep 17 00:00:00 2001 From: Igor null Date: Mon, 13 May 2019 17:42:18 +0300 Subject: [PATCH 0237/1459] added tests near pi for rem_pio2 --- libm/src/math/rem_pio2.rs | 8 ++++++++ libm/src/math/sin.rs | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 186333e57..e46e6623e 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -185,3 +185,11 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { } (n, ty[0], ty[1]) } + +#[test] +fn test_near_pi() { + assert_eq!(rem_pio2(3.141592025756836), (2, -6.278329573009626e-7, -2.1125998133974653e-23)); + assert_eq!(rem_pio2(3.141592033207416), (2, -6.20382377148128e-7, -2.1125998133974653e-23)); + assert_eq!(rem_pio2(3.141592144966125), (2, -5.086236681942706e-7, -2.1125998133974653e-23)); + assert_eq!(rem_pio2(3.141592979431152), (2, 3.2584135866119817e-7, -2.1125998133974653e-23)); +} diff --git a/libm/src/math/sin.rs b/libm/src/math/sin.rs index b73074416..51aed88a8 100644 --- a/libm/src/math/sin.rs +++ b/libm/src/math/sin.rs @@ -77,3 +77,10 @@ pub fn sin(x: f64) -> f64 { _ => -k_cos(y0, y1), } } + +#[test] +fn test_near_pi() { + let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707 + let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7 + assert_eq!(sin(x), sx); +} From 7cad437f8eb69eac55bd47103178e3719b1ad8fb Mon Sep 17 00:00:00 2001 From: Igor null Date: Mon, 13 May 2019 18:05:38 +0300 Subject: [PATCH 0238/1459] formatted rem_pio2 tests --- libm/src/math/rem_pio2.rs | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index e46e6623e..536dfac3c 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -188,8 +188,20 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { #[test] fn test_near_pi() { - assert_eq!(rem_pio2(3.141592025756836), (2, -6.278329573009626e-7, -2.1125998133974653e-23)); - assert_eq!(rem_pio2(3.141592033207416), (2, -6.20382377148128e-7, -2.1125998133974653e-23)); - assert_eq!(rem_pio2(3.141592144966125), (2, -5.086236681942706e-7, -2.1125998133974653e-23)); - assert_eq!(rem_pio2(3.141592979431152), (2, 3.2584135866119817e-7, -2.1125998133974653e-23)); + assert_eq!( + rem_pio2(3.141592025756836), + (2, -6.278329573009626e-7, -2.1125998133974653e-23) + ); + assert_eq!( + rem_pio2(3.141592033207416), + (2, -6.20382377148128e-7, -2.1125998133974653e-23) + ); + assert_eq!( + rem_pio2(3.141592144966125), + (2, -5.086236681942706e-7, -2.1125998133974653e-23) + ); + assert_eq!( + rem_pio2(3.141592979431152), + (2, 3.2584135866119817e-7, -2.1125998133974653e-23) + ); } From 3c51ffdadcd35a6eefab99786e22f7de9212dcaa Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 14 May 2019 17:57:42 +0200 Subject: [PATCH 0239/1459] don't force-on c feature when working in rustc workspace --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a05825a61..ec9fc13f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,7 +58,7 @@ mangled-names = [] no-lang-items = [] # Only used in the compiler's build system -rustc-dep-of-std = ['c', 'compiler-builtins', 'core'] +rustc-dep-of-std = ['compiler-builtins', 'core'] [[example]] name = "intrinsics" From 20281e4997c8d6010974b0d4f7ccdfe4611949b6 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 14 May 2019 09:06:43 -0700 Subject: [PATCH 0240/1459] Bump to 0.1.3 --- libm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 45fad8230..a61db5eba 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -7,7 +7,7 @@ keywords = ["libm", "math"] license = "MIT OR Apache-2.0" name = "libm" repository = "https://github.com/rust-lang-nursery/libm" -version = "0.1.2" +version = "0.1.3" edition = "2018" [features] From 8521530f49383f5d014062a450b19a3b87b43b49 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 14 May 2019 12:26:09 -0700 Subject: [PATCH 0241/1459] Fix __divsi3 and __udivsi3 on thumbv6m targets This commit fixes a bug accidentally introduced in #285 where some lingering references remained to `#[cfg(thumbv6m)]` but this, since the historical revert, was renamed to `#[cfg(thumb_1)]`. This caused on the thumbv6m platform for the intrinsics to be accidentally omitted because the build script didn't actually compile them but the Rust code thought the C code was in use. After correcting the `#[cfg]` statements the CI configuration for the `thumb*` family of targets was all updated. The support for xargo testing was removed from `run.sh` since it had long since bitrotted, and the script was updated to simply build the intrinsics example to attempt to link for each of these targets. This in turn exposed the bug locally and allowed to confirm a fix once the `#[cfg]` statements were corrected. cc rust-lang/rust#60782 --- Cargo.toml | 2 +- azure-pipelines.yml | 20 ++++----- ci/docker/thumbv6m-linux-eabi/Dockerfile | 10 ----- ci/docker/thumbv6m-none-eabi/Dockerfile | 7 +++ ci/docker/thumbv7em-linux-eabi/Dockerfile | 10 ----- ci/docker/thumbv7em-linux-eabihf/Dockerfile | 10 ----- ci/docker/thumbv7em-none-eabi/Dockerfile | 7 +++ ci/docker/thumbv7em-none-eabihf/Dockerfile | 7 +++ ci/docker/thumbv7m-linux-eabi/Dockerfile | 10 ----- ci/docker/thumbv7m-none-eabi/Dockerfile | 7 +++ ci/run-docker.sh | 1 - ci/run.sh | 50 +++++---------------- examples/intrinsics.rs | 14 +++++- src/int/sdiv.rs | 2 +- src/int/udiv.rs | 2 +- 15 files changed, 64 insertions(+), 95 deletions(-) delete mode 100644 ci/docker/thumbv6m-linux-eabi/Dockerfile create mode 100644 ci/docker/thumbv6m-none-eabi/Dockerfile delete mode 100644 ci/docker/thumbv7em-linux-eabi/Dockerfile delete mode 100644 ci/docker/thumbv7em-linux-eabihf/Dockerfile create mode 100644 ci/docker/thumbv7em-none-eabi/Dockerfile create mode 100644 ci/docker/thumbv7em-none-eabihf/Dockerfile delete mode 100644 ci/docker/thumbv7m-linux-eabi/Dockerfile create mode 100644 ci/docker/thumbv7m-none-eabi/Dockerfile diff --git a/Cargo.toml b/Cargo.toml index a05825a61..e7995915b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,7 +62,7 @@ rustc-dep-of-std = ['c', 'compiler-builtins', 'core'] [[example]] name = "intrinsics" -required-features = ["c", "compiler-builtins"] +required-features = ["compiler-builtins"] [workspace] members = ["testcrate"] diff --git a/azure-pipelines.yml b/azure-pipelines.yml index da3a1bb1d..eac3fb5e4 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -33,18 +33,14 @@ jobs: TARGET: powerpc64-unknown-linux-gnu powerpc64le: TARGET: powerpc64le-unknown-linux-gnu - # thumbv6m: - # TARGET: thumbv6m-linux-eabi - # XARGO: 1 - # thumbv7em: - # TARGET: thumbv7em-linux-eabi - # XARGO: 1 - # thumbv7emhf: - # TARGET: thumbv7em-linux-eabihf - # XARGO: 1 - # thumbv7m: - # TARGET: thumbv7m-linux-eabi - # XARGO: 1 + thumbv6m: + TARGET: thumbv6m-none-eabi + thumbv7em: + TARGET: thumbv7em-none-eabi + thumbv7emhf: + TARGET: thumbv7em-none-eabihf + thumbv7m: + TARGET: thumbv7m-none-eabi wasm32: TARGET: wasm32-unknown-unknown ONLY_BUILD: 1 diff --git a/ci/docker/thumbv6m-linux-eabi/Dockerfile b/ci/docker/thumbv6m-linux-eabi/Dockerfile deleted file mode 100644 index 789bdf4e4..000000000 --- a/ci/docker/thumbv6m-linux-eabi/Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -FROM ubuntu:18.04 -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - ca-certificates curl gcc gcc-arm-none-eabi libc6-dev libcurl4-openssl-dev libssh2-1 libnewlib-dev qemu-user-static -RUN curl -LSfs https://japaric.github.io/trust/install.sh | \ - sh -s -- --git japaric/xargo --tag v0.3.1 --target x86_64-unknown-linux-gnu --to /usr/bin -ENV AR_thumbv6m_linux_eabi=arm-none-eabi-ar \ - CARGO_TARGET_THUMBV6M_LINUX_EABI_LINKER=arm-none-eabi-gcc \ - CARGO_TARGET_THUMBV6M_LINUX_EABI_RUNNER=qemu-arm-static \ - CC_thumbv6m_linux_eabi=arm-none-eabi-gcc \ diff --git a/ci/docker/thumbv6m-none-eabi/Dockerfile b/ci/docker/thumbv6m-none-eabi/Dockerfile new file mode 100644 index 000000000..04d4f4429 --- /dev/null +++ b/ci/docker/thumbv6m-none-eabi/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:18.04 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-none-eabi \ + libnewlib-arm-none-eabi +ENV XARGO=1 diff --git a/ci/docker/thumbv7em-linux-eabi/Dockerfile b/ci/docker/thumbv7em-linux-eabi/Dockerfile deleted file mode 100644 index c6ce273c8..000000000 --- a/ci/docker/thumbv7em-linux-eabi/Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -FROM ubuntu:18.04 -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - ca-certificates curl gcc gcc-arm-none-eabi libc6-dev libcurl4-openssl-dev libssh2-1 libnewlib-dev qemu-user-static -RUN curl -LSfs https://japaric.github.io/trust/install.sh | \ - sh -s -- --git japaric/xargo --tag v0.3.1 --target x86_64-unknown-linux-gnu --to /usr/bin -ENV AR_thumbv7em_linux_eabi=arm-none-eabi-ar \ - CARGO_TARGET_THUMBV7EM_LINUX_EABI_LINKER=arm-none-eabi-gcc \ - CARGO_TARGET_THUMBV7EM_LINUX_EABI_RUNNER=qemu-arm-static \ - CC_thumbv7em_linux_eabi=arm-none-eabi-gcc \ diff --git a/ci/docker/thumbv7em-linux-eabihf/Dockerfile b/ci/docker/thumbv7em-linux-eabihf/Dockerfile deleted file mode 100644 index c7518aaca..000000000 --- a/ci/docker/thumbv7em-linux-eabihf/Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -FROM ubuntu:18.04 -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - ca-certificates curl gcc gcc-arm-none-eabi libc6-dev libcurl4-openssl-dev libssh2-1 libnewlib-dev qemu-user-static -RUN curl -LSfs https://japaric.github.io/trust/install.sh | \ - sh -s -- --git japaric/xargo --tag v0.3.1 --target x86_64-unknown-linux-gnu --to /usr/bin -ENV AR_thumbv7em_linux_eabihf=arm-none-eabi-ar \ - CARGO_TARGET_THUMBV7EM_LINUX_EABIHF_LINKER=arm-none-eabi-gcc \ - CARGO_TARGET_THUMBV7EM_LINUX_EABIHF_RUNNER=qemu-arm-static \ - CC_thumbv7em_linux_eabihf=arm-none-eabi-gcc \ diff --git a/ci/docker/thumbv7em-none-eabi/Dockerfile b/ci/docker/thumbv7em-none-eabi/Dockerfile new file mode 100644 index 000000000..04d4f4429 --- /dev/null +++ b/ci/docker/thumbv7em-none-eabi/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:18.04 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-none-eabi \ + libnewlib-arm-none-eabi +ENV XARGO=1 diff --git a/ci/docker/thumbv7em-none-eabihf/Dockerfile b/ci/docker/thumbv7em-none-eabihf/Dockerfile new file mode 100644 index 000000000..04d4f4429 --- /dev/null +++ b/ci/docker/thumbv7em-none-eabihf/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:18.04 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-none-eabi \ + libnewlib-arm-none-eabi +ENV XARGO=1 diff --git a/ci/docker/thumbv7m-linux-eabi/Dockerfile b/ci/docker/thumbv7m-linux-eabi/Dockerfile deleted file mode 100644 index c90710941..000000000 --- a/ci/docker/thumbv7m-linux-eabi/Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -FROM ubuntu:18.04 -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - ca-certificates curl gcc gcc-arm-none-eabi libc6-dev libcurl4-openssl-dev libssh2-1 libnewlib-dev qemu-user-static -RUN curl -LSfs https://japaric.github.io/trust/install.sh | \ - sh -s -- --git japaric/xargo --tag v0.3.1 --target x86_64-unknown-linux-gnu --to /usr/bin -ENV AR_thumbv7m_linux_eabi=arm-none-eabi-ar \ - CARGO_TARGET_THUMBV7M_LINUX_EABI_LINKER=arm-none-eabi-gcc \ - CARGO_TARGET_THUMBV7M_LINUX_EABI_RUNNER=qemu-arm-static \ - CC_thumbv7m_linux_eabi=arm-none-eabi-gcc \ diff --git a/ci/docker/thumbv7m-none-eabi/Dockerfile b/ci/docker/thumbv7m-none-eabi/Dockerfile new file mode 100644 index 000000000..04d4f4429 --- /dev/null +++ b/ci/docker/thumbv7m-none-eabi/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:18.04 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-none-eabi \ + libnewlib-arm-none-eabi +ENV XARGO=1 diff --git a/ci/run-docker.sh b/ci/run-docker.sh index bed84ed81..ddb970c1b 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -18,7 +18,6 @@ run() { --user $(id -u):$(id -g) \ -e CARGO_HOME=/cargo \ -e CARGO_TARGET_DIR=/target \ - -e XARGO \ -v $HOME/.cargo:/cargo \ -v `pwd`/target:/target \ -v `pwd`:/checkout:ro \ diff --git a/ci/run.sh b/ci/run.sh index 27e2de22f..ae32806ec 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -1,53 +1,24 @@ set -ex -# FIXME(japarix/xargo#186) this shouldn't be necessary -export RUST_TARGET_PATH=`pwd` - cargo=cargo -if [ "$XARGO" = "1" ]; then - cargo=xargo -fi - -INTRINSICS_FEATURES="c" - -# Some architectures like ARM apparently seem to require the `mem` feature -# enabled to successfully compile the `intrinsics` example, and... we're not -# sure why! -if [ -z "$INTRINSICS_FAILS_WITH_MEM_FEATURE" ]; then - INTRINSICS_FEATURES="$INTRINSICS_FEATURES mem" -fi # Test our implementation if [ "$XARGO" = "1" ]; then - run="xargo test --manifest-path testcrate/Cargo.toml --target $1" - for t in $(ls testcrate/tests); do - t=${t%.rs} - - RUSTFLAGS="-C debug-assertions=no -C lto" \ - CARGO_INCREMENTAL=0 \ - $run --test $t --no-default-features --features 'mem c' --no-run - qemu-arm-static target/${1}/debug/$t-* - done - - for t in $(ls testcrate/tests); do - t=${t%.rs} - RUSTFLAGS="-C lto" \ - CARGO_INCREMENTAL=0 \ - $run --test $t --no-default-features --features 'mem c' --no-run --release - qemu-arm-static target/${1}/release/$t-* - done + # FIXME: currently these tests don't work... + echo nothing to do else run="cargo test --manifest-path testcrate/Cargo.toml --target $1" $run $run --release $run --features c $run --features c --release - cargo build --target $1 - cargo build --target $1 --release - cargo build --target $1 --features c - cargo build --target $1 --release --features c fi +cargo build --target $1 +cargo build --target $1 --release +cargo build --target $1 --features c +cargo build --target $1 --release --features c + PREFIX=$(echo $1 | sed -e 's/unknown-//')- case $1 in armv7-*) @@ -101,8 +72,11 @@ done rm -f $path # Verify that we haven't drop any intrinsic/symbol -RUSTFLAGS="-C debug-assertions=no" \ - $cargo build --features "$INTRINSICS_FEATURES" --target $1 --example intrinsics -v +build_intrinsics="$cargo build --target $1 -v --example intrinsics" +RUSTFLAGS="-C debug-assertions=no" $build_intrinsics +RUSTFLAGS="-C debug-assertions=no" $build_intrinsics --release +RUSTFLAGS="-C debug-assertions=no" $build_intrinsics --features c +RUSTFLAGS="-C debug-assertions=no" $build_intrinsics --features c --release # Verify that there are no undefined symbols to `panic` within our # implementations diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index ccd701569..8de108d99 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -27,7 +27,14 @@ extern {} mod intrinsics { // trunccdfsf2 pub fn aeabi_d2f(x: f64) -> f32 { - x as f32 + // This is only implemented in C currently, so only test it there. + #[cfg(feature = "c")] + return x as f32; + #[cfg(not(feature = "c"))] + { + drop(x); + 0.0 + } } // fixdfsi @@ -263,6 +270,10 @@ mod intrinsics { pub fn modti3(a: i128, b: i128) -> i128 { a % b } + + pub fn udivsi3(a: u32, b: u32) -> u32 { + a / b + } } fn run() { @@ -325,6 +336,7 @@ fn run() { bb(umodti3(bb(2), bb(2))); bb(divti3(bb(2), bb(2))); bb(modti3(bb(2), bb(2))); + bb(udivsi3(bb(2), bb(2))); something_with_a_dtor(&|| assert_eq!(bb(1), 1)); diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index d7ae71ab2..82262a441 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -57,7 +57,7 @@ impl Divmod for i32 {} impl Divmod for i64 {} intrinsics! { - #[use_c_shim_if(all(target_arch = "arm", not(target_os = "ios"), not(thumbv6m)))] + #[use_c_shim_if(all(target_arch = "arm", not(target_os = "ios"), not(thumb_1)))] #[arm_aeabi_alias = __aeabi_idiv] pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 { a.div(b) diff --git a/src/int/udiv.rs b/src/int/udiv.rs index 7b7f5b44d..8837126de 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -154,7 +154,7 @@ macro_rules! udivmod_inner { intrinsics! { #[use_c_shim_if(all(target_arch = "arm", not(target_os = "ios"), - not(thumbv6m)))] + not(thumb_1)))] #[arm_aeabi_alias = __aeabi_uidiv] /// Returns `n / d` pub extern "C" fn __udivsi3(n: u32, d: u32) -> u32 { From e4f46b91ca843297fc065e20f1591e4971ae608c Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 14 May 2019 14:29:29 -0700 Subject: [PATCH 0242/1459] Don't compile C code on riscv targets This fixes a longstanding bug in compiler-builtins where C code was compiled for the riscv targets but when distributed in rust-lang/rust all the C code was actually compiled for x86_64 since there is no configured C compiler for riscv. This was exposed by #286 by accident but the underlying cause was somewhat unrelated. For now we forcibly disable C code for riscv targets, and when the C compiler story is sorted out in rust-lang/rust and with `cc-rs` we can reenable. For now just use all the Rust definitions. cc rust-lang/rust#60747 --- build.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/build.rs b/build.rs index 27dac6026..4fc0adbed 100644 --- a/build.rs +++ b/build.rs @@ -37,8 +37,15 @@ fn main() { // build anything and we rely on the upstream implementation of compiler-rt // functions if !cfg!(feature = "mangled-names") && cfg!(feature = "c") { - // Don't use C compiler for bitcode-only wasm and nvptx - if !target.contains("wasm32") && !target.contains("nvptx") { + // Don't use a C compiler for these targets: + // + // * wasm32 - clang 8 for wasm is somewhat hard to come by and it's + // unlikely that the C is really that much better than our own Rust. + // * nvptx - everything is bitcode, not compatible with mixed C/Rust + // * riscv - the rust-lang/rust distribution container doesn't have a C + // compiler nor is cc-rs ready for compilation to riscv (at this + // time). This can probably be removed in the future + if !target.contains("wasm32") && !target.contains("nvptx") && !target.starts_with("riscv") { #[cfg(feature = "c")] c::compile(&llvm_target); println!("cargo:rustc-cfg=use_c"); From c88c9502b7e3b273accf94225fc912ae9173b2cc Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 14 May 2019 14:33:08 -0700 Subject: [PATCH 0243/1459] Run rustfmt over everything --- build.rs | 413 ++++++------- examples/intrinsics.rs | 10 +- src/arm_linux.rs | 98 ++- src/float/add.rs | 50 +- src/float/cmp.rs | 67 +- src/float/conv.rs | 74 ++- src/float/div.rs | 6 +- src/float/extend.rs | 7 +- src/float/mod.rs | 46 +- src/float/mul.rs | 2 +- src/float/pow.rs | 2 +- src/float/sub.rs | 4 +- src/int/addsub.rs | 23 +- src/int/mod.rs | 50 +- src/int/mul.rs | 9 +- src/int/sdiv.rs | 3 +- src/int/shift.rs | 35 +- src/lib.rs | 34 +- src/macros.rs | 2 +- src/mem.rs | 10 +- src/probestack.rs | 4 +- testcrate/build.rs | 814 ++++++++++++++++--------- testcrate/src/lib.rs | 2 +- testcrate/tests/aeabi_memclr.rs | 10 +- testcrate/tests/aeabi_memcpy.rs | 10 +- testcrate/tests/aeabi_memset.rs | 82 +-- testcrate/tests/count_leading_zeros.rs | 30 +- testcrate/tests/generated.rs | 30 +- 28 files changed, 1105 insertions(+), 822 deletions(-) diff --git a/build.rs b/build.rs index 4fc0adbed..a8301afa9 100644 --- a/build.rs +++ b/build.rs @@ -22,8 +22,9 @@ fn main() { // Forcibly enable memory intrinsics on wasm32 & SGX as we don't have a libc to // provide them. - if (target.contains("wasm32") && !target.contains("wasi")) || - (target.contains("sgx") && target.contains("fortanix")) { + if (target.contains("wasm32") && !target.contains("wasi")) + || (target.contains("sgx") && target.contains("fortanix")) + { println!("cargo:rustc-cfg=feature=\"mem\""); } @@ -85,7 +86,9 @@ mod c { impl Sources { fn new() -> Sources { - Sources { map: BTreeMap::new() } + Sources { + map: BTreeMap::new(), + } } fn extend(&mut self, sources: &[&'static str]) { @@ -151,163 +154,144 @@ mod c { } let mut sources = Sources::new(); - sources.extend( - &[ - "absvdi2.c", - "absvsi2.c", - "addvdi3.c", - "addvsi3.c", - "apple_versioning.c", - "clzdi2.c", - "clzsi2.c", - "cmpdi2.c", - "ctzdi2.c", - "ctzsi2.c", - "divdc3.c", - "divsc3.c", - "divxc3.c", - "extendhfsf2.c", - "int_util.c", - "muldc3.c", - "mulsc3.c", - "mulvdi3.c", - "mulvsi3.c", - "mulxc3.c", - "negdf2.c", - "negdi2.c", - "negsf2.c", - "negvdi2.c", - "negvsi2.c", - "paritydi2.c", - "paritysi2.c", - "popcountdi2.c", - "popcountsi2.c", - "powixf2.c", - "subvdi3.c", - "subvsi3.c", - "truncdfhf2.c", - "truncdfsf2.c", - "truncsfhf2.c", - "ucmpdi2.c", - ], - ); + sources.extend(&[ + "absvdi2.c", + "absvsi2.c", + "addvdi3.c", + "addvsi3.c", + "apple_versioning.c", + "clzdi2.c", + "clzsi2.c", + "cmpdi2.c", + "ctzdi2.c", + "ctzsi2.c", + "divdc3.c", + "divsc3.c", + "divxc3.c", + "extendhfsf2.c", + "int_util.c", + "muldc3.c", + "mulsc3.c", + "mulvdi3.c", + "mulvsi3.c", + "mulxc3.c", + "negdf2.c", + "negdi2.c", + "negsf2.c", + "negvdi2.c", + "negvsi2.c", + "paritydi2.c", + "paritysi2.c", + "popcountdi2.c", + "popcountsi2.c", + "powixf2.c", + "subvdi3.c", + "subvsi3.c", + "truncdfhf2.c", + "truncdfsf2.c", + "truncsfhf2.c", + "ucmpdi2.c", + ]); // When compiling in rustbuild (the rust-lang/rust repo) this library // also needs to satisfy intrinsics that jemalloc or C in general may // need, so include a few more that aren't typically needed by // LLVM/Rust. if cfg!(feature = "rustbuild") { - sources.extend(&[ - "ffsdi2.c", - ]); + sources.extend(&["ffsdi2.c"]); } // On iOS and 32-bit OSX these are all just empty intrinsics, no need to // include them. if target_os != "ios" && (target_vendor != "apple" || target_arch != "x86") { - sources.extend( - &[ - "absvti2.c", - "addvti3.c", - "clzti2.c", - "cmpti2.c", - "ctzti2.c", - "ffsti2.c", - "mulvti3.c", - "negti2.c", - "negvti2.c", - "parityti2.c", - "popcountti2.c", - "subvti3.c", - "ucmpti2.c", - ], - ); + sources.extend(&[ + "absvti2.c", + "addvti3.c", + "clzti2.c", + "cmpti2.c", + "ctzti2.c", + "ffsti2.c", + "mulvti3.c", + "negti2.c", + "negvti2.c", + "parityti2.c", + "popcountti2.c", + "subvti3.c", + "ucmpti2.c", + ]); } if target_vendor == "apple" { - sources.extend( - &[ - "atomic_flag_clear.c", - "atomic_flag_clear_explicit.c", - "atomic_flag_test_and_set.c", - "atomic_flag_test_and_set_explicit.c", - "atomic_signal_fence.c", - "atomic_thread_fence.c", - ], - ); + sources.extend(&[ + "atomic_flag_clear.c", + "atomic_flag_clear_explicit.c", + "atomic_flag_test_and_set.c", + "atomic_flag_test_and_set_explicit.c", + "atomic_signal_fence.c", + "atomic_thread_fence.c", + ]); } if target_env == "msvc" { if target_arch == "x86_64" { - sources.extend( - &[ - "x86_64/floatdisf.c", - "x86_64/floatdixf.c", - ], - ); + sources.extend(&["x86_64/floatdisf.c", "x86_64/floatdixf.c"]); } } else { // None of these seem to be used on x86_64 windows, and they've all // got the wrong ABI anyway, so we want to avoid them. if target_os != "windows" { if target_arch == "x86_64" { - sources.extend( - &[ - "x86_64/floatdisf.c", - "x86_64/floatdixf.c", - "x86_64/floatundidf.S", - "x86_64/floatundisf.S", - "x86_64/floatundixf.S", - ], - ); + sources.extend(&[ + "x86_64/floatdisf.c", + "x86_64/floatdixf.c", + "x86_64/floatundidf.S", + "x86_64/floatundisf.S", + "x86_64/floatundixf.S", + ]); } } if target_arch == "x86" { - sources.extend( - &[ - "i386/ashldi3.S", - "i386/ashrdi3.S", - "i386/divdi3.S", - "i386/floatdidf.S", - "i386/floatdisf.S", - "i386/floatdixf.S", - "i386/floatundidf.S", - "i386/floatundisf.S", - "i386/floatundixf.S", - "i386/lshrdi3.S", - "i386/moddi3.S", - "i386/muldi3.S", - "i386/udivdi3.S", - "i386/umoddi3.S", - ], - ); + sources.extend(&[ + "i386/ashldi3.S", + "i386/ashrdi3.S", + "i386/divdi3.S", + "i386/floatdidf.S", + "i386/floatdisf.S", + "i386/floatdixf.S", + "i386/floatundidf.S", + "i386/floatundisf.S", + "i386/floatundixf.S", + "i386/lshrdi3.S", + "i386/moddi3.S", + "i386/muldi3.S", + "i386/udivdi3.S", + "i386/umoddi3.S", + ]); } } if target_arch == "arm" && target_os != "ios" && target_env != "msvc" { - sources.extend( - &[ - "arm/aeabi_div0.c", - "arm/aeabi_drsub.c", - "arm/aeabi_frsub.c", - "arm/bswapdi2.S", - "arm/bswapsi2.S", - "arm/clzdi2.S", - "arm/clzsi2.S", - "arm/divmodsi4.S", - "arm/divsi3.S", - "arm/modsi3.S", - "arm/switch16.S", - "arm/switch32.S", - "arm/switch8.S", - "arm/switchu8.S", - "arm/sync_synchronize.S", - "arm/udivmodsi4.S", - "arm/udivsi3.S", - "arm/umodsi3.S", - ], - ); + sources.extend(&[ + "arm/aeabi_div0.c", + "arm/aeabi_drsub.c", + "arm/aeabi_frsub.c", + "arm/bswapdi2.S", + "arm/bswapsi2.S", + "arm/clzdi2.S", + "arm/clzsi2.S", + "arm/divmodsi4.S", + "arm/divsi3.S", + "arm/modsi3.S", + "arm/switch16.S", + "arm/switch32.S", + "arm/switch8.S", + "arm/switchu8.S", + "arm/sync_synchronize.S", + "arm/udivmodsi4.S", + "arm/udivsi3.S", + "arm/umodsi3.S", + ]); if target_os == "freebsd" { sources.extend(&["clear_cache.c"]); @@ -316,100 +300,89 @@ mod c { // First of all aeabi_cdcmp and aeabi_cfcmp are never called by LLVM. // Second are little-endian only, so build fail on big-endian targets. // Temporally workaround: exclude these files for big-endian targets. - if !llvm_target[0].starts_with("thumbeb") && - !llvm_target[0].starts_with("armeb") { - sources.extend( - &[ - "arm/aeabi_cdcmp.S", - "arm/aeabi_cdcmpeq_check_nan.c", - "arm/aeabi_cfcmp.S", - "arm/aeabi_cfcmpeq_check_nan.c", - ], - ); + if !llvm_target[0].starts_with("thumbeb") && !llvm_target[0].starts_with("armeb") { + sources.extend(&[ + "arm/aeabi_cdcmp.S", + "arm/aeabi_cdcmpeq_check_nan.c", + "arm/aeabi_cfcmp.S", + "arm/aeabi_cfcmpeq_check_nan.c", + ]); } } if llvm_target[0] == "armv7" { - sources.extend( - &[ - "arm/sync_fetch_and_add_4.S", - "arm/sync_fetch_and_add_8.S", - "arm/sync_fetch_and_and_4.S", - "arm/sync_fetch_and_and_8.S", - "arm/sync_fetch_and_max_4.S", - "arm/sync_fetch_and_max_8.S", - "arm/sync_fetch_and_min_4.S", - "arm/sync_fetch_and_min_8.S", - "arm/sync_fetch_and_nand_4.S", - "arm/sync_fetch_and_nand_8.S", - "arm/sync_fetch_and_or_4.S", - "arm/sync_fetch_and_or_8.S", - "arm/sync_fetch_and_sub_4.S", - "arm/sync_fetch_and_sub_8.S", - "arm/sync_fetch_and_umax_4.S", - "arm/sync_fetch_and_umax_8.S", - "arm/sync_fetch_and_umin_4.S", - "arm/sync_fetch_and_umin_8.S", - "arm/sync_fetch_and_xor_4.S", - "arm/sync_fetch_and_xor_8.S", - ], - ); + sources.extend(&[ + "arm/sync_fetch_and_add_4.S", + "arm/sync_fetch_and_add_8.S", + "arm/sync_fetch_and_and_4.S", + "arm/sync_fetch_and_and_8.S", + "arm/sync_fetch_and_max_4.S", + "arm/sync_fetch_and_max_8.S", + "arm/sync_fetch_and_min_4.S", + "arm/sync_fetch_and_min_8.S", + "arm/sync_fetch_and_nand_4.S", + "arm/sync_fetch_and_nand_8.S", + "arm/sync_fetch_and_or_4.S", + "arm/sync_fetch_and_or_8.S", + "arm/sync_fetch_and_sub_4.S", + "arm/sync_fetch_and_sub_8.S", + "arm/sync_fetch_and_umax_4.S", + "arm/sync_fetch_and_umax_8.S", + "arm/sync_fetch_and_umin_4.S", + "arm/sync_fetch_and_umin_8.S", + "arm/sync_fetch_and_xor_4.S", + "arm/sync_fetch_and_xor_8.S", + ]); } if llvm_target.last().unwrap().ends_with("eabihf") { - if !llvm_target[0].starts_with("thumbv7em") && - !llvm_target[0].starts_with("thumbv8m.main") { + if !llvm_target[0].starts_with("thumbv7em") + && !llvm_target[0].starts_with("thumbv8m.main") + { // The FPU option chosen for these architectures in cc-rs, ie: // -mfpu=fpv4-sp-d16 for thumbv7em // -mfpu=fpv5-sp-d16 for thumbv8m.main // do not support double precision floating points conversions so the files // that include such instructions are not included for these targets. - sources.extend( - &[ - "arm/fixdfsivfp.S", - "arm/fixunsdfsivfp.S", - "arm/floatsidfvfp.S", - "arm/floatunssidfvfp.S", - ], - ); + sources.extend(&[ + "arm/fixdfsivfp.S", + "arm/fixunsdfsivfp.S", + "arm/floatsidfvfp.S", + "arm/floatunssidfvfp.S", + ]); } - sources.extend( - &[ - "arm/fixsfsivfp.S", - "arm/fixunssfsivfp.S", - "arm/floatsisfvfp.S", - "arm/floatunssisfvfp.S", - "arm/floatunssisfvfp.S", - "arm/restore_vfp_d8_d15_regs.S", - "arm/save_vfp_d8_d15_regs.S", - "arm/negdf2vfp.S", - "arm/negsf2vfp.S", - ] - ); - + sources.extend(&[ + "arm/fixsfsivfp.S", + "arm/fixunssfsivfp.S", + "arm/floatsisfvfp.S", + "arm/floatunssisfvfp.S", + "arm/floatunssisfvfp.S", + "arm/restore_vfp_d8_d15_regs.S", + "arm/save_vfp_d8_d15_regs.S", + "arm/negdf2vfp.S", + "arm/negsf2vfp.S", + ]); } if target_arch == "aarch64" { - sources.extend( - &[ - "comparetf2.c", - "extenddftf2.c", - "extendsftf2.c", - "fixtfdi.c", - "fixtfsi.c", - "fixtfti.c", - "fixunstfdi.c", - "fixunstfsi.c", - "fixunstfti.c", - "floatditf.c", - "floatsitf.c", - "floatunditf.c", - "floatunsitf.c", - "trunctfdf2.c", - "trunctfsf2.c", - ], - ); + sources.extend(&[ + "comparetf2.c", + "extenddftf2.c", + "extendsftf2.c", + "fixtfdi.c", + "fixtfsi.c", + "fixtfti.c", + "fixunstfdi.c", + "fixunstfsi.c", + "fixunstfti.c", + "floatditf.c", + "floatsitf.c", + "floatunditf.c", + "floatunsitf.c", + "trunctfdf2.c", + "trunctfsf2.c", + ]); if target_os != "windows" { sources.extend(&["multc3.c"]); @@ -418,22 +391,20 @@ mod c { // Remove the assembly implementations that won't compile for the target if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" { - sources.remove( - &[ - "clzdi2", - "clzsi2", - "divmodsi4", - "divsi3", - "modsi3", - "switch16", - "switch32", - "switch8", - "switchu8", - "udivmodsi4", - "udivsi3", - "umodsi3", - ], - ); + sources.remove(&[ + "clzdi2", + "clzsi2", + "divmodsi4", + "divsi3", + "modsi3", + "switch16", + "switch32", + "switch8", + "switchu8", + "udivmodsi4", + "udivsi3", + "umodsi3", + ]); // But use some generic implementations where possible sources.extend(&["clzdi2.c", "clzsi2.c"]) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 8de108d99..3debffa45 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -17,7 +17,7 @@ extern crate panic_handler; #[cfg(all(not(thumb), not(windows)))] #[link(name = "c")] -extern {} +extern "C" {} // Every function in this module maps will be lowered to an intrinsic by LLVM, if the platform // doesn't have native support for the operation used in the function. ARM has a naming convention @@ -340,11 +340,13 @@ fn run() { something_with_a_dtor(&|| assert_eq!(bb(1), 1)); - extern { + extern "C" { fn rust_begin_unwind(); } // if bb(false) { - unsafe { rust_begin_unwind(); } + unsafe { + rust_begin_unwind(); + } // } } @@ -377,7 +379,7 @@ pub fn _start() -> ! { #[cfg(windows)] #[link(name = "kernel32")] #[link(name = "msvcrt")] -extern {} +extern "C" {} // ARM targets need these symbols #[no_mangle] diff --git a/src/arm_linux.rs b/src/arm_linux.rs index 5ed379fa1..e710c1ab9 100644 --- a/src/arm_linux.rs +++ b/src/arm_linux.rs @@ -4,11 +4,11 @@ use core::mem; // Kernel-provided user-mode helper functions: // https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt unsafe fn __kuser_cmpxchg(oldval: u32, newval: u32, ptr: *mut u32) -> bool { - let f: extern "C" fn (u32, u32, *mut u32) -> u32 = mem::transmute(0xffff0fc0u32); + let f: extern "C" fn(u32, u32, *mut u32) -> u32 = mem::transmute(0xffff0fc0u32); f(oldval, newval, ptr) == 0 } unsafe fn __kuser_memory_barrier() { - let f: extern "C" fn () = mem::transmute(0xffff0fa0u32); + let f: extern "C" fn() = mem::transmute(0xffff0fa0u32); f(); } @@ -94,7 +94,7 @@ macro_rules! atomic_rmw { pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty { atomic_rmw(ptr, |x| $op(x as $ty, val) as u32) as $ty } - } + }; } macro_rules! atomic_cmpxchg { ($name:ident, $ty:ty) => { @@ -102,16 +102,20 @@ macro_rules! atomic_cmpxchg { pub unsafe extern "C" fn $name(ptr: *mut $ty, oldval: $ty, newval: $ty) -> $ty { atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty } - } + }; } atomic_rmw!(__sync_fetch_and_add_1, u8, |a: u8, b: u8| a.wrapping_add(b)); -atomic_rmw!(__sync_fetch_and_add_2, u16, |a: u16, b: u16| a.wrapping_add(b)); -atomic_rmw!(__sync_fetch_and_add_4, u32, |a: u32, b: u32| a.wrapping_add(b)); +atomic_rmw!(__sync_fetch_and_add_2, u16, |a: u16, b: u16| a + .wrapping_add(b)); +atomic_rmw!(__sync_fetch_and_add_4, u32, |a: u32, b: u32| a + .wrapping_add(b)); atomic_rmw!(__sync_fetch_and_sub_1, u8, |a: u8, b: u8| a.wrapping_sub(b)); -atomic_rmw!(__sync_fetch_and_sub_2, u16, |a: u16, b: u16| a.wrapping_sub(b)); -atomic_rmw!(__sync_fetch_and_sub_4, u32, |a: u32, b: u32| a.wrapping_sub(b)); +atomic_rmw!(__sync_fetch_and_sub_2, u16, |a: u16, b: u16| a + .wrapping_sub(b)); +atomic_rmw!(__sync_fetch_and_sub_4, u32, |a: u32, b: u32| a + .wrapping_sub(b)); atomic_rmw!(__sync_fetch_and_and_1, u8, |a: u8, b: u8| a & b); atomic_rmw!(__sync_fetch_and_and_2, u16, |a: u16, b: u16| a & b); @@ -129,21 +133,69 @@ atomic_rmw!(__sync_fetch_and_nand_1, u8, |a: u8, b: u8| !(a & b)); atomic_rmw!(__sync_fetch_and_nand_2, u16, |a: u16, b: u16| !(a & b)); atomic_rmw!(__sync_fetch_and_nand_4, u32, |a: u32, b: u32| !(a & b)); -atomic_rmw!(__sync_fetch_and_max_1, i8, |a: i8, b: i8| if a > b { a } else { b }); -atomic_rmw!(__sync_fetch_and_max_2, i16, |a: i16, b: i16| if a > b { a } else { b }); -atomic_rmw!(__sync_fetch_and_max_4, i32, |a: i32, b: i32| if a > b { a } else { b }); - -atomic_rmw!(__sync_fetch_and_umax_1, u8, |a: u8, b: u8| if a > b { a } else { b }); -atomic_rmw!(__sync_fetch_and_umax_2, u16, |a: u16, b: u16| if a > b { a } else { b }); -atomic_rmw!(__sync_fetch_and_umax_4, u32, |a: u32, b: u32| if a > b { a } else { b }); - -atomic_rmw!(__sync_fetch_and_min_1, i8, |a: i8, b: i8| if a < b { a } else { b }); -atomic_rmw!(__sync_fetch_and_min_2, i16, |a: i16, b: i16| if a < b { a } else { b }); -atomic_rmw!(__sync_fetch_and_min_4, i32, |a: i32, b: i32| if a < b { a } else { b }); - -atomic_rmw!(__sync_fetch_and_umin_1, u8, |a: u8, b: u8| if a < b { a } else { b }); -atomic_rmw!(__sync_fetch_and_umin_2, u16, |a: u16, b: u16| if a < b { a } else { b }); -atomic_rmw!(__sync_fetch_and_umin_4, u32, |a: u32, b: u32| if a < b { a } else { b }); +atomic_rmw!(__sync_fetch_and_max_1, i8, |a: i8, b: i8| if a > b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_max_2, i16, |a: i16, b: i16| if a > b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_max_4, i32, |a: i32, b: i32| if a > b { + a +} else { + b +}); + +atomic_rmw!(__sync_fetch_and_umax_1, u8, |a: u8, b: u8| if a > b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_umax_2, u16, |a: u16, b: u16| if a > b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_umax_4, u32, |a: u32, b: u32| if a > b { + a +} else { + b +}); + +atomic_rmw!(__sync_fetch_and_min_1, i8, |a: i8, b: i8| if a < b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_min_2, i16, |a: i16, b: i16| if a < b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_min_4, i32, |a: i32, b: i32| if a < b { + a +} else { + b +}); + +atomic_rmw!(__sync_fetch_and_umin_1, u8, |a: u8, b: u8| if a < b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_umin_2, u16, |a: u16, b: u16| if a < b { + a +} else { + b +}); +atomic_rmw!(__sync_fetch_and_umin_4, u32, |a: u32, b: u32| if a < b { + a +} else { + b +}); atomic_rmw!(__sync_lock_test_and_set_1, u8, |_: u8, b: u8| b); atomic_rmw!(__sync_lock_test_and_set_2, u16, |_: u16, b: u16| b); diff --git a/src/float/add.rs b/src/float/add.rs index 2b6ada81d..e8b9f9e77 100644 --- a/src/float/add.rs +++ b/src/float/add.rs @@ -1,8 +1,9 @@ -use int::{Int, CastInto}; use float::Float; +use int::{CastInto, Int}; /// Returns `a + b` -fn add(a: F, b: F) -> F where +fn add(a: F, b: F) -> F +where u32: CastInto, F::Int: CastInto, i32: CastInto, @@ -11,18 +12,18 @@ fn add(a: F, b: F) -> F where let one = F::Int::ONE; let zero = F::Int::ZERO; - let bits = F::BITS.cast(); + let bits = F::BITS.cast(); let significand_bits = F::SIGNIFICAND_BITS; - let max_exponent = F::EXPONENT_MAX; + let max_exponent = F::EXPONENT_MAX; - let implicit_bit = F::IMPLICIT_BIT; + let implicit_bit = F::IMPLICIT_BIT; let significand_mask = F::SIGNIFICAND_MASK; - let sign_bit = F::SIGN_MASK as F::Int; - let abs_mask = sign_bit - one; - let exponent_mask = F::EXPONENT_MASK; - let inf_rep = exponent_mask; - let quiet_bit = implicit_bit >> 1; - let qnan_rep = exponent_mask | quiet_bit; + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXPONENT_MASK; + let inf_rep = exponent_mask; + let quiet_bit = implicit_bit >> 1; + let qnan_rep = exponent_mask | quiet_bit; let mut a_rep = a.repr(); let mut b_rep = b.repr(); @@ -30,8 +31,7 @@ fn add(a: F, b: F) -> F where let b_abs = b_rep & abs_mask; // Detect if a or b is zero, infinity, or NaN. - if a_abs.wrapping_sub(one) >= inf_rep - one || - b_abs.wrapping_sub(one) >= inf_rep - one { + if a_abs.wrapping_sub(one) >= inf_rep - one || b_abs.wrapping_sub(one) >= inf_rep - one { // NaN + anything = qNaN if a_abs > inf_rep { return F::from_repr(a_abs | quiet_bit); @@ -68,7 +68,7 @@ fn add(a: F, b: F) -> F where // anything + zero = anything if b_abs == Int::ZERO { - return a; + return a; } } @@ -115,7 +115,8 @@ fn add(a: F, b: F) -> F where let align = a_exponent.wrapping_sub(b_exponent).cast(); if align != Int::ZERO { if align < bits { - let sticky = F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != Int::ZERO); + let sticky = + F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != Int::ZERO); b_significand = (b_significand >> align.cast()) | sticky; } else { b_significand = one; // sticky; b is known to be non-zero. @@ -131,12 +132,14 @@ fn add(a: F, b: F) -> F where // If partial cancellation occured, we need to left-shift the result // and adjust the exponent: if a_significand < implicit_bit << 3 { - let shift = a_significand.leading_zeros() as i32 - - (implicit_bit << 3).leading_zeros() as i32; + let shift = + a_significand.leading_zeros() as i32 - (implicit_bit << 3).leading_zeros() as i32; a_significand <<= shift; a_exponent -= shift; } - } else /* addition */ { + } else + /* addition */ + { a_significand += b_significand; // If the addition carried up, we need to right-shift the result and @@ -157,7 +160,8 @@ fn add(a: F, b: F) -> F where // Result is denormal before rounding; the exponent is zero and we // need to shift the significand. let shift = (1 - a_exponent).cast(); - let sticky = F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != Int::ZERO); + let sticky = + F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != Int::ZERO); a_significand = a_significand >> shift.cast() | sticky; a_exponent = 0; } @@ -175,8 +179,12 @@ fn add(a: F, b: F) -> F where // Final rounding. The result may overflow to infinity, but that is the // correct result in that case. - if round_guard_sticky > 0x4 { result += one; } - if round_guard_sticky == 0x4 { result += result & one; } + if round_guard_sticky > 0x4 { + result += one; + } + if round_guard_sticky == 0x4 { + result += result & one; + } F::from_repr(result) } diff --git a/src/float/cmp.rs b/src/float/cmp.rs index 01dd89055..20ab92e4b 100644 --- a/src/float/cmp.rs +++ b/src/float/cmp.rs @@ -1,64 +1,65 @@ #![allow(unreachable_code)] -use int::{Int, CastInto}; use float::Float; +use int::{CastInto, Int}; #[derive(Clone, Copy)] enum Result { Less, Equal, Greater, - Unordered + Unordered, } impl Result { fn to_le_abi(self) -> i32 { match self { - Result::Less => -1, - Result::Equal => 0, - Result::Greater => 1, - Result::Unordered => 1 + Result::Less => -1, + Result::Equal => 0, + Result::Greater => 1, + Result::Unordered => 1, } } fn to_ge_abi(self) -> i32 { match self { - Result::Less => -1, - Result::Equal => 0, - Result::Greater => 1, - Result::Unordered => -1 + Result::Less => -1, + Result::Equal => 0, + Result::Greater => 1, + Result::Unordered => -1, } } } -fn cmp(a: F, b: F) -> Result where +fn cmp(a: F, b: F) -> Result +where u32: CastInto, F::Int: CastInto, i32: CastInto, F::Int: CastInto, { - let one = F::Int::ONE; - let zero = F::Int::ZERO; + let one = F::Int::ONE; + let zero = F::Int::ZERO; let szero = F::SignedInt::ZERO; - let sign_bit = F::SIGN_MASK as F::Int; - let abs_mask = sign_bit - one; + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; let exponent_mask = F::EXPONENT_MASK; - let inf_rep = exponent_mask; + let inf_rep = exponent_mask; - let a_rep = a.repr(); - let b_rep = b.repr(); - let a_abs = a_rep & abs_mask; - let b_abs = b_rep & abs_mask; + let a_rep = a.repr(); + let b_rep = b.repr(); + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; // If either a or b is NaN, they are unordered. if a_abs > inf_rep || b_abs > inf_rep { - return Result::Unordered + return Result::Unordered; } // If a and b are both zeros, they are equal. if a_abs | b_abs == zero { - return Result::Equal + return Result::Equal; } let a_srep = a.signed_repr(); @@ -68,29 +69,29 @@ fn cmp(a: F, b: F) -> Result where // a and b as signed integers as we would with a fp_ting-point compare. if a_srep & b_srep >= szero { if a_srep < b_srep { - return Result::Less + return Result::Less; } else if a_srep == b_srep { - return Result::Equal + return Result::Equal; } else { - return Result::Greater + return Result::Greater; } } - // Otherwise, both are negative, so we need to flip the sense of the // comparison to get the correct result. (This assumes a twos- or ones- // complement integer representation; if integers are represented in a // sign-magnitude representation, then this flip is incorrect). else { if a_srep > b_srep { - return Result::Less + return Result::Less; } else if a_srep == b_srep { - return Result::Equal + return Result::Equal; } else { - return Result::Greater + return Result::Greater; } } } -fn unord(a: F, b: F) -> bool where +fn unord(a: F, b: F) -> bool +where u32: CastInto, F::Int: CastInto, i32: CastInto, @@ -98,10 +99,10 @@ fn unord(a: F, b: F) -> bool where { let one = F::Int::ONE; - let sign_bit = F::SIGN_MASK as F::Int; - let abs_mask = sign_bit - one; + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; let exponent_mask = F::EXPONENT_MASK; - let inf_rep = exponent_mask; + let inf_rep = exponent_mask; let a_rep = a.repr(); let b_rep = b.repr(); diff --git a/src/float/conv.rs b/src/float/conv.rs index 8d3e5fc6d..21aac15c1 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -2,10 +2,10 @@ use float::Float; use int::Int; macro_rules! int_to_float { - ($i:expr, $ity:ty, $fty:ty) => ({ + ($i:expr, $ity:ty, $fty:ty) => {{ let i = $i; if i == 0 { - return 0.0 + return 0.0; } let mant_dig = <$fty>::SIGNIFICAND_BITS + 1; @@ -22,20 +22,22 @@ macro_rules! int_to_float { let mut e = sd - 1; if <$ity>::BITS < mant_dig { - return <$fty>::from_parts(s, + return <$fty>::from_parts( + s, (e + exponent_bias) as <$fty as Float>::Int, - (a as <$fty as Float>::Int) << (mant_dig - e - 1)) + (a as <$fty as Float>::Int) << (mant_dig - e - 1), + ); } a = if sd > mant_dig { /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit MANT_DIG-1 bits to the right of 1 - * Q = bit MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit MANT_DIG-1 bits to the right of 1 + * Q = bit MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ let mant_dig_plus_one = mant_dig + 1; let mant_dig_plus_two = mant_dig + 2; a = if sd == mant_dig_plus_one { @@ -43,8 +45,10 @@ macro_rules! int_to_float { } else if sd == mant_dig_plus_two { a } else { - (a >> (sd - mant_dig_plus_two)) as <$ity as Int>::UnsignedInt | - ((a & <$ity as Int>::UnsignedInt::max_value()).wrapping_shl((n + mant_dig_plus_two) - sd) != 0) as <$ity as Int>::UnsignedInt + (a >> (sd - mant_dig_plus_two)) as <$ity as Int>::UnsignedInt + | ((a & <$ity as Int>::UnsignedInt::max_value()) + .wrapping_shl((n + mant_dig_plus_two) - sd) + != 0) as <$ity as Int>::UnsignedInt }; /* finish: */ @@ -54,19 +58,22 @@ macro_rules! int_to_float { /* a is now rounded to mant_dig or mant_dig+1 bits */ if (a & (1 << mant_dig)) != 0 { - a >>= 1; e += 1; + a >>= 1; + e += 1; } a - /* a is now rounded to mant_dig bits */ + /* a is now rounded to mant_dig bits */ } else { a.wrapping_shl(mant_dig - sd) /* a is now rounded to mant_dig bits */ }; - <$fty>::from_parts(s, + <$fty>::from_parts( + s, (e + exponent_bias) as <$fty as Float>::Int, - a as <$fty as Float>::Int) - }) + a as <$fty as Float>::Int, + ) + }}; } intrinsics! { @@ -160,11 +167,11 @@ intrinsics! { #[derive(PartialEq)] enum Sign { Positive, - Negative + Negative, } macro_rules! float_to_int { - ($f:expr, $fty:ty, $ity:ty) => ({ + ($f:expr, $fty:ty, $ity:ty) => {{ let f = $f; let fixint_min = <$ity>::min_value(); let fixint_max = <$ity>::max_value(); @@ -181,21 +188,34 @@ macro_rules! float_to_int { let a_abs = a_rep & !sign_bit; // this is used to work around -1 not being available for unsigned - let sign = if (a_rep & sign_bit) == 0 { Sign::Positive } else { Sign::Negative }; + let sign = if (a_rep & sign_bit) == 0 { + Sign::Positive + } else { + Sign::Negative + }; let mut exponent = (a_abs >> significand_bits) as usize; let significand = (a_abs & <$fty>::SIGNIFICAND_MASK) | <$fty>::IMPLICIT_BIT; // if < 1 or unsigned & negative - if exponent < exponent_bias || - fixint_unsigned && sign == Sign::Negative { - return 0 + if exponent < exponent_bias || fixint_unsigned && sign == Sign::Negative { + return 0; } exponent -= exponent_bias; // If the value is infinity, saturate. // If the value is too large for the integer type, 0. - if exponent >= (if fixint_unsigned {fixint_bits} else {fixint_bits -1}) { - return if sign == Sign::Positive {fixint_max} else {fixint_min} + if exponent + >= (if fixint_unsigned { + fixint_bits + } else { + fixint_bits - 1 + }) + { + return if sign == Sign::Positive { + fixint_max + } else { + fixint_min + }; } // If 0 <= exponent < significand_bits, right shift to get the result. // Otherwise, shift left. @@ -211,7 +231,7 @@ macro_rules! float_to_int { } else { r } - }) + }}; } intrinsics! { diff --git a/src/float/div.rs b/src/float/div.rs index ae12bb321..7c582a440 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -1,7 +1,5 @@ -use int::{CastInto, Int, WideInt}; use float::Float; - - +use int::{CastInto, Int, WideInt}; fn div32(a: F, b: F) -> F where @@ -398,7 +396,6 @@ where // operation in C, so we need to be a little bit fussy. let (mut quotient, _) = ::wide_mul(a_significand << 2, reciprocal.cast()); - // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). // In either case, we are going to compute a residual of the form // @@ -442,7 +439,6 @@ where } } - intrinsics! { #[arm_aeabi_alias = __aeabi_fdiv] pub extern "C" fn __divsf3(a: f32, b: f32) -> f32 { diff --git a/src/float/extend.rs b/src/float/extend.rs index 0ca9cf57a..39633773b 100644 --- a/src/float/extend.rs +++ b/src/float/extend.rs @@ -1,8 +1,9 @@ -use int::{CastInto, Int}; use float::Float; +use int::{CastInto, Int}; /// Generic conversion from a narrower to a wider IEEE-754 floating-point type -fn extend(a: F) -> R where +fn extend(a: F) -> R +where F::Int: CastInto, u64: CastInto, u32: CastInto, @@ -79,4 +80,4 @@ intrinsics! { pub extern "C" fn __extendsfdf2vfp(a: f32) -> f64 { a as f64 // LLVM generate 'fcvtds' } -} \ No newline at end of file +} diff --git a/src/float/mod.rs b/src/float/mod.rs index 3bb13abbc..8b8039452 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -3,26 +3,26 @@ use core::ops; use super::int::Int; -pub mod conv; -pub mod cmp; pub mod add; -pub mod pow; -pub mod sub; -pub mod mul; +pub mod cmp; +pub mod conv; pub mod div; pub mod extend; +pub mod mul; +pub mod pow; +pub mod sub; /// Trait for some basic operations on floats pub trait Float: - Copy + - PartialEq + - PartialOrd + - ops::AddAssign + - ops::MulAssign + - ops::Add + - ops::Sub + - ops::Div + - ops::Rem + + Copy + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::MulAssign + + ops::Add + + ops::Sub + + ops::Div + + ops::Rem { /// A uint of the same with as the float type Int: Int; @@ -118,17 +118,23 @@ macro_rules! float_impl { unsafe { mem::transmute(a) } } fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self { - Self::from_repr(((sign as Self::Int) << (Self::BITS - 1)) | - ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK) | - (significand & Self::SIGNIFICAND_MASK)) + Self::from_repr( + ((sign as Self::Int) << (Self::BITS - 1)) + | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK) + | (significand & Self::SIGNIFICAND_MASK), + ) } fn normalize(significand: Self::Int) -> (i32, Self::Int) { - let shift = significand.leading_zeros() + let shift = significand + .leading_zeros() .wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros()); - (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int) + ( + 1i32.wrapping_sub(shift as i32), + significand << shift as Self::Int, + ) } } - } + }; } float_impl!(f32, u32, i32, 32, 23); diff --git a/src/float/mul.rs b/src/float/mul.rs index d014bbc90..7b28793c8 100644 --- a/src/float/mul.rs +++ b/src/float/mul.rs @@ -1,5 +1,5 @@ -use int::{CastInto, Int, WideInt}; use float::Float; +use int::{CastInto, Int, WideInt}; fn mul(a: F, b: F) -> F where diff --git a/src/float/pow.rs b/src/float/pow.rs index f879c1a1f..2eedf6758 100644 --- a/src/float/pow.rs +++ b/src/float/pow.rs @@ -1,5 +1,5 @@ -use int::Int; use float::Float; +use int::Int; trait Pow: Float { /// Returns `a` raised to the power `b` diff --git a/src/float/sub.rs b/src/float/sub.rs index 2afb1409b..8d300e9d2 100644 --- a/src/float/sub.rs +++ b/src/float/sub.rs @@ -1,6 +1,6 @@ -use float::Float; -use float::add::__addsf3; use float::add::__adddf3; +use float::add::__addsf3; +use float::Float; intrinsics! { #[arm_aeabi_alias = __aeabi_fsub] diff --git a/src/int/addsub.rs b/src/int/addsub.rs index 50b5d10d2..e2d5bcbd4 100644 --- a/src/int/addsub.rs +++ b/src/int/addsub.rs @@ -1,16 +1,24 @@ -use int::LargeInt; use int::Int; +use int::LargeInt; trait UAddSub: LargeInt { fn uadd(self, other: Self) -> Self { let (low, carry) = self.low().overflowing_add(other.low()); let high = self.high().wrapping_add(other.high()); - let carry = if carry { Self::HighHalf::ONE } else { Self::HighHalf::ZERO }; + let carry = if carry { + Self::HighHalf::ONE + } else { + Self::HighHalf::ZERO + }; Self::from_parts(low, high.wrapping_add(carry)) } fn uadd_one(self) -> Self { let (low, carry) = self.low().overflowing_add(Self::LowHalf::ONE); - let carry = if carry { Self::HighHalf::ONE } else { Self::HighHalf::ZERO }; + let carry = if carry { + Self::HighHalf::ONE + } else { + Self::HighHalf::ZERO + }; Self::from_parts(low, self.high().wrapping_add(carry)) } fn usub(self, other: Self) -> Self { @@ -22,7 +30,8 @@ trait UAddSub: LargeInt { impl UAddSub for u128 {} trait AddSub: Int - where ::UnsignedInt: UAddSub +where + ::UnsignedInt: UAddSub, { fn add(self, other: Self) -> Self { Self::from_unsigned(self.unsigned().uadd(other.unsigned())) @@ -36,7 +45,8 @@ impl AddSub for u128 {} impl AddSub for i128 {} trait Addo: AddSub - where ::UnsignedInt: UAddSub +where + ::UnsignedInt: UAddSub, { fn addo(self, other: Self, overflow: &mut i32) -> Self { *overflow = 0; @@ -58,7 +68,8 @@ impl Addo for i128 {} impl Addo for u128 {} trait Subo: AddSub - where ::UnsignedInt: UAddSub +where + ::UnsignedInt: UAddSub, { fn subo(self, other: Self, overflow: &mut i32) -> Self { *overflow = 0; diff --git a/src/int/mod.rs b/src/int/mod.rs index 07f72f84e..fd1f0c3c8 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -3,13 +3,13 @@ use core::ops; macro_rules! hty { ($ty:ty) => { <$ty as LargeInt>::HighHalf - } + }; } macro_rules! os_ty { ($ty:ty) => { <$ty as Int>::OtherSign - } + }; } pub mod addsub; @@ -20,23 +20,23 @@ pub mod udiv; /// Trait for some basic operations on integers pub trait Int: - Copy + - PartialEq + - PartialOrd + - ops::AddAssign + - ops::BitAndAssign + - ops::BitOrAssign + - ops::ShlAssign + - ops::ShrAssign + - ops::Add + - ops::Sub + - ops::Div + - ops::Shl + - ops::Shr + - ops::BitOr + - ops::BitXor + - ops::BitAnd + - ops::Not + + Copy + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::BitAndAssign + + ops::BitOrAssign + + ops::ShlAssign + + ops::ShrAssign + + ops::Add + + ops::Sub + + ops::Div + + ops::Shl + + ops::Shr + + ops::BitOr + + ops::BitXor + + ops::BitAnd + + ops::Not { /// Type with the same width but other signedness type OtherSign: Int; @@ -182,7 +182,7 @@ macro_rules! int_impl { int_impl_common!($ity, $bits); } - } + }; } int_impl!(i32, u32, 32); @@ -223,7 +223,7 @@ macro_rules! large_int { low as $ty | ((high as $ty) << $halfbits) } } - } + }; } large_int!(u64, u32, u32, 32); @@ -284,9 +284,9 @@ macro_rules! impl_wide_int { let sticky = *low << ($bits - count); *low = *self << ($bits - count) | *low >> count | sticky; *self = *self >> count; - } else if count < 2*$bits { - let sticky = *self << (2*$bits - count) | *low; - *low = *self >> (count - $bits ) | sticky; + } else if count < 2 * $bits { + let sticky = *self << (2 * $bits - count) | *low; + *low = *self >> (count - $bits) | sticky; *self = 0; } else { let sticky = *self | *low; @@ -295,7 +295,7 @@ macro_rules! impl_wide_int { } } } - } + }; } impl_wide_int!(u32, u64, 32); diff --git a/src/int/mul.rs b/src/int/mul.rs index 58ca461b0..376395ac0 100644 --- a/src/int/mul.rs +++ b/src/int/mul.rs @@ -1,7 +1,7 @@ use core::ops; -use int::LargeInt; use int::Int; +use int::LargeInt; trait Mul: LargeInt { fn mul(self, other: Self) -> Self { @@ -19,8 +19,9 @@ trait Mul: LargeInt { low += (t & lower_mask) << half_bits; high += Self::low_as_high(t >> half_bits); high += Self::low_as_high((self.low() >> half_bits).wrapping_mul(other.low() >> half_bits)); - high = high.wrapping_add(self.high().wrapping_mul(Self::low_as_high(other.low()))) - .wrapping_add(Self::low_as_high(self.low()).wrapping_mul(other.high())); + high = high + .wrapping_add(self.high().wrapping_mul(Self::low_as_high(other.low()))) + .wrapping_add(Self::low_as_high(self.low()).wrapping_mul(other.high())); Self::from_parts(low, high) } } @@ -70,7 +71,7 @@ impl Mulo for i32 {} impl Mulo for i64 {} impl Mulo for i128 {} -trait UMulo : Int { +trait UMulo: Int { fn mulo(self, other: Self, overflow: &mut i32) -> Self { *overflow = 0; let result = self.wrapping_mul(other); diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index 82262a441..4b63697b4 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -43,7 +43,8 @@ impl Mod for i128 {} trait Divmod: Int { /// Returns `a / b` and sets `*rem = n % d` fn divmod(self, other: Self, rem: &mut Self, div: F) -> Self - where F: Fn(Self, Self) -> Self, + where + F: Fn(Self, Self) -> Self, { let r = div(self, other); // NOTE won't overflow because it's using the result from the diff --git a/src/int/shift.rs b/src/int/shift.rs index 959fcb088..4be588f16 100644 --- a/src/int/shift.rs +++ b/src/int/shift.rs @@ -3,7 +3,8 @@ use int::{Int, LargeInt}; trait Ashl: Int + LargeInt { /// Returns `a << b`, requires `b < Self::BITS` fn ashl(self, offset: u32) -> Self - where Self: LargeInt::LowHalf>, + where + Self: LargeInt::LowHalf>, { let half_bits = Self::BITS / 2; if offset & half_bits != 0 { @@ -11,9 +12,10 @@ trait Ashl: Int + LargeInt { } else if offset == 0 { self } else { - Self::from_parts(self.low() << offset, - (self.high() << offset) | - (self.low() >> (half_bits - offset))) + Self::from_parts( + self.low() << offset, + (self.high() << offset) | (self.low() >> (half_bits - offset)), + ) } } } @@ -24,18 +26,23 @@ impl Ashl for u128 {} trait Ashr: Int + LargeInt { /// Returns arithmetic `a >> b`, requires `b < Self::BITS` fn ashr(self, offset: u32) -> Self - where Self: LargeInt::HighHalf as Int>::UnsignedInt>, + where + Self: LargeInt::HighHalf as Int>::UnsignedInt>, { let half_bits = Self::BITS / 2; if offset & half_bits != 0 { - Self::from_parts((self.high() >> (offset - half_bits)).unsigned(), - self.high() >> (half_bits - 1)) + Self::from_parts( + (self.high() >> (offset - half_bits)).unsigned(), + self.high() >> (half_bits - 1), + ) } else if offset == 0 { self } else { let high_unsigned = self.high().unsigned(); - Self::from_parts((high_unsigned << (half_bits - offset)) | (self.low() >> offset), - self.high() >> offset) + Self::from_parts( + (high_unsigned << (half_bits - offset)) | (self.low() >> offset), + self.high() >> offset, + ) } } } @@ -46,7 +53,8 @@ impl Ashr for i128 {} trait Lshr: Int + LargeInt { /// Returns logical `a >> b`, requires `b < Self::BITS` fn lshr(self, offset: u32) -> Self - where Self: LargeInt::LowHalf>, + where + Self: LargeInt::LowHalf>, { let half_bits = Self::BITS / 2; if offset & half_bits != 0 { @@ -54,9 +62,10 @@ trait Lshr: Int + LargeInt { } else if offset == 0 { self } else { - Self::from_parts((self.high() << (half_bits - offset)) | - (self.low() >> offset), - self.high() >> offset) + Self::from_parts( + (self.high() << (half_bits - offset)) | (self.low() >> offset), + self.high() >> offset, + ) } } } diff --git a/src/lib.rs b/src/lib.rs index fddfa67aa..ef5353a70 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,11 +3,13 @@ #![cfg_attr(feature = "compiler-builtins", compiler_builtins)] #![crate_name = "compiler_builtins"] #![crate_type = "rlib"] -#![doc(html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk.png", - html_favicon_url = "https://doc.rust-lang.org/favicon.ico", - html_root_url = "https://doc.rust-lang.org/nightly/", - html_playground_url = "https://play.rust-lang.org/", - test(attr(deny(warnings))))] +#![doc( + html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk.png", + html_favicon_url = "https://doc.rust-lang.org/favicon.ico", + html_root_url = "https://doc.rust-lang.org/nightly/", + html_playground_url = "https://play.rust-lang.org/", + test(attr(deny(warnings))) +)] #![feature(asm)] #![feature(compiler_builtins)] #![feature(core_intrinsics)] @@ -19,10 +21,14 @@ #![allow(unused_features)] #![no_builtins] #![cfg_attr(feature = "compiler-builtins", feature(staged_api))] -#![cfg_attr(feature = "compiler-builtins", - unstable(feature = "compiler_builtins_lib", - reason = "Compiler builtins. Will never become stable.", - issue = "0"))] +#![cfg_attr( + feature = "compiler-builtins", + unstable( + feature = "compiler_builtins_lib", + reason = "Compiler builtins. Will never become stable.", + issue = "0" + ) +)] // We disable #[no_mangle] for tests so that we can verify the test results // against the native compiler-rt implementations of the builtins. @@ -44,12 +50,14 @@ fn abort() -> ! { #[macro_use] mod macros; -pub mod int; pub mod float; +pub mod int; -#[cfg(any(all(target_arch = "wasm32", target_os = "unknown"), - all(target_arch = "arm", target_os = "none"), - all(target_vendor = "fortanix", target_env = "sgx")))] +#[cfg(any( + all(target_arch = "wasm32", target_os = "unknown"), + all(target_arch = "arm", target_os = "none"), + all(target_vendor = "fortanix", target_env = "sgx") +))] pub mod math; pub mod mem; diff --git a/src/macros.rs b/src/macros.rs index 5f1ab469a..e84338fae 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -261,7 +261,7 @@ macro_rules! intrinsics { // Hack for LLVM expectations for ABI on windows. This is used by the // `#[win64_128bit_abi_hack]` attribute recognized above -#[cfg(all(windows, target_pointer_width="64"))] +#[cfg(all(windows, target_pointer_width = "64"))] pub mod win64_128bit_abi_hack { #[repr(simd)] pub struct U64x2(u64, u64); diff --git a/src/mem.rs b/src/mem.rs index c56391c4d..c863bb729 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -6,10 +6,7 @@ type c_int = i16; type c_int = i32; #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -pub unsafe extern "C" fn memcpy(dest: *mut u8, - src: *const u8, - n: usize) - -> *mut u8 { +pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { let mut i = 0; while i < n { *dest.offset(i as isize) = *src.offset(i as isize); @@ -19,10 +16,7 @@ pub unsafe extern "C" fn memcpy(dest: *mut u8, } #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -pub unsafe extern "C" fn memmove(dest: *mut u8, - src: *const u8, - n: usize) - -> *mut u8 { +pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { if src < dest as *const u8 { // copy from end let mut i = n; diff --git a/src/probestack.rs b/src/probestack.rs index 52a0414a1..f9284e814 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -46,7 +46,7 @@ #[naked] #[no_mangle] #[cfg(all(target_arch = "x86_64", not(feature = "mangled-names")))] -pub unsafe extern fn __rust_probestack() { +pub unsafe extern "C" fn __rust_probestack() { // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, // ensuring that if any pages are unmapped we'll make a page fault. // @@ -97,7 +97,7 @@ pub unsafe extern fn __rust_probestack() { #[naked] #[no_mangle] #[cfg(all(target_arch = "x86", not(feature = "mangled-names")))] -pub unsafe extern fn __rust_probestack() { +pub unsafe extern "C" fn __rust_probestack() { // This is the same as x86_64 above, only translated for 32-bit sizes. Note // that on Unix we're expected to restore everything as it was, this // function basically can't tamper with anything. diff --git a/testcrate/build.rs b/testcrate/build.rs index 17ae3ef18..e43fdb77b 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -2,15 +2,15 @@ extern crate cast; extern crate rand; use std::collections::HashMap; +use std::fmt; use std::fmt::Write as FmtWrite; use std::fs::{self, OpenOptions}; -use std::io::Write; use std::hash::{Hash, Hasher}; +use std::io::Write; use std::path::PathBuf; use std::{env, mem}; -use std::fmt; -use self::cast::{f32, f64, u32, u64, u128, i32, i64, i128}; +use self::cast::{f32, f64, i128, i32, i64, u128, u32, u64}; use self::rand::Rng; const NTESTS: usize = 1_000; @@ -21,16 +21,15 @@ fn main() { drop(fs::remove_file(&out_file)); let target = env::var("TARGET").unwrap(); - let target_arch_arm = - target.contains("arm") || - target.contains("thumb"); + let target_arch_arm = target.contains("arm") || target.contains("thumb"); let target_arch_mips = target.contains("mips"); // TODO accept NaNs. We don't do that right now because we can't check // for NaN-ness on the thumb targets (due to missing intrinsics) // float/add.rs - gen(|(a, b): (MyF64, MyF64)| { + gen( + |(a, b): (MyF64, MyF64)| { let c = a.0 + b.0; if a.0.is_nan() || b.0.is_nan() || c.is_nan() { None @@ -38,8 +37,10 @@ fn main() { Some(c) } }, - "builtins::float::add::__adddf3(a, b)"); - gen(|(a, b): (MyF32, MyF32)| { + "builtins::float::add::__adddf3(a, b)", + ); + gen( + |(a, b): (MyF32, MyF32)| { let c = a.0 + b.0; if a.0.is_nan() || b.0.is_nan() || c.is_nan() { None @@ -47,10 +48,12 @@ fn main() { Some(c) } }, - "builtins::float::add::__addsf3(a, b)"); + "builtins::float::add::__addsf3(a, b)", + ); if target_arch_arm { - gen(|(a, b): (MyF64, MyF64)| { + gen( + |(a, b): (MyF64, MyF64)| { let c = a.0 + b.0; if a.0.is_nan() || b.0.is_nan() || c.is_nan() { None @@ -58,8 +61,10 @@ fn main() { Some(c) } }, - "builtins::float::add::__adddf3vfp(a, b)"); - gen(|(a, b): (LargeF32, LargeF32)| { + "builtins::float::add::__adddf3vfp(a, b)", + ); + gen( + |(a, b): (LargeF32, LargeF32)| { let c = a.0 + b.0; if a.0.is_nan() || b.0.is_nan() || c.is_nan() { None @@ -67,12 +72,13 @@ fn main() { Some(c) } }, - "builtins::float::add::__addsf3vfp(a, b)"); + "builtins::float::add::__addsf3vfp(a, b)", + ); } - // float/cmp.rs - gen(|(a, b): (MyF64, MyF64)| { + gen( + |(a, b): (MyF64, MyF64)| { let (a, b) = (a.0, b.0); if a.is_nan() || b.is_nan() { return None; @@ -88,8 +94,10 @@ fn main() { Some(0) } }, - "builtins::float::cmp::__gedf2(a, b)"); - gen(|(a, b): (MyF32, MyF32)| { + "builtins::float::cmp::__gedf2(a, b)", + ); + gen( + |(a, b): (MyF32, MyF32)| { let (a, b) = (a.0, b.0); if a.is_nan() || b.is_nan() { return None; @@ -105,8 +113,10 @@ fn main() { Some(0) } }, - "builtins::float::cmp::__gesf2(a, b)"); - gen(|(a, b): (MyF64, MyF64)| { + "builtins::float::cmp::__gesf2(a, b)", + ); + gen( + |(a, b): (MyF64, MyF64)| { let (a, b) = (a.0, b.0); if a.is_nan() || b.is_nan() { return None; @@ -122,8 +132,10 @@ fn main() { Some(0) } }, - "builtins::float::cmp::__ledf2(a, b)"); - gen(|(a, b): (MyF32, MyF32)| { + "builtins::float::cmp::__ledf2(a, b)", + ); + gen( + |(a, b): (MyF32, MyF32)| { let (a, b) = (a.0, b.0); if a.is_nan() || b.is_nan() { return None; @@ -139,285 +151,387 @@ fn main() { Some(0) } }, - "builtins::float::cmp::__lesf2(a, b)"); + "builtins::float::cmp::__lesf2(a, b)", + ); - gen(|(a, b): (MyF32, MyF32)| { + gen( + |(a, b): (MyF32, MyF32)| { let c = a.0.is_nan() || b.0.is_nan(); Some(c as i32) }, - "builtins::float::cmp::__unordsf2(a, b)"); + "builtins::float::cmp::__unordsf2(a, b)", + ); - gen(|(a, b): (MyF64, MyF64)| { + gen( + |(a, b): (MyF64, MyF64)| { let c = a.0.is_nan() || b.0.is_nan(); Some(c as i32) }, - "builtins::float::cmp::__unorddf2(a, b)"); + "builtins::float::cmp::__unorddf2(a, b)", + ); if target_arch_arm { - gen(|(a, b): (MyF32, MyF32)| { + gen( + |(a, b): (MyF32, MyF32)| { if a.0.is_nan() || b.0.is_nan() { return None; } let c = (a.0 <= b.0) as i32; Some(c) }, - "builtins::float::cmp::__aeabi_fcmple(a, b)"); + "builtins::float::cmp::__aeabi_fcmple(a, b)", + ); - gen(|(a, b): (MyF32, MyF32)| { + gen( + |(a, b): (MyF32, MyF32)| { if a.0.is_nan() || b.0.is_nan() { return None; } let c = (a.0 >= b.0) as i32; Some(c) }, - "builtins::float::cmp::__aeabi_fcmpge(a, b)"); + "builtins::float::cmp::__aeabi_fcmpge(a, b)", + ); - gen(|(a, b): (MyF32, MyF32)| { + gen( + |(a, b): (MyF32, MyF32)| { if a.0.is_nan() || b.0.is_nan() { return None; } let c = (a.0 == b.0) as i32; Some(c) }, - "builtins::float::cmp::__aeabi_fcmpeq(a, b)"); + "builtins::float::cmp::__aeabi_fcmpeq(a, b)", + ); - gen(|(a, b): (MyF32, MyF32)| { + gen( + |(a, b): (MyF32, MyF32)| { if a.0.is_nan() || b.0.is_nan() { return None; } let c = (a.0 < b.0) as i32; Some(c) }, - "builtins::float::cmp::__aeabi_fcmplt(a, b)"); + "builtins::float::cmp::__aeabi_fcmplt(a, b)", + ); - gen(|(a, b): (MyF32, MyF32)| { + gen( + |(a, b): (MyF32, MyF32)| { if a.0.is_nan() || b.0.is_nan() { return None; } let c = (a.0 > b.0) as i32; Some(c) }, - "builtins::float::cmp::__aeabi_fcmpgt(a, b)"); + "builtins::float::cmp::__aeabi_fcmpgt(a, b)", + ); - gen(|(a, b): (MyF64, MyF64)| { + gen( + |(a, b): (MyF64, MyF64)| { if a.0.is_nan() || b.0.is_nan() { return None; } let c = (a.0 <= b.0) as i32; Some(c) }, - "builtins::float::cmp::__aeabi_dcmple(a, b)"); + "builtins::float::cmp::__aeabi_dcmple(a, b)", + ); - gen(|(a, b): (MyF64, MyF64)| { + gen( + |(a, b): (MyF64, MyF64)| { if a.0.is_nan() || b.0.is_nan() { return None; } let c = (a.0 >= b.0) as i32; Some(c) }, - "builtins::float::cmp::__aeabi_dcmpge(a, b)"); + "builtins::float::cmp::__aeabi_dcmpge(a, b)", + ); - gen(|(a, b): (MyF64, MyF64)| { + gen( + |(a, b): (MyF64, MyF64)| { if a.0.is_nan() || b.0.is_nan() { return None; } let c = (a.0 == b.0) as i32; Some(c) }, - "builtins::float::cmp::__aeabi_dcmpeq(a, b)"); + "builtins::float::cmp::__aeabi_dcmpeq(a, b)", + ); - gen(|(a, b): (MyF64, MyF64)| { + gen( + |(a, b): (MyF64, MyF64)| { if a.0.is_nan() || b.0.is_nan() { return None; } let c = (a.0 < b.0) as i32; Some(c) }, - "builtins::float::cmp::__aeabi_dcmplt(a, b)"); + "builtins::float::cmp::__aeabi_dcmplt(a, b)", + ); - gen(|(a, b): (MyF64, MyF64)| { + gen( + |(a, b): (MyF64, MyF64)| { if a.0.is_nan() || b.0.is_nan() { return None; } let c = (a.0 > b.0) as i32; Some(c) }, - "builtins::float::cmp::__aeabi_dcmpgt(a, b)"); + "builtins::float::cmp::__aeabi_dcmpgt(a, b)", + ); - gen(|(a, b): (LargeF32, LargeF32)| { + gen( + |(a, b): (LargeF32, LargeF32)| { if a.0.is_nan() || b.0.is_nan() { return None; } Some((a.0 >= b.0) as i32) }, - "builtins::float::cmp::__gesf2vfp(a, b)"); - gen(|(a, b): (MyF64, MyF64)| { + "builtins::float::cmp::__gesf2vfp(a, b)", + ); + gen( + |(a, b): (MyF64, MyF64)| { if a.0.is_nan() || b.0.is_nan() { return None; } Some((a.0 >= b.0) as i32) }, - "builtins::float::cmp::__gedf2vfp(a, b)"); - gen(|(a, b): (LargeF32, LargeF32)| { + "builtins::float::cmp::__gedf2vfp(a, b)", + ); + gen( + |(a, b): (LargeF32, LargeF32)| { if a.0.is_nan() || b.0.is_nan() { return None; } Some((a.0 > b.0) as i32) }, - "builtins::float::cmp::__gtsf2vfp(a, b)"); - gen(|(a, b): (MyF64, MyF64)| { + "builtins::float::cmp::__gtsf2vfp(a, b)", + ); + gen( + |(a, b): (MyF64, MyF64)| { if a.0.is_nan() || b.0.is_nan() { return None; } Some((a.0 > b.0) as i32) }, - "builtins::float::cmp::__gtdf2vfp(a, b)"); - gen(|(a, b): (LargeF32, LargeF32)| { + "builtins::float::cmp::__gtdf2vfp(a, b)", + ); + gen( + |(a, b): (LargeF32, LargeF32)| { if a.0.is_nan() || b.0.is_nan() { return None; } Some((a.0 < b.0) as i32) }, - "builtins::float::cmp::__ltsf2vfp(a, b)"); - gen(|(a, b): (MyF64, MyF64)| { + "builtins::float::cmp::__ltsf2vfp(a, b)", + ); + gen( + |(a, b): (MyF64, MyF64)| { if a.0.is_nan() || b.0.is_nan() { return None; } Some((a.0 < b.0) as i32) }, - "builtins::float::cmp::__ltdf2vfp(a, b)"); - gen(|(a, b): (LargeF32, LargeF32)| { + "builtins::float::cmp::__ltdf2vfp(a, b)", + ); + gen( + |(a, b): (LargeF32, LargeF32)| { if a.0.is_nan() || b.0.is_nan() { return None; } Some((a.0 <= b.0) as i32) }, - "builtins::float::cmp::__lesf2vfp(a, b)"); - gen(|(a, b): (MyF64, MyF64)| { + "builtins::float::cmp::__lesf2vfp(a, b)", + ); + gen( + |(a, b): (MyF64, MyF64)| { if a.0.is_nan() || b.0.is_nan() { return None; } Some((a.0 <= b.0) as i32) }, - "builtins::float::cmp::__ledf2vfp(a, b)"); - gen(|(a, b): (LargeF32, LargeF32)| { + "builtins::float::cmp::__ledf2vfp(a, b)", + ); + gen( + |(a, b): (LargeF32, LargeF32)| { if a.0.is_nan() || b.0.is_nan() { return None; } Some((a.0 != b.0) as i32) }, - "builtins::float::cmp::__nesf2vfp(a, b)"); - gen(|(a, b): (MyF64, MyF64)| { + "builtins::float::cmp::__nesf2vfp(a, b)", + ); + gen( + |(a, b): (MyF64, MyF64)| { if a.0.is_nan() || b.0.is_nan() { return None; } Some((a.0 != b.0) as i32) }, - "builtins::float::cmp::__nedf2vfp(a, b)"); - gen(|(a, b): (LargeF32, LargeF32)| { + "builtins::float::cmp::__nedf2vfp(a, b)", + ); + gen( + |(a, b): (LargeF32, LargeF32)| { if a.0.is_nan() || b.0.is_nan() { return None; } Some((a.0 == b.0) as i32) }, - "builtins::float::cmp::__eqsf2vfp(a, b)"); - gen(|(a, b): (MyF64, MyF64)| { + "builtins::float::cmp::__eqsf2vfp(a, b)", + ); + gen( + |(a, b): (MyF64, MyF64)| { if a.0.is_nan() || b.0.is_nan() { return None; } Some((a.0 == b.0) as i32) }, - "builtins::float::cmp::__eqdf2vfp(a, b)"); + "builtins::float::cmp::__eqdf2vfp(a, b)", + ); } // float/extend.rs - gen(|a: MyF32| { + gen( + |a: MyF32| { if a.0.is_nan() { return None; } Some(f64(a.0)) }, - "builtins::float::extend::__extendsfdf2(a)"); + "builtins::float::extend::__extendsfdf2(a)", + ); if target_arch_arm { - gen(|a: LargeF32| { - if a.0.is_nan() { - return None; - } - Some(f64(a.0)) - }, - "builtins::float::extend::__extendsfdf2vfp(a)"); + gen( + |a: LargeF32| { + if a.0.is_nan() { + return None; + } + Some(f64(a.0)) + }, + "builtins::float::extend::__extendsfdf2vfp(a)", + ); } // float/conv.rs - gen(|a: MyF64| i64(a.0).ok(), - "builtins::float::conv::__fixdfdi(a)"); - gen(|a: MyF64| i32(a.0).ok(), - "builtins::float::conv::__fixdfsi(a)"); - gen(|a: MyF32| i64(a.0).ok(), - "builtins::float::conv::__fixsfdi(a)"); - gen(|a: MyF32| i32(a.0).ok(), - "builtins::float::conv::__fixsfsi(a)"); - gen(|a: MyF32| i128(a.0).ok(), - "builtins::float::conv::__fixsfti(a)"); - gen(|a: MyF64| i128(a.0).ok(), - "builtins::float::conv::__fixdfti(a)"); - gen(|a: MyF64| u64(a.0).ok(), - "builtins::float::conv::__fixunsdfdi(a)"); - gen(|a: MyF64| u32(a.0).ok(), - "builtins::float::conv::__fixunsdfsi(a)"); - gen(|a: MyF32| u64(a.0).ok(), - "builtins::float::conv::__fixunssfdi(a)"); - gen(|a: MyF32| u32(a.0).ok(), - "builtins::float::conv::__fixunssfsi(a)"); - gen(|a: MyF32| u128(a.0).ok(), - "builtins::float::conv::__fixunssfti(a)"); - gen(|a: MyF64| u128(a.0).ok(), - "builtins::float::conv::__fixunsdfti(a)"); - gen(|a: MyI64| Some(f64(a.0)), - "builtins::float::conv::__floatdidf(a)"); - gen(|a: MyI32| Some(f64(a.0)), - "builtins::float::conv::__floatsidf(a)"); - gen(|a: MyI32| Some(f32(a.0)), - "builtins::float::conv::__floatsisf(a)"); - gen(|a: MyU64| Some(f64(a.0)), - "builtins::float::conv::__floatundidf(a)"); - gen(|a: MyU32| Some(f64(a.0)), - "builtins::float::conv::__floatunsidf(a)"); - gen(|a: MyU32| Some(f32(a.0)), - "builtins::float::conv::__floatunsisf(a)"); - gen(|a: MyU128| f32(a.0).ok(), - "builtins::float::conv::__floatuntisf(a)"); + gen( + |a: MyF64| i64(a.0).ok(), + "builtins::float::conv::__fixdfdi(a)", + ); + gen( + |a: MyF64| i32(a.0).ok(), + "builtins::float::conv::__fixdfsi(a)", + ); + gen( + |a: MyF32| i64(a.0).ok(), + "builtins::float::conv::__fixsfdi(a)", + ); + gen( + |a: MyF32| i32(a.0).ok(), + "builtins::float::conv::__fixsfsi(a)", + ); + gen( + |a: MyF32| i128(a.0).ok(), + "builtins::float::conv::__fixsfti(a)", + ); + gen( + |a: MyF64| i128(a.0).ok(), + "builtins::float::conv::__fixdfti(a)", + ); + gen( + |a: MyF64| u64(a.0).ok(), + "builtins::float::conv::__fixunsdfdi(a)", + ); + gen( + |a: MyF64| u32(a.0).ok(), + "builtins::float::conv::__fixunsdfsi(a)", + ); + gen( + |a: MyF32| u64(a.0).ok(), + "builtins::float::conv::__fixunssfdi(a)", + ); + gen( + |a: MyF32| u32(a.0).ok(), + "builtins::float::conv::__fixunssfsi(a)", + ); + gen( + |a: MyF32| u128(a.0).ok(), + "builtins::float::conv::__fixunssfti(a)", + ); + gen( + |a: MyF64| u128(a.0).ok(), + "builtins::float::conv::__fixunsdfti(a)", + ); + gen( + |a: MyI64| Some(f64(a.0)), + "builtins::float::conv::__floatdidf(a)", + ); + gen( + |a: MyI32| Some(f64(a.0)), + "builtins::float::conv::__floatsidf(a)", + ); + gen( + |a: MyI32| Some(f32(a.0)), + "builtins::float::conv::__floatsisf(a)", + ); + gen( + |a: MyU64| Some(f64(a.0)), + "builtins::float::conv::__floatundidf(a)", + ); + gen( + |a: MyU32| Some(f64(a.0)), + "builtins::float::conv::__floatunsidf(a)", + ); + gen( + |a: MyU32| Some(f32(a.0)), + "builtins::float::conv::__floatunsisf(a)", + ); + gen( + |a: MyU128| f32(a.0).ok(), + "builtins::float::conv::__floatuntisf(a)", + ); if !target_arch_mips { - gen(|a: MyI128| Some(f32(a.0)), - "builtins::float::conv::__floattisf(a)"); - gen(|a: MyI128| Some(f64(a.0)), - "builtins::float::conv::__floattidf(a)"); - gen(|a: MyU128| Some(f64(a.0)), - "builtins::float::conv::__floatuntidf(a)"); + gen( + |a: MyI128| Some(f32(a.0)), + "builtins::float::conv::__floattisf(a)", + ); + gen( + |a: MyI128| Some(f64(a.0)), + "builtins::float::conv::__floattidf(a)", + ); + gen( + |a: MyU128| Some(f64(a.0)), + "builtins::float::conv::__floatuntidf(a)", + ); } // float/pow.rs - gen(|(a, b): (MyF64, MyI32)| { - let c = a.0.powi(b.0); - if a.0.is_nan() || c.is_nan() { - None - } else { + gen( + |(a, b): (MyF64, MyI32)| { + let c = a.0.powi(b.0); + if a.0.is_nan() || c.is_nan() { + None + } else { Some(c) } }, - "builtins::float::pow::__powidf2(a, b)"); - gen(|(a, b): (MyF32, MyI32)| { - let c = a.0.powi(b.0); - if a.0.is_nan() || c.is_nan() { - None - } else { + "builtins::float::pow::__powidf2(a, b)", + ); + gen( + |(a, b): (MyF32, MyI32)| { + let c = a.0.powi(b.0); + if a.0.is_nan() || c.is_nan() { + None + } else { Some(c) } }, - "builtins::float::pow::__powisf2(a, b)"); + "builtins::float::pow::__powisf2(a, b)", + ); // float/sub.rs - gen(|(a, b): (MyF64, MyF64)| { + gen( + |(a, b): (MyF64, MyF64)| { let c = a.0 - b.0; if a.0.is_nan() || b.0.is_nan() || c.is_nan() { None @@ -425,8 +539,10 @@ fn main() { Some(c) } }, - "builtins::float::sub::__subdf3(a, b)"); - gen(|(a, b): (MyF32, MyF32)| { + "builtins::float::sub::__subdf3(a, b)", + ); + gen( + |(a, b): (MyF32, MyF32)| { let c = a.0 - b.0; if a.0.is_nan() || b.0.is_nan() || c.is_nan() { None @@ -434,10 +550,12 @@ fn main() { Some(c) } }, - "builtins::float::sub::__subsf3(a, b)"); + "builtins::float::sub::__subsf3(a, b)", + ); if target_arch_arm { - gen(|(a, b): (MyF64, MyF64)| { + gen( + |(a, b): (MyF64, MyF64)| { let c = a.0 - b.0; if a.0.is_nan() || b.0.is_nan() || c.is_nan() { None @@ -445,8 +563,10 @@ fn main() { Some(c) } }, - "builtins::float::sub::__subdf3vfp(a, b)"); - gen(|(a, b): (LargeF32, LargeF32)| { + "builtins::float::sub::__subdf3vfp(a, b)", + ); + gen( + |(a, b): (LargeF32, LargeF32)| { let c = a.0 - b.0; if a.0.is_nan() || b.0.is_nan() || c.is_nan() { None @@ -454,11 +574,13 @@ fn main() { Some(c) } }, - "builtins::float::sub::__subsf3vfp(a, b)"); + "builtins::float::sub::__subsf3vfp(a, b)", + ); } // float/mul.rs - gen(|(a, b): (MyF64, MyF64)| { + gen( + |(a, b): (MyF64, MyF64)| { let c = a.0 * b.0; if a.0.is_nan() || b.0.is_nan() || c.is_nan() { None @@ -466,8 +588,10 @@ fn main() { Some(c) } }, - "builtins::float::mul::__muldf3(a, b)"); - gen(|(a, b): (LargeF32, LargeF32)| { + "builtins::float::mul::__muldf3(a, b)", + ); + gen( + |(a, b): (LargeF32, LargeF32)| { let c = a.0 * b.0; if a.0.is_nan() || b.0.is_nan() || c.is_nan() { None @@ -475,10 +599,12 @@ fn main() { Some(c) } }, - "builtins::float::mul::__mulsf3(a, b)"); + "builtins::float::mul::__mulsf3(a, b)", + ); if target_arch_arm { - gen(|(a, b): (MyF64, MyF64)| { + gen( + |(a, b): (MyF64, MyF64)| { let c = a.0 * b.0; if a.0.is_nan() || b.0.is_nan() || c.is_nan() { None @@ -486,8 +612,10 @@ fn main() { Some(c) } }, - "builtins::float::mul::__muldf3vfp(a, b)"); - gen(|(a, b): (LargeF32, LargeF32)| { + "builtins::float::mul::__muldf3vfp(a, b)", + ); + gen( + |(a, b): (LargeF32, LargeF32)| { let c = a.0 * b.0; if a.0.is_nan() || b.0.is_nan() || c.is_nan() { None @@ -495,122 +623,168 @@ fn main() { Some(c) } }, - "builtins::float::mul::__mulsf3vfp(a, b)"); + "builtins::float::mul::__mulsf3vfp(a, b)", + ); } // float/div.rs - gen(|(a, b): (MyF64, MyF64)| { + gen( + |(a, b): (MyF64, MyF64)| { if b.0 == 0.0 { - return None + return None; } let c = a.0 / b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() || - c.abs() <= unsafe { mem::transmute(4503599627370495u64) } + if a.0.is_nan() + || b.0.is_nan() + || c.is_nan() + || c.abs() <= unsafe { mem::transmute(4503599627370495u64) } { None } else { Some(c) } }, - "builtins::float::div::__divdf3(a, b)"); - gen(|(a, b): (LargeF32, LargeF32)| { + "builtins::float::div::__divdf3(a, b)", + ); + gen( + |(a, b): (LargeF32, LargeF32)| { if b.0 == 0.0 { - return None + return None; } let c = a.0 / b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() || - c.abs() <= unsafe { mem::transmute(16777215u32) } + if a.0.is_nan() + || b.0.is_nan() + || c.is_nan() + || c.abs() <= unsafe { mem::transmute(16777215u32) } { None } else { Some(c) } }, - "builtins::float::div::__divsf3(a, b)"); + "builtins::float::div::__divsf3(a, b)", + ); if target_arch_arm { - gen(|(a, b): (MyF64, MyF64)| { + gen( + |(a, b): (MyF64, MyF64)| { if b.0 == 0.0 { - return None + return None; } let c = a.0 / b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() || - c.abs() <= unsafe { mem::transmute(4503599627370495u64) } + if a.0.is_nan() + || b.0.is_nan() + || c.is_nan() + || c.abs() <= unsafe { mem::transmute(4503599627370495u64) } { None } else { Some(c) } }, - "builtins::float::div::__divdf3vfp(a, b)"); - gen(|(a, b): (LargeF32, LargeF32)| { + "builtins::float::div::__divdf3vfp(a, b)", + ); + gen( + |(a, b): (LargeF32, LargeF32)| { if b.0 == 0.0 { - return None + return None; } let c = a.0 / b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() || - c.abs() <= unsafe { mem::transmute(16777215u32) } + if a.0.is_nan() + || b.0.is_nan() + || c.is_nan() + || c.abs() <= unsafe { mem::transmute(16777215u32) } { None } else { Some(c) } }, - "builtins::float::div::__divsf3vfp(a, b)"); + "builtins::float::div::__divsf3vfp(a, b)", + ); } // int/addsub.rs - gen(|(a, b): (MyU128, MyU128)| Some(a.0.wrapping_add(b.0)), - "builtins::int::addsub::rust_u128_add(a, b)"); - gen(|(a, b): (MyI128, MyI128)| Some(a.0.wrapping_add(b.0)), - "builtins::int::addsub::rust_i128_add(a, b)"); - gen(|(a, b): (MyU128, MyU128)| Some(a.0.overflowing_add(b.0)), - "builtins::int::addsub::rust_u128_addo(a, b)"); - gen(|(a, b): (MyI128, MyI128)| Some(a.0.overflowing_add(b.0)), - "builtins::int::addsub::rust_i128_addo(a, b)"); - gen(|(a, b): (MyU128, MyU128)| Some(a.0.wrapping_sub(b.0)), - "builtins::int::addsub::rust_u128_sub(a, b)"); - gen(|(a, b): (MyI128, MyI128)| Some(a.0.wrapping_sub(b.0)), - "builtins::int::addsub::rust_i128_sub(a, b)"); - gen(|(a, b): (MyU128, MyU128)| Some(a.0.overflowing_sub(b.0)), - "builtins::int::addsub::rust_u128_subo(a, b)"); - gen(|(a, b): (MyI128, MyI128)| Some(a.0.overflowing_sub(b.0)), - "builtins::int::addsub::rust_i128_subo(a, b)"); + gen( + |(a, b): (MyU128, MyU128)| Some(a.0.wrapping_add(b.0)), + "builtins::int::addsub::rust_u128_add(a, b)", + ); + gen( + |(a, b): (MyI128, MyI128)| Some(a.0.wrapping_add(b.0)), + "builtins::int::addsub::rust_i128_add(a, b)", + ); + gen( + |(a, b): (MyU128, MyU128)| Some(a.0.overflowing_add(b.0)), + "builtins::int::addsub::rust_u128_addo(a, b)", + ); + gen( + |(a, b): (MyI128, MyI128)| Some(a.0.overflowing_add(b.0)), + "builtins::int::addsub::rust_i128_addo(a, b)", + ); + gen( + |(a, b): (MyU128, MyU128)| Some(a.0.wrapping_sub(b.0)), + "builtins::int::addsub::rust_u128_sub(a, b)", + ); + gen( + |(a, b): (MyI128, MyI128)| Some(a.0.wrapping_sub(b.0)), + "builtins::int::addsub::rust_i128_sub(a, b)", + ); + gen( + |(a, b): (MyU128, MyU128)| Some(a.0.overflowing_sub(b.0)), + "builtins::int::addsub::rust_u128_subo(a, b)", + ); + gen( + |(a, b): (MyI128, MyI128)| Some(a.0.overflowing_sub(b.0)), + "builtins::int::addsub::rust_i128_subo(a, b)", + ); // int/mul.rs - gen(|(a, b): (MyU64, MyU64)| Some(a.0.wrapping_mul(b.0)), - "builtins::int::mul::__muldi3(a, b)"); - gen(|(a, b): (MyI64, MyI64)| Some(a.0.overflowing_mul(b.0)), + gen( + |(a, b): (MyU64, MyU64)| Some(a.0.wrapping_mul(b.0)), + "builtins::int::mul::__muldi3(a, b)", + ); + gen( + |(a, b): (MyI64, MyI64)| Some(a.0.overflowing_mul(b.0)), "{ let mut o = 2; let c = builtins::int::mul::__mulodi4(a, b, &mut o); (c, match o { 0 => false, 1 => true, _ => panic!() }) - }"); - gen(|(a, b): (MyI32, MyI32)| Some(a.0.overflowing_mul(b.0)), + }", + ); + gen( + |(a, b): (MyI32, MyI32)| Some(a.0.overflowing_mul(b.0)), "{ let mut o = 2; let c = builtins::int::mul::__mulosi4(a, b, &mut o); (c, match o { 0 => false, 1 => true, _ => panic!() }) - }"); - gen(|(a, b): (MyI128, MyI128)| Some(a.0.wrapping_mul(b.0)), - "builtins::int::mul::__multi3(a, b)"); - gen(|(a, b): (MyI128, MyI128)| Some(a.0.overflowing_mul(b.0)), + }", + ); + gen( + |(a, b): (MyI128, MyI128)| Some(a.0.wrapping_mul(b.0)), + "builtins::int::mul::__multi3(a, b)", + ); + gen( + |(a, b): (MyI128, MyI128)| Some(a.0.overflowing_mul(b.0)), "{ let mut o = 2; let c = builtins::int::mul::__muloti4(a, b, &mut o); (c, match o { 0 => false, 1 => true, _ => panic!() }) - }"); + }", + ); // int/sdiv.rs - gen(|(a, b): (MyI64, MyI64)| { + gen( + |(a, b): (MyI64, MyI64)| { if b.0 == 0 { None } else { Some(a.0 / b.0) } }, - "builtins::int::sdiv::__divdi3(a, b)"); - gen(|(a, b): (MyI64, MyI64)| { + "builtins::int::sdiv::__divdi3(a, b)", + ); + gen( + |(a, b): (MyI64, MyI64)| { if b.0 == 0 { None } else { @@ -620,8 +794,10 @@ fn main() { "{ let mut r = 0; (builtins::int::sdiv::__divmoddi4(a, b, &mut r), r) - }"); - gen(|(a, b): (MyI32, MyI32)| { + }", + ); + gen( + |(a, b): (MyI32, MyI32)| { if b.0 == 0 { None } else { @@ -631,72 +807,98 @@ fn main() { "{ let mut r = 0; (builtins::int::sdiv::__divmodsi4(a, b, &mut r), r) - }"); - gen(|(a, b): (MyI32, MyI32)| { + }", + ); + gen( + |(a, b): (MyI32, MyI32)| { if b.0 == 0 { None } else { Some(a.0 / b.0) } }, - "builtins::int::sdiv::__divsi3(a, b)"); - gen(|(a, b): (MyI32, MyI32)| { + "builtins::int::sdiv::__divsi3(a, b)", + ); + gen( + |(a, b): (MyI32, MyI32)| { if b.0 == 0 { None } else { Some(a.0 % b.0) } }, - "builtins::int::sdiv::__modsi3(a, b)"); - gen(|(a, b): (MyI64, MyI64)| { + "builtins::int::sdiv::__modsi3(a, b)", + ); + gen( + |(a, b): (MyI64, MyI64)| { if b.0 == 0 { None } else { Some(a.0 % b.0) } }, - "builtins::int::sdiv::__moddi3(a, b)"); - gen(|(a, b): (MyI128, MyI128)| { + "builtins::int::sdiv::__moddi3(a, b)", + ); + gen( + |(a, b): (MyI128, MyI128)| { if b.0 == 0 { None } else { Some(a.0 / b.0) } }, - "builtins::int::sdiv::__divti3(a, b)"); - gen(|(a, b): (MyI128, MyI128)| { + "builtins::int::sdiv::__divti3(a, b)", + ); + gen( + |(a, b): (MyI128, MyI128)| { if b.0 == 0 { None } else { Some(a.0 % b.0) } }, - "builtins::int::sdiv::__modti3(a, b)"); + "builtins::int::sdiv::__modti3(a, b)", + ); // int/shift.rs - gen(|(a, b): (MyU64, MyU32)| Some(a.0 << (b.0 % 64)), - "builtins::int::shift::__ashldi3(a, b % 64)"); - gen(|(a, b): (MyU128, MyU32)| Some(a.0 << (b.0 % 128)), - "builtins::int::shift::__ashlti3(a, b % 128)"); - gen(|(a, b): (MyI64, MyU32)| Some(a.0 >> (b.0 % 64)), - "builtins::int::shift::__ashrdi3(a, b % 64)"); - gen(|(a, b): (MyI128, MyU32)| Some(a.0 >> (b.0 % 128)), - "builtins::int::shift::__ashrti3(a, b % 128)"); - gen(|(a, b): (MyU64, MyU32)| Some(a.0 >> (b.0 % 64)), - "builtins::int::shift::__lshrdi3(a, b % 64)"); - gen(|(a, b): (MyU128, MyU32)| Some(a.0 >> (b.0 % 128)), - "builtins::int::shift::__lshrti3(a, b % 128)"); + gen( + |(a, b): (MyU64, MyU32)| Some(a.0 << (b.0 % 64)), + "builtins::int::shift::__ashldi3(a, b % 64)", + ); + gen( + |(a, b): (MyU128, MyU32)| Some(a.0 << (b.0 % 128)), + "builtins::int::shift::__ashlti3(a, b % 128)", + ); + gen( + |(a, b): (MyI64, MyU32)| Some(a.0 >> (b.0 % 64)), + "builtins::int::shift::__ashrdi3(a, b % 64)", + ); + gen( + |(a, b): (MyI128, MyU32)| Some(a.0 >> (b.0 % 128)), + "builtins::int::shift::__ashrti3(a, b % 128)", + ); + gen( + |(a, b): (MyU64, MyU32)| Some(a.0 >> (b.0 % 64)), + "builtins::int::shift::__lshrdi3(a, b % 64)", + ); + gen( + |(a, b): (MyU128, MyU32)| Some(a.0 >> (b.0 % 128)), + "builtins::int::shift::__lshrti3(a, b % 128)", + ); // int/udiv.rs - gen(|(a, b): (MyU64, MyU64)| { + gen( + |(a, b): (MyU64, MyU64)| { if b.0 == 0 { None } else { Some(a.0 / b.0) } }, - "builtins::int::udiv::__udivdi3(a, b)"); - gen(|(a, b): (MyU64, MyU64)| { + "builtins::int::udiv::__udivdi3(a, b)", + ); + gen( + |(a, b): (MyU64, MyU64)| { if b.0 == 0 { None } else { @@ -706,8 +908,10 @@ fn main() { "{ let mut r = 0; (builtins::int::udiv::__udivmoddi4(a, b, Some(&mut r)), r) - }"); - gen(|(a, b): (MyU32, MyU32)| { + }", + ); + gen( + |(a, b): (MyU32, MyU32)| { if b.0 == 0 { None } else { @@ -717,48 +921,60 @@ fn main() { "{ let mut r = 0; (builtins::int::udiv::__udivmodsi4(a, b, Some(&mut r)), r) - }"); - gen(|(a, b): (MyU32, MyU32)| { + }", + ); + gen( + |(a, b): (MyU32, MyU32)| { if b.0 == 0 { None } else { Some(a.0 / b.0) } }, - "builtins::int::udiv::__udivsi3(a, b)"); - gen(|(a, b): (MyU32, MyU32)| { + "builtins::int::udiv::__udivsi3(a, b)", + ); + gen( + |(a, b): (MyU32, MyU32)| { if b.0 == 0 { None } else { Some(a.0 % b.0) } }, - "builtins::int::udiv::__umodsi3(a, b)"); - gen(|(a, b): (MyU64, MyU64)| { + "builtins::int::udiv::__umodsi3(a, b)", + ); + gen( + |(a, b): (MyU64, MyU64)| { if b.0 == 0 { None } else { Some(a.0 % b.0) } }, - "builtins::int::udiv::__umoddi3(a, b)"); - gen(|(a, b): (MyU128, MyU128)| { + "builtins::int::udiv::__umoddi3(a, b)", + ); + gen( + |(a, b): (MyU128, MyU128)| { if b.0 == 0 { None } else { Some(a.0 / b.0) } }, - "builtins::int::udiv::__udivti3(a, b)"); - gen(|(a, b): (MyU128, MyU128)| { + "builtins::int::udiv::__udivti3(a, b)", + ); + gen( + |(a, b): (MyU128, MyU128)| { if b.0 == 0 { None } else { Some(a.0 % b.0) } }, - "builtins::int::udiv::__umodti3(a, b)"); - gen(|(a, b): (MyU128, MyU128)| { + "builtins::int::udiv::__umodti3(a, b)", + ); + gen( + |(a, b): (MyU128, MyU128)| { if b.0 == 0 { None } else { @@ -768,7 +984,8 @@ fn main() { "{ let mut r = 0; (builtins::int::udiv::__udivmodti4(a, b, Some(&mut r)), r) - }"); + }", + ); } macro_rules! gen_float { @@ -790,24 +1007,27 @@ macro_rules! gen_float { fn mk_f32(sign: bool, exponent: $uty, significand: $uty) -> $fty { unsafe { - mem::transmute(((sign as $uty) << (BITS - 1)) | - ((exponent & EXPONENT_MASK) << - SIGNIFICAND_BITS) | - (significand & SIGNIFICAND_MASK)) + mem::transmute( + ((sign as $uty) << (BITS - 1)) + | ((exponent & EXPONENT_MASK) << SIGNIFICAND_BITS) + | (significand & SIGNIFICAND_MASK), + ) } } if rng.gen_weighted_bool(10) { // Special values - *rng.choose(&[-0.0, - 0.0, - ::std::$fty::MIN, - ::std::$fty::MIN_POSITIVE, - ::std::$fty::MAX, - ::std::$fty::NAN, - ::std::$fty::INFINITY, - -::std::$fty::INFINITY]) - .unwrap() + *rng.choose(&[ + -0.0, + 0.0, + ::std::$fty::MIN, + ::std::$fty::MIN_POSITIVE, + ::std::$fty::MAX, + ::std::$fty::NAN, + ::std::$fty::INFINITY, + -::std::$fty::INFINITY, + ]) + .unwrap() } else if rng.gen_weighted_bool(10) { // NaN patterns mk_f32(rng.gen(), rng.gen(), 0) @@ -819,7 +1039,7 @@ macro_rules! gen_float { mk_f32(rng.gen(), rng.gen(), rng.gen()) } } - } + }; } gen_float!(gen_f32, f32, u32, 32, 23); @@ -844,24 +1064,27 @@ macro_rules! gen_large_float { fn mk_f32(sign: bool, exponent: $uty, significand: $uty) -> $fty { unsafe { - mem::transmute(((sign as $uty) << (BITS - 1)) | - ((exponent & EXPONENT_MASK) << - SIGNIFICAND_BITS) | - (significand & SIGNIFICAND_MASK)) + mem::transmute( + ((sign as $uty) << (BITS - 1)) + | ((exponent & EXPONENT_MASK) << SIGNIFICAND_BITS) + | (significand & SIGNIFICAND_MASK), + ) } } if rng.gen_weighted_bool(10) { // Special values - *rng.choose(&[-0.0, - 0.0, - ::std::$fty::MIN, - ::std::$fty::MIN_POSITIVE, - ::std::$fty::MAX, - ::std::$fty::NAN, - ::std::$fty::INFINITY, - -::std::$fty::INFINITY]) - .unwrap() + *rng.choose(&[ + -0.0, + 0.0, + ::std::$fty::MIN, + ::std::$fty::MIN_POSITIVE, + ::std::$fty::MAX, + ::std::$fty::NAN, + ::std::$fty::INFINITY, + -::std::$fty::INFINITY, + ]) + .unwrap() } else if rng.gen_weighted_bool(10) { // NaN patterns mk_f32(rng.gen(), rng.gen(), 0) @@ -873,7 +1096,7 @@ macro_rules! gen_large_float { rng.gen::<$fty>() } } - } + }; } gen_large_float!(gen_large_f32, f32, u32, 32, 23); @@ -892,17 +1115,13 @@ trait TestOutput { } fn gen(mut generate: F, test: &str) - where F: FnMut(A) -> Option, - A: TestInput + Copy, - R: TestOutput, +where + F: FnMut(A) -> Option, + A: TestInput + Copy, + R: TestOutput, { let rng = &mut rand::thread_rng(); - let testname = test.split("::") - .last() - .unwrap() - .split("(") - .next() - .unwrap(); + let testname = test.split("::").last().unwrap().split("(").next().unwrap(); let out_dir = PathBuf::from(env::var_os("OUT_DIR").unwrap()); let out_file = out_dir.join("generated.rs"); @@ -911,7 +1130,7 @@ fn gen(mut generate: F, test: &str) while n > 0 { let input: A = rng.gen(); if testcases.contains_key(&input) { - continue + continue; } let output = match generate(input) { Some(o) => o, @@ -925,10 +1144,12 @@ fn gen(mut generate: F, test: &str) contents.push_str(&format!("mod {} {{\nuse super::*;\n", testname)); contents.push_str("#[test]\n"); contents.push_str("fn test() {\n"); - contents.push_str(&format!("static TESTS: [({}, {}); {}] = [\n", - A::ty_name(), - R::ty_name(), - NTESTS)); + contents.push_str(&format!( + "static TESTS: [({}, {}); {}] = [\n", + A::ty_name(), + R::ty_name(), + NTESTS + )); for (input, output) in testcases { contents.push_str(" ("); input.generate_static(&mut contents); @@ -938,7 +1159,8 @@ fn gen(mut generate: F, test: &str) } contents.push_str("];\n"); - contents.push_str(&format!(r#" + contents.push_str(&format!( + r#" for &(inputs, output) in TESTS.iter() {{ {} assert_eq!({}, {}, "inputs {{:?}}", inputs) @@ -1070,17 +1292,20 @@ my_integer! { } impl TestInput for (A, B) - where A: TestInput, - B: TestInput, +where + A: TestInput, + B: TestInput, { fn ty_name() -> String { format!("({}, {})", A::ty_name(), B::ty_name()) } fn generate_lets(container: &str, cnt: &mut u8) -> String { - format!("{}{}", - A::generate_lets(&format!("{}.0", container), cnt), - B::generate_lets(&format!("{}.1", container), cnt)) + format!( + "{}{}", + A::generate_lets(&format!("{}.0", container), cnt), + B::generate_lets(&format!("{}.1", container), cnt) + ) } fn generate_static(&self, dst: &mut String) { @@ -1141,8 +1366,9 @@ macro_rules! plain_test_output { plain_test_output!(i32 i64 i128 u32 u64 u128 bool); impl TestOutput for (A, B) - where A: TestOutput, - B: TestOutput, +where + A: TestOutput, + B: TestOutput, { fn ty_name() -> String { format!("({}, {})", A::ty_name(), B::ty_name()) diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs index 2e7f0d4d7..0c9ac1ac8 100644 --- a/testcrate/src/lib.rs +++ b/testcrate/src/lib.rs @@ -1 +1 @@ -#![no_std] \ No newline at end of file +#![no_std] diff --git a/testcrate/tests/aeabi_memclr.rs b/testcrate/tests/aeabi_memclr.rs index 08fbd4fa9..326435c29 100644 --- a/testcrate/tests/aeabi_memclr.rs +++ b/testcrate/tests/aeabi_memclr.rs @@ -1,7 +1,9 @@ -#![cfg(all(target_arch = "arm", - not(any(target_env = "gnu", target_env = "musl")), - target_os = "linux", - feature = "mem"))] +#![cfg(all( + target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + feature = "mem" +))] #![feature(compiler_builtins_lib)] #![feature(lang_items)] #![no_std] diff --git a/testcrate/tests/aeabi_memcpy.rs b/testcrate/tests/aeabi_memcpy.rs index 76dad8992..2d72dfbba 100644 --- a/testcrate/tests/aeabi_memcpy.rs +++ b/testcrate/tests/aeabi_memcpy.rs @@ -1,7 +1,9 @@ -#![cfg(all(target_arch = "arm", - not(any(target_env = "gnu", target_env = "musl")), - target_os = "linux", - feature = "mem"))] +#![cfg(all( + target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + feature = "mem" +))] #![feature(compiler_builtins_lib)] #![feature(lang_items)] #![no_std] diff --git a/testcrate/tests/aeabi_memset.rs b/testcrate/tests/aeabi_memset.rs index 71fe37e35..3cfbfe5b0 100644 --- a/testcrate/tests/aeabi_memset.rs +++ b/testcrate/tests/aeabi_memset.rs @@ -1,7 +1,9 @@ -#![cfg(all(target_arch = "arm", - not(any(target_env = "gnu", target_env = "musl")), - target_os = "linux", - feature = "mem"))] +#![cfg(all( + target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + feature = "mem" +))] #![feature(compiler_builtins_lib)] #![feature(lang_items)] #![no_std] @@ -48,9 +50,7 @@ fn zero() { let xs = &mut aligned.array; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), 0, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), 0, c) } assert_eq!(*xs, [0; 8]); @@ -59,9 +59,7 @@ fn zero() { let xs = &mut aligned.array; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), 0, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), 0, c) } assert_eq!(*xs, [1; 8]); } @@ -74,9 +72,7 @@ fn one() { let n = 1; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 0, 0, 0, 0, 0, 0, 0]); @@ -85,9 +81,7 @@ fn one() { let xs = &mut aligned.array; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 1, 1, 1, 1, 1, 1, 1]); } @@ -100,9 +94,7 @@ fn two() { let n = 2; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 0xef, 0, 0, 0, 0, 0, 0]); @@ -111,9 +103,7 @@ fn two() { let xs = &mut aligned.array; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 0xef, 1, 1, 1, 1, 1, 1]); } @@ -126,9 +116,7 @@ fn three() { let n = 3; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 0xef, 0xef, 0, 0, 0, 0, 0]); @@ -137,9 +125,7 @@ fn three() { let xs = &mut aligned.array; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 0xef, 0xef, 1, 1, 1, 1, 1]); } @@ -152,9 +138,7 @@ fn four() { let n = 4; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0, 0, 0, 0]); @@ -163,9 +147,7 @@ fn four() { let xs = &mut aligned.array; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 1, 1, 1, 1]); } @@ -178,9 +160,7 @@ fn five() { let n = 5; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0, 0, 0]); @@ -189,9 +169,7 @@ fn five() { let xs = &mut aligned.array; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 1, 1, 1]); } @@ -204,9 +182,7 @@ fn six() { let n = 6; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0, 0]); @@ -215,9 +191,7 @@ fn six() { let xs = &mut aligned.array; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1, 1]); } @@ -230,9 +204,7 @@ fn seven() { let n = 7; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0]); @@ -241,9 +213,7 @@ fn seven() { let xs = &mut aligned.array; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1]); } @@ -256,9 +226,7 @@ fn eight() { let n = 8; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]); @@ -267,9 +235,7 @@ fn eight() { let xs = &mut aligned.array; let c = 0xdeadbeef; - unsafe { - __aeabi_memset4(xs.as_mut_ptr(), n, c) - } + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]); } diff --git a/testcrate/tests/count_leading_zeros.rs b/testcrate/tests/count_leading_zeros.rs index 559650174..b50a7ce84 100644 --- a/testcrate/tests/count_leading_zeros.rs +++ b/testcrate/tests/count_leading_zeros.rs @@ -6,20 +6,20 @@ use compiler_builtins::int::__clzsi2; #[test] fn __clzsi2_test() { - let mut i: usize = core::usize::MAX; - // Check all values above 0 - while i > 0 { + let mut i: usize = core::usize::MAX; + // Check all values above 0 + while i > 0 { + assert_eq!(__clzsi2(i) as u32, i.leading_zeros()); + i >>= 1; + } + // check 0 also + i = 0; assert_eq!(__clzsi2(i) as u32, i.leading_zeros()); - i >>= 1; - } - // check 0 also - i = 0; - assert_eq!(__clzsi2(i) as u32, i.leading_zeros()); - // double check for bit patterns that aren't just solid 1s - i = 1; - for _ in 0..63 { - assert_eq!(__clzsi2(i) as u32, i.leading_zeros()); - i <<= 2; - i += 1; - } + // double check for bit patterns that aren't just solid 1s + i = 1; + for _ in 0..63 { + assert_eq!(__clzsi2(i) as u32, i.leading_zeros()); + i <<= 2; + i += 1; + } } diff --git a/testcrate/tests/generated.rs b/testcrate/tests/generated.rs index 28fe4be59..ee575cba8 100644 --- a/testcrate/tests/generated.rs +++ b/testcrate/tests/generated.rs @@ -6,23 +6,29 @@ extern crate compiler_builtins as builtins; -#[cfg(all(target_arch = "arm", - not(any(target_env = "gnu", target_env = "musl")), - target_os = "linux", - test))] +#[cfg(all( + target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + test +))] extern crate utest_cortex_m_qemu; -#[cfg(all(target_arch = "arm", - not(any(target_env = "gnu", target_env = "musl")), - target_os = "linux", - test))] +#[cfg(all( + target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + test +))] #[macro_use] extern crate utest_macros; -#[cfg(all(target_arch = "arm", - not(any(target_env = "gnu", target_env = "musl")), - target_os = "linux", - test))] +#[cfg(all( + target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + test +))] macro_rules! panic { // overrides `panic!` ($($tt:tt)*) => { upanic!($($tt)*); From 6178e2c61105a9ff7fa1c4fc974b142b0c07ae3d Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 14 May 2019 14:41:07 -0700 Subject: [PATCH 0244/1459] Bump to 0.1.13 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4d0350de5..f08037432 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.12" +version = "0.1.13" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From cf98161da7ed5217b6031796f0f60b4dd07148a4 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 15 May 2019 12:57:36 -0700 Subject: [PATCH 0245/1459] Remove the need for #[cfg] in #[use_c_shim_if] This commit tweaks the implementation of the synthetic `#[use_c_shim_if]` attribute, renaming it to `#[maybe_use_optimized_c_shim]` in the process. This no longer requires specifying a `#[cfg]` clause indicating when the optimized intrinsic should be used, but rather this is inferred and printed from the build script. The build script will now print out appropriate `#[cfg]` directives for rustc to indicate what intrinsics it's compiling. This should remove the need for us to keep the build script and the source in sync, but rather the build script can simply take care of everything. --- build.rs | 336 +++++++++++++++++++++++----------------------- ci/run.sh | 1 + src/float/conv.rs | 18 +-- src/int/mod.rs | 2 +- src/int/mul.rs | 2 +- src/int/sdiv.rs | 14 +- src/int/shift.rs | 6 +- src/int/udiv.rs | 18 +-- src/macros.rs | 23 ++-- 9 files changed, 198 insertions(+), 222 deletions(-) diff --git a/build.rs b/build.rs index a8301afa9..be5d42bb7 100644 --- a/build.rs +++ b/build.rs @@ -49,7 +49,6 @@ fn main() { if !target.contains("wasm32") && !target.contains("nvptx") && !target.starts_with("riscv") { #[cfg(feature = "c")] c::compile(&llvm_target); - println!("cargo:rustc-cfg=use_c"); } } @@ -91,15 +90,14 @@ mod c { } } - fn extend(&mut self, sources: &[&'static str]) { + fn extend(&mut self, sources: &[(&'static str, &'static str)]) { // NOTE Some intrinsics have both a generic implementation (e.g. // `floatdidf.c`) and an arch optimized implementation // (`x86_64/floatdidf.c`). In those cases, we keep the arch optimized // implementation and discard the generic implementation. If we don't // and keep both implementations, the linker will yell at us about // duplicate symbols! - for &src in sources { - let symbol = Path::new(src).file_stem().unwrap().to_str().unwrap(); + for (symbol, src) in sources { if src.contains("/") { // Arch-optimized implementation (preferred) self.map.insert(symbol, src); @@ -155,42 +153,42 @@ mod c { let mut sources = Sources::new(); sources.extend(&[ - "absvdi2.c", - "absvsi2.c", - "addvdi3.c", - "addvsi3.c", - "apple_versioning.c", - "clzdi2.c", - "clzsi2.c", - "cmpdi2.c", - "ctzdi2.c", - "ctzsi2.c", - "divdc3.c", - "divsc3.c", - "divxc3.c", - "extendhfsf2.c", - "int_util.c", - "muldc3.c", - "mulsc3.c", - "mulvdi3.c", - "mulvsi3.c", - "mulxc3.c", - "negdf2.c", - "negdi2.c", - "negsf2.c", - "negvdi2.c", - "negvsi2.c", - "paritydi2.c", - "paritysi2.c", - "popcountdi2.c", - "popcountsi2.c", - "powixf2.c", - "subvdi3.c", - "subvsi3.c", - "truncdfhf2.c", - "truncdfsf2.c", - "truncsfhf2.c", - "ucmpdi2.c", + ("__absvdi2", "absvdi2.c"), + ("__absvsi2", "absvsi2.c"), + ("__addvdi3", "addvdi3.c"), + ("__addvsi3", "addvsi3.c"), + ("apple_versioning", "apple_versioning.c"), + ("__clzdi2", "clzdi2.c"), + ("__clzsi2", "clzsi2.c"), + ("__cmpdi2", "cmpdi2.c"), + ("__ctzdi2", "ctzdi2.c"), + ("__ctzsi2", "ctzsi2.c"), + ("__divdc3", "divdc3.c"), + ("__divsc3", "divsc3.c"), + ("__divxc3", "divxc3.c"), + ("__extendhfsf2", "extendhfsf2.c"), + ("__int_util", "int_util.c"), + ("__muldc3", "muldc3.c"), + ("__mulsc3", "mulsc3.c"), + ("__mulvdi3", "mulvdi3.c"), + ("__mulvsi3", "mulvsi3.c"), + ("__mulxc3", "mulxc3.c"), + ("__negdf2", "negdf2.c"), + ("__negdi2", "negdi2.c"), + ("__negsf2", "negsf2.c"), + ("__negvdi2", "negvdi2.c"), + ("__negvsi2", "negvsi2.c"), + ("__paritydi2", "paritydi2.c"), + ("__paritysi2", "paritysi2.c"), + ("__popcountdi2", "popcountdi2.c"), + ("__popcountsi2", "popcountsi2.c"), + ("__powixf2", "powixf2.c"), + ("__subvdi3", "subvdi3.c"), + ("__subvsi3", "subvsi3.c"), + ("__truncdfhf2", "truncdfhf2.c"), + ("__truncdfsf2", "truncdfsf2.c"), + ("__truncsfhf2", "truncsfhf2.c"), + ("__ucmpdi2", "ucmpdi2.c"), ]); // When compiling in rustbuild (the rust-lang/rust repo) this library @@ -198,43 +196,49 @@ mod c { // need, so include a few more that aren't typically needed by // LLVM/Rust. if cfg!(feature = "rustbuild") { - sources.extend(&["ffsdi2.c"]); + sources.extend(&[("__ffsdi2", "ffsdi2.c")]); } // On iOS and 32-bit OSX these are all just empty intrinsics, no need to // include them. if target_os != "ios" && (target_vendor != "apple" || target_arch != "x86") { sources.extend(&[ - "absvti2.c", - "addvti3.c", - "clzti2.c", - "cmpti2.c", - "ctzti2.c", - "ffsti2.c", - "mulvti3.c", - "negti2.c", - "negvti2.c", - "parityti2.c", - "popcountti2.c", - "subvti3.c", - "ucmpti2.c", + ("__absvti2", "absvti2.c"), + ("__addvti3", "addvti3.c"), + ("__clzti2", "clzti2.c"), + ("__cmpti2", "cmpti2.c"), + ("__ctzti2", "ctzti2.c"), + ("__ffsti2", "ffsti2.c"), + ("__mulvti3", "mulvti3.c"), + ("__negti2", "negti2.c"), + ("__negvti2", "negvti2.c"), + ("__parityti2", "parityti2.c"), + ("__popcountti2", "popcountti2.c"), + ("__subvti3", "subvti3.c"), + ("__ucmpti2", "ucmpti2.c"), ]); } if target_vendor == "apple" { sources.extend(&[ - "atomic_flag_clear.c", - "atomic_flag_clear_explicit.c", - "atomic_flag_test_and_set.c", - "atomic_flag_test_and_set_explicit.c", - "atomic_signal_fence.c", - "atomic_thread_fence.c", + ("atomic_flag_clear", "atomic_flag_clear.c"), + ("atomic_flag_clear_explicit", "atomic_flag_clear_explicit.c"), + ("atomic_flag_test_and_set", "atomic_flag_test_and_set.c"), + ( + "atomic_flag_test_and_set_explicit", + "atomic_flag_test_and_set_explicit.c", + ), + ("atomic_signal_fence", "atomic_signal_fence.c"), + ("atomic_thread_fence", "atomic_thread_fence.c"), ]); } if target_env == "msvc" { if target_arch == "x86_64" { - sources.extend(&["x86_64/floatdisf.c", "x86_64/floatdixf.c"]); + sources.extend(&[ + ("__floatdisf", "x86_64/floatdisf.c"), + ("__floatdixf", "x86_64/floatdixf.c"), + ]); } } else { // None of these seem to be used on x86_64 windows, and they've all @@ -242,59 +246,59 @@ mod c { if target_os != "windows" { if target_arch == "x86_64" { sources.extend(&[ - "x86_64/floatdisf.c", - "x86_64/floatdixf.c", - "x86_64/floatundidf.S", - "x86_64/floatundisf.S", - "x86_64/floatundixf.S", + ("__floatdisf", "x86_64/floatdisf.c"), + ("__floatdixf", "x86_64/floatdixf.c"), + ("__floatundidf", "x86_64/floatundidf.S"), + ("__floatundisf", "x86_64/floatundisf.S"), + ("__floatundixf", "x86_64/floatundixf.S"), ]); } } if target_arch == "x86" { sources.extend(&[ - "i386/ashldi3.S", - "i386/ashrdi3.S", - "i386/divdi3.S", - "i386/floatdidf.S", - "i386/floatdisf.S", - "i386/floatdixf.S", - "i386/floatundidf.S", - "i386/floatundisf.S", - "i386/floatundixf.S", - "i386/lshrdi3.S", - "i386/moddi3.S", - "i386/muldi3.S", - "i386/udivdi3.S", - "i386/umoddi3.S", + ("__ashldi3", "i386/ashldi3.S"), + ("__ashrdi3", "i386/ashrdi3.S"), + ("__divdi3", "i386/divdi3.S"), + ("__floatdidf", "i386/floatdidf.S"), + ("__floatdisf", "i386/floatdisf.S"), + ("__floatdixf", "i386/floatdixf.S"), + ("__floatundidf", "i386/floatundidf.S"), + ("__floatundisf", "i386/floatundisf.S"), + ("__floatundixf", "i386/floatundixf.S"), + ("__lshrdi3", "i386/lshrdi3.S"), + ("__moddi3", "i386/moddi3.S"), + ("__muldi3", "i386/muldi3.S"), + ("__udivdi3", "i386/udivdi3.S"), + ("__umoddi3", "i386/umoddi3.S"), ]); } } if target_arch == "arm" && target_os != "ios" && target_env != "msvc" { sources.extend(&[ - "arm/aeabi_div0.c", - "arm/aeabi_drsub.c", - "arm/aeabi_frsub.c", - "arm/bswapdi2.S", - "arm/bswapsi2.S", - "arm/clzdi2.S", - "arm/clzsi2.S", - "arm/divmodsi4.S", - "arm/divsi3.S", - "arm/modsi3.S", - "arm/switch16.S", - "arm/switch32.S", - "arm/switch8.S", - "arm/switchu8.S", - "arm/sync_synchronize.S", - "arm/udivmodsi4.S", - "arm/udivsi3.S", - "arm/umodsi3.S", + ("__aeabi_div0", "arm/aeabi_div0.c"), + ("__aeabi_drsub", "arm/aeabi_drsub.c"), + ("__aeabi_frsub", "arm/aeabi_frsub.c"), + ("__bswapdi2", "arm/bswapdi2.S"), + ("__bswapsi2", "arm/bswapsi2.S"), + ("__clzdi2", "arm/clzdi2.S"), + ("__clzsi2", "arm/clzsi2.S"), + ("__divmodsi4", "arm/divmodsi4.S"), + ("__divsi3", "arm/divsi3.S"), + ("__modsi3", "arm/modsi3.S"), + ("__switch16", "arm/switch16.S"), + ("__switch32", "arm/switch32.S"), + ("__switch8", "arm/switch8.S"), + ("__switchu8", "arm/switchu8.S"), + ("__sync_synchronize", "arm/sync_synchronize.S"), + ("__udivmodsi4", "arm/udivmodsi4.S"), + ("__udivsi3", "arm/udivsi3.S"), + ("__umodsi3", "arm/umodsi3.S"), ]); if target_os == "freebsd" { - sources.extend(&["clear_cache.c"]); + sources.extend(&[("__clear_cache", "clear_cache.c")]); } // First of all aeabi_cdcmp and aeabi_cfcmp are never called by LLVM. @@ -302,36 +306,36 @@ mod c { // Temporally workaround: exclude these files for big-endian targets. if !llvm_target[0].starts_with("thumbeb") && !llvm_target[0].starts_with("armeb") { sources.extend(&[ - "arm/aeabi_cdcmp.S", - "arm/aeabi_cdcmpeq_check_nan.c", - "arm/aeabi_cfcmp.S", - "arm/aeabi_cfcmpeq_check_nan.c", + ("__aeabi_cdcmp", "arm/aeabi_cdcmp.S"), + ("__aeabi_cdcmpeq_check_nan", "arm/aeabi_cdcmpeq_check_nan.c"), + ("__aeabi_cfcmp", "arm/aeabi_cfcmp.S"), + ("__aeabi_cfcmpeq_check_nan", "arm/aeabi_cfcmpeq_check_nan.c"), ]); } } if llvm_target[0] == "armv7" { sources.extend(&[ - "arm/sync_fetch_and_add_4.S", - "arm/sync_fetch_and_add_8.S", - "arm/sync_fetch_and_and_4.S", - "arm/sync_fetch_and_and_8.S", - "arm/sync_fetch_and_max_4.S", - "arm/sync_fetch_and_max_8.S", - "arm/sync_fetch_and_min_4.S", - "arm/sync_fetch_and_min_8.S", - "arm/sync_fetch_and_nand_4.S", - "arm/sync_fetch_and_nand_8.S", - "arm/sync_fetch_and_or_4.S", - "arm/sync_fetch_and_or_8.S", - "arm/sync_fetch_and_sub_4.S", - "arm/sync_fetch_and_sub_8.S", - "arm/sync_fetch_and_umax_4.S", - "arm/sync_fetch_and_umax_8.S", - "arm/sync_fetch_and_umin_4.S", - "arm/sync_fetch_and_umin_8.S", - "arm/sync_fetch_and_xor_4.S", - "arm/sync_fetch_and_xor_8.S", + ("__sync_fetch_and_add_4", "arm/sync_fetch_and_add_4.S"), + ("__sync_fetch_and_add_8", "arm/sync_fetch_and_add_8.S"), + ("__sync_fetch_and_and_4", "arm/sync_fetch_and_and_4.S"), + ("__sync_fetch_and_and_8", "arm/sync_fetch_and_and_8.S"), + ("__sync_fetch_and_max_4", "arm/sync_fetch_and_max_4.S"), + ("__sync_fetch_and_max_8", "arm/sync_fetch_and_max_8.S"), + ("__sync_fetch_and_min_4", "arm/sync_fetch_and_min_4.S"), + ("__sync_fetch_and_min_8", "arm/sync_fetch_and_min_8.S"), + ("__sync_fetch_and_nand_4", "arm/sync_fetch_and_nand_4.S"), + ("__sync_fetch_and_nand_8", "arm/sync_fetch_and_nand_8.S"), + ("__sync_fetch_and_or_4", "arm/sync_fetch_and_or_4.S"), + ("__sync_fetch_and_or_8", "arm/sync_fetch_and_or_8.S"), + ("__sync_fetch_and_sub_4", "arm/sync_fetch_and_sub_4.S"), + ("__sync_fetch_and_sub_8", "arm/sync_fetch_and_sub_8.S"), + ("__sync_fetch_and_umax_4", "arm/sync_fetch_and_umax_4.S"), + ("__sync_fetch_and_umax_8", "arm/sync_fetch_and_umax_8.S"), + ("__sync_fetch_and_umin_4", "arm/sync_fetch_and_umin_4.S"), + ("__sync_fetch_and_umin_8", "arm/sync_fetch_and_umin_8.S"), + ("__sync_fetch_and_xor_4", "arm/sync_fetch_and_xor_4.S"), + ("__sync_fetch_and_xor_8", "arm/sync_fetch_and_xor_8.S"), ]); } @@ -345,73 +349,66 @@ mod c { // do not support double precision floating points conversions so the files // that include such instructions are not included for these targets. sources.extend(&[ - "arm/fixdfsivfp.S", - "arm/fixunsdfsivfp.S", - "arm/floatsidfvfp.S", - "arm/floatunssidfvfp.S", + ("__fixdfsivfp", "arm/fixdfsivfp.S"), + ("__fixunsdfsivfp", "arm/fixunsdfsivfp.S"), + ("__floatsidfvfp", "arm/floatsidfvfp.S"), + ("__floatunssidfvfp", "arm/floatunssidfvfp.S"), ]); } sources.extend(&[ - "arm/fixsfsivfp.S", - "arm/fixunssfsivfp.S", - "arm/floatsisfvfp.S", - "arm/floatunssisfvfp.S", - "arm/floatunssisfvfp.S", - "arm/restore_vfp_d8_d15_regs.S", - "arm/save_vfp_d8_d15_regs.S", - "arm/negdf2vfp.S", - "arm/negsf2vfp.S", + ("__fixsfsivfp", "arm/fixsfsivfp.S"), + ("__fixunssfsivfp", "arm/fixunssfsivfp.S"), + ("__floatsisfvfp", "arm/floatsisfvfp.S"), + ("__floatunssisfvfp", "arm/floatunssisfvfp.S"), + ("__floatunssisfvfp", "arm/floatunssisfvfp.S"), + ("__restore_vfp_d8_d15_regs", "arm/restore_vfp_d8_d15_regs.S"), + ("__save_vfp_d8_d15_regs", "arm/save_vfp_d8_d15_regs.S"), + ("__negdf2vfp", "arm/negdf2vfp.S"), + ("__negsf2vfp", "arm/negsf2vfp.S"), ]); } if target_arch == "aarch64" { sources.extend(&[ - "comparetf2.c", - "extenddftf2.c", - "extendsftf2.c", - "fixtfdi.c", - "fixtfsi.c", - "fixtfti.c", - "fixunstfdi.c", - "fixunstfsi.c", - "fixunstfti.c", - "floatditf.c", - "floatsitf.c", - "floatunditf.c", - "floatunsitf.c", - "trunctfdf2.c", - "trunctfsf2.c", + ("__comparetf2", "comparetf2.c"), + ("__extenddftf2", "extenddftf2.c"), + ("__extendsftf2", "extendsftf2.c"), + ("__fixtfdi", "fixtfdi.c"), + ("__fixtfsi", "fixtfsi.c"), + ("__fixtfti", "fixtfti.c"), + ("__fixunstfdi", "fixunstfdi.c"), + ("__fixunstfsi", "fixunstfsi.c"), + ("__fixunstfti", "fixunstfti.c"), + ("__floatditf", "floatditf.c"), + ("__floatsitf", "floatsitf.c"), + ("__floatunditf", "floatunditf.c"), + ("__floatunsitf", "floatunsitf.c"), + ("__trunctfdf2", "trunctfdf2.c"), + ("__trunctfsf2", "trunctfsf2.c"), ]); if target_os != "windows" { - sources.extend(&["multc3.c"]); + sources.extend(&[("__multc3", "multc3.c")]); } } // Remove the assembly implementations that won't compile for the target if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" { - sources.remove(&[ - "clzdi2", - "clzsi2", - "divmodsi4", - "divsi3", - "modsi3", - "switch16", - "switch32", - "switch8", - "switchu8", - "udivmodsi4", - "udivsi3", - "umodsi3", - ]); + let mut to_remove = Vec::new(); + for (k, v) in sources.map.iter() { + if v.ends_with(".S") { + to_remove.push(*k); + } + } + sources.remove(&to_remove); // But use some generic implementations where possible - sources.extend(&["clzdi2.c", "clzsi2.c"]) + sources.extend(&[("__clzdi2", "clzdi2.c"), ("__clzsi2", "clzsi2.c")]) } if llvm_target[0] == "thumbv7m" || llvm_target[0] == "thumbv7em" { - sources.remove(&["aeabi_cdcmp", "aeabi_cfcmp"]); + sources.remove(&["__aeabi_cdcmp", "__aeabi_cfcmp"]); } // When compiling in rustbuild (the rust-lang/rust repo) this build @@ -423,10 +420,11 @@ mod c { }; let src_dir = root.join("compiler-rt/lib/builtins"); - for src in sources.map.values() { + for (sym, src) in sources.map.iter() { let src = src_dir.join(src); cfg.file(&src); println!("cargo:rerun-if-changed={}", src.display()); + println!("cargo:rustc-cfg={}=\"optimized-c\"", sym); } cfg.compile("libcompiler-rt.a"); diff --git a/ci/run.sh b/ci/run.sh index ae32806ec..4d6d6c68f 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -1,5 +1,6 @@ set -ex +export CARGO_INCREMENTAL=0 cargo=cargo # Test our implementation diff --git a/src/float/conv.rs b/src/float/conv.rs index 21aac15c1..8a0fc6cb4 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -87,11 +87,7 @@ intrinsics! { int_to_float!(i, i32, f64) } - #[use_c_shim_if(any( - all(target_arch = "x86", not(target_env = "msvc")), - all(target_arch = "x86_64", not(windows)), - all(target_arch = "x86_64", target_env = "msvc"), - ))] + #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_l2f] pub extern "C" fn __floatdisf(i: i64) -> f32 { // On x86_64 LLVM will use native instructions for this conversion, we @@ -103,7 +99,7 @@ intrinsics! { } } - #[use_c_shim_if(all(target_arch = "x86", not(target_env = "msvc")))] + #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_l2d] pub extern "C" fn __floatdidf(i: i64) -> f64 { // On x86_64 LLVM will use native instructions for this conversion, we @@ -135,19 +131,13 @@ intrinsics! { int_to_float!(i, u32, f64) } - #[use_c_shim_if(any( - all(target_arch = "x86", not(target_env = "msvc")), - all(target_arch = "x86_64", not(windows)), - ))] + #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_ul2f] pub extern "C" fn __floatundisf(i: u64) -> f32 { int_to_float!(i, u64, f32) } - #[use_c_shim_if(any( - all(target_arch = "x86", not(target_env = "msvc")), - all(target_arch = "x86_64", not(windows)), - ))] + #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_ul2d] pub extern "C" fn __floatundidf(i: u64) -> f64 { int_to_float!(i, u64, f64) diff --git a/src/int/mod.rs b/src/int/mod.rs index fd1f0c3c8..7587bc69e 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -302,7 +302,7 @@ impl_wide_int!(u32, u64, 32); impl_wide_int!(u64, u128, 64); intrinsics! { - #[use_c_shim_if(/* always if C compilation is enabled */)] + #[maybe_use_optimized_c_shim] #[cfg(any( target_pointer_width = "16", target_pointer_width = "32", diff --git a/src/int/mul.rs b/src/int/mul.rs index 376395ac0..8df58a27b 100644 --- a/src/int/mul.rs +++ b/src/int/mul.rs @@ -84,7 +84,7 @@ trait UMulo: Int { impl UMulo for u128 {} intrinsics! { - #[use_c_shim_if(all(target_arch = "x86", not(target_env = "msvc")))] + #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_lmul] pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 { a.mul(b) diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index 4b63697b4..ad7f67b1b 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -58,13 +58,13 @@ impl Divmod for i32 {} impl Divmod for i64 {} intrinsics! { - #[use_c_shim_if(all(target_arch = "arm", not(target_os = "ios"), not(thumb_1)))] + #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_idiv] pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 { a.div(b) } - #[use_c_shim_if(all(target_arch = "x86", not(target_env = "msvc")))] + #[maybe_use_optimized_c_shim] pub extern "C" fn __divdi3(a: i64, b: i64) -> i64 { a.div(b) } @@ -74,15 +74,12 @@ intrinsics! { a.div(b) } - #[use_c_shim_if(all(target_arch = "arm", - not(target_os = "ios"), - not(target_env = "msvc"), - not(thumb_1)))] + #[maybe_use_optimized_c_shim] pub extern "C" fn __modsi3(a: i32, b: i32) -> i32 { a.mod_(b) } - #[use_c_shim_if(all(target_arch = "x86", not(target_env = "msvc")))] + #[maybe_use_optimized_c_shim] pub extern "C" fn __moddi3(a: i64, b: i64) -> i64 { a.mod_(b) } @@ -92,8 +89,7 @@ intrinsics! { a.mod_(b) } - #[use_c_shim_if(all(target_arch = "arm", not(target_env = "msvc"), - not(target_os = "ios"), not(thumb_1)))] + #[maybe_use_optimized_c_shim] pub extern "C" fn __divmodsi4(a: i32, b: i32, rem: &mut i32) -> i32 { a.divmod(b, rem, |a, b| __divsi3(a, b)) } diff --git a/src/int/shift.rs b/src/int/shift.rs index 4be588f16..d98622279 100644 --- a/src/int/shift.rs +++ b/src/int/shift.rs @@ -74,7 +74,7 @@ impl Lshr for u64 {} impl Lshr for u128 {} intrinsics! { - #[use_c_shim_if(all(target_arch = "x86", not(target_env = "msvc")))] + #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_llsl] pub extern "C" fn __ashldi3(a: u64, b: u32) -> u64 { a.ashl(b) @@ -84,7 +84,7 @@ intrinsics! { a.ashl(b) } - #[use_c_shim_if(all(target_arch = "x86", not(target_env = "msvc")))] + #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_lasr] pub extern "C" fn __ashrdi3(a: i64, b: u32) -> i64 { a.ashr(b) @@ -94,7 +94,7 @@ intrinsics! { a.ashr(b) } - #[use_c_shim_if(all(target_arch = "x86", not(target_env = "msvc")))] + #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_llsr] pub extern "C" fn __lshrdi3(a: u64, b: u32) -> u64 { a.lshr(b) diff --git a/src/int/udiv.rs b/src/int/udiv.rs index 8837126de..cdec11d2f 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -152,9 +152,7 @@ macro_rules! udivmod_inner { } intrinsics! { - #[use_c_shim_if(all(target_arch = "arm", - not(target_os = "ios"), - not(thumb_1)))] + #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_uidiv] /// Returns `n / d` pub extern "C" fn __udivsi3(n: u32, d: u32) -> u32 { @@ -212,20 +210,14 @@ intrinsics! { (q << 1) | carry } - #[use_c_shim_if(all(target_arch = "arm", - not(target_os = "ios"), - not(target_env = "msvc"), - not(thumb_1)))] + #[maybe_use_optimized_c_shim] /// Returns `n % d` pub extern "C" fn __umodsi3(n: u32, d: u32) -> u32 { let q = __udivsi3(n, d); n - q * d } - #[use_c_shim_if(all(target_arch = "arm", - not(target_os = "ios"), - not(target_env = "msvc"), - not(thumb_1)))] + #[maybe_use_optimized_c_shim] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 { let q = __udivsi3(n, d); @@ -235,13 +227,13 @@ intrinsics! { q } - #[use_c_shim_if(all(target_arch = "x86", not(target_env = "msvc")))] + #[maybe_use_optimized_c_shim] /// Returns `n / d` pub extern "C" fn __udivdi3(n: u64, d: u64) -> u64 { __udivmoddi4(n, d, None) } - #[use_c_shim_if(all(target_arch = "x86", not(target_env = "msvc")))] + #[maybe_use_optimized_c_shim] /// Returns `n % d` pub extern "C" fn __umoddi3(n: u64, d: u64) -> u64 { let mut rem = 0; diff --git a/src/macros.rs b/src/macros.rs index e84338fae..4abdae6ee 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -30,8 +30,8 @@ /// /// A quick overview of attributes supported right now are: /// -/// * `use_c_shim_if` - takes a #[cfg] directive and falls back to the -/// C-compiled version if `use_c` is specified. +/// * `maybe_use_optimized_c_shim` - indicates that the Rust implementation is +/// ignored if an optimized C version was compiled. /// * `aapcs_on_arm` - forces the ABI of the function to be `"aapcs"` on ARM and /// the specified ABI everywhere else. /// * `unadjusted_on_win64` - like `aapcs_on_arm` this switches to the @@ -51,15 +51,14 @@ macro_rules! intrinsics { // to the architecture-specific versions which should be more optimized. The // purpose of this macro is to easily allow specifying this. // - // The argument to `use_c_shim_if` is a `#[cfg]` directive which, when true, - // will cause this crate's exported version of `$name` to just redirect to - // the C implementation. No symbol named `$name` will be in the object file - // for this crate itself. - // - // When the `#[cfg]` directive is false, or when the `c` feature is - // disabled, the provided implementation is used instead. + // The `#[maybe_use_optimized_c_shim]` attribute indicates that this + // intrinsic may have an optimized C version. In these situations the build + // script, if the C code is enabled and compiled, will emit a cfg directive + // to get passed to rustc for our compilation. If that cfg is set we skip + // the Rust implementation, but if the attribute is not enabled then we + // compile in the Rust implementation. ( - #[use_c_shim_if($($cfg_clause:tt)*)] + #[maybe_use_optimized_c_shim] $(#[$($attr:tt)*])* pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { $($body:tt)* @@ -68,7 +67,7 @@ macro_rules! intrinsics { $($rest:tt)* ) => ( - #[cfg(all(use_c, $($cfg_clause)*))] + #[cfg($name = "optimized-c")] pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { extern $abi { fn $name($($argname: $ty),*) -> $ret; @@ -78,7 +77,7 @@ macro_rules! intrinsics { } } - #[cfg(not(all(use_c, $($cfg_clause)*)))] + #[cfg(not($name = "optimized-c"))] intrinsics! { $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { From 6566ad9e926e7d6bf54163d82f7c830e35b95b32 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 15 May 2019 14:47:58 -0700 Subject: [PATCH 0246/1459] Bump to 0.1.14 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f08037432..5cea06e35 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.13" +version = "0.1.14" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From aa41e0d25fcb2b11b9b5c269846dd70547f2a787 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 16 May 2019 07:30:36 -0700 Subject: [PATCH 0247/1459] Remove `compiler-rt` submodule from this repository This commit removes the `compiler-rt` submodule from this repository. The goal here is to align the `compiler-rt` used for compiling C intrinsics with the upstream rust-lang/rust's usage of `llvm-project`. Currently we have both an `llvm-project` repository as well as `compiler-rt`, but they can naturally get out of sync and it's just one more submodule to manage. The thinking here is that the feature `c` for this crate, when activated, will require the user to configure where the source code for `compiler-rt` is present. This places the onus on the builder of `compiler-builtins` to check-out and arrange for the appropriate `compiler-rt` source code to be placed somewhere. For rust-lang/rust this is already done with the `llvm-project` submodule, and we can arrange for it to happen on this crate's CI anyway. For users of this crate this is a bit of a bummer, but `c` is disabled by default anyway and it seems unlikely that `c` is explicitly opted in to all that much. (given the purpose of this crate) This should allow us to archive the `compiler-rt` repository and simply use `llvm-project` in the rust-lang/rust repository. --- .gitmodules | 3 --- build.rs | 20 ++++++++++++-------- ci/azure-steps.yml | 12 +++++++----- ci/run-docker.sh | 1 + ci/run.sh | 1 - compiler-rt | 1 - 6 files changed, 20 insertions(+), 18 deletions(-) delete mode 160000 compiler-rt diff --git a/.gitmodules b/.gitmodules index 50ed51e93..a71118ff9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "compiler-rt"] - path = compiler-rt - url = https://github.com/rust-lang/compiler-rt [submodule "libm"] path = libm url = https://github.com/rust-lang-nursery/libm diff --git a/build.rs b/build.rs index be5d42bb7..c714bc15d 100644 --- a/build.rs +++ b/build.rs @@ -76,7 +76,7 @@ mod c { use std::collections::BTreeMap; use std::env; - use std::path::Path; + use std::path::PathBuf; struct Sources { // SYMBOL -> PATH TO SOURCE @@ -411,15 +411,19 @@ mod c { sources.remove(&["__aeabi_cdcmp", "__aeabi_cfcmp"]); } - // When compiling in rustbuild (the rust-lang/rust repo) this build - // script runs from a directory other than this root directory. - let root = if cfg!(feature = "rustbuild") { - Path::new("../../libcompiler_builtins") - } else { - Path::new(".") + // When compiling the C code we require the user to tell us where the + // source code is, and this is largely done so when we're compiling as + // part of rust-lang/rust we can use the same llvm-project repository as + // rust-lang/rust. + let root = match env::var_os("RUST_COMPILER_RT_ROOT") { + Some(s) => PathBuf::from(s), + None => panic!("RUST_COMPILER_RT_ROOT is not set"), }; + if !root.exists() { + panic!("RUST_COMPILER_RT_ROOT={} does not exist", root.display()); + } - let src_dir = root.join("compiler-rt/lib/builtins"); + let src_dir = root.join("lib/builtins"); for (sym, src) in sources.map.iter() { let src = src_dir.join(src); cfg.file(&src); diff --git a/ci/azure-steps.yml b/ci/azure-steps.yml index c8a954024..f08beca05 100644 --- a/ci/azure-steps.yml +++ b/ci/azure-steps.yml @@ -4,13 +4,15 @@ steps: - template: azure-install-rust.yml - - script: rustup component add rust-src - displayName: Install Rust sources - condition: eq( variables['XARGO'], '1' ) - - bash: rustup target add $TARGET displayName: Install Rust target - condition: ne( variables['XARGO'], '1' ) + + - bash: | + set -e + curl -L https://github.com/rust-lang/llvm-project/archive/rustc/8.0-2019-03-18.tar.gz | \ + tar xzf - --strip-components 1 llvm-project-rustc-8.0-2019-03-18/compiler-rt + echo '##vso[task.setvariable variable=RUST_COMPILER_RT_ROOT]./compiler-rt' + displayName: "Download compiler-rt reference sources" - bash: ./ci/run.sh $TARGET condition: ne( variables['Agent.OS'], 'Linux' ) diff --git a/ci/run-docker.sh b/ci/run-docker.sh index ddb970c1b..dbc86ca54 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -18,6 +18,7 @@ run() { --user $(id -u):$(id -g) \ -e CARGO_HOME=/cargo \ -e CARGO_TARGET_DIR=/target \ + -e RUST_COMPILER_RT_ROOT \ -v $HOME/.cargo:/cargo \ -v `pwd`/target:/target \ -v `pwd`:/checkout:ro \ diff --git a/ci/run.sh b/ci/run.sh index 4d6d6c68f..ae32806ec 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -1,6 +1,5 @@ set -ex -export CARGO_INCREMENTAL=0 cargo=cargo # Test our implementation diff --git a/compiler-rt b/compiler-rt deleted file mode 160000 index 03fc28f92..000000000 --- a/compiler-rt +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 03fc28f9273eeab16f1005f982dfde5900bddb29 From 4cd9d1cc5349e2759e81100096b77c0f83b2029d Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 16 May 2019 08:12:14 -0700 Subject: [PATCH 0248/1459] Bump to 0.1.15 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 5cea06e35..755dcf5e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.14" +version = "0.1.15" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From fc6d7b015fa2d5cab42c4cb487f4969631bcecfd Mon Sep 17 00:00:00 2001 From: Benjamin Schultzer Date: Thu, 16 May 2019 17:09:33 -0700 Subject: [PATCH 0249/1459] Run musl test in debug mode --- libm/ci/run.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libm/ci/run.sh b/libm/ci/run.sh index d28811300..42c241645 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -6,7 +6,6 @@ TARGET=$1 cargo test --target $TARGET cargo test --target $TARGET --release -# FIXME(#4) overflow checks in non-release currently cause issues -#cargo test --features 'checked musl-reference-tests' --target $TARGET +cargo test --features 'checked musl-reference-tests' --target $TARGET cargo test --features 'checked musl-reference-tests' --target $TARGET --release From b36809829480c7142e37ef5cf2ed5654c4b7bf9f Mon Sep 17 00:00:00 2001 From: Benjamin Schultzer Date: Thu, 16 May 2019 23:06:43 -0700 Subject: [PATCH 0250/1459] Add docs --- libm/src/math/acos.rs | 5 +++++ libm/src/math/acosf.rs | 5 +++++ libm/src/math/acosh.rs | 6 +++++- libm/src/math/acoshf.rs | 6 +++++- libm/src/math/asin.rs | 5 +++++ libm/src/math/asinf.rs | 5 +++++ libm/src/math/asinh.rs | 4 ++++ libm/src/math/asinhf.rs | 4 ++++ libm/src/math/atan.rs | 4 ++++ libm/src/math/atan2.rs | 5 +++++ libm/src/math/atan2f.rs | 5 +++++ libm/src/math/atanf.rs | 4 ++++ libm/src/math/atanh.rs | 4 ++++ libm/src/math/atanhf.rs | 4 ++++ libm/src/math/cbrt.rs | 3 +++ libm/src/math/cbrtf.rs | 3 +++ libm/src/math/ceil.rs | 3 +++ libm/src/math/ceilf.rs | 3 +++ libm/src/math/copysign.rs | 4 ++++ libm/src/math/copysignf.rs | 4 ++++ libm/src/math/cosh.rs | 5 +++++ libm/src/math/coshf.rs | 5 +++++ libm/src/math/erf.rs | 11 +++++++++++ libm/src/math/erff.rs | 11 +++++++++++ libm/src/math/exp.rs | 4 ++++ libm/src/math/exp2.rs | 4 ++++ libm/src/math/exp2f.rs | 4 ++++ libm/src/math/expf.rs | 4 ++++ libm/src/math/expm1.rs | 7 +++++++ libm/src/math/expm1f.rs | 7 +++++++ libm/src/math/fabs.rs | 3 +++ libm/src/math/fabsf.rs | 3 +++ libm/src/math/fdim.rs | 8 ++++++++ libm/src/math/fdimf.rs | 8 ++++++++ libm/src/math/floor.rs | 3 +++ libm/src/math/floorf.rs | 3 +++ libm/src/math/fma.rs | 5 +++++ libm/src/math/fmaf.rs | 6 ++++++ 38 files changed, 185 insertions(+), 2 deletions(-) diff --git a/libm/src/math/acos.rs b/libm/src/math/acos.rs index 055888ffa..d5e1f6865 100644 --- a/libm/src/math/acos.rs +++ b/libm/src/math/acos.rs @@ -55,6 +55,11 @@ fn r(z: f64) -> f64 { p / q } +/// Arccosine (f64) +/// +/// Computes the inverse cosine (arc cosine) of the input value. +/// Arguments must be in the range -1 to 1. +/// Returns values in radians, in the range of 0 to pi. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn acos(x: f64) -> f64 { diff --git a/libm/src/math/acosf.rs b/libm/src/math/acosf.rs index a6061ae80..d0598e811 100644 --- a/libm/src/math/acosf.rs +++ b/libm/src/math/acosf.rs @@ -29,6 +29,11 @@ fn r(z: f32) -> f32 { p / q } +/// Arccosine (f32) +/// +/// Computes the inverse cosine (arc cosine) of the input value. +/// Arguments must be in the range -1 to 1. +/// Returns values in radians, in the range of 0 to pi. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn acosf(x: f32) -> f32 { diff --git a/libm/src/math/acosh.rs b/libm/src/math/acosh.rs index 95dc57d81..ac7a5f1c6 100644 --- a/libm/src/math/acosh.rs +++ b/libm/src/math/acosh.rs @@ -2,7 +2,11 @@ use super::{log, log1p, sqrt}; const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa39ef*/ -/* acosh(x) = log(x + sqrt(x*x-1)) */ +/// Inverse hyperbolic cosine (f64) +/// +/// Calculates the inverse hyperbolic cosine of `x`. +/// Is defined as `log(x + sqrt(x*x-1))`. +/// `x` must be a number greater than or equal to 1. pub fn acosh(x: f64) -> f64 { let u = x.to_bits(); let e = ((u >> 52) as usize) & 0x7ff; diff --git a/libm/src/math/acoshf.rs b/libm/src/math/acoshf.rs index f50a00324..0879e1edb 100644 --- a/libm/src/math/acoshf.rs +++ b/libm/src/math/acoshf.rs @@ -2,7 +2,11 @@ use super::{log1pf, logf, sqrtf}; const LN2: f32 = 0.693147180559945309417232121458176568; -/* acosh(x) = log(x + sqrt(x*x-1)) */ +/// Inverse hyperbolic cosine (f32) +/// +/// Calculates the inverse hyperbolic cosine of `x`. +/// Is defined as `log(x + sqrt(x*x-1))`. +/// `x` must be a number greater than or equal to 1. pub fn acoshf(x: f32) -> f32 { let u = x.to_bits(); let a = u & 0x7fffffff; diff --git a/libm/src/math/asin.rs b/libm/src/math/asin.rs index 2aee72b28..774475e51 100644 --- a/libm/src/math/asin.rs +++ b/libm/src/math/asin.rs @@ -62,6 +62,11 @@ fn comp_r(z: f64) -> f64 { p / q } +/// Arcsine (f64) +/// +/// Computes the inverse sine (arc sine) of the argument `x`. +/// Arguments to asin must be in the range -1 to 1. +/// Returns values in radians, in the range of -pi/2 to pi/2. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn asin(mut x: f64) -> f64 { diff --git a/libm/src/math/asinf.rs b/libm/src/math/asinf.rs index 979f1a654..ce0f4a997 100644 --- a/libm/src/math/asinf.rs +++ b/libm/src/math/asinf.rs @@ -31,6 +31,11 @@ fn r(z: f32) -> f32 { p / q } +/// Arcsine (f32) +/// +/// Computes the inverse sine (arc sine) of the argument `x`. +/// Arguments to asin must be in the range -1 to 1. +/// Returns values in radians, in the range of -pi/2 to pi/2. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn asinf(mut x: f32) -> f32 { diff --git a/libm/src/math/asinh.rs b/libm/src/math/asinh.rs index b29093b23..14295357a 100644 --- a/libm/src/math/asinh.rs +++ b/libm/src/math/asinh.rs @@ -3,6 +3,10 @@ use super::{log, log1p, sqrt}; const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa39ef*/ /* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ +/// Inverse hyperbolic sine (f64) +/// +/// Calculates the inverse hyperbolic sine of `x`. +/// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`. pub fn asinh(mut x: f64) -> f64 { let mut u = x.to_bits(); let e = ((u >> 52) as usize) & 0x7ff; diff --git a/libm/src/math/asinhf.rs b/libm/src/math/asinhf.rs index 981243303..e22a29132 100644 --- a/libm/src/math/asinhf.rs +++ b/libm/src/math/asinhf.rs @@ -3,6 +3,10 @@ use super::{log1pf, logf, sqrtf}; const LN2: f32 = 0.693147180559945309417232121458176568; /* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ +/// Inverse hyperbolic sine (f32) +/// +/// Calculates the inverse hyperbolic sine of `x`. +/// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`. pub fn asinhf(mut x: f32) -> f32 { let u = x.to_bits(); let i = u & 0x7fffffff; diff --git a/libm/src/math/atan.rs b/libm/src/math/atan.rs index 94594080c..d2684ece8 100644 --- a/libm/src/math/atan.rs +++ b/libm/src/math/atan.rs @@ -60,6 +60,10 @@ const AT: [f64; 11] = [ 1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */ ]; +/// Arctangent (f64) +/// +/// Computes the inverse tangent (arc tangent) of the input value. +/// Returns a value in radians, in the range of -pi/2 to pi/2. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atan(x: f64) -> f64 { diff --git a/libm/src/math/atan2.rs b/libm/src/math/atan2.rs index 313bec4b9..08385cd10 100644 --- a/libm/src/math/atan2.rs +++ b/libm/src/math/atan2.rs @@ -43,6 +43,11 @@ use super::fabs; const PI: f64 = 3.1415926535897931160E+00; /* 0x400921FB, 0x54442D18 */ const PI_LO: f64 = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */ +/// Arctangent of y/x (f64) +/// +/// Computes the inverse tangent (arc tangent) of `y/x`. +/// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0). +/// Returns a value in radians, in the range of -pi to pi. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atan2(y: f64, x: f64) -> f64 { diff --git a/libm/src/math/atan2f.rs b/libm/src/math/atan2f.rs index 94e3c7718..7bbe5f1d4 100644 --- a/libm/src/math/atan2f.rs +++ b/libm/src/math/atan2f.rs @@ -19,6 +19,11 @@ use super::fabsf; const PI: f32 = 3.1415927410e+00; /* 0x40490fdb */ const PI_LO: f32 = -8.7422776573e-08; /* 0xb3bbbd2e */ +/// Arctangent of y/x (f32) +/// +/// Computes the inverse tangent (arc tangent) of `y/x`. +/// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0). +/// Returns a value in radians, in the range of -pi to pi. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atan2f(y: f32, x: f32) -> f32 { diff --git a/libm/src/math/atanf.rs b/libm/src/math/atanf.rs index 89b0afd6f..363e11d64 100644 --- a/libm/src/math/atanf.rs +++ b/libm/src/math/atanf.rs @@ -37,6 +37,10 @@ const A_T: [f32; 5] = [ 6.1687607318e-02, ]; +/// Arctangent (f32) +/// +/// Computes the inverse tangent (arc tangent) of the input value. +/// Returns a value in radians, in the range of -pi/2 to pi/2. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atanf(mut x: f32) -> f32 { diff --git a/libm/src/math/atanh.rs b/libm/src/math/atanh.rs index 2833715ab..79a989c42 100644 --- a/libm/src/math/atanh.rs +++ b/libm/src/math/atanh.rs @@ -1,6 +1,10 @@ use super::log1p; /* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ +/// Inverse hyperbolic tangent (f64) +/// +/// Calculates the inverse hyperbolic tangent of `x`. +/// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`. pub fn atanh(x: f64) -> f64 { let u = x.to_bits(); let e = ((u >> 52) as usize) & 0x7ff; diff --git a/libm/src/math/atanhf.rs b/libm/src/math/atanhf.rs index 709a95551..7b2f34d97 100644 --- a/libm/src/math/atanhf.rs +++ b/libm/src/math/atanhf.rs @@ -1,6 +1,10 @@ use super::log1pf; /* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ +/// Inverse hyperbolic tangent (f32) +/// +/// Calculates the inverse hyperbolic tangent of `x`. +/// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`. pub fn atanhf(mut x: f32) -> f32 { let mut u = x.to_bits(); let sign = (u >> 31) != 0; diff --git a/libm/src/math/cbrt.rs b/libm/src/math/cbrt.rs index ab11c497e..04469b159 100644 --- a/libm/src/math/cbrt.rs +++ b/libm/src/math/cbrt.rs @@ -27,6 +27,9 @@ const P2: f64 = 1.621429720105354466140; /* 0x3ff9f160, 0x4a49d6c2 */ const P3: f64 = -0.758397934778766047437; /* 0xbfe844cb, 0xbee751d9 */ const P4: f64 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */ +// Cube root (f64) +/// +/// Computes the cube root of the argument. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cbrt(x: f64) -> f64 { diff --git a/libm/src/math/cbrtf.rs b/libm/src/math/cbrtf.rs index 19215b858..6e589c099 100644 --- a/libm/src/math/cbrtf.rs +++ b/libm/src/math/cbrtf.rs @@ -22,6 +22,9 @@ use core::f32; const B1: u32 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */ const B2: u32 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */ +/// Cube root (f32) +/// +/// Computes the cube root of the argument. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cbrtf(x: f32) -> f32 { diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index d337db200..59883a8a7 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -2,6 +2,9 @@ use core::f64; const TOINT: f64 = 1. / f64::EPSILON; +/// Ceil (f64) +/// +/// Finds the nearest integer greater than or equal to `x`. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ceil(x: f64) -> f64 { diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs index 0be53c5cf..151a4f210 100644 --- a/libm/src/math/ceilf.rs +++ b/libm/src/math/ceilf.rs @@ -1,5 +1,8 @@ use core::f32; +/// Ceil (f32) +/// +/// Finds the nearest integer greater than or equal to `x`. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ceilf(x: f32) -> f32 { diff --git a/libm/src/math/copysign.rs b/libm/src/math/copysign.rs index 9c5362a5a..1527fb6ea 100644 --- a/libm/src/math/copysign.rs +++ b/libm/src/math/copysign.rs @@ -1,3 +1,7 @@ +/// Sign of Y, magnitude of X (f64) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. pub fn copysign(x: f64, y: f64) -> f64 { let mut ux = x.to_bits(); let uy = y.to_bits(); diff --git a/libm/src/math/copysignf.rs b/libm/src/math/copysignf.rs index b42fd39ad..35148561a 100644 --- a/libm/src/math/copysignf.rs +++ b/libm/src/math/copysignf.rs @@ -1,3 +1,7 @@ +/// Sign of Y, magnitude of X (f32) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. pub fn copysignf(x: f32, y: f32) -> f32 { let mut ux = x.to_bits(); let uy = y.to_bits(); diff --git a/libm/src/math/cosh.rs b/libm/src/math/cosh.rs index b6ba338b5..bac875566 100644 --- a/libm/src/math/cosh.rs +++ b/libm/src/math/cosh.rs @@ -2,6 +2,11 @@ use super::exp; use super::expm1; use super::k_expo2; +/// Hyperbolic cosine (f64) +/// +/// Computes the hyperbolic cosine of the argument x. +/// Is defined as `(exp(x) + exp(-x))/2` +/// Angles are specified in radians. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cosh(mut x: f64) -> f64 { diff --git a/libm/src/math/coshf.rs b/libm/src/math/coshf.rs index b37ee1f32..bf99e42f0 100644 --- a/libm/src/math/coshf.rs +++ b/libm/src/math/coshf.rs @@ -2,6 +2,11 @@ use super::expf; use super::expm1f; use super::k_expo2f; +/// Hyperbolic cosine (f64) +/// +/// Computes the hyperbolic cosine of the argument x. +/// Is defined as `(exp(x) + exp(-x))/2` +/// Angles are specified in radians. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn coshf(mut x: f32) -> f32 { diff --git a/libm/src/math/erf.rs b/libm/src/math/erf.rs index d53a4c83c..a2c617d34 100644 --- a/libm/src/math/erf.rs +++ b/libm/src/math/erf.rs @@ -214,6 +214,11 @@ fn erfc2(ix: u32, mut x: f64) -> f64 { exp(-z * z - 0.5625) * exp((z - x) * (z + x) + r / big_s) / x } +/// Error function (f64) +/// +/// Calculates an approximation to the “error function”, which estimates +/// the probability that an observation will fall within x standard +/// deviations of the mean (assuming a normal distribution). pub fn erf(x: f64) -> f64 { let r: f64; let s: f64; @@ -257,6 +262,12 @@ pub fn erf(x: f64) -> f64 { } } +/// Error function (f64) +/// +/// Calculates the complementary probability. +/// Is `1 - erf(x)`. Is computed directly, so that you can use it to avoid +/// the loss of precision that would result from subtracting +/// large probabilities (on large `x`) from 1. pub fn erfc(x: f64) -> f64 { let r: f64; let s: f64; diff --git a/libm/src/math/erff.rs b/libm/src/math/erff.rs index ef67c335b..384052293 100644 --- a/libm/src/math/erff.rs +++ b/libm/src/math/erff.rs @@ -125,6 +125,11 @@ fn erfc2(mut ix: u32, mut x: f32) -> f32 { expf(-z * z - 0.5625) * expf((z - x) * (z + x) + r / big_s) / x } +/// Error function (f32) +/// +/// Calculates an approximation to the “error function”, which estimates +/// the probability that an observation will fall within x standard +/// deviations of the mean (assuming a normal distribution). pub fn erff(x: f32) -> f32 { let r: f32; let s: f32; @@ -168,6 +173,12 @@ pub fn erff(x: f32) -> f32 { } } +/// Error function (f32) +/// +/// Calculates the complementary probability. +/// Is `1 - erf(x)`. Is computed directly, so that you can use it to avoid +/// the loss of precision that would result from subtracting +/// large probabilities (on large `x`) from 1. pub fn erfcf(x: f32) -> f32 { let r: f32; let s: f32; diff --git a/libm/src/math/exp.rs b/libm/src/math/exp.rs index c32773186..5465b5693 100644 --- a/libm/src/math/exp.rs +++ b/libm/src/math/exp.rs @@ -77,6 +77,10 @@ const P3: f64 = 6.61375632143793436117e-05; /* 0x3F11566A, 0xAF25DE2C */ const P4: f64 = -1.65339022054652515390e-06; /* 0xBEBBBD41, 0xC5D26BF1 */ const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ +/// Exponential, base *e* (f64) +/// +/// Calculate the exponential of `x`, that is, *e* raised to the power `x` +/// (where *e* is the base of the natural system of logarithms, approximately 2.71828). #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp(mut x: f64) -> f64 { diff --git a/libm/src/math/exp2.rs b/libm/src/math/exp2.rs index be6a003c6..570ca315b 100644 --- a/libm/src/math/exp2.rs +++ b/libm/src/math/exp2.rs @@ -318,6 +318,10 @@ static TBL: [u64; TBLSIZE * 2] = [ // // Gal, S. and Bachelis, B. An Accurate Elementary Mathematical Library // for the IEEE Floating Point Standard. TOMS 17(1), 26-46 (1991). + +/// Exponential, base 2 (f64) +/// +/// Calculate `2^x`, that is, 2 raised to the power `x`. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp2(mut x: f64) -> f64 { diff --git a/libm/src/math/exp2f.rs b/libm/src/math/exp2f.rs index 32816104b..12c9e76a4 100644 --- a/libm/src/math/exp2f.rs +++ b/libm/src/math/exp2f.rs @@ -69,6 +69,10 @@ static EXP2FT: [u64; TBLSIZE] = [ // // Tang, P. Table-driven Implementation of the Exponential Function // in IEEE Floating-Point Arithmetic. TOMS 15(2), 144-157 (1989). + +/// Exponential, base 2 (f32) +/// +/// Calculate `2^x`, that is, 2 raised to the power `x`. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp2f(mut x: f32) -> f32 { diff --git a/libm/src/math/expf.rs b/libm/src/math/expf.rs index baade2552..09323ec8d 100644 --- a/libm/src/math/expf.rs +++ b/libm/src/math/expf.rs @@ -26,6 +26,10 @@ const INV_LN2: f32 = 1.4426950216e+00; /* 0x3fb8aa3b */ const P1: f32 = 1.6666625440e-1; /* 0xaaaa8f.0p-26 */ const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */ +/// Exponential, base *e* (f32) +/// +/// Calculate the exponential of `x`, that is, *e* raised to the power `x` +/// (where *e* is the base of the natural system of logarithms, approximately 2.71828). #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn expf(mut x: f32) -> f32 { diff --git a/libm/src/math/expm1.rs b/libm/src/math/expm1.rs index 426163990..0d43b4e10 100644 --- a/libm/src/math/expm1.rs +++ b/libm/src/math/expm1.rs @@ -23,6 +23,13 @@ const Q3: f64 = -7.93650757867487942473e-05; /* BF14CE19 9EAADBB7 */ const Q4: f64 = 4.00821782732936239552e-06; /* 3ED0CFCA 86E65239 */ const Q5: f64 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ +/// Exponential, base *e*, of x-1 (f64) +/// +/// Calculates the exponential of `x` and subtract 1, that is, *e* raised +/// to the power `x` minus 1 (where *e* is the base of the natural +/// system of logarithms, approximately 2.71828). +/// The result is accurate even for small values of `x`, +/// where using `exp(x)-1` would lose many significant digits. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn expm1(mut x: f64) -> f64 { diff --git a/libm/src/math/expm1f.rs b/libm/src/math/expm1f.rs index 4daa83c85..9bb223448 100644 --- a/libm/src/math/expm1f.rs +++ b/libm/src/math/expm1f.rs @@ -25,6 +25,13 @@ const INV_LN2: f32 = 1.4426950216e+00; /* 0x3fb8aa3b */ const Q1: f32 = -3.3333212137e-2; /* -0x888868.0p-28 */ const Q2: f32 = 1.5807170421e-3; /* 0xcf3010.0p-33 */ +/// Exponential, base *e*, of x-1 (f32) +/// +/// Calculates the exponential of `x` and subtract 1, that is, *e* raised +/// to the power `x` minus 1 (where *e* is the base of the natural +/// system of logarithms, approximately 2.71828). +/// The result is accurate even for small values of `x`, +/// where using `exp(x)-1` would lose many significant digits. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn expm1f(mut x: f32) -> f32 { diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index 0824bd593..52a9adcbf 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -1,5 +1,8 @@ use core::u64; +/// Absolute value (magnitude) (f64) +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fabs(x: f64) -> f64 { diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs index 859508f9b..5942d983a 100644 --- a/libm/src/math/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -1,3 +1,6 @@ +/// Absolute value (magnitude) (f32) +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fabsf(x: f32) -> f32 { diff --git a/libm/src/math/fdim.rs b/libm/src/math/fdim.rs index d9aca8611..06edc9960 100644 --- a/libm/src/math/fdim.rs +++ b/libm/src/math/fdim.rs @@ -1,5 +1,13 @@ use core::f64; +/// Positive difference (f64) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fdim(x: f64, y: f64) -> f64 { diff --git a/libm/src/math/fdimf.rs b/libm/src/math/fdimf.rs index bcda8ee94..f1ad5896b 100644 --- a/libm/src/math/fdimf.rs +++ b/libm/src/math/fdimf.rs @@ -1,5 +1,13 @@ use core::f32; +/// Positive difference (f32) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fdimf(x: f32, y: f32) -> f32 { diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index c705ae501..f6068c697 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -2,6 +2,9 @@ use core::f64; const TOINT: f64 = 1. / f64::EPSILON; +/// Floor (f64) +/// +/// Finds the nearest integer less than or equal to `x`. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn floor(x: f64) -> f64 { diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs index 899dcf5a5..ae605e191 100644 --- a/libm/src/math/floorf.rs +++ b/libm/src/math/floorf.rs @@ -1,5 +1,8 @@ use core::f32; +/// Floor (f64) +/// +/// Finds the nearest integer less than or equal to `x`. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn floorf(x: f32) -> f32 { diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 6b062481d..07d90f8b7 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -48,6 +48,11 @@ fn mul(x: u64, y: u64) -> (u64, u64) { (hi, lo) } +/// Floating multiply add (f64) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation: +/// Computes the value (as if) to infinite precision and rounds once to the result format, +/// according to the rounding mode characterized by the value of FLT_ROUNDS. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fma(x: f64, y: f64, z: f64) -> f64 { diff --git a/libm/src/math/fmaf.rs b/libm/src/math/fmaf.rs index 9e5a55f44..e77e0fa4a 100644 --- a/libm/src/math/fmaf.rs +++ b/libm/src/math/fmaf.rs @@ -40,6 +40,12 @@ use super::fenv::{ * direct double-precision arithmetic suffices, except where double * rounding occurs. */ + +/// Floating multiply add (f32) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation: +/// Computes the value (as if) to infinite precision and rounds once to the result format, +/// according to the rounding mode characterized by the value of FLT_ROUNDS. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 { From 82f898fd48ed7a225a25082501ad8e37bd945cf8 Mon Sep 17 00:00:00 2001 From: Sean Leather Date: Tue, 21 May 2019 08:43:50 +0200 Subject: [PATCH 0251/1459] Fix typo: mingw_unwinding --- examples/intrinsics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 3debffa45..f6980fcb5 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -399,7 +399,7 @@ pub fn _Unwind_Resume() {} pub extern "C" fn eh_personality() {} #[cfg(all(windows, target_env = "gnu"))] -mod mingw_unwidning { +mod mingw_unwinding { #[no_mangle] pub fn rust_eh_personality() {} #[no_mangle] From 144624e262a892d290bc0b69765e4e41278f4fc3 Mon Sep 17 00:00:00 2001 From: Igor null Date: Mon, 3 Jun 2019 13:16:03 +0300 Subject: [PATCH 0252/1459] fixed add overflow in exp2 --- libm/src/math/exp2.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libm/src/math/exp2.rs b/libm/src/math/exp2.rs index 570ca315b..c2192fde5 100644 --- a/libm/src/math/exp2.rs +++ b/libm/src/math/exp2.rs @@ -373,7 +373,7 @@ pub fn exp2(mut x: f64) -> f64 { /* Reduce x, computing z, i0, and k. */ let ui = f64::to_bits(x + redux); let mut i0 = ui as u32; - i0 += TBLSIZE as u32 / 2; + i0 = i0.wrapping_add(TBLSIZE as u32 / 2); let ku = i0 / TBLSIZE as u32 * TBLSIZE as u32; let ki = ku as i32 / TBLSIZE as i32; i0 %= TBLSIZE as u32; @@ -387,3 +387,9 @@ pub fn exp2(mut x: f64) -> f64 { scalbn(r, ki) } + +#[test] +fn i0_wrap_test() { + let x = -3.0 / 256.0; + assert_eq!(exp2(x), f64::from_bits(0x3fefbdba3692d514)); +} From 9094170192087b89b08a029b860956bf2c67ee0f Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 5 Jun 2019 12:14:53 -0700 Subject: [PATCH 0253/1459] Attempt to fix CI --- libm/ci/azure-install-rust.yml | 14 ++++++++------ libm/ci/run-docker.sh | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/libm/ci/azure-install-rust.yml b/libm/ci/azure-install-rust.yml index fa7eae459..c5a53122f 100644 --- a/libm/ci/azure-install-rust.yml +++ b/libm/ci/azure-install-rust.yml @@ -5,15 +5,17 @@ steps: if [ "$toolchain" = "" ]; then toolchain=stable fi - curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain $toolchain - echo "##vso[task.prependpath]$HOME/.cargo/bin" + if command -v rustup; then + rustup update $toolchain + rustup default $toolchain + else + curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain $toolchain + echo "##vso[task.setvariable variable=PATH;]$PATH:$HOME/.cargo/bin" + fi displayName: Install rust (unix) condition: ne( variables['Agent.OS'], 'Windows_NT' ) - - script: | - curl -sSf -o rustup-init.exe https://win.rustup.rs - rustup-init.exe -y --default-toolchain stable-%TARGET% - echo ##vso[task.prependpath]%USERPROFILE%\.cargo\bin + - bash: rustup update stable-$TOOLCHAIN && rustup default stable-$TOOLCHAIN displayName: Install rust (windows) condition: eq( variables['Agent.OS'], 'Windows_NT' ) diff --git a/libm/ci/run-docker.sh b/libm/ci/run-docker.sh index 95bd3db48..e7b80c719 100755 --- a/libm/ci/run-docker.sh +++ b/libm/ci/run-docker.sh @@ -18,7 +18,7 @@ run() { --user $(id -u):$(id -g) \ -e CARGO_HOME=/cargo \ -e CARGO_TARGET_DIR=/target \ - -v $HOME/.cargo:/cargo \ + -v $(dirname $(dirname `which cargo`)):/cargo \ -v `pwd`/target:/target \ -v `pwd`:/checkout:ro \ -v `rustc --print sysroot`:/rust:ro \ From cc132254f4e5169475b72938767074bac42349d8 Mon Sep 17 00:00:00 2001 From: varkor Date: Wed, 5 Jun 2019 17:43:46 +0100 Subject: [PATCH 0254/1459] Add minf --- libm/CHANGELOG.md | 4 ++++ libm/src/lib.rs | 7 +++++++ libm/src/math/minf.rs | 13 +++++++++++++ libm/src/math/mod.rs | 2 ++ 4 files changed, 26 insertions(+) create mode 100644 libm/src/math/minf.rs diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index 1cc396455..f8d4e98f0 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -5,6 +5,10 @@ This project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] +### Added + +- minf + ## [v0.1.2] - 2018-07-18 ### Added diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 0d0f6155a..763920cc5 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -136,6 +136,8 @@ pub trait F32Ext: private::Sealed + Sized { fn acosh(self) -> Self; fn atanh(self) -> Self; + + fn min(self, other: Self) -> Self; } impl F32Ext for f32 { @@ -327,6 +329,11 @@ impl F32Ext for f32 { fn atanh(self) -> Self { atanhf(self) } + + #[inline] + fn min(self, other: Self) -> Self { + minf(self, other) + } } /// Math support for `f64` diff --git a/libm/src/math/minf.rs b/libm/src/math/minf.rs new file mode 100644 index 000000000..2098a2d75 --- /dev/null +++ b/libm/src/math/minf.rs @@ -0,0 +1,13 @@ +#[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn minf(x: f32, y: f32) -> f32 { + // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the + // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it + // is either x or y, canonicalized (this means results might differ among implementations). + // When either x or y is a signalingNaN, then the result is according to 6.2. + // + // Since we do not support sNaN in Rust yet, we do not need to handle them. + // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by + // multiplying by 1.0. Should switch to the `canonicalize` when it works. + (if y.is_nan() || x < y { x } else { y }) * 1.0 +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index c4d247414..90c5fd311 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -166,6 +166,7 @@ mod tgamma; mod tgammaf; mod trunc; mod truncf; +mod minf; // Use separated imports instead of {}-grouped imports for easier merging. pub use self::acos::acos; @@ -272,6 +273,7 @@ pub use self::tgamma::tgamma; pub use self::tgammaf::tgammaf; pub use self::trunc::trunc; pub use self::truncf::truncf; +pub use self::minf::minf; // Private modules mod expo2; From 3797ede89914257f4407a6d84fb4983cba1aab1b Mon Sep 17 00:00:00 2001 From: varkor Date: Wed, 5 Jun 2019 17:44:06 +0100 Subject: [PATCH 0255/1459] Add min --- libm/CHANGELOG.md | 1 + libm/src/lib.rs | 7 +++++++ libm/src/math/min.rs | 13 +++++++++++++ libm/src/math/mod.rs | 2 ++ 4 files changed, 23 insertions(+) create mode 100644 libm/src/math/min.rs diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index f8d4e98f0..4ecd8c8ab 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -8,6 +8,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). ### Added - minf +- min ## [v0.1.2] - 2018-07-18 diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 763920cc5..b72b58c1f 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -417,6 +417,8 @@ pub trait F64Ext: private::Sealed + Sized { fn acosh(self) -> Self; fn atanh(self) -> Self; + + fn min(self, other: Self) -> Self; } impl F64Ext for f64 { @@ -608,6 +610,11 @@ impl F64Ext for f64 { fn atanh(self) -> Self { atanh(self) } + + #[inline] + fn min(self, other: Self) -> Self { + min(self, other) + } } mod private { diff --git a/libm/src/math/min.rs b/libm/src/math/min.rs new file mode 100644 index 000000000..d0345cf8d --- /dev/null +++ b/libm/src/math/min.rs @@ -0,0 +1,13 @@ +#[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn min(x: f64, y: f64) -> f64 { + // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the + // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it + // is either x or y, canonicalized (this means results might differ among implementations). + // When either x or y is a signalingNaN, then the result is according to 6.2. + // + // Since we do not support sNaN in Rust yet, we do not need to handle them. + // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by + // multiplying by 1.0. Should switch to the `canonicalize` when it works. + (if y.is_nan() || x < y { x } else { y }) * 1.0 +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 90c5fd311..e2d706201 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -166,6 +166,7 @@ mod tgamma; mod tgammaf; mod trunc; mod truncf; +mod min; mod minf; // Use separated imports instead of {}-grouped imports for easier merging. @@ -273,6 +274,7 @@ pub use self::tgamma::tgamma; pub use self::tgammaf::tgammaf; pub use self::trunc::trunc; pub use self::truncf::truncf; +pub use self::min::min; pub use self::minf::minf; // Private modules From fa55a654ce464cca30b6b6e8064b980cfdee8e89 Mon Sep 17 00:00:00 2001 From: varkor Date: Wed, 5 Jun 2019 17:44:20 +0100 Subject: [PATCH 0256/1459] Add maxf --- libm/CHANGELOG.md | 1 + libm/src/lib.rs | 7 +++++++ libm/src/math/maxf.rs | 13 +++++++++++++ libm/src/math/mod.rs | 2 ++ 4 files changed, 23 insertions(+) create mode 100644 libm/src/math/maxf.rs diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index 4ecd8c8ab..7fb17cc9f 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -9,6 +9,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). - minf - min +- maxf ## [v0.1.2] - 2018-07-18 diff --git a/libm/src/lib.rs b/libm/src/lib.rs index b72b58c1f..368e25c8c 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -138,6 +138,8 @@ pub trait F32Ext: private::Sealed + Sized { fn atanh(self) -> Self; fn min(self, other: Self) -> Self; + + fn max(self, other: Self) -> Self; } impl F32Ext for f32 { @@ -334,6 +336,11 @@ impl F32Ext for f32 { fn min(self, other: Self) -> Self { minf(self, other) } + + #[inline] + fn max(self, other: Self) -> Self { + maxf(self, other) + } } /// Math support for `f64` diff --git a/libm/src/math/maxf.rs b/libm/src/math/maxf.rs new file mode 100644 index 000000000..ac0d22aa5 --- /dev/null +++ b/libm/src/math/maxf.rs @@ -0,0 +1,13 @@ +#[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn maxf(x: f32, y: f32) -> f32 { + // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the + // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it + // is either x or y, canonicalized (this means results might differ among implementations). + // When either x or y is a signalingNaN, then the result is according to 6.2. + // + // Since we do not support sNaN in Rust yet, we do not need to handle them. + // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by + // multiplying by 1.0. Should switch to the `canonicalize` when it works. + (if x.is_nan() || x < y { y } else { x }) * 1.0 +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index e2d706201..9feadce7d 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -168,6 +168,7 @@ mod trunc; mod truncf; mod min; mod minf; +mod maxf; // Use separated imports instead of {}-grouped imports for easier merging. pub use self::acos::acos; @@ -276,6 +277,7 @@ pub use self::trunc::trunc; pub use self::truncf::truncf; pub use self::min::min; pub use self::minf::minf; +pub use self::maxf::maxf; // Private modules mod expo2; From a30214093489e25c45a199f579cbbe2afe7b0ece Mon Sep 17 00:00:00 2001 From: varkor Date: Wed, 5 Jun 2019 17:44:24 +0100 Subject: [PATCH 0257/1459] Add max --- libm/CHANGELOG.md | 1 + libm/src/lib.rs | 7 +++++++ libm/src/math/max.rs | 13 +++++++++++++ libm/src/math/mod.rs | 2 ++ 4 files changed, 23 insertions(+) create mode 100644 libm/src/math/max.rs diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index 7fb17cc9f..d7667c0a1 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -10,6 +10,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). - minf - min - maxf +- max ## [v0.1.2] - 2018-07-18 diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 368e25c8c..f77cda94b 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -426,6 +426,8 @@ pub trait F64Ext: private::Sealed + Sized { fn atanh(self) -> Self; fn min(self, other: Self) -> Self; + + fn max(self, other: Self) -> Self; } impl F64Ext for f64 { @@ -622,6 +624,11 @@ impl F64Ext for f64 { fn min(self, other: Self) -> Self { min(self, other) } + + #[inline] + fn max(self, other: Self) -> Self { + max(self, other) + } } mod private { diff --git a/libm/src/math/max.rs b/libm/src/math/max.rs new file mode 100644 index 000000000..3fbb48024 --- /dev/null +++ b/libm/src/math/max.rs @@ -0,0 +1,13 @@ +#[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn max(x: f64, y: f64) -> f64 { + // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the + // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it + // is either x or y, canonicalized (this means results might differ among implementations). + // When either x or y is a signalingNaN, then the result is according to 6.2. + // + // Since we do not support sNaN in Rust yet, we do not need to handle them. + // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by + // multiplying by 1.0. Should switch to the `canonicalize` when it works. + (if x.is_nan() || x < y { y } else { x }) * 1.0 +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 9feadce7d..c2bc9a475 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -168,6 +168,7 @@ mod trunc; mod truncf; mod min; mod minf; +mod max; mod maxf; // Use separated imports instead of {}-grouped imports for easier merging. @@ -277,6 +278,7 @@ pub use self::trunc::trunc; pub use self::truncf::truncf; pub use self::min::min; pub use self::minf::minf; +pub use self::max::max; pub use self::maxf::maxf; // Private modules From 4c967418d9618f9f1db7eee0a86e1f7b5de5bcfc Mon Sep 17 00:00:00 2001 From: varkor Date: Wed, 5 Jun 2019 20:59:05 +0100 Subject: [PATCH 0258/1459] Alphabetise --- libm/src/math/mod.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index c2bc9a475..bedc3a69e 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -140,6 +140,10 @@ mod log1pf; mod log2; mod log2f; mod logf; +mod max; +mod maxf; +mod min; +mod minf; mod modf; mod modff; mod pow; @@ -166,10 +170,6 @@ mod tgamma; mod tgammaf; mod trunc; mod truncf; -mod min; -mod minf; -mod max; -mod maxf; // Use separated imports instead of {}-grouped imports for easier merging. pub use self::acos::acos; @@ -250,6 +250,10 @@ pub use self::log1pf::log1pf; pub use self::log2::log2; pub use self::log2f::log2f; pub use self::logf::logf; +pub use self::max::max; +pub use self::maxf::maxf; +pub use self::min::min; +pub use self::minf::minf; pub use self::modf::modf; pub use self::modff::modff; pub use self::pow::pow; @@ -276,10 +280,6 @@ pub use self::tgamma::tgamma; pub use self::tgammaf::tgammaf; pub use self::trunc::trunc; pub use self::truncf::truncf; -pub use self::min::min; -pub use self::minf::minf; -pub use self::max::max; -pub use self::maxf::maxf; // Private modules mod expo2; From 27f0409e64eb4d981cb8acffcbd3eb8f0826aa04 Mon Sep 17 00:00:00 2001 From: varkor Date: Wed, 5 Jun 2019 21:13:25 +0100 Subject: [PATCH 0259/1459] Correct libm names --- libm/CHANGELOG.md | 6 +++--- libm/src/lib.rs | 8 ++++---- libm/src/math/{max.rs => fmax.rs} | 2 +- libm/src/math/{maxf.rs => fmaxf.rs} | 2 +- libm/src/math/{min.rs => fmin.rs} | 2 +- libm/src/math/{minf.rs => fminf.rs} | 2 +- libm/src/math/mod.rs | 16 ++++++++-------- 7 files changed, 19 insertions(+), 19 deletions(-) rename libm/src/math/{max.rs => fmax.rs} (95%) rename libm/src/math/{maxf.rs => fmaxf.rs} (95%) rename libm/src/math/{min.rs => fmin.rs} (95%) rename libm/src/math/{minf.rs => fminf.rs} (95%) diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index d7667c0a1..62bfd0d5b 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -8,9 +8,9 @@ This project adheres to [Semantic Versioning](http://semver.org/). ### Added - minf -- min -- maxf -- max +- fmin +- fmaxf +- fmax ## [v0.1.2] - 2018-07-18 diff --git a/libm/src/lib.rs b/libm/src/lib.rs index f77cda94b..df3c8cf60 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -334,12 +334,12 @@ impl F32Ext for f32 { #[inline] fn min(self, other: Self) -> Self { - minf(self, other) + fminf(self, other) } #[inline] fn max(self, other: Self) -> Self { - maxf(self, other) + fmaxf(self, other) } } @@ -622,12 +622,12 @@ impl F64Ext for f64 { #[inline] fn min(self, other: Self) -> Self { - min(self, other) + fmin(self, other) } #[inline] fn max(self, other: Self) -> Self { - max(self, other) + fmax(self, other) } } diff --git a/libm/src/math/max.rs b/libm/src/math/fmax.rs similarity index 95% rename from libm/src/math/max.rs rename to libm/src/math/fmax.rs index 3fbb48024..22016d11c 100644 --- a/libm/src/math/max.rs +++ b/libm/src/math/fmax.rs @@ -1,6 +1,6 @@ #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn max(x: f64, y: f64) -> f64 { +pub fn fmax(x: f64, y: f64) -> f64 { // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it // is either x or y, canonicalized (this means results might differ among implementations). diff --git a/libm/src/math/maxf.rs b/libm/src/math/fmaxf.rs similarity index 95% rename from libm/src/math/maxf.rs rename to libm/src/math/fmaxf.rs index ac0d22aa5..a883fdaef 100644 --- a/libm/src/math/maxf.rs +++ b/libm/src/math/fmaxf.rs @@ -1,6 +1,6 @@ #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn maxf(x: f32, y: f32) -> f32 { +pub fn fmaxf(x: f32, y: f32) -> f32 { // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it // is either x or y, canonicalized (this means results might differ among implementations). diff --git a/libm/src/math/min.rs b/libm/src/math/fmin.rs similarity index 95% rename from libm/src/math/min.rs rename to libm/src/math/fmin.rs index d0345cf8d..d1ccc3a46 100644 --- a/libm/src/math/min.rs +++ b/libm/src/math/fmin.rs @@ -1,6 +1,6 @@ #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn min(x: f64, y: f64) -> f64 { +pub fn fmin(x: f64, y: f64) -> f64 { // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it // is either x or y, canonicalized (this means results might differ among implementations). diff --git a/libm/src/math/minf.rs b/libm/src/math/fminf.rs similarity index 95% rename from libm/src/math/minf.rs rename to libm/src/math/fminf.rs index 2098a2d75..43ec97cb5 100644 --- a/libm/src/math/minf.rs +++ b/libm/src/math/fminf.rs @@ -1,6 +1,6 @@ #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn minf(x: f32, y: f32) -> f32 { +pub fn fminf(x: f32, y: f32) -> f32 { // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it // is either x or y, canonicalized (this means results might differ among implementations). diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index bedc3a69e..35ffe1a2c 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -112,6 +112,10 @@ mod floor; mod floorf; mod fma; mod fmaf; +mod fmax; +mod fmaxf; +mod fmin; +mod fminf; mod fmod; mod fmodf; mod frexp; @@ -140,10 +144,6 @@ mod log1pf; mod log2; mod log2f; mod logf; -mod max; -mod maxf; -mod min; -mod minf; mod modf; mod modff; mod pow; @@ -216,6 +216,10 @@ pub use self::floor::floor; pub use self::floorf::floorf; pub use self::fma::fma; pub use self::fmaf::fmaf; +pub use self::fmax::fmax; +pub use self::fmaxf::fmaxf; +pub use self::fmin::fmin; +pub use self::fminf::fminf; pub use self::fmod::fmod; pub use self::fmodf::fmodf; pub use self::frexp::frexp; @@ -250,10 +254,6 @@ pub use self::log1pf::log1pf; pub use self::log2::log2; pub use self::log2f::log2f; pub use self::logf::logf; -pub use self::max::max; -pub use self::maxf::maxf; -pub use self::min::min; -pub use self::minf::minf; pub use self::modf::modf; pub use self::modff::modff; pub use self::pow::pow; From bf3a9e9d0b9c4693c8e49fcdc626117e0d5b74fc Mon Sep 17 00:00:00 2001 From: varkor Date: Wed, 5 Jun 2019 21:32:31 +0100 Subject: [PATCH 0260/1459] Update libm for fmin/fmax/fminf/fmaxf --- libm | 2 +- src/math.rs | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/libm b/libm index 0ae442888..01bee72a9 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 0ae442888c7af72c0a335edd43dbbd74c751f119 +Subproject commit 01bee72a93ebaeea2883d0f963174c2b00d4fe68 diff --git a/src/math.rs b/src/math.rs index 1893f1c16..4b27cb80f 100644 --- a/src/math.rs +++ b/src/math.rs @@ -45,6 +45,10 @@ no_mangle! { fn log10f(x: f32) -> f32; fn log(x: f64) -> f64; fn logf(x: f32) -> f32; + fn fmin(x: f64, y: f64) -> f64; + fn fminf(x: f32, y: f32) -> f32; + fn fmax(x: f64, y: f64) -> f64; + fn fmaxf(x: f32, y: f32) -> f32; fn round(x: f64) -> f64; fn roundf(x: f32) -> f32; fn sin(x: f64) -> f64; From 20827ad38f18b4d4244c5afb3d6499921da15d13 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 5 Jun 2019 16:09:27 -0700 Subject: [PATCH 0261/1459] Attempt to fix CI --- ci/azure-install-rust.yml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/ci/azure-install-rust.yml b/ci/azure-install-rust.yml index f44f8c59d..d0255f85b 100644 --- a/ci/azure-install-rust.yml +++ b/ci/azure-install-rust.yml @@ -4,17 +4,21 @@ parameters: steps: - bash: | set -e - curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain $TOOLCHAIN - echo "##vso[task.prependpath]$HOME/.cargo/bin" + if command -v rustup; then + rustup update $TOOLCHAIN + rustup default $TOOLCHAIN + else + curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain $TOOLCHAIN + echo "##vso[task.prependpath]$HOME/.cargo/bin" + fi displayName: Install rust condition: ne( variables['Agent.OS'], 'Windows_NT' ) env: TOOLCHAIN: ${{ parameters.toolchain }} - script: | - curl -sSf -o rustup-init.exe https://win.rustup.rs - rustup-init.exe -y --default-toolchain %TOOLCHAIN%-%TARGET% - echo ##vso[task.prependpath]%USERPROFILE%\.cargo\bin + rustup update --no-self-update %TOOLCHAIN%-%TARGET% + rustup default %TOOLCHAIN%-%TARGET% displayName: Install rust condition: eq( variables['Agent.OS'], 'Windows_NT' ) env: From 5d5a85e6ea3d761ea9937fa4fec1707f2f9c10d6 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 5 Jun 2019 16:19:26 -0700 Subject: [PATCH 0262/1459] More fixes for CI --- ci/run-docker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run-docker.sh b/ci/run-docker.sh index dbc86ca54..4bb2a78d9 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -19,7 +19,7 @@ run() { -e CARGO_HOME=/cargo \ -e CARGO_TARGET_DIR=/target \ -e RUST_COMPILER_RT_ROOT \ - -v $HOME/.cargo:/cargo \ + -v $(dirname $(dirname `which cargo`)):/cargo \ -v `pwd`/target:/target \ -v `pwd`:/checkout:ro \ -v `rustc --print sysroot`:/rust:ro \ From d2b44e93b067ace24fbfc62b982ef0d446480aee Mon Sep 17 00:00:00 2001 From: varkor Date: Thu, 6 Jun 2019 01:09:47 +0100 Subject: [PATCH 0263/1459] Bump to 0.1.16 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 755dcf5e3..e0bb22d21 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.15" +version = "0.1.16" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From 60bba524f5794d6e83aa8d81110af1551d57e968 Mon Sep 17 00:00:00 2001 From: Diego Barrios Romero Date: Fri, 7 Jun 2019 18:23:43 +0200 Subject: [PATCH 0264/1459] Make module path compatible with Rust 1.31.0 --- libm/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index df3c8cf60..a47883d81 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -20,7 +20,7 @@ mod math; use core::{f32, f64}; -pub use math::*; +pub use self::math::*; /// Approximate equality with 1 ULP of tolerance #[doc(hidden)] From 5e4d04281e06273dd2c675d2aab9f352064e2ae7 Mon Sep 17 00:00:00 2001 From: Diego Barrios Romero Date: Wed, 12 Jun 2019 18:43:12 +0200 Subject: [PATCH 0265/1459] Add 0.1.3 changelog entry --- libm/CHANGELOG.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index 62bfd0d5b..aef65cfb3 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -5,6 +5,10 @@ This project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] +... + +## [v0.1.3] - 2019-05-14 + ### Added - minf @@ -81,6 +85,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). - Initial release -[Unreleased]: https://github.com/japaric/libm/compare/v0.1.2...HEAD +[Unreleased]: https://github.com/japaric/libm/compare/0.1.3...HEAD +[v0.1.3]: https://github.com/japaric/libm/compare/v0.1.2...0.1.3 [v0.1.2]: https://github.com/japaric/libm/compare/v0.1.1...v0.1.2 [v0.1.1]: https://github.com/japaric/libm/compare/v0.1.0...v0.1.1 From 33828bd3fac8d9c432ffc948052c51932c013c3a Mon Sep 17 00:00:00 2001 From: Diego Barrios Romero Date: Wed, 12 Jun 2019 18:45:42 +0200 Subject: [PATCH 0266/1459] Add changelog entry for v0.1.4 release --- libm/CHANGELOG.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index aef65cfb3..28e27055d 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -7,6 +7,11 @@ This project adheres to [Semantic Versioning](http://semver.org/). ... +## [v0.1.4] - 2019-06-12 + +### Fixed +- Restored compatibility with Rust 1.31.0 + ## [v0.1.3] - 2019-05-14 ### Added @@ -85,7 +90,8 @@ This project adheres to [Semantic Versioning](http://semver.org/). - Initial release -[Unreleased]: https://github.com/japaric/libm/compare/0.1.3...HEAD +[Unreleased]: https://github.com/japaric/libm/compare/v0.1.4...HEAD +[v0.1.4]: https://github.com/japaric/libm/compare/0.1.3...v0.1.4 [v0.1.3]: https://github.com/japaric/libm/compare/v0.1.2...0.1.3 [v0.1.2]: https://github.com/japaric/libm/compare/v0.1.1...v0.1.2 [v0.1.1]: https://github.com/japaric/libm/compare/v0.1.0...v0.1.1 From edce0b37640451f0d210e0babf107ecf70e59dfd Mon Sep 17 00:00:00 2001 From: Diego Barrios Romero Date: Wed, 12 Jun 2019 18:46:01 +0200 Subject: [PATCH 0267/1459] Bump version --- libm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index a61db5eba..8b272d294 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -7,7 +7,7 @@ keywords = ["libm", "math"] license = "MIT OR Apache-2.0" name = "libm" repository = "https://github.com/rust-lang-nursery/libm" -version = "0.1.3" +version = "0.1.4" edition = "2018" [features] From 938d62f88e251e9eab68b1169e37e3b277f76e4c Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 13 Jun 2019 09:21:50 -0700 Subject: [PATCH 0268/1459] Update Rust install task --- libm/ci/azure-install-rust.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/ci/azure-install-rust.yml b/libm/ci/azure-install-rust.yml index c5a53122f..f1cd87bcc 100644 --- a/libm/ci/azure-install-rust.yml +++ b/libm/ci/azure-install-rust.yml @@ -10,7 +10,7 @@ steps: rustup default $toolchain else curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain $toolchain - echo "##vso[task.setvariable variable=PATH;]$PATH:$HOME/.cargo/bin" + echo "##vso[task.prependpath]$HOME/.cargo/bin" fi displayName: Install rust (unix) condition: ne( variables['Agent.OS'], 'Windows_NT' ) From 082e3194fd7a5cb0f7fdcf37ae90acd9149a4322 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Tue, 25 Jun 2019 20:39:09 +0200 Subject: [PATCH 0269/1459] Fix doc for floorf --- libm/src/math/floorf.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs index ae605e191..c04f18aee 100644 --- a/libm/src/math/floorf.rs +++ b/libm/src/math/floorf.rs @@ -1,6 +1,6 @@ use core::f32; -/// Floor (f64) +/// Floor (f32) /// /// Finds the nearest integer less than or equal to `x`. #[inline] From f6aa1e9c6245b222d8e4ec2c73c9fa8cf7954203 Mon Sep 17 00:00:00 2001 From: Igor null Date: Mon, 1 Jul 2019 17:05:46 +0300 Subject: [PATCH 0270/1459] Fixed rounding to negative zero --- libm/src/math/round.rs | 15 ++++++++++----- libm/src/math/roundf.rs | 11 ++++++++--- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs index 9a9723cfb..efbe68ae4 100644 --- a/libm/src/math/round.rs +++ b/libm/src/math/round.rs @@ -5,20 +5,20 @@ const TOINT: f64 = 1.0 / f64::EPSILON; #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn round(mut x: f64) -> f64 { - let (f, i) = (x, x.to_bits()); + let i = x.to_bits(); let e: u64 = i >> 52 & 0x7ff; let mut y: f64; if e >= 0x3ff + 52 { return x; } - if i >> 63 != 0 { - x = -x; - } if e < 0x3ff - 1 { // raise inexact if x!=0 force_eval!(x + TOINT); - return 0.0 * f; + return 0.0 * x; + } + if i >> 63 != 0 { + x = -x; } y = x + TOINT - TOINT - x; if y > 0.5 { @@ -35,3 +35,8 @@ pub fn round(mut x: f64) -> f64 { y } } + +#[test] +fn negative_zero() { + assert_eq!(round(-0.0_f64).to_bits(), (-0.0_f64).to_bits()); +} diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs index 839d9469a..559d3a926 100644 --- a/libm/src/math/roundf.rs +++ b/libm/src/math/roundf.rs @@ -12,13 +12,13 @@ pub fn roundf(mut x: f32) -> f32 { if e >= 0x7f + 23 { return x; } - if i >> 31 != 0 { - x = -x; - } if e < 0x7f - 1 { force_eval!(x + TOINT); return 0.0 * x; } + if i >> 31 != 0 { + x = -x; + } y = x + TOINT - TOINT - x; if y > 0.5f32 { y = y + x - 1.0; @@ -33,3 +33,8 @@ pub fn roundf(mut x: f32) -> f32 { y } } + +#[test] +fn negative_zero() { + assert_eq!(roundf(-0.0_f32).to_bits(), (-0.0_f32).to_bits()); +} From 0847a43c6de2f33995862079e84fe485ce0ea47e Mon Sep 17 00:00:00 2001 From: Igor null Date: Mon, 1 Jul 2019 17:10:44 +0300 Subject: [PATCH 0271/1459] Fix incorrect f32<->f64 casting in j1f/y1f --- libm/src/math/j1f.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs index 83ac1acff..14e4ef5bf 100644 --- a/libm/src/math/j1f.rs +++ b/libm/src/math/j1f.rs @@ -49,7 +49,7 @@ fn common(ix: u32, x: f32, y1: bool, sign: bool) -> f32 { if sign { cc = -cc; } - return INVSQRTPI * (cc as f32) / sqrtf(x); + return (((INVSQRTPI as f64) * cc) / (sqrtf(x) as f64)) as f32; } /* R0/S0 on [0,2] */ @@ -356,3 +356,17 @@ fn qonef(x: f32) -> f32 { s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); return (0.375 + r / s) / x; } + +#[test] +fn test_j1f_2488() { + // 0x401F3E49 + assert_eq!( + j1f(2.4881766_f32), + 0.49999475_f32); +} +#[test] +fn test_y1f_2002() { + assert_eq!( + y1f(2.0000002_f32), + -0.10703229_f32); +} From 4c42720d7b7c124f02ec2a21112b949f5cf4f7d5 Mon Sep 17 00:00:00 2001 From: Igor null Date: Mon, 1 Jul 2019 17:18:59 +0300 Subject: [PATCH 0272/1459] fixed formatting in tests --- libm/src/math/j1f.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs index 14e4ef5bf..d165ce369 100644 --- a/libm/src/math/j1f.rs +++ b/libm/src/math/j1f.rs @@ -360,13 +360,9 @@ fn qonef(x: f32) -> f32 { #[test] fn test_j1f_2488() { // 0x401F3E49 - assert_eq!( - j1f(2.4881766_f32), - 0.49999475_f32); + assert_eq!(j1f(2.4881766_f32), 0.49999475_f32); } #[test] fn test_y1f_2002() { - assert_eq!( - y1f(2.0000002_f32), - -0.10703229_f32); + assert_eq!(y1f(2.0000002_f32), -0.10703229_f32); } From 88e81091420e4e916f174043ecde7850099a325c Mon Sep 17 00:00:00 2001 From: Igor null Date: Mon, 1 Jul 2019 17:21:43 +0300 Subject: [PATCH 0273/1459] separate tests into #[cfg(test)] mod --- libm/src/math/j1f.rs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs index d165ce369..5095894d7 100644 --- a/libm/src/math/j1f.rs +++ b/libm/src/math/j1f.rs @@ -357,12 +357,16 @@ fn qonef(x: f32) -> f32 { return (0.375 + r / s) / x; } -#[test] -fn test_j1f_2488() { - // 0x401F3E49 - assert_eq!(j1f(2.4881766_f32), 0.49999475_f32); -} -#[test] -fn test_y1f_2002() { - assert_eq!(y1f(2.0000002_f32), -0.10703229_f32); +#[cfg(test)] +mod tests { + use super::{j1f, y1f}; + #[test] + fn test_j1f_2488() { + // 0x401F3E49 + assert_eq!(j1f(2.4881766_f32), 0.49999475_f32); + } + #[test] + fn test_y1f_2002() { + assert_eq!(y1f(2.0000002_f32), -0.10703229_f32); + } } From b62f830463fe20d983644d0255cd5d8971193e97 Mon Sep 17 00:00:00 2001 From: Igor null Date: Mon, 1 Jul 2019 17:23:52 +0300 Subject: [PATCH 0274/1459] move tests to separate #[cfg(test)] mod --- libm/src/math/round.rs | 11 ++++++++--- libm/src/math/roundf.rs | 11 ++++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs index efbe68ae4..67590d2c1 100644 --- a/libm/src/math/round.rs +++ b/libm/src/math/round.rs @@ -36,7 +36,12 @@ pub fn round(mut x: f64) -> f64 { } } -#[test] -fn negative_zero() { - assert_eq!(round(-0.0_f64).to_bits(), (-0.0_f64).to_bits()); +#[cfg(test)] +mod tests { + use super::round; + + #[test] + fn negative_zero() { + assert_eq!(round(-0.0_f64).to_bits(), (-0.0_f64).to_bits()); + } } diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs index 559d3a926..85114be4b 100644 --- a/libm/src/math/roundf.rs +++ b/libm/src/math/roundf.rs @@ -34,7 +34,12 @@ pub fn roundf(mut x: f32) -> f32 { } } -#[test] -fn negative_zero() { - assert_eq!(roundf(-0.0_f32).to_bits(), (-0.0_f32).to_bits()); +#[cfg(test)] +mod tests { + use super::roundf; + + #[test] + fn negative_zero() { + assert_eq!(roundf(-0.0_f32).to_bits(), (-0.0_f32).to_bits()); + } } From f9ff0e35f353d67ce180f16e99088c8e0661d660 Mon Sep 17 00:00:00 2001 From: Benjamin Schultzer Date: Tue, 18 Jun 2019 17:56:58 -0700 Subject: [PATCH 0275/1459] Add benchmark suite Signed-off-by: Benjamin Schultzer --- libm/Cargo.toml | 2 + libm/README.md | 6 ++ libm/azure-pipelines.yml | 11 ++++ libm/benches/bench.rs | 118 +++++++++++++++++++++++++++++++++++++++ libm/src/math/pow.rs | 2 +- 5 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 libm/benches/bench.rs diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 8b272d294..2f68ecc01 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -30,6 +30,8 @@ members = [ [dev-dependencies] no-panic = "0.1.8" +rand = "0.6.5" +paste = "0.1.5" [build-dependencies] rand = { version = "0.6.5", optional = true } diff --git a/libm/README.md b/libm/README.md index 3df5b65ea..edd54d418 100644 --- a/libm/README.md +++ b/libm/README.md @@ -37,6 +37,12 @@ fn foo(x: f32) { The API documentation can be found [here](https://docs.rs/libm). +## Benchmark +[benchmark]: #benchmark +Run `cargo +nightly bench` + +NOTE: remember to have nightly installed `rustup install nightly` + ## Contributing Please check [CONTRIBUTING.md](CONTRIBUTING.md) diff --git a/libm/azure-pipelines.yml b/libm/azure-pipelines.yml index d8068e023..e9cb916db 100644 --- a/libm/azure-pipelines.yml +++ b/libm/azure-pipelines.yml @@ -71,3 +71,14 @@ jobs: - template: ci/azure-install-rust.yml - bash: cargo build -p cb displayName: "Check compiler-builtins still probably builds" + + - job: benchmarks + pool: + vmImage: ubuntu-16.04 + steps: + - template: ci/azure-install-rust.yml + - bash: cargo bench + displayName: "Benchmarks" + variables: + TOOLCHAIN: nightly + diff --git a/libm/benches/bench.rs b/libm/benches/bench.rs new file mode 100644 index 000000000..522ac4e3b --- /dev/null +++ b/libm/benches/bench.rs @@ -0,0 +1,118 @@ +#![feature(test)] + +extern crate paste; +extern crate rand; +extern crate test; + +use rand::Rng; +use test::Bencher; + +macro_rules! unary { + ($($func:ident),*) => ($( + paste::item! { + #[bench] + pub fn [<$func>](bh: &mut Bencher) { + let mut rng = rand::thread_rng(); + let x = rng.gen::(); + bh.iter(|| test::black_box(libm::[<$func>](x))) + } + #[bench] + pub fn [<$func f>](bh: &mut Bencher) { + let mut rng = rand::thread_rng(); + let x = rng.gen::(); + bh.iter(|| test::black_box(libm::[<$func f>](x))) + } + } + )*); +} +macro_rules! binary { + ($($func:ident),*) => ($( + paste::item! { + #[bench] + pub fn [<$func>](bh: &mut Bencher) { + let mut rng = rand::thread_rng(); + let x = rng.gen::(); + let y = rng.gen::(); + bh.iter(|| test::black_box(libm::[<$func>](x, y))) + } + #[bench] + pub fn [<$func f>](bh: &mut Bencher) { + let mut rng = rand::thread_rng(); + let x = rng.gen::(); + let y = rng.gen::(); + bh.iter(|| test::black_box(libm::[<$func f>](x, y))) + } + } + )*); + ($($func:ident);*) => ($( + paste::item! { + #[bench] + pub fn [<$func>](bh: &mut Bencher) { + let mut rng = rand::thread_rng(); + let x = rng.gen::(); + let n = rng.gen::(); + bh.iter(|| test::black_box(libm::[<$func>](x, n))) + } + #[bench] + pub fn [<$func f>](bh: &mut Bencher) { + let mut rng = rand::thread_rng(); + let x = rng.gen::(); + let n = rng.gen::(); + bh.iter(|| test::black_box(libm::[<$func f>](x, n))) + } + } + )*); +} +macro_rules! trinary { + ($($func:ident),*) => ($( + paste::item! { + #[bench] + pub fn [<$func>](bh: &mut Bencher) { + let mut rng = rand::thread_rng(); + let x = rng.gen::(); + let y = rng.gen::(); + let z = rng.gen::(); + bh.iter(|| test::black_box(libm::[<$func>](x, y, z))) + } + #[bench] + pub fn [<$func f>](bh: &mut Bencher) { + let mut rng = rand::thread_rng(); + let x = rng.gen::(); + let y = rng.gen::(); + let z = rng.gen::(); + bh.iter(|| test::black_box(libm::[<$func f>](x, y, z))) + } + } + )*); +} +macro_rules! bessel { + ($($func:ident),*) => ($( + paste::item! { + #[bench] + pub fn [<$func>](bh: &mut Bencher) { + let mut rng = rand::thread_rng(); + let mut n = rng.gen::(); + n &= 0xffff; + let x = rng.gen::(); + bh.iter(|| test::black_box(libm::[<$func>](n, x))) + } + #[bench] + pub fn [<$func f>](bh: &mut Bencher) { + let mut rng = rand::thread_rng(); + let mut n = rng.gen::(); + n &= 0xffff; + let x = rng.gen::(); + bh.iter(|| test::black_box(libm::[<$func f>](n, x))) + } + } + )*); +} + +unary!( + acos, acosh, asin, atan, cbrt, ceil, cos, cosh, erf, exp, exp2, exp10, expm1, fabs, floor, j0, + j1, lgamma, log, log1p, log2, log10, round, sin, sinh, sqrt, tan, tanh, tgamma, trunc, y0, y1 +); +binary!(atan2, copysign, fdim, fmax, fmin, fmod, hypot, pow); +trinary!(fma); +bessel!(jn, yn); +binary!(ldexp; scalbn); diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 111d712ff..068a4ec47 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -479,7 +479,7 @@ mod tests { .for_each(|s| s.iter().for_each(|val| pow_test(base, *val, expected))); } - fn test_sets(sets: &[&[f64]], computed: &Fn(f64) -> f64, expected: &Fn(f64) -> f64) { + fn test_sets(sets: &[&[f64]], computed: &dyn Fn(f64) -> f64, expected: &dyn Fn(f64) -> f64) { sets.iter().for_each(|s| { s.iter().for_each(|val| { let exp = expected(*val); From 4a9a5d6ec745237771039dee85fb017630ca2e68 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 2 Jul 2019 08:22:03 +0200 Subject: [PATCH 0276/1459] Move benchmarks into its own crate --- libm/Cargo.toml | 3 +-- libm/README.md | 6 ++++-- libm/ci/run.sh | 14 +++++++++----- libm/crates/libm-bench/Cargo.toml | 11 +++++++++++ libm/{ => crates/libm-bench}/benches/bench.rs | 3 --- 5 files changed, 25 insertions(+), 12 deletions(-) create mode 100644 libm/crates/libm-bench/Cargo.toml rename libm/{ => crates/libm-bench}/benches/bench.rs (98%) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 2f68ecc01..bd1bb80ce 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -26,12 +26,11 @@ checked = [] [workspace] members = [ "crates/compiler-builtins-smoke-test", + "crates/libm-bench", ] [dev-dependencies] no-panic = "0.1.8" -rand = "0.6.5" -paste = "0.1.5" [build-dependencies] rand = { version = "0.6.5", optional = true } diff --git a/libm/README.md b/libm/README.md index edd54d418..8b93f2603 100644 --- a/libm/README.md +++ b/libm/README.md @@ -39,9 +39,11 @@ The API documentation can be found [here](https://docs.rs/libm). ## Benchmark [benchmark]: #benchmark -Run `cargo +nightly bench` -NOTE: remember to have nightly installed `rustup install nightly` +The benchmarks are located in `crates/libm-bench` and require a nightly Rust toolchain. +To run all benchmarks: + +> cargo +nightly bench --all ## Contributing diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 42c241645..37ffb8793 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -1,11 +1,15 @@ -#!/bin/sh +#!/usr/bin/env sh set -ex TARGET=$1 -cargo test --target $TARGET -cargo test --target $TARGET --release +CMD="cargo test --all --no-default-features --target $TARGET" -cargo test --features 'checked musl-reference-tests' --target $TARGET +$CMD +$CMD --release -cargo test --features 'checked musl-reference-tests' --target $TARGET --release +$CMD --features 'stable' +$CMD --release --features 'stable' + +$CMD --features 'stable checked musl-reference-tests' +$CMD --release --features 'stable checked musl-reference-tests' diff --git a/libm/crates/libm-bench/Cargo.toml b/libm/crates/libm-bench/Cargo.toml new file mode 100644 index 000000000..8e06e0fb5 --- /dev/null +++ b/libm/crates/libm-bench/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "libm-bench" +version = "0.1.0" +authors = ["Gonzalo Brito Gadeschi "] +edition = "2018" +license = "MIT OR Apache-2.0" + +[dependencies] +libm = { path = "../.." } +rand = "0.6.5" +paste = "0.1.5" diff --git a/libm/benches/bench.rs b/libm/crates/libm-bench/benches/bench.rs similarity index 98% rename from libm/benches/bench.rs rename to libm/crates/libm-bench/benches/bench.rs index 522ac4e3b..b6d874153 100644 --- a/libm/benches/bench.rs +++ b/libm/crates/libm-bench/benches/bench.rs @@ -1,7 +1,4 @@ #![feature(test)] - -extern crate paste; -extern crate rand; extern crate test; use rand::Rng; From d41dd23f94346b6d5c57051789234184a1895ad2 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 2 Jul 2019 08:32:31 +0200 Subject: [PATCH 0277/1459] Do not enable default features in benchmarks --- libm/crates/libm-bench/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/crates/libm-bench/Cargo.toml b/libm/crates/libm-bench/Cargo.toml index 8e06e0fb5..d28dd861f 100644 --- a/libm/crates/libm-bench/Cargo.toml +++ b/libm/crates/libm-bench/Cargo.toml @@ -6,6 +6,6 @@ edition = "2018" license = "MIT OR Apache-2.0" [dependencies] -libm = { path = "../.." } +libm = { path = "../..", default-features = false } rand = "0.6.5" paste = "0.1.5" From 04f1de36f358fe5dcb8c778e5c0d4bb9f6b210ac Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 2 Jul 2019 08:33:11 +0200 Subject: [PATCH 0278/1459] Re-export the stable libm feature --- libm/crates/libm-bench/Cargo.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libm/crates/libm-bench/Cargo.toml b/libm/crates/libm-bench/Cargo.toml index d28dd861f..ba65dbd5f 100644 --- a/libm/crates/libm-bench/Cargo.toml +++ b/libm/crates/libm-bench/Cargo.toml @@ -9,3 +9,7 @@ license = "MIT OR Apache-2.0" libm = { path = "../..", default-features = false } rand = "0.6.5" paste = "0.1.5" + +[features] +default = [] +stable = [ "libm/stable" ] From 30559db9a26ea0a081d21e14b130518232ff460a Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 2 Jul 2019 09:07:10 +0200 Subject: [PATCH 0279/1459] Run benchmarks on CI --- libm/azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/azure-pipelines.yml b/libm/azure-pipelines.yml index e9cb916db..c89346c73 100644 --- a/libm/azure-pipelines.yml +++ b/libm/azure-pipelines.yml @@ -77,7 +77,7 @@ jobs: vmImage: ubuntu-16.04 steps: - template: ci/azure-install-rust.yml - - bash: cargo bench + - bash: cargo bench --all displayName: "Benchmarks" variables: TOOLCHAIN: nightly From fc735894d173c087e238ba069d46d9f909ddcbbd Mon Sep 17 00:00:00 2001 From: Igor null Date: Tue, 2 Jul 2019 16:48:59 +0300 Subject: [PATCH 0280/1459] Fixed a few int overflows rem_pio2 had incorrect u32/i32 operations remquo has a straight int overflow --- libm/src/math/rem_pio2.rs | 53 +++++++++++++++++++++------------ libm/src/math/rem_pio2_large.rs | 2 +- libm/src/math/remquo.rs | 14 ++++++++- 3 files changed, 48 insertions(+), 21 deletions(-) diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 536dfac3c..dc6b3297d 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -179,29 +179,44 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { i -= 1; } let mut ty = [0.0; 3]; - let n = rem_pio2_large(&tx[..=i], &mut ty, ((ix >> 20) - (0x3ff + 23)) as i32, 1); + let n = rem_pio2_large(&tx[..=i], &mut ty, ((ix as i32) >> 20) - (0x3ff + 23), 1); if sign != 0 { return (-n, -ty[0], -ty[1]); } (n, ty[0], ty[1]) } -#[test] -fn test_near_pi() { - assert_eq!( - rem_pio2(3.141592025756836), - (2, -6.278329573009626e-7, -2.1125998133974653e-23) - ); - assert_eq!( - rem_pio2(3.141592033207416), - (2, -6.20382377148128e-7, -2.1125998133974653e-23) - ); - assert_eq!( - rem_pio2(3.141592144966125), - (2, -5.086236681942706e-7, -2.1125998133974653e-23) - ); - assert_eq!( - rem_pio2(3.141592979431152), - (2, 3.2584135866119817e-7, -2.1125998133974653e-23) - ); +#[cfg(test)] +mod tests { + use super::rem_pio2; + + #[test] + fn test_near_pi() { + assert_eq!( + rem_pio2(3.141592025756836), + (2, -6.278329573009626e-7, -2.1125998133974653e-23) + ); + assert_eq!( + rem_pio2(3.141592033207416), + (2, -6.20382377148128e-7, -2.1125998133974653e-23) + ); + assert_eq!( + rem_pio2(3.141592144966125), + (2, -5.086236681942706e-7, -2.1125998133974653e-23) + ); + assert_eq!( + rem_pio2(3.141592979431152), + (2, 3.2584135866119817e-7, -2.1125998133974653e-23) + ); + } + + #[test] + fn test_overflow_b9b847() { + let _ = rem_pio2(-3054214.5490637687); + } + + #[test] + fn test_overflow_4747b9() { + let _ = rem_pio2(917340800458.2274); + } } diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 006d3e153..5336a9767 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -256,7 +256,7 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> let jv = jv as usize; /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ - let mut j = (jv - jx) as i32; + let mut j = (jv as i32) - (jx as i32); let m = jx + jk; for i in 0..=m { i!(f, i, =, if j < 0 { diff --git a/libm/src/math/remquo.rs b/libm/src/math/remquo.rs index 1c2ba8918..c72c8f187 100644 --- a/libm/src/math/remquo.rs +++ b/libm/src/math/remquo.rs @@ -85,7 +85,8 @@ pub fn remquo(mut x: f64, mut y: f64) -> (f64, i32) { } if ex == ey || (ex + 1 == ey && (2.0 * x > y || (2.0 * x == y && (q % 2) != 0))) { x -= y; - q += 1; + // TODO: this matches musl behavior, but it is incorrect + q = q.wrapping_add(1); } q &= 0x7fffffff; let quo = if sx ^ sy { -(q as i32) } else { q as i32 }; @@ -95,3 +96,14 @@ pub fn remquo(mut x: f64, mut y: f64) -> (f64, i32) { (x, quo) } } + +#[cfg(test)] +mod tests { + use super::remquo; + + #[test] + fn test_q_overflow() { + // 0xc000000000000001, 0x04c0000000000004 + let _ = remquo(-2.0000000000000004, 8.406091369059082e-286); + } +} From 68f92a7becb0f0a7a2dab2795923e3beba7087f7 Mon Sep 17 00:00:00 2001 From: Benjamin Schultzer Date: Tue, 2 Jul 2019 11:56:38 -0700 Subject: [PATCH 0281/1459] Add signum Signed-off-by: Benjamin Schultzer --- libm/src/lib.rs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index a47883d81..81b76f87b 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -73,8 +73,7 @@ pub trait F32Ext: private::Sealed + Sized { fn abs(self) -> Self; - // NOTE depends on unstable intrinsics::copysignf32 - // fn signum(self) -> Self; + fn signum(self) -> Self; fn mul_add(self, a: Self, b: Self) -> Self; @@ -178,6 +177,15 @@ impl F32Ext for f32 { fabsf(self) } + #[inline] + fn signum(self) -> Self { + if self.is_nan() { + f32::NAN + } else { + copysignf(1., self) + } + } + #[inline] fn mul_add(self, a: Self, b: Self) -> Self { fmaf(self, a, b) @@ -361,8 +369,7 @@ pub trait F64Ext: private::Sealed + Sized { fn abs(self) -> Self; - // NOTE depends on unstable intrinsics::copysignf64 - // fn signum(self) -> Self; + fn signum(self) -> Self; fn mul_add(self, a: Self, b: Self) -> Self; @@ -466,6 +473,15 @@ impl F64Ext for f64 { fabs(self) } + #[inline] + fn signum(self) -> Self { + if self.is_nan() { + f64::NAN + } else { + copysign(1., self) + } + } + #[inline] fn mul_add(self, a: Self, b: Self) -> Self { fma(self, a, b) From 45427256d1ab37e3882e04f38b2c808ec18f7160 Mon Sep 17 00:00:00 2001 From: Benjamin Schultzer Date: Wed, 3 Jul 2019 11:57:54 -0700 Subject: [PATCH 0282/1459] Add remainder This PR adds the missing `remainder` and `remainderf` found in musl libm respectly https://git.musl-libc.org/cgit/musl/tree/src/math/remainder.c and https://git.musl-libc.org/cgit/musl/tree/src/math/remainderf.c Signed-off-by: Benjamin Schultzer --- libm/src/math/mod.rs | 4 ++++ libm/src/math/remainder.rs | 5 +++++ libm/src/math/remainderf.rs | 5 +++++ 3 files changed, 14 insertions(+) create mode 100644 libm/src/math/remainder.rs create mode 100644 libm/src/math/remainderf.rs diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 35ffe1a2c..48b400a92 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -148,6 +148,8 @@ mod modf; mod modff; mod pow; mod powf; +mod remainder; +mod remainderf; mod remquo; mod remquof; mod round; @@ -258,6 +260,8 @@ pub use self::modf::modf; pub use self::modff::modff; pub use self::pow::pow; pub use self::powf::powf; +pub use self::remainder::remainder; +pub use self::remainderf::remainderf; pub use self::remquo::remquo; pub use self::remquof::remquof; pub use self::round::round; diff --git a/libm/src/math/remainder.rs b/libm/src/math/remainder.rs new file mode 100644 index 000000000..e0f56da20 --- /dev/null +++ b/libm/src/math/remainder.rs @@ -0,0 +1,5 @@ +#[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn remainder(x: f64, y: f64) -> (f64, i32) { + super::remquo(x, y) +} diff --git a/libm/src/math/remainderf.rs b/libm/src/math/remainderf.rs new file mode 100644 index 000000000..72fd0e205 --- /dev/null +++ b/libm/src/math/remainderf.rs @@ -0,0 +1,5 @@ +#[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn remainderf(x: f32, y: f32) -> (f32, i32) { + super::remquof(x, y) +} From e9b462d54bfc306c868a86673dea7f1fd5e39b78 Mon Sep 17 00:00:00 2001 From: Benjamin Schultzer Date: Wed, 3 Jul 2019 14:15:29 -0700 Subject: [PATCH 0283/1459] Only return the fp value. Signed-off-by: Benjamin Schultzer --- libm/src/math/remainder.rs | 5 +++-- libm/src/math/remainderf.rs | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/libm/src/math/remainder.rs b/libm/src/math/remainder.rs index e0f56da20..7ce895004 100644 --- a/libm/src/math/remainder.rs +++ b/libm/src/math/remainder.rs @@ -1,5 +1,6 @@ #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn remainder(x: f64, y: f64) -> (f64, i32) { - super::remquo(x, y) +pub fn remainder(x: f64, y: f64) -> f64 { + let (result, _) = super::remquo(x, y); + result } diff --git a/libm/src/math/remainderf.rs b/libm/src/math/remainderf.rs index 72fd0e205..8b2aa5aab 100644 --- a/libm/src/math/remainderf.rs +++ b/libm/src/math/remainderf.rs @@ -1,5 +1,6 @@ #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn remainderf(x: f32, y: f32) -> (f32, i32) { - super::remquof(x, y) +pub fn remainderf(x: f32, y: f32) -> f32 { + let (result, _) = super::remquof(x, y); + result } From d79fe9161f45dc16112b2ac2448db5be9859ed64 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 3 Jul 2019 23:35:07 +0200 Subject: [PATCH 0284/1459] Remove F32Ext and F64Ext --- libm/src/lib.rs | 599 ------------------------------------------------ 1 file changed, 599 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 81b76f87b..b37248ffb 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -55,604 +55,5 @@ pub fn _eq(a: f64, b: f64) -> Result<(), u64> { } } -/// Math support for `f32` -/// -/// This trait is sealed and cannot be implemented outside of `libm`. -pub trait F32Ext: private::Sealed + Sized { - fn floor(self) -> Self; - - fn ceil(self) -> Self; - - fn round(self) -> Self; - - fn trunc(self) -> Self; - - fn fdim(self, rhs: Self) -> Self; - - fn fract(self) -> Self; - - fn abs(self) -> Self; - - fn signum(self) -> Self; - - fn mul_add(self, a: Self, b: Self) -> Self; - - fn div_euc(self, rhs: Self) -> Self; - - fn mod_euc(self, rhs: Self) -> Self; - - // NOTE depends on unstable intrinsics::powif32 - // fn powi(self, n: i32) -> Self; - - fn powf(self, n: Self) -> Self; - - fn sqrt(self) -> Self; - - fn exp(self) -> Self; - - fn exp2(self) -> Self; - - fn ln(self) -> Self; - - fn log(self, base: Self) -> Self; - - fn log2(self) -> Self; - - fn log10(self) -> Self; - - fn cbrt(self) -> Self; - - fn hypot(self, other: Self) -> Self; - - fn sin(self) -> Self; - - fn cos(self) -> Self; - - fn tan(self) -> Self; - - fn asin(self) -> Self; - - fn acos(self) -> Self; - - fn atan(self) -> Self; - - fn atan2(self, other: Self) -> Self; - - fn sin_cos(self) -> (Self, Self); - - fn exp_m1(self) -> Self; - - fn ln_1p(self) -> Self; - - fn sinh(self) -> Self; - - fn cosh(self) -> Self; - - fn tanh(self) -> Self; - - fn asinh(self) -> Self; - - fn acosh(self) -> Self; - - fn atanh(self) -> Self; - - fn min(self, other: Self) -> Self; - - fn max(self, other: Self) -> Self; -} - -impl F32Ext for f32 { - #[inline] - fn floor(self) -> Self { - floorf(self) - } - - #[inline] - fn ceil(self) -> Self { - ceilf(self) - } - - #[inline] - fn round(self) -> Self { - roundf(self) - } - - #[inline] - fn trunc(self) -> Self { - truncf(self) - } - - #[inline] - fn fdim(self, rhs: Self) -> Self { - fdimf(self, rhs) - } - - #[inline] - fn fract(self) -> Self { - self - self.trunc() - } - - #[inline] - fn abs(self) -> Self { - fabsf(self) - } - - #[inline] - fn signum(self) -> Self { - if self.is_nan() { - f32::NAN - } else { - copysignf(1., self) - } - } - - #[inline] - fn mul_add(self, a: Self, b: Self) -> Self { - fmaf(self, a, b) - } - - #[inline] - fn div_euc(self, rhs: Self) -> Self { - let q = (self / rhs).trunc(); - if self % rhs < 0.0 { - return if rhs > 0.0 { q - 1.0 } else { q + 1.0 }; - } - q - } - - #[inline] - fn mod_euc(self, rhs: f32) -> f32 { - let r = self % rhs; - if r < 0.0 { - r + rhs.abs() - } else { - r - } - } - - #[inline] - fn powf(self, n: Self) -> Self { - powf(self, n) - } - - #[inline] - fn sqrt(self) -> Self { - sqrtf(self) - } - - #[inline] - fn exp(self) -> Self { - expf(self) - } - - #[inline] - fn exp2(self) -> Self { - exp2f(self) - } - - #[inline] - fn ln(self) -> Self { - logf(self) - } - - #[inline] - fn log(self, base: Self) -> Self { - self.ln() / base.ln() - } - - #[inline] - fn log2(self) -> Self { - log2f(self) - } - - #[inline] - fn log10(self) -> Self { - log10f(self) - } - - #[inline] - fn cbrt(self) -> Self { - cbrtf(self) - } - - #[inline] - fn hypot(self, other: Self) -> Self { - hypotf(self, other) - } - - #[inline] - fn sin(self) -> Self { - sinf(self) - } - - #[inline] - fn cos(self) -> Self { - cosf(self) - } - - #[inline] - fn tan(self) -> Self { - tanf(self) - } - - #[inline] - fn asin(self) -> Self { - asinf(self) - } - - #[inline] - fn acos(self) -> Self { - acosf(self) - } - - #[inline] - fn atan(self) -> Self { - atanf(self) - } - - #[inline] - fn atan2(self, other: Self) -> Self { - atan2f(self, other) - } - - #[inline] - fn sin_cos(self) -> (Self, Self) { - sincosf(self) - } - - #[inline] - fn exp_m1(self) -> Self { - expm1f(self) - } - - #[inline] - fn ln_1p(self) -> Self { - log1pf(self) - } - - #[inline] - fn sinh(self) -> Self { - sinhf(self) - } - - #[inline] - fn cosh(self) -> Self { - coshf(self) - } - - #[inline] - fn tanh(self) -> Self { - tanhf(self) - } - - #[inline] - fn asinh(self) -> Self { - asinhf(self) - } - - #[inline] - fn acosh(self) -> Self { - acoshf(self) - } - - #[inline] - fn atanh(self) -> Self { - atanhf(self) - } - - #[inline] - fn min(self, other: Self) -> Self { - fminf(self, other) - } - - #[inline] - fn max(self, other: Self) -> Self { - fmaxf(self, other) - } -} - -/// Math support for `f64` -/// -/// This trait is sealed and cannot be implemented outside of `libm`. -pub trait F64Ext: private::Sealed + Sized { - fn floor(self) -> Self; - - fn ceil(self) -> Self; - - fn round(self) -> Self; - - fn trunc(self) -> Self; - - fn fdim(self, rhs: Self) -> Self; - - fn fract(self) -> Self; - - fn abs(self) -> Self; - - fn signum(self) -> Self; - - fn mul_add(self, a: Self, b: Self) -> Self; - - fn div_euc(self, rhs: Self) -> Self; - - fn mod_euc(self, rhs: Self) -> Self; - - // NOTE depends on unstable intrinsics::powif64 - // fn powi(self, n: i32) -> Self; - - fn powf(self, n: Self) -> Self; - - fn sqrt(self) -> Self; - - fn exp(self) -> Self; - - fn exp2(self) -> Self; - - fn ln(self) -> Self; - - fn log(self, base: Self) -> Self; - - fn log2(self) -> Self; - - fn log10(self) -> Self; - - fn cbrt(self) -> Self; - - fn hypot(self, other: Self) -> Self; - - fn sin(self) -> Self; - - fn cos(self) -> Self; - - fn tan(self) -> Self; - - fn asin(self) -> Self; - - fn acos(self) -> Self; - - fn atan(self) -> Self; - - fn atan2(self, other: Self) -> Self; - - fn sin_cos(self) -> (Self, Self); - - fn exp_m1(self) -> Self; - - fn ln_1p(self) -> Self; - - fn sinh(self) -> Self; - - fn cosh(self) -> Self; - - fn tanh(self) -> Self; - - fn asinh(self) -> Self; - - fn acosh(self) -> Self; - - fn atanh(self) -> Self; - - fn min(self, other: Self) -> Self; - - fn max(self, other: Self) -> Self; -} - -impl F64Ext for f64 { - #[inline] - fn floor(self) -> Self { - floor(self) - } - - #[inline] - fn ceil(self) -> Self { - ceil(self) - } - - #[inline] - fn round(self) -> Self { - round(self) - } - - #[inline] - fn trunc(self) -> Self { - trunc(self) - } - - #[inline] - fn fdim(self, rhs: Self) -> Self { - fdim(self, rhs) - } - - #[inline] - fn fract(self) -> Self { - self - self.trunc() - } - - #[inline] - fn abs(self) -> Self { - fabs(self) - } - - #[inline] - fn signum(self) -> Self { - if self.is_nan() { - f64::NAN - } else { - copysign(1., self) - } - } - - #[inline] - fn mul_add(self, a: Self, b: Self) -> Self { - fma(self, a, b) - } - - #[inline] - fn div_euc(self, rhs: Self) -> Self { - let q = (self / rhs).trunc(); - if self % rhs < 0.0 { - return if rhs > 0.0 { q - 1.0 } else { q + 1.0 }; - } - q - } - - #[inline] - fn mod_euc(self, rhs: f64) -> f64 { - let r = self % rhs; - if r < 0.0 { - r + rhs.abs() - } else { - r - } - } - - #[inline] - fn powf(self, n: Self) -> Self { - pow(self, n) - } - - #[inline] - fn sqrt(self) -> Self { - sqrt(self) - } - - #[inline] - fn exp(self) -> Self { - exp(self) - } - - #[inline] - fn exp2(self) -> Self { - exp2(self) - } - - #[inline] - fn ln(self) -> Self { - log(self) - } - - #[inline] - fn log(self, base: Self) -> Self { - self.ln() / base.ln() - } - - #[inline] - fn log2(self) -> Self { - log2(self) - } - - #[inline] - fn log10(self) -> Self { - log10(self) - } - - #[inline] - fn cbrt(self) -> Self { - cbrt(self) - } - - #[inline] - fn hypot(self, other: Self) -> Self { - hypot(self, other) - } - - #[inline] - fn sin(self) -> Self { - sin(self) - } - - #[inline] - fn cos(self) -> Self { - cos(self) - } - - #[inline] - fn tan(self) -> Self { - tan(self) - } - - #[inline] - fn asin(self) -> Self { - asin(self) - } - - #[inline] - fn acos(self) -> Self { - acos(self) - } - - #[inline] - fn atan(self) -> Self { - atan(self) - } - - #[inline] - fn atan2(self, other: Self) -> Self { - atan2(self, other) - } - - #[inline] - fn sin_cos(self) -> (Self, Self) { - sincos(self) - } - - #[inline] - fn exp_m1(self) -> Self { - expm1(self) - } - - #[inline] - fn ln_1p(self) -> Self { - log1p(self) - } - - #[inline] - fn sinh(self) -> Self { - sinh(self) - } - - #[inline] - fn cosh(self) -> Self { - cosh(self) - } - - #[inline] - fn tanh(self) -> Self { - tanh(self) - } - - #[inline] - fn asinh(self) -> Self { - asinh(self) - } - - #[inline] - fn acosh(self) -> Self { - acosh(self) - } - - #[inline] - fn atanh(self) -> Self { - atanh(self) - } - - #[inline] - fn min(self, other: Self) -> Self { - fmin(self, other) - } - - #[inline] - fn max(self, other: Self) -> Self { - fmax(self, other) - } -} - -mod private { - pub trait Sealed {} - - impl Sealed for f32 {} - impl Sealed for f64 {} -} - #[cfg(all(test, feature = "musl-reference-tests"))] include!(concat!(env!("OUT_DIR"), "/musl-tests.rs")); From 151a5c1501e3ff006b6a5ae384adb44212428b82 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 3 Jul 2019 23:35:24 +0200 Subject: [PATCH 0285/1459] Bump minor version to 0.2.0 --- libm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index bd1bb80ce..3e6817851 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -7,7 +7,7 @@ keywords = ["libm", "math"] license = "MIT OR Apache-2.0" name = "libm" repository = "https://github.com/rust-lang-nursery/libm" -version = "0.1.4" +version = "0.2.0" edition = "2018" [features] From c6558c9a088f0162b76d71dc322c9495aeff2f12 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 3 Jul 2019 23:53:35 +0200 Subject: [PATCH 0286/1459] Remove traits from README --- libm/README.md | 17 +---------------- libm/src/lib.rs | 10 ---------- 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/libm/README.md b/libm/README.md index 8b93f2603..e10904f8c 100644 --- a/libm/README.md +++ b/libm/README.md @@ -18,22 +18,7 @@ term goal is to enable [math support in the `core` crate][core]. ## Already usable -This crate is [on crates.io] and can be used today in stable `#![no_std]` programs like this: - -[on crates.io]: https://crates.io/crates/libm - -``` rust -#![no_std] - -extern crate libm; - -use libm::F32Ext; // adds methods to `f32` - -fn foo(x: f32) { - let y = x.sqrt(); - let z = libm::truncf(x); -} -``` +This crate is [on crates.io] and can be used today in stable `#![no_std]` programs. The API documentation can be found [here](https://docs.rs/libm). diff --git a/libm/src/lib.rs b/libm/src/lib.rs index b37248ffb..b15857dbe 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -1,14 +1,4 @@ //! libm in pure Rust -//! -//! # Usage -//! -//! You can use this crate in two ways: -//! -//! - By directly using its free functions, e.g. `libm::powf`. -//! -//! - By importing the `F32Ext` and / or `F64Ext` extension traits to add methods like `powf` to the -//! `f32` and `f64` types. Then you'll be able to invoke math functions as methods, e.g. `x.sqrt()`. - #![deny(warnings)] #![no_std] #![cfg_attr( From 03203713c3ad7c1ab650432b4c6a32cb70b6985f Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 9 Jul 2019 17:17:08 +0200 Subject: [PATCH 0287/1459] Add nextafter and nextafterf from musl --- libm/src/math/mod.rs | 4 ++++ libm/src/math/nextafter.rs | 38 +++++++++++++++++++++++++++++++++++++ libm/src/math/nextafterf.rs | 38 +++++++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 libm/src/math/nextafter.rs create mode 100644 libm/src/math/nextafterf.rs diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 48b400a92..fcf4e649c 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -146,6 +146,8 @@ mod log2f; mod logf; mod modf; mod modff; +mod nextafter; +mod nextafterf; mod pow; mod powf; mod remainder; @@ -258,6 +260,8 @@ pub use self::log2f::log2f; pub use self::logf::logf; pub use self::modf::modf; pub use self::modff::modff; +pub use self::nextafter::nextafter; +pub use self::nextafterf::nextafterf; pub use self::pow::pow; pub use self::powf::powf; pub use self::remainder::remainder; diff --git a/libm/src/math/nextafter.rs b/libm/src/math/nextafter.rs new file mode 100644 index 000000000..4c1257e00 --- /dev/null +++ b/libm/src/math/nextafter.rs @@ -0,0 +1,38 @@ +#[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn nextafter(x: f64, y: f64) -> f64 { + if x.is_nan() || y.is_nan() { + return x + y; + } + + let mut ux_i = x.to_bits(); + let uy_i = y.to_bits(); + if ux_i == uy_i { + return y; + } + + let ax = ux_i & !1_u64 / 2; + let ay = uy_i & !1_u64 / 2; + if ax == 0 { + if ay == 0 { + return y; + } + ux_i = (uy_i & 1_u64 << 63) | 1; + } else if ax > ay || ((ux_i ^ uy_i) & 1_u64 << 63) != 0 { + ux_i -= 1; + } else { + ux_i += 1; + } + + let e = ux_i.wrapping_shr(52 & 0x7ff); + // raise overflow if ux.f is infinite and x is finite + if e == 0x7ff { + force_eval!(x + x); + } + let ux_f = f64::from_bits(ux_i); + // raise underflow if ux.f is subnormal or zero + if e == 0 { + force_eval!(x * x + ux_f * ux_f); + } + ux_f +} diff --git a/libm/src/math/nextafterf.rs b/libm/src/math/nextafterf.rs new file mode 100644 index 000000000..a48bce9f7 --- /dev/null +++ b/libm/src/math/nextafterf.rs @@ -0,0 +1,38 @@ +#[inline] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn nextafterf(x: f32, y: f32) -> f32 { + if x.is_nan() || y.is_nan() { + return x + y; + } + + let mut ux_i = x.to_bits(); + let uy_i = y.to_bits(); + if ux_i == uy_i { + return y; + } + + let ax = ux_i & 0x7fff_ffff_u32; + let ay = uy_i & 0x7fff_ffff_u32; + if ax == 0 { + if ay == 0 { + return y; + } + ux_i = (uy_i & 0x8000_0000_u32) | 1; + } else if ax > ay || ((ux_i ^ uy_i) & 0x8000_0000_u32) != 0 { + ux_i -= 1; + } else { + ux_i += 1; + } + + let e = ux_i.wrapping_shr(0x7f80_0000_u32); + // raise overflow if ux_f is infinite and x is finite + if e == 0x7f80_0000_u32 { + force_eval!(x + x); + } + let ux_f = f32::from_bits(ux_i); + // raise underflow if ux_f is subnormal or zero + if e == 0 { + force_eval!(x * x + ux_f * ux_f); + } + ux_f +} From 6b8f2b36d2f17ef88ea14de4dc85e35ac9801712 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 9 Jul 2019 17:33:01 +0200 Subject: [PATCH 0288/1459] Floating-point environment APIs are private --- libm/src/math/fenv.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/libm/src/math/fenv.rs b/libm/src/math/fenv.rs index 63bb20368..652e60324 100644 --- a/libm/src/math/fenv.rs +++ b/libm/src/math/fenv.rs @@ -1,33 +1,33 @@ // src: musl/src/fenv/fenv.c /* Dummy functions for archs lacking fenv implementation */ -pub const FE_UNDERFLOW: i32 = 0; -pub const FE_INEXACT: i32 = 0; +pub(crate) const FE_UNDERFLOW: i32 = 0; +pub(crate) const FE_INEXACT: i32 = 0; -pub const FE_TONEAREST: i32 = 0; -pub const FE_TOWARDZERO: i32 = 0; +pub(crate) const FE_TONEAREST: i32 = 0; +pub(crate) const FE_TOWARDZERO: i32 = 0; #[inline] -pub fn feclearexcept(_mask: i32) -> i32 { +pub(crate) fn feclearexcept(_mask: i32) -> i32 { 0 } #[inline] -pub fn feraiseexcept(_mask: i32) -> i32 { +pub(crate) fn feraiseexcept(_mask: i32) -> i32 { 0 } #[inline] -pub fn fetestexcept(_mask: i32) -> i32 { +pub(crate) fn fetestexcept(_mask: i32) -> i32 { 0 } #[inline] -pub fn fegetround() -> i32 { +pub(crate) fn fegetround() -> i32 { FE_TONEAREST } #[inline] -pub fn fesetround(_r: i32) -> i32 { +pub(crate) fn fesetround(_r: i32) -> i32 { 0 } From e596e1c5604de5eb032425d13657e6bc4d4d3eaf Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 10 Jul 2019 12:59:36 +0200 Subject: [PATCH 0289/1459] Add FMA tests that cause it to segfault --- libm/src/math/fma.rs | 19 +++++++++++++++++++ libm/tests/unit.rs | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 libm/tests/unit.rs diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 07d90f8b7..6d4471a9a 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -205,3 +205,22 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { } scalbn(r, e) } + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn fma_segfault() { + // These two inputs cause fma to segfault on release due to overflow: + assert_eq!( + fma( + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313 + ), + -0.00000000000000022204460492503126, + ); + + assert_eq!(fma(-0.992, -0.992, -0.992), -0.00793599999988632,); + } +} diff --git a/libm/tests/unit.rs b/libm/tests/unit.rs new file mode 100644 index 000000000..4e7002817 --- /dev/null +++ b/libm/tests/unit.rs @@ -0,0 +1,19 @@ +use libm::*; + +#[test] +fn fma_segfault() { + // These two inputs cause fma to segfault on release due to overflow: + assert_eq!( + fma( + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313 + ), + -0.00000000000000022204460492503126, + ); + + assert_eq!( + fma(-0.992, -0.992, -0.992), + -0.00793599999988632, + ); +} From 8fab5f93f1f2ab2c73fd5452b1c2cb9f222d02fc Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 10 Jul 2019 08:42:28 -0700 Subject: [PATCH 0290/1459] Remove most `#[inline]` annotations These annotations fall into a few categories * Some simply aren't needed since functions will always be in the same CGU anyway and are already candidates for inlining. * Many are on massive functions which shouldn't be inlined across crates due to code size concerns. * Others aren't necessary since calls to this crate are rarely inlined anyway (since it's lowered through LLVM). If this crate is called directly and inlining is needed then LTO can always be turned on, otherwise this will benefit downstream consumers by avoiding re-codegen'ing so many functions. --- libm/src/math/acos.rs | 2 -- libm/src/math/acosf.rs | 2 -- libm/src/math/asin.rs | 2 -- libm/src/math/asinf.rs | 2 -- libm/src/math/atan.rs | 1 - libm/src/math/atan2.rs | 1 - libm/src/math/atan2f.rs | 1 - libm/src/math/atanf.rs | 1 - libm/src/math/cbrt.rs | 1 - libm/src/math/cbrtf.rs | 1 - libm/src/math/ceil.rs | 1 - libm/src/math/ceilf.rs | 1 - libm/src/math/cos.rs | 1 - libm/src/math/cosf.rs | 1 - libm/src/math/cosh.rs | 1 - libm/src/math/coshf.rs | 1 - libm/src/math/exp.rs | 1 - libm/src/math/exp2.rs | 1 - libm/src/math/exp2f.rs | 1 - libm/src/math/expf.rs | 1 - libm/src/math/expm1.rs | 1 - libm/src/math/expm1f.rs | 1 - libm/src/math/expo2.rs | 1 - libm/src/math/fabs.rs | 1 - libm/src/math/fabsf.rs | 1 - libm/src/math/fdim.rs | 1 - libm/src/math/fdimf.rs | 1 - libm/src/math/floor.rs | 1 - libm/src/math/floorf.rs | 1 - libm/src/math/fma.rs | 3 --- libm/src/math/fmaf.rs | 1 - libm/src/math/fmax.rs | 1 - libm/src/math/fmaxf.rs | 1 - libm/src/math/fmin.rs | 1 - libm/src/math/fminf.rs | 1 - libm/src/math/fmod.rs | 1 - libm/src/math/fmodf.rs | 1 - libm/src/math/hypot.rs | 2 -- libm/src/math/hypotf.rs | 1 - libm/src/math/k_cos.rs | 1 - libm/src/math/k_cosf.rs | 1 - libm/src/math/k_expo2.rs | 1 - libm/src/math/k_expo2f.rs | 1 - libm/src/math/k_sin.rs | 1 - libm/src/math/k_sinf.rs | 1 - libm/src/math/k_tan.rs | 2 -- libm/src/math/k_tanf.rs | 1 - libm/src/math/ldexp.rs | 1 - libm/src/math/ldexpf.rs | 1 - libm/src/math/log.rs | 1 - libm/src/math/log10.rs | 1 - libm/src/math/log10f.rs | 1 - libm/src/math/log1p.rs | 1 - libm/src/math/log1pf.rs | 1 - libm/src/math/log2.rs | 1 - libm/src/math/log2f.rs | 1 - libm/src/math/logf.rs | 1 - libm/src/math/nextafter.rs | 1 - libm/src/math/nextafterf.rs | 1 - libm/src/math/pow.rs | 1 - libm/src/math/powf.rs | 1 - libm/src/math/rem_pio2.rs | 2 -- libm/src/math/rem_pio2_large.rs | 1 - libm/src/math/rem_pio2f.rs | 1 - libm/src/math/remainder.rs | 1 - libm/src/math/remainderf.rs | 1 - libm/src/math/remquo.rs | 1 + libm/src/math/remquof.rs | 1 + libm/src/math/round.rs | 1 - libm/src/math/roundf.rs | 1 - libm/src/math/scalbn.rs | 1 - libm/src/math/scalbnf.rs | 1 - libm/src/math/sin.rs | 1 - libm/src/math/sinf.rs | 1 - libm/src/math/sinh.rs | 1 - libm/src/math/sinhf.rs | 1 - libm/src/math/sqrt.rs | 1 - libm/src/math/sqrtf.rs | 1 - libm/src/math/tan.rs | 1 - libm/src/math/tanf.rs | 1 - libm/src/math/tanh.rs | 1 - libm/src/math/tanhf.rs | 1 - libm/src/math/trunc.rs | 1 - libm/src/math/truncf.rs | 1 - 84 files changed, 2 insertions(+), 91 deletions(-) diff --git a/libm/src/math/acos.rs b/libm/src/math/acos.rs index d5e1f6865..23b13251e 100644 --- a/libm/src/math/acos.rs +++ b/libm/src/math/acos.rs @@ -48,7 +48,6 @@ const QS2: f64 = 2.02094576023350569471e+00; /* 0x40002AE5, 0x9C598AC8 */ const QS3: f64 = -6.88283971605453293030e-01; /* 0xBFE6066C, 0x1B8D0159 */ const QS4: f64 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ -#[inline] fn r(z: f64) -> f64 { let p: f64 = z * (PS0 + z * (PS1 + z * (PS2 + z * (PS3 + z * (PS4 + z * PS5))))); let q: f64 = 1.0 + z * (QS1 + z * (QS2 + z * (QS3 + z * QS4))); @@ -60,7 +59,6 @@ fn r(z: f64) -> f64 { /// Computes the inverse cosine (arc cosine) of the input value. /// Arguments must be in the range -1 to 1. /// Returns values in radians, in the range of 0 to pi. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn acos(x: f64) -> f64 { let x1p_120f = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ -120 diff --git a/libm/src/math/acosf.rs b/libm/src/math/acosf.rs index d0598e811..1a60479e3 100644 --- a/libm/src/math/acosf.rs +++ b/libm/src/math/acosf.rs @@ -22,7 +22,6 @@ const P_S1: f32 = -4.2743422091e-02; const P_S2: f32 = -8.6563630030e-03; const Q_S1: f32 = -7.0662963390e-01; -#[inline] fn r(z: f32) -> f32 { let p = z * (P_S0 + z * (P_S1 + z * P_S2)); let q = 1. + z * Q_S1; @@ -34,7 +33,6 @@ fn r(z: f32) -> f32 { /// Computes the inverse cosine (arc cosine) of the input value. /// Arguments must be in the range -1 to 1. /// Returns values in radians, in the range of 0 to pi. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn acosf(x: f32) -> f32 { let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120) diff --git a/libm/src/math/asin.rs b/libm/src/math/asin.rs index 774475e51..3e4b7c56e 100644 --- a/libm/src/math/asin.rs +++ b/libm/src/math/asin.rs @@ -55,7 +55,6 @@ const Q_S2: f64 = 2.02094576023350569471e+00; /* 0x40002AE5, 0x9C598AC8 */ const Q_S3: f64 = -6.88283971605453293030e-01; /* 0xBFE6066C, 0x1B8D0159 */ const Q_S4: f64 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ -#[inline] fn comp_r(z: f64) -> f64 { let p = z * (P_S0 + z * (P_S1 + z * (P_S2 + z * (P_S3 + z * (P_S4 + z * P_S5))))); let q = 1.0 + z * (Q_S1 + z * (Q_S2 + z * (Q_S3 + z * Q_S4))); @@ -67,7 +66,6 @@ fn comp_r(z: f64) -> f64 { /// Computes the inverse sine (arc sine) of the argument `x`. /// Arguments to asin must be in the range -1 to 1. /// Returns values in radians, in the range of -pi/2 to pi/2. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn asin(mut x: f64) -> f64 { let z: f64; diff --git a/libm/src/math/asinf.rs b/libm/src/math/asinf.rs index ce0f4a997..6ec61b629 100644 --- a/libm/src/math/asinf.rs +++ b/libm/src/math/asinf.rs @@ -24,7 +24,6 @@ const P_S1: f32 = -4.2743422091e-02; const P_S2: f32 = -8.6563630030e-03; const Q_S1: f32 = -7.0662963390e-01; -#[inline] fn r(z: f32) -> f32 { let p = z * (P_S0 + z * (P_S1 + z * P_S2)); let q = 1. + z * Q_S1; @@ -36,7 +35,6 @@ fn r(z: f32) -> f32 { /// Computes the inverse sine (arc sine) of the argument `x`. /// Arguments to asin must be in the range -1 to 1. /// Returns values in radians, in the range of -pi/2 to pi/2. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn asinf(mut x: f32) -> f32 { let x1p_120 = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ (-120) diff --git a/libm/src/math/atan.rs b/libm/src/math/atan.rs index d2684ece8..4259dc71a 100644 --- a/libm/src/math/atan.rs +++ b/libm/src/math/atan.rs @@ -64,7 +64,6 @@ const AT: [f64; 11] = [ /// /// Computes the inverse tangent (arc tangent) of the input value. /// Returns a value in radians, in the range of -pi/2 to pi/2. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atan(x: f64) -> f64 { let mut x = x; diff --git a/libm/src/math/atan2.rs b/libm/src/math/atan2.rs index 08385cd10..fb2ea4eda 100644 --- a/libm/src/math/atan2.rs +++ b/libm/src/math/atan2.rs @@ -48,7 +48,6 @@ const PI_LO: f64 = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */ /// Computes the inverse tangent (arc tangent) of `y/x`. /// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0). /// Returns a value in radians, in the range of -pi to pi. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atan2(y: f64, x: f64) -> f64 { if x.is_nan() || y.is_nan() { diff --git a/libm/src/math/atan2f.rs b/libm/src/math/atan2f.rs index 7bbe5f1d4..eae3b002d 100644 --- a/libm/src/math/atan2f.rs +++ b/libm/src/math/atan2f.rs @@ -24,7 +24,6 @@ const PI_LO: f32 = -8.7422776573e-08; /* 0xb3bbbd2e */ /// Computes the inverse tangent (arc tangent) of `y/x`. /// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0). /// Returns a value in radians, in the range of -pi to pi. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atan2f(y: f32, x: f32) -> f32 { if x.is_nan() || y.is_nan() { diff --git a/libm/src/math/atanf.rs b/libm/src/math/atanf.rs index 363e11d64..73f3352e9 100644 --- a/libm/src/math/atanf.rs +++ b/libm/src/math/atanf.rs @@ -41,7 +41,6 @@ const A_T: [f32; 5] = [ /// /// Computes the inverse tangent (arc tangent) of the input value. /// Returns a value in radians, in the range of -pi/2 to pi/2. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atanf(mut x: f32) -> f32 { let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120) diff --git a/libm/src/math/cbrt.rs b/libm/src/math/cbrt.rs index 04469b159..b4e77eaa2 100644 --- a/libm/src/math/cbrt.rs +++ b/libm/src/math/cbrt.rs @@ -30,7 +30,6 @@ const P4: f64 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */ // Cube root (f64) /// /// Computes the cube root of the argument. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cbrt(x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 diff --git a/libm/src/math/cbrtf.rs b/libm/src/math/cbrtf.rs index 6e589c099..9d70305c6 100644 --- a/libm/src/math/cbrtf.rs +++ b/libm/src/math/cbrtf.rs @@ -25,7 +25,6 @@ const B2: u32 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */ /// Cube root (f32) /// /// Computes the cube root of the argument. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cbrtf(x: f32) -> f32 { let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index 59883a8a7..63c1121c6 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -5,7 +5,6 @@ const TOINT: f64 = 1. / f64::EPSILON; /// Ceil (f64) /// /// Finds the nearest integer greater than or equal to `x`. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ceil(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs index 151a4f210..87d96982a 100644 --- a/libm/src/math/ceilf.rs +++ b/libm/src/math/ceilf.rs @@ -3,7 +3,6 @@ use core::f32; /// Ceil (f32) /// /// Finds the nearest integer greater than or equal to `x`. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ceilf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized diff --git a/libm/src/math/cos.rs b/libm/src/math/cos.rs index fe5a89919..db8bc4989 100644 --- a/libm/src/math/cos.rs +++ b/libm/src/math/cos.rs @@ -41,7 +41,6 @@ use super::{k_cos, k_sin, rem_pio2}; // Accuracy: // TRIG(x) returns trig(x) nearly rounded // -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cos(x: f64) -> f64 { let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; diff --git a/libm/src/math/cosf.rs b/libm/src/math/cosf.rs index 48d76c8ee..424fa42ed 100644 --- a/libm/src/math/cosf.rs +++ b/libm/src/math/cosf.rs @@ -24,7 +24,6 @@ const C2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ const C3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ const C4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cosf(x: f32) -> f32 { let x64 = x as f64; diff --git a/libm/src/math/cosh.rs b/libm/src/math/cosh.rs index bac875566..2fb568ab3 100644 --- a/libm/src/math/cosh.rs +++ b/libm/src/math/cosh.rs @@ -7,7 +7,6 @@ use super::k_expo2; /// Computes the hyperbolic cosine of the argument x. /// Is defined as `(exp(x) + exp(-x))/2` /// Angles are specified in radians. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cosh(mut x: f64) -> f64 { /* |x| */ diff --git a/libm/src/math/coshf.rs b/libm/src/math/coshf.rs index bf99e42f0..e7b684587 100644 --- a/libm/src/math/coshf.rs +++ b/libm/src/math/coshf.rs @@ -7,7 +7,6 @@ use super::k_expo2f; /// Computes the hyperbolic cosine of the argument x. /// Is defined as `(exp(x) + exp(-x))/2` /// Angles are specified in radians. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn coshf(mut x: f32) -> f32 { let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 diff --git a/libm/src/math/exp.rs b/libm/src/math/exp.rs index 5465b5693..5b163f954 100644 --- a/libm/src/math/exp.rs +++ b/libm/src/math/exp.rs @@ -81,7 +81,6 @@ const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ /// /// Calculate the exponential of `x`, that is, *e* raised to the power `x` /// (where *e* is the base of the natural system of logarithms, approximately 2.71828). -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp(mut x: f64) -> f64 { let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 diff --git a/libm/src/math/exp2.rs b/libm/src/math/exp2.rs index c2192fde5..8ea434dca 100644 --- a/libm/src/math/exp2.rs +++ b/libm/src/math/exp2.rs @@ -322,7 +322,6 @@ static TBL: [u64; TBLSIZE * 2] = [ /// Exponential, base 2 (f64) /// /// Calculate `2^x`, that is, 2 raised to the power `x`. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp2(mut x: f64) -> f64 { let redux = f64::from_bits(0x4338000000000000) / TBLSIZE as f64; diff --git a/libm/src/math/exp2f.rs b/libm/src/math/exp2f.rs index 12c9e76a4..8a890b832 100644 --- a/libm/src/math/exp2f.rs +++ b/libm/src/math/exp2f.rs @@ -73,7 +73,6 @@ static EXP2FT: [u64; TBLSIZE] = [ /// Exponential, base 2 (f32) /// /// Calculate `2^x`, that is, 2 raised to the power `x`. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp2f(mut x: f32) -> f32 { let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32; diff --git a/libm/src/math/expf.rs b/libm/src/math/expf.rs index 09323ec8d..47c1b2c46 100644 --- a/libm/src/math/expf.rs +++ b/libm/src/math/expf.rs @@ -30,7 +30,6 @@ const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */ /// /// Calculate the exponential of `x`, that is, *e* raised to the power `x` /// (where *e* is the base of the natural system of logarithms, approximately 2.71828). -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn expf(mut x: f32) -> f32 { let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 diff --git a/libm/src/math/expm1.rs b/libm/src/math/expm1.rs index 0d43b4e10..42608509a 100644 --- a/libm/src/math/expm1.rs +++ b/libm/src/math/expm1.rs @@ -30,7 +30,6 @@ const Q5: f64 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ /// system of logarithms, approximately 2.71828). /// The result is accurate even for small values of `x`, /// where using `exp(x)-1` would lose many significant digits. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn expm1(mut x: f64) -> f64 { let hi: f64; diff --git a/libm/src/math/expm1f.rs b/libm/src/math/expm1f.rs index 9bb223448..3fc2a247b 100644 --- a/libm/src/math/expm1f.rs +++ b/libm/src/math/expm1f.rs @@ -32,7 +32,6 @@ const Q2: f32 = 1.5807170421e-3; /* 0xcf3010.0p-33 */ /// system of logarithms, approximately 2.71828). /// The result is accurate even for small values of `x`, /// where using `exp(x)-1` would lose many significant digits. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn expm1f(mut x: f32) -> f32 { let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 diff --git a/libm/src/math/expo2.rs b/libm/src/math/expo2.rs index ae6cc8121..82e9b360a 100644 --- a/libm/src/math/expo2.rs +++ b/libm/src/math/expo2.rs @@ -1,7 +1,6 @@ use super::{combine_words, exp}; /* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn expo2(x: f64) -> f64 { /* k is such that k*ln2 has minimal relative error and x - kln2 > log(DBL_MIN) */ diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index 52a9adcbf..5a7f795f6 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -3,7 +3,6 @@ use core::u64; /// Absolute value (magnitude) (f64) /// Calculates the absolute value (magnitude) of the argument `x`, /// by direct manipulation of the bit representation of `x`. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fabs(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs index 5942d983a..495512584 100644 --- a/libm/src/math/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -1,7 +1,6 @@ /// Absolute value (magnitude) (f32) /// Calculates the absolute value (magnitude) of the argument `x`, /// by direct manipulation of the bit representation of `x`. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fabsf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized diff --git a/libm/src/math/fdim.rs b/libm/src/math/fdim.rs index 06edc9960..014930097 100644 --- a/libm/src/math/fdim.rs +++ b/libm/src/math/fdim.rs @@ -8,7 +8,6 @@ use core::f64; /// * NAN if either argument is NAN. /// /// A range error may occur. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fdim(x: f64, y: f64) -> f64 { if x.is_nan() { diff --git a/libm/src/math/fdimf.rs b/libm/src/math/fdimf.rs index f1ad5896b..ea0b592d7 100644 --- a/libm/src/math/fdimf.rs +++ b/libm/src/math/fdimf.rs @@ -8,7 +8,6 @@ use core::f32; /// * NAN if either argument is NAN. /// /// A range error may occur. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fdimf(x: f32, y: f32) -> f32 { if x.is_nan() { diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index f6068c697..91825e3c8 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -5,7 +5,6 @@ const TOINT: f64 = 1. / f64::EPSILON; /// Floor (f64) /// /// Finds the nearest integer less than or equal to `x`. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn floor(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs index c04f18aee..6d751b077 100644 --- a/libm/src/math/floorf.rs +++ b/libm/src/math/floorf.rs @@ -3,7 +3,6 @@ use core::f32; /// Floor (f32) /// /// Finds the nearest integer less than or equal to `x`. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn floorf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 07d90f8b7..5ea0d09ea 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -10,7 +10,6 @@ struct Num { sign: i32, } -#[inline] fn normalize(x: f64) -> Num { let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 @@ -30,7 +29,6 @@ fn normalize(x: f64) -> Num { Num { m: ix, e, sign } } -#[inline] fn mul(x: u64, y: u64) -> (u64, u64) { let t1: u64; let t2: u64; @@ -53,7 +51,6 @@ fn mul(x: u64, y: u64) -> (u64, u64) { /// Computes `(x*y)+z`, rounded as one ternary operation: /// Computes the value (as if) to infinite precision and rounds once to the result format, /// according to the rounding mode characterized by the value of FLT_ROUNDS. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fma(x: f64, y: f64, z: f64) -> f64 { let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 diff --git a/libm/src/math/fmaf.rs b/libm/src/math/fmaf.rs index e77e0fa4a..03d371c55 100644 --- a/libm/src/math/fmaf.rs +++ b/libm/src/math/fmaf.rs @@ -46,7 +46,6 @@ use super::fenv::{ /// Computes `(x*y)+z`, rounded as one ternary operation: /// Computes the value (as if) to infinite precision and rounds once to the result format, /// according to the rounding mode characterized by the value of FLT_ROUNDS. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 { let xy: f64; diff --git a/libm/src/math/fmax.rs b/libm/src/math/fmax.rs index 22016d11c..93c97bc61 100644 --- a/libm/src/math/fmax.rs +++ b/libm/src/math/fmax.rs @@ -1,4 +1,3 @@ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmax(x: f64, y: f64) -> f64 { // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the diff --git a/libm/src/math/fmaxf.rs b/libm/src/math/fmaxf.rs index a883fdaef..607746647 100644 --- a/libm/src/math/fmaxf.rs +++ b/libm/src/math/fmaxf.rs @@ -1,4 +1,3 @@ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmaxf(x: f32, y: f32) -> f32 { // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the diff --git a/libm/src/math/fmin.rs b/libm/src/math/fmin.rs index d1ccc3a46..ab1509f34 100644 --- a/libm/src/math/fmin.rs +++ b/libm/src/math/fmin.rs @@ -1,4 +1,3 @@ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmin(x: f64, y: f64) -> f64 { // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the diff --git a/libm/src/math/fminf.rs b/libm/src/math/fminf.rs index 43ec97cb5..0049e7117 100644 --- a/libm/src/math/fminf.rs +++ b/libm/src/math/fminf.rs @@ -1,4 +1,3 @@ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fminf(x: f32, y: f32) -> f32 { // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the diff --git a/libm/src/math/fmod.rs b/libm/src/math/fmod.rs index 2cdd8a9ba..d892ffd8b 100644 --- a/libm/src/math/fmod.rs +++ b/libm/src/math/fmod.rs @@ -1,6 +1,5 @@ use core::u64; -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmod(x: f64, y: f64) -> f64 { let mut uxi = x.to_bits(); diff --git a/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs index 3e6779a93..c53dc186a 100644 --- a/libm/src/math/fmodf.rs +++ b/libm/src/math/fmodf.rs @@ -1,7 +1,6 @@ use core::f32; use core::u32; -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmodf(x: f32, y: f32) -> f32 { let mut uxi = x.to_bits(); diff --git a/libm/src/math/hypot.rs b/libm/src/math/hypot.rs index e53baf539..da458ea1d 100644 --- a/libm/src/math/hypot.rs +++ b/libm/src/math/hypot.rs @@ -4,7 +4,6 @@ use super::sqrt; const SPLIT: f64 = 134217728. + 1.; // 0x1p27 + 1 === (2 ^ 27) + 1 -#[inline] fn sq(x: f64) -> (f64, f64) { let xh: f64; let xl: f64; @@ -18,7 +17,6 @@ fn sq(x: f64) -> (f64, f64) { (hi, lo) } -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn hypot(mut x: f64, mut y: f64) -> f64 { let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700 diff --git a/libm/src/math/hypotf.rs b/libm/src/math/hypotf.rs index 4636b8f1d..576eebb33 100644 --- a/libm/src/math/hypotf.rs +++ b/libm/src/math/hypotf.rs @@ -2,7 +2,6 @@ use core::f32; use super::sqrtf; -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn hypotf(mut x: f32, mut y: f32) -> f32 { let x1p90 = f32::from_bits(0x6c800000); // 0x1p90f === 2 ^ 90 diff --git a/libm/src/math/k_cos.rs b/libm/src/math/k_cos.rs index 4687b369a..49b2fc64d 100644 --- a/libm/src/math/k_cos.rs +++ b/libm/src/math/k_cos.rs @@ -51,7 +51,6 @@ const C6: f64 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ // expression for cos(). Retention happens in all cases tested // under FreeBSD, so don't pessimize things by forcibly clipping // any extra precision in w. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_cos(x: f64, y: f64) -> f64 { let z = x * x; diff --git a/libm/src/math/k_cosf.rs b/libm/src/math/k_cosf.rs index 79d0f238f..e99f2348c 100644 --- a/libm/src/math/k_cosf.rs +++ b/libm/src/math/k_cosf.rs @@ -20,7 +20,6 @@ const C1: f64 = 0.0416666233237390631894; /* 0x155553e1053a42.0p-57 */ const C2: f64 = -0.00138867637746099294692; /* -0x16c087e80f1e27.0p-62 */ const C3: f64 = 0.0000243904487962774090654; /* 0x199342e0ee5069.0p-68 */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_cosf(x: f64) -> f32 { let z = x * x; diff --git a/libm/src/math/k_expo2.rs b/libm/src/math/k_expo2.rs index 0a9562eae..7345075f3 100644 --- a/libm/src/math/k_expo2.rs +++ b/libm/src/math/k_expo2.rs @@ -4,7 +4,6 @@ use super::exp; const K: i32 = 2043; /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_expo2(x: f64) -> f64 { let k_ln2 = f64::from_bits(0x40962066151add8b); diff --git a/libm/src/math/k_expo2f.rs b/libm/src/math/k_expo2f.rs index de8507772..fbd7b27d5 100644 --- a/libm/src/math/k_expo2f.rs +++ b/libm/src/math/k_expo2f.rs @@ -4,7 +4,6 @@ use super::expf; const K: i32 = 235; /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_expo2f(x: f32) -> f32 { let k_ln2 = f32::from_bits(0x4322e3bc); diff --git a/libm/src/math/k_sin.rs b/libm/src/math/k_sin.rs index 5d2bd68aa..9dd96c944 100644 --- a/libm/src/math/k_sin.rs +++ b/libm/src/math/k_sin.rs @@ -43,7 +43,6 @@ const S6: f64 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ // r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) // then 3 2 // sin(x) = x + (S1*x + (x *(r-y/2)+y)) -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_sin(x: f64, y: f64, iy: i32) -> f64 { let z = x * x; diff --git a/libm/src/math/k_sinf.rs b/libm/src/math/k_sinf.rs index 68fe926c2..88d10caba 100644 --- a/libm/src/math/k_sinf.rs +++ b/libm/src/math/k_sinf.rs @@ -20,7 +20,6 @@ const S2: f64 = 0.0083333293858894631756; /* 0x111110896efbb2.0p-59 */ const S3: f64 = -0.000198393348360966317347; /* -0x1a00f9e2cae774.0p-65 */ const S4: f64 = 0.0000027183114939898219064; /* 0x16cd878c3b46a7.0p-71 */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_sinf(x: f64) -> f32 { let z = x * x; diff --git a/libm/src/math/k_tan.rs b/libm/src/math/k_tan.rs index ea3c386b0..d177010bb 100644 --- a/libm/src/math/k_tan.rs +++ b/libm/src/math/k_tan.rs @@ -58,7 +58,6 @@ static T: [f64; 13] = [ const PIO4: f64 = 7.85398163397448278999e-01; /* 3FE921FB, 54442D18 */ const PIO4_LO: f64 = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_tan(mut x: f64, mut y: f64, odd: i32) -> f64 { let hx = (f64::to_bits(x) >> 32) as u32; @@ -101,7 +100,6 @@ pub(crate) fn k_tan(mut x: f64, mut y: f64, odd: i32) -> f64 { a0 + a * (1.0 + a0 * w0 + a0 * v) } -#[inline] fn zero_low_word(x: f64) -> f64 { f64::from_bits(f64::to_bits(x) & 0xFFFF_FFFF_0000_0000) } diff --git a/libm/src/math/k_tanf.rs b/libm/src/math/k_tanf.rs index 52651378d..af8db539d 100644 --- a/libm/src/math/k_tanf.rs +++ b/libm/src/math/k_tanf.rs @@ -19,7 +19,6 @@ const T: [f64; 6] = [ 0.00946564784943673166728, /* 0x1362b9bf971bcd.0p-59 */ ]; -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_tanf(x: f64, odd: bool) -> f32 { let z = x * x; diff --git a/libm/src/math/ldexp.rs b/libm/src/math/ldexp.rs index 780ddfc11..e46242e55 100644 --- a/libm/src/math/ldexp.rs +++ b/libm/src/math/ldexp.rs @@ -1,4 +1,3 @@ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ldexp(x: f64, n: i32) -> f64 { super::scalbn(x, n) diff --git a/libm/src/math/ldexpf.rs b/libm/src/math/ldexpf.rs index 70935a002..95b27fc49 100644 --- a/libm/src/math/ldexpf.rs +++ b/libm/src/math/ldexpf.rs @@ -1,4 +1,3 @@ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ldexpf(x: f32, n: i32) -> f32 { super::scalbnf(x, n) diff --git a/libm/src/math/log.rs b/libm/src/math/log.rs index 4126e413b..27a26da60 100644 --- a/libm/src/math/log.rs +++ b/libm/src/math/log.rs @@ -70,7 +70,6 @@ const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 diff --git a/libm/src/math/log10.rs b/libm/src/math/log10.rs index c99696040..40dacf2c9 100644 --- a/libm/src/math/log10.rs +++ b/libm/src/math/log10.rs @@ -31,7 +31,6 @@ const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log10(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 diff --git a/libm/src/math/log10f.rs b/libm/src/math/log10f.rs index 9845cda5d..108dfa8b5 100644 --- a/libm/src/math/log10f.rs +++ b/libm/src/math/log10f.rs @@ -25,7 +25,6 @@ const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log10f(mut x: f32) -> f32 { let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 diff --git a/libm/src/math/log1p.rs b/libm/src/math/log1p.rs index cd7045ac9..4fd1c73eb 100644 --- a/libm/src/math/log1p.rs +++ b/libm/src/math/log1p.rs @@ -65,7 +65,6 @@ const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log1p(x: f64) -> f64 { let mut ui: u64 = x.to_bits(); diff --git a/libm/src/math/log1pf.rs b/libm/src/math/log1pf.rs index 8e9651357..500e8eeaa 100644 --- a/libm/src/math/log1pf.rs +++ b/libm/src/math/log1pf.rs @@ -20,7 +20,6 @@ const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log1pf(x: f32) -> f32 { let mut ui: u32 = x.to_bits(); diff --git a/libm/src/math/log2.rs b/libm/src/math/log2.rs index a3d43e55c..83da3a193 100644 --- a/libm/src/math/log2.rs +++ b/libm/src/math/log2.rs @@ -29,7 +29,6 @@ const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log2(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 diff --git a/libm/src/math/log2f.rs b/libm/src/math/log2f.rs index 53a37e503..3a20fb15b 100644 --- a/libm/src/math/log2f.rs +++ b/libm/src/math/log2f.rs @@ -23,7 +23,6 @@ const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log2f(mut x: f32) -> f32 { let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 diff --git a/libm/src/math/logf.rs b/libm/src/math/logf.rs index 95195601c..2b57b934f 100644 --- a/libm/src/math/logf.rs +++ b/libm/src/math/logf.rs @@ -21,7 +21,6 @@ const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn logf(mut x: f32) -> f32 { let x1p25 = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 diff --git a/libm/src/math/nextafter.rs b/libm/src/math/nextafter.rs index 4c1257e00..13094a17c 100644 --- a/libm/src/math/nextafter.rs +++ b/libm/src/math/nextafter.rs @@ -1,4 +1,3 @@ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn nextafter(x: f64, y: f64) -> f64 { if x.is_nan() || y.is_nan() { diff --git a/libm/src/math/nextafterf.rs b/libm/src/math/nextafterf.rs index a48bce9f7..df9b10829 100644 --- a/libm/src/math/nextafterf.rs +++ b/libm/src/math/nextafterf.rs @@ -1,4 +1,3 @@ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn nextafterf(x: f32, y: f32) -> f32 { if x.is_nan() || y.is_nan() { diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 068a4ec47..ce8e83ee6 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -89,7 +89,6 @@ const IVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547_652b82fe =1/ln2 */ const IVLN2_H: f64 = 1.44269502162933349609e+00; /* 0x3ff71547_60000000 =24b 1/ln2*/ const IVLN2_L: f64 = 1.92596299112661746887e-08; /* 0x3e54ae0b_f85ddf44 =1/ln2 tail*/ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn pow(x: f64, y: f64) -> f64 { let t1: f64; diff --git a/libm/src/math/powf.rs b/libm/src/math/powf.rs index 015bade86..f3cf76f9a 100644 --- a/libm/src/math/powf.rs +++ b/libm/src/math/powf.rs @@ -43,7 +43,6 @@ const IVLN2: f32 = 1.4426950216e+00; const IVLN2_H: f32 = 1.4426879883e+00; const IVLN2_L: f32 = 7.0526075433e-06; -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn powf(x: f32, y: f32) -> f32 { let mut z: f32; diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index dc6b3297d..6b7dbd348 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -41,7 +41,6 @@ const PIO2_3T: f64 = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ // use rem_pio2_large() for large x // // caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { let x1p24 = f64::from_bits(0x4170000000000000); @@ -49,7 +48,6 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { let sign = (f64::to_bits(x) >> 63) as i32; let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; - #[inline] fn medium(x: f64, ix: u32) -> (i32, f64, f64) { /* rint(x/(pi/2)), Assume round-to-nearest. */ let f_n = x as f64 * INV_PIO2 + TO_INT - TO_INT; diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 5336a9767..8533dc289 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -222,7 +222,6 @@ const PIO2: [f64; 8] = [ /// skip the part of the product that are known to be a huge integer ( /// more accurately, = 0 mod 8 ). Thus the number of operations are /// independent of the exponent of the input. -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 diff --git a/libm/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs index af2745d1b..5d392ba2d 100644 --- a/libm/src/math/rem_pio2f.rs +++ b/libm/src/math/rem_pio2f.rs @@ -31,7 +31,6 @@ const PIO2_1T: f64 = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ /// /// use double precision for everything except passing x /// use __rem_pio2_large() for large x -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) { let x64 = x as f64; diff --git a/libm/src/math/remainder.rs b/libm/src/math/remainder.rs index 7ce895004..9e966c9ed 100644 --- a/libm/src/math/remainder.rs +++ b/libm/src/math/remainder.rs @@ -1,4 +1,3 @@ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn remainder(x: f64, y: f64) -> f64 { let (result, _) = super::remquo(x, y); diff --git a/libm/src/math/remainderf.rs b/libm/src/math/remainderf.rs index 8b2aa5aab..b1407cf2a 100644 --- a/libm/src/math/remainderf.rs +++ b/libm/src/math/remainderf.rs @@ -1,4 +1,3 @@ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn remainderf(x: f32, y: f32) -> f32 { let (result, _) = super::remquof(x, y); diff --git a/libm/src/math/remquo.rs b/libm/src/math/remquo.rs index c72c8f187..0afd1f7f5 100644 --- a/libm/src/math/remquo.rs +++ b/libm/src/math/remquo.rs @@ -1,3 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn remquo(mut x: f64, mut y: f64) -> (f64, i32) { let ux: u64 = x.to_bits(); let mut uy: u64 = y.to_bits(); diff --git a/libm/src/math/remquof.rs b/libm/src/math/remquof.rs index 871d0c7d6..d71bd38e3 100644 --- a/libm/src/math/remquof.rs +++ b/libm/src/math/remquof.rs @@ -1,3 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn remquof(mut x: f32, mut y: f32) -> (f32, i32) { let ux: u32 = x.to_bits(); let mut uy: u32 = y.to_bits(); diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs index 67590d2c1..bf72f5b94 100644 --- a/libm/src/math/round.rs +++ b/libm/src/math/round.rs @@ -2,7 +2,6 @@ use core::f64; const TOINT: f64 = 1.0 / f64::EPSILON; -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn round(mut x: f64) -> f64 { let i = x.to_bits(); diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs index 85114be4b..497e88d62 100644 --- a/libm/src/math/roundf.rs +++ b/libm/src/math/roundf.rs @@ -2,7 +2,6 @@ use core::f32; const TOINT: f32 = 1.0 / f32::EPSILON; -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn roundf(mut x: f32) -> f32 { let i = x.to_bits(); diff --git a/libm/src/math/scalbn.rs b/libm/src/math/scalbn.rs index d8c8409ac..00c455a10 100644 --- a/libm/src/math/scalbn.rs +++ b/libm/src/math/scalbn.rs @@ -1,4 +1,3 @@ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn scalbn(x: f64, mut n: i32) -> f64 { let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 diff --git a/libm/src/math/scalbnf.rs b/libm/src/math/scalbnf.rs index 4e9771175..73f4bb57a 100644 --- a/libm/src/math/scalbnf.rs +++ b/libm/src/math/scalbnf.rs @@ -1,4 +1,3 @@ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn scalbnf(mut x: f32, mut n: i32) -> f32 { let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 diff --git a/libm/src/math/sin.rs b/libm/src/math/sin.rs index 51aed88a8..1329b41a9 100644 --- a/libm/src/math/sin.rs +++ b/libm/src/math/sin.rs @@ -40,7 +40,6 @@ use super::{k_cos, k_sin, rem_pio2}; // // Accuracy: // TRIG(x) returns trig(x) nearly rounded -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sin(x: f64) -> f64 { let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 diff --git a/libm/src/math/sinf.rs b/libm/src/math/sinf.rs index 0c31099ed..6e20be2ae 100644 --- a/libm/src/math/sinf.rs +++ b/libm/src/math/sinf.rs @@ -24,7 +24,6 @@ const S2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ const S3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ const S4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sinf(x: f32) -> f32 { let x64 = x as f64; diff --git a/libm/src/math/sinh.rs b/libm/src/math/sinh.rs index d36de66c1..fd24fd20c 100644 --- a/libm/src/math/sinh.rs +++ b/libm/src/math/sinh.rs @@ -4,7 +4,6 @@ use super::{expm1, expo2}; // = (exp(x)-1 + (exp(x)-1)/exp(x))/2 // = x + x^3/6 + o(x^5) // -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sinh(x: f64) -> f64 { // union {double f; uint64_t i;} u = {.f = x}; diff --git a/libm/src/math/sinhf.rs b/libm/src/math/sinhf.rs index fd0b2bfc8..24f863c44 100644 --- a/libm/src/math/sinhf.rs +++ b/libm/src/math/sinhf.rs @@ -1,7 +1,6 @@ use super::expm1f; use super::k_expo2f; -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sinhf(x: f32) -> f32 { let mut h = 0.5f32; diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 14404d4eb..58cf00ed8 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -81,7 +81,6 @@ use core::num::Wrapping; const TINY: f64 = 1.0e-300; -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrt(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index b9365c617..889b52581 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -15,7 +15,6 @@ const TINY: f32 = 1.0e-30; -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrtf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized diff --git a/libm/src/math/tan.rs b/libm/src/math/tan.rs index e5c94cbb1..5a72f6801 100644 --- a/libm/src/math/tan.rs +++ b/libm/src/math/tan.rs @@ -39,7 +39,6 @@ use super::{k_tan, rem_pio2}; // // Accuracy: // TRIG(x) returns trig(x) nearly rounded -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tan(x: f64) -> f64 { let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 diff --git a/libm/src/math/tanf.rs b/libm/src/math/tanf.rs index c286cdeb4..10de59c39 100644 --- a/libm/src/math/tanf.rs +++ b/libm/src/math/tanf.rs @@ -24,7 +24,6 @@ const T2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ const T3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ const T4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tanf(x: f32) -> f32 { let x64 = x as f64; diff --git a/libm/src/math/tanh.rs b/libm/src/math/tanh.rs index 75d695cf7..980c68554 100644 --- a/libm/src/math/tanh.rs +++ b/libm/src/math/tanh.rs @@ -4,7 +4,6 @@ use super::expm1; * = (exp(2*x) - 1)/(exp(2*x) - 1 + 2) * = (1 - exp(-2*x))/(exp(-2*x) - 1 + 2) */ -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tanh(mut x: f64) -> f64 { let mut uf: f64 = x; diff --git a/libm/src/math/tanhf.rs b/libm/src/math/tanhf.rs index ac4657b5a..fc94e3ddd 100644 --- a/libm/src/math/tanhf.rs +++ b/libm/src/math/tanhf.rs @@ -1,6 +1,5 @@ use super::expm1f; -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tanhf(mut x: f32) -> f32 { /* x = |x| */ diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs index 1ee46fc7d..f7892a2c5 100644 --- a/libm/src/math/trunc.rs +++ b/libm/src/math/trunc.rs @@ -1,6 +1,5 @@ use core::f64; -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn trunc(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs index f93383269..a4c001629 100644 --- a/libm/src/math/truncf.rs +++ b/libm/src/math/truncf.rs @@ -1,6 +1,5 @@ use core::f32; -#[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn truncf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized From 92ffb046c050cb90fc8134bf9c0f350dbfef7ad1 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 10 Jul 2019 13:01:02 +0200 Subject: [PATCH 0291/1459] Fix overflow bug in fma --- libm/src/math/fma.rs | 4 ++-- libm/tests/unit.rs | 19 ------------------- 2 files changed, 2 insertions(+), 21 deletions(-) delete mode 100644 libm/tests/unit.rs diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 6d4471a9a..99f77bc79 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -126,8 +126,8 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { } else { /* r -= z */ let t = rlo; - rlo -= zlo; - rhi = rhi - zhi - (t < rlo) as u64; + rlo = rlo.wrapping_sub(zlo); + rhi = rhi.wrapping_sub(zhi.wrapping_sub((t < rlo) as u64)); if (rhi >> 63) != 0 { rlo = (-(rlo as i64)) as u64; rhi = (-(rhi as i64)) as u64 - (rlo != 0) as u64; diff --git a/libm/tests/unit.rs b/libm/tests/unit.rs deleted file mode 100644 index 4e7002817..000000000 --- a/libm/tests/unit.rs +++ /dev/null @@ -1,19 +0,0 @@ -use libm::*; - -#[test] -fn fma_segfault() { - // These two inputs cause fma to segfault on release due to overflow: - assert_eq!( - fma( - -0.0000000000000002220446049250313, - -0.0000000000000002220446049250313, - -0.0000000000000002220446049250313 - ), - -0.00000000000000022204460492503126, - ); - - assert_eq!( - fma(-0.992, -0.992, -0.992), - -0.00793599999988632, - ); -} From ebeace71ce4734b4382904db4b3aa619ac9fef16 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 13 Jul 2019 10:55:54 +0200 Subject: [PATCH 0292/1459] avoid ptr::write which might panic in debug mode --- src/arm.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/arm.rs b/src/arm.rs index 9bfffb74f..ab35ec92b 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -137,11 +137,12 @@ pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: us pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, mut n: usize) { use core::ptr; + // We are guaranteed 4-alignment, so accessing at u32 is okay. let mut dest = dest as *mut u32; let mut src = src as *mut u32; while n >= 4 { - ptr::write(dest, ptr::read(src)); + *dest = *src; dest = dest.offset(1); src = src.offset(1); n -= 4; @@ -198,7 +199,7 @@ pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, mut n: usize, c: i32 let c = (byte << 24) | (byte << 16) | (byte << 8) | byte; while n >= 4 { - ptr::write(dest, c); + *dest = c; dest = dest.offset(1); n -= 4; } From 25edd204fa324b00836dcd2d2cea362eeb627096 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 13 Jul 2019 11:00:15 +0200 Subject: [PATCH 0293/1459] remove unused imports --- src/arm.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/arm.rs b/src/arm.rs index ab35ec92b..4cf73ef37 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -135,8 +135,6 @@ pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: us #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(thumb, linkage = "weak")] pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, mut n: usize) { - use core::ptr; - // We are guaranteed 4-alignment, so accessing at u32 is okay. let mut dest = dest as *mut u32; let mut src = src as *mut u32; @@ -191,8 +189,6 @@ pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(thumb, linkage = "weak")] pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, mut n: usize, c: i32) { - use core::ptr; - let mut dest = dest as *mut u32; let byte = (c as u32) & 0xff; From 65dacd7150f7e510881773ce603bba5b9395ffeb Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 13 Jul 2019 11:07:43 +0200 Subject: [PATCH 0294/1459] avoid bare trait objects --- examples/intrinsics.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index f6980fcb5..8b0ffa3a8 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -350,8 +350,8 @@ fn run() { // } } -fn something_with_a_dtor(f: &Fn()) { - struct A<'a>(&'a (Fn() + 'a)); +fn something_with_a_dtor(f: &dyn Fn()) { + struct A<'a>(&'a (dyn Fn() + 'a)); impl<'a> Drop for A<'a> { fn drop(&mut self) { From 783430e6ae1ccb0bf6265e4ec81f7c1596dd3eb0 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 13 Jul 2019 11:38:35 +0200 Subject: [PATCH 0295/1459] Replace {u,i}128_* lang items with __rust_{u,i}128_* unmangled functions The -Zlower-128bit-ops feature is completely broken, as libcore needs those lang items to compile with this feature, but they are only provided by compiler_builtins, which itself depends on libcore. According to rust-lang/rust#58969 the feature never got finished. This commit removes the associated lang items and replaces them with normal unmangled functions, when there is no existing intrinsic. This makes it easier for alternative codegen backends to implement 128bit integer support. --- src/int/addsub.rs | 37 ++++++++++++++++++------------------- src/int/mul.rs | 17 +++-------------- src/int/sdiv.rs | 11 ----------- src/int/shift.rs | 40 ++++++++++------------------------------ src/int/udiv.rs | 11 ----------- src/macros.rs | 14 -------------- 6 files changed, 31 insertions(+), 99 deletions(-) diff --git a/src/int/addsub.rs b/src/int/addsub.rs index e2d5bcbd4..0a88e2fc8 100644 --- a/src/int/addsub.rs +++ b/src/int/addsub.rs @@ -90,44 +90,43 @@ where impl Subo for i128 {} impl Subo for u128 {} -u128_lang_items! { - #[lang = "i128_add"] - pub fn rust_i128_add(a: i128, b: i128) -> i128 { - rust_u128_add(a as _, b as _) as _ +intrinsics! { + pub extern "C" fn __rust_i128_add(a: i128, b: i128) -> i128 { + __rust_u128_add(a as _, b as _) as _ } - #[lang = "i128_addo"] - pub fn rust_i128_addo(a: i128, b: i128) -> (i128, bool) { + + pub extern "C" fn __rust_i128_addo(a: i128, b: i128) -> (i128, bool) { let mut oflow = 0; let r = a.addo(b, &mut oflow); (r, oflow != 0) } - #[lang = "u128_add"] - pub fn rust_u128_add(a: u128, b: u128) -> u128 { + + pub extern "C" fn __rust_u128_add(a: u128, b: u128) -> u128 { a.add(b) } - #[lang = "u128_addo"] - pub fn rust_u128_addo(a: u128, b: u128) -> (u128, bool) { + + pub extern "C" fn __rust_u128_addo(a: u128, b: u128) -> (u128, bool) { let mut oflow = 0; let r = a.addo(b, &mut oflow); (r, oflow != 0) } - #[lang = "i128_sub"] - pub fn rust_i128_sub(a: i128, b: i128) -> i128 { - rust_u128_sub(a as _, b as _) as _ + + pub extern "C" fn __rust_i128_sub(a: i128, b: i128) -> i128 { + __rust_u128_sub(a as _, b as _) as _ } - #[lang = "i128_subo"] - pub fn rust_i128_subo(a: i128, b: i128) -> (i128, bool) { + + pub extern "C" fn __rust_i128_subo(a: i128, b: i128) -> (i128, bool) { let mut oflow = 0; let r = a.subo(b, &mut oflow); (r, oflow != 0) } - #[lang = "u128_sub"] - pub fn rust_u128_sub(a: u128, b: u128) -> u128 { + + pub extern "C" fn __rust_u128_sub(a: u128, b: u128) -> u128 { a.sub(b) } - #[lang = "u128_subo"] - pub fn rust_u128_subo(a: u128, b: u128) -> (u128, bool) { + + pub extern "C" fn __rust_u128_subo(a: u128, b: u128) -> (u128, bool) { let mut oflow = 0; let r = a.subo(b, &mut oflow); (r, oflow != 0) diff --git a/src/int/mul.rs b/src/int/mul.rs index 8df58a27b..42f13913e 100644 --- a/src/int/mul.rs +++ b/src/int/mul.rs @@ -107,25 +107,14 @@ intrinsics! { pub extern "C" fn __muloti4(a: i128, b: i128, oflow: &mut i32) -> i128 { a.mulo(b, oflow) } -} -u128_lang_items! { - #[lang = "i128_mul"] - pub fn rust_i128_mul(a: i128, b: i128) -> i128 { - __multi3(a, b) - } - #[lang = "i128_mulo"] - pub fn rust_i128_mulo(a: i128, b: i128) -> (i128, bool) { + pub extern "C" fn __rust_i128_mulo(a: i128, b: i128) -> (i128, bool) { let mut oflow = 0; let r = __muloti4(a, b, &mut oflow); (r, oflow != 0) } - #[lang = "u128_mul"] - pub fn rust_u128_mul(a: u128, b: u128) -> u128 { - __multi3(a as _, b as _) as _ - } - #[lang = "u128_mulo"] - pub fn rust_u128_mulo(a: u128, b: u128) -> (u128, bool) { + + pub extern "C" fn __rust_u128_mulo(a: u128, b: u128) -> (u128, bool) { let mut oflow = 0; let r = a.mulo(b, &mut oflow); (r, oflow != 0) diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index ad7f67b1b..c9e252cc3 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -99,14 +99,3 @@ intrinsics! { a.divmod(b, rem, |a, b| __divdi3(a, b)) } } - -u128_lang_items! { - #[lang = "i128_div"] - pub fn rust_i128_div(a: i128, b: i128) -> i128 { - __divti3(a, b) - } - #[lang = "i128_rem"] - pub fn rust_i128_rem(a: i128, b: i128) -> i128 { - __modti3(a, b) - } -} diff --git a/src/int/shift.rs b/src/int/shift.rs index d98622279..408f8f3cc 100644 --- a/src/int/shift.rs +++ b/src/int/shift.rs @@ -103,40 +103,20 @@ intrinsics! { pub extern "C" fn __lshrti3(a: u128, b: u32) -> u128 { a.lshr(b) } -} -u128_lang_items! { - #[lang = "i128_shl"] - pub fn rust_i128_shl(a: i128, b: u32) -> i128 { - __ashlti3(a as _, b) as _ - } - #[lang = "i128_shlo"] - pub fn rust_i128_shlo(a: i128, b: u128) -> (i128, bool) { - (rust_i128_shl(a, b as _), b >= 128) - } - #[lang = "u128_shl"] - pub fn rust_u128_shl(a: u128, b: u32) -> u128 { - __ashlti3(a, b) - } - #[lang = "u128_shlo"] - pub fn rust_u128_shlo(a: u128, b: u128) -> (u128, bool) { - (rust_u128_shl(a, b as _), b >= 128) + pub extern "C" fn __rust_i128_shlo(a: i128, b: u128) -> (i128, bool) { + (__ashlti3(a as _, b as _) as _, b >= 128) } - #[lang = "i128_shr"] - pub fn rust_i128_shr(a: i128, b: u32) -> i128 { - __ashrti3(a, b) + pub extern "C" fn __rust_u128_shlo(a: u128, b: u128) -> (u128, bool) { + (__ashlti3(a, b as _), b >= 128) } - #[lang = "i128_shro"] - pub fn rust_i128_shro(a: i128, b: u128) -> (i128, bool) { - (rust_i128_shr(a, b as _), b >= 128) - } - #[lang = "u128_shr"] - pub fn rust_u128_shr(a: u128, b: u32) -> u128 { - __lshrti3(a, b) + + pub extern "C" fn __rust_i128_shro(a: i128, b: u128) -> (i128, bool) { + (__ashrti3(a, b as _), b >= 128) } - #[lang = "u128_shro"] - pub fn rust_u128_shro(a: u128, b: u128) -> (u128, bool) { - (rust_u128_shr(a, b as _), b >= 128) + + pub extern "C" fn __rust_u128_shro(a: u128, b: u128) -> (u128, bool) { + (__lshrti3(a, b as _), b >= 128) } } diff --git a/src/int/udiv.rs b/src/int/udiv.rs index cdec11d2f..b393ac6db 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -268,14 +268,3 @@ intrinsics! { udivmod_inner!(n, d, rem, u128) } } - -u128_lang_items! { - #[lang = "u128_div"] - pub fn rust_u128_div(a: u128, b: u128) -> u128 { - __udivti3(a, b) - } - #[lang = "u128_rem"] - pub fn rust_u128_rem(a: u128, b: u128) -> u128 { - __umodti3(a, b) - } -} diff --git a/src/macros.rs b/src/macros.rs index 4abdae6ee..2d11ba622 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -280,17 +280,3 @@ pub mod win64_128bit_abi_hack { } } } - -macro_rules! u128_lang_items { - ($( - #[lang = $lang:tt] - pub fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { - $($body:tt)* - } - )*) => ($( - #[cfg_attr(not(any(stage0, feature = "no-lang-items")), lang = $lang)] - pub fn $name( $($argname: $ty),* ) -> $ret { - $($body)* - } - )*) -} From 7b716e628529ef33f731ef85a22477428299ca42 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 13 Jul 2019 12:27:58 +0200 Subject: [PATCH 0296/1459] Fix tests --- testcrate/build.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/testcrate/build.rs b/testcrate/build.rs index e43fdb77b..4bd4005b7 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -707,35 +707,35 @@ fn main() { // int/addsub.rs gen( |(a, b): (MyU128, MyU128)| Some(a.0.wrapping_add(b.0)), - "builtins::int::addsub::rust_u128_add(a, b)", + "builtins::int::addsub::__rust_u128_add(a, b)", ); gen( |(a, b): (MyI128, MyI128)| Some(a.0.wrapping_add(b.0)), - "builtins::int::addsub::rust_i128_add(a, b)", + "builtins::int::addsub::__rust_i128_add(a, b)", ); gen( |(a, b): (MyU128, MyU128)| Some(a.0.overflowing_add(b.0)), - "builtins::int::addsub::rust_u128_addo(a, b)", + "builtins::int::addsub::__rust_u128_addo(a, b)", ); gen( |(a, b): (MyI128, MyI128)| Some(a.0.overflowing_add(b.0)), - "builtins::int::addsub::rust_i128_addo(a, b)", + "builtins::int::addsub::__rust_i128_addo(a, b)", ); gen( |(a, b): (MyU128, MyU128)| Some(a.0.wrapping_sub(b.0)), - "builtins::int::addsub::rust_u128_sub(a, b)", + "builtins::int::addsub::__rust_u128_sub(a, b)", ); gen( |(a, b): (MyI128, MyI128)| Some(a.0.wrapping_sub(b.0)), - "builtins::int::addsub::rust_i128_sub(a, b)", + "builtins::int::addsub::__rust_i128_sub(a, b)", ); gen( |(a, b): (MyU128, MyU128)| Some(a.0.overflowing_sub(b.0)), - "builtins::int::addsub::rust_u128_subo(a, b)", + "builtins::int::addsub::__rust_u128_subo(a, b)", ); gen( |(a, b): (MyI128, MyI128)| Some(a.0.overflowing_sub(b.0)), - "builtins::int::addsub::rust_i128_subo(a, b)", + "builtins::int::addsub::__rust_i128_subo(a, b)", ); // int/mul.rs From 18f2d2d4ce84e24fd4d63c7cf6178fd9f360a0c8 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 15 Jul 2019 07:26:49 -0700 Subject: [PATCH 0297/1459] Bump to 0.1.17 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e0bb22d21..fed7beead 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.16" +version = "0.1.17" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From 79a6a1603d5672cbb9187ff41ff4d9b5048ac1cb Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 19 Jul 2019 07:44:17 -0700 Subject: [PATCH 0298/1459] Bump to 0.1.18 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index fed7beead..1436d9b4d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.17" +version = "0.1.18" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From 985a43095c82822e928e08ba3d12fad90ef8d59e Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Wed, 24 Jul 2019 08:59:54 +0300 Subject: [PATCH 0299/1459] probestack: add frame pointers for easier traceback This turns the following backtrace, ``` >> bt #0 0x0000555555576f73 in __rust_probestack () at /cargo/registry/src/github.com-1ecc6299db9ec823/compiler_builtins-0.1.14/src/probestack.rs:55 Backtrace stopped: Cannot access memory at address 0x7fffff7fedf0 ``` To this: ``` >>> bt #0 0x0000555555574e47 in __rust_probestack () #1 0x00005555555595ba in test::main () #2 0x00005555555594f3 in std::rt::lang_start::{{closure}} () #3 0x0000555555561ae3 in std::panicking::try::do_call () #4 0x000055555556595a in __rust_maybe_catch_panic () #5 0x000055555555af9b in std::rt::lang_start_internal () #6 0x00005555555594d5 in std::rt::lang_start () #7 0x000055555555977b in main () ``` --- src/probestack.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/probestack.rs b/src/probestack.rs index f9284e814..9bcaf4fd1 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -53,6 +53,9 @@ pub unsafe extern "C" fn __rust_probestack() { // The ABI here is that the stack frame size is located in `%eax`. Upon // return we're not supposed to modify `%esp` or `%eax`. asm!(" + pushq %rbp + movq %rsp, %rbp + mov %rax,%r11 // duplicate %rax as we're clobbering %r11 // Main loop, taken in one page increments. We're decrementing rsp by @@ -89,6 +92,7 @@ pub unsafe extern "C" fn __rust_probestack() { // return. add %rax,%rsp + leave ret " ::: "memory" : "volatile"); ::core::intrinsics::unreachable(); @@ -104,6 +108,8 @@ pub unsafe extern "C" fn __rust_probestack() { // // The ABI here is the same as x86_64, except everything is 32-bits large. asm!(" + push %ebp + mov %esp, %ebp push %ecx mov %eax,%ecx @@ -122,6 +128,7 @@ pub unsafe extern "C" fn __rust_probestack() { add %eax,%esp pop %ecx + leave ret " ::: "memory" : "volatile"); ::core::intrinsics::unreachable(); From 36da64f20e96206ac279f700586817c8abe3bdf8 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 24 Jul 2019 07:05:29 -0700 Subject: [PATCH 0300/1459] Bump to 0.1.19 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1436d9b4d..0705dbd9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.18" +version = "0.1.19" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang-nursery/compiler-builtins" From 13d54b21726f9eba657ae0ce2861e6d105d3abe6 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Wed, 7 Aug 2019 14:06:12 -0600 Subject: [PATCH 0301/1459] Improve sqrt/sqrtf if stable intrinsics allow --- libm/src/math/sqrt.rs | 252 ++++++++++++++++++++++------------------- libm/src/math/sqrtf.rs | 160 ++++++++++++++------------ 2 files changed, 224 insertions(+), 188 deletions(-) diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 58cf00ed8..f01267da7 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -95,128 +95,146 @@ pub fn sqrt(x: f64) -> f64 { } } } - let mut z: f64; - let sign: Wrapping = Wrapping(0x80000000); - let mut ix0: i32; - let mut s0: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: Wrapping; - let mut t1: Wrapping; - let mut s1: Wrapping; - let mut ix1: Wrapping; - let mut q1: Wrapping; + #[cfg(target_feature="sse2")] + { + // Note(Lokathor): If compile time settings allow, we just use SSE2, since + // the sqrt in `std` on these platforms also compiles down to an SSE2 + // instruction. + #[cfg(target_arch="x86")] + use core::arch::x86::*; + #[cfg(target_arch="x86_64")] + use core::arch::x86_64::*; + unsafe { + let m = _mm_set_sd(x); + let m_sqrt = _mm_sqrt_pd(m); + _mm_cvtsd_f64(m_sqrt) + } + } + #[cfg(not(target_feature="sse2"))] + { + let mut z: f64; + let sign: Wrapping = Wrapping(0x80000000); + let mut ix0: i32; + let mut s0: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: Wrapping; + let mut t1: Wrapping; + let mut s1: Wrapping; + let mut ix1: Wrapping; + let mut q1: Wrapping; - ix0 = (x.to_bits() >> 32) as i32; - ix1 = Wrapping(x.to_bits() as u32); + ix0 = (x.to_bits() >> 32) as i32; + ix1 = Wrapping(x.to_bits() as u32); - /* take care of Inf and NaN */ - if (ix0 & 0x7ff00000) == 0x7ff00000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } - /* take care of zero */ - if ix0 <= 0 { - if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix0 < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } - /* normalize x */ - m = ix0 >> 20; - if m == 0 { - /* subnormal x */ - while ix0 == 0 { - m -= 21; - ix0 |= (ix1 >> 11).0 as i32; - ix1 <<= 21; - } - i = 0; - while (ix0 & 0x00100000) == 0 { - i += 1; - ix0 <<= 1; - } - m -= i - 1; - ix0 |= (ix1 >> (32 - i) as usize).0 as i32; - ix1 = ix1 << i as usize; - } - m -= 1023; /* unbias exponent */ - ix0 = (ix0 & 0x000fffff) | 0x00100000; - if (m & 1) == 1 { - /* odd m, double x to make it even */ - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - } - m >>= 1; /* m = [m/2] */ + /* take care of Inf and NaN */ + if (ix0 & 0x7ff00000) == 0x7ff00000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + /* take care of zero */ + if ix0 <= 0 { + if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix0 < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ + } + } + /* normalize x */ + m = ix0 >> 20; + if m == 0 { + /* subnormal x */ + while ix0 == 0 { + m -= 21; + ix0 |= (ix1 >> 11).0 as i32; + ix1 <<= 21; + } + i = 0; + while (ix0 & 0x00100000) == 0 { + i += 1; + ix0 <<= 1; + } + m -= i - 1; + ix0 |= (ix1 >> (32 - i) as usize).0 as i32; + ix1 = ix1 << i as usize; + } + m -= 1023; /* unbias exponent */ + ix0 = (ix0 & 0x000fffff) | 0x00100000; + if (m & 1) == 1 { + /* odd m, double x to make it even */ + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + } + m >>= 1; /* m = [m/2] */ - /* generate sqrt(x) bit by bit */ - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - q = 0; /* [q,q1] = sqrt(x) */ - q1 = Wrapping(0); - s0 = 0; - s1 = Wrapping(0); - r = Wrapping(0x00200000); /* r = moving bit from right to left */ + /* generate sqrt(x) bit by bit */ + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + q = 0; /* [q,q1] = sqrt(x) */ + q1 = Wrapping(0); + s0 = 0; + s1 = Wrapping(0); + r = Wrapping(0x00200000); /* r = moving bit from right to left */ - while r != Wrapping(0) { - t = s0 + r.0 as i32; - if t <= ix0 { - s0 = t + r.0 as i32; - ix0 -= t; - q += r.0 as i32; - } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } + while r != Wrapping(0) { + t = s0 + r.0 as i32; + if t <= ix0 { + s0 = t + r.0 as i32; + ix0 -= t; + q += r.0 as i32; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } - r = sign; - while r != Wrapping(0) { - t1 = s1 + r; - t = s0; - if t < ix0 || (t == ix0 && t1 <= ix1) { - s1 = t1 + r; - if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { - s0 += 1; - } - ix0 -= t; - if ix1 < t1 { - ix0 -= 1; - } - ix1 -= t1; - q1 += r; - } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } + r = sign; + while r != Wrapping(0) { + t1 = s1 + r; + t = s0; + if t < ix0 || (t == ix0 && t1 <= ix1) { + s1 = t1 + r; + if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { + s0 += 1; + } + ix0 -= t; + if ix1 < t1 { + ix0 -= 1; + } + ix1 -= t1; + q1 += r; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } - /* use floating add to find out rounding direction */ - if (ix0 as u32 | ix1.0) != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if q1.0 == 0xffffffff { - q1 = Wrapping(0); - q += 1; - } else if z > 1.0 { - if q1.0 == 0xfffffffe { - q += 1; - } - q1 += Wrapping(2); - } else { - q1 += q1 & Wrapping(1); - } - } - } - ix0 = (q >> 1) + 0x3fe00000; - ix1 = q1 >> 1; - if (q & 1) == 1 { - ix1 |= sign; + /* use floating add to find out rounding direction */ + if (ix0 as u32 | ix1.0) != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if q1.0 == 0xffffffff { + q1 = Wrapping(0); + q += 1; + } else if z > 1.0 { + if q1.0 == 0xfffffffe { + q += 1; + } + q1 += Wrapping(2); + } else { + q1 += q1 & Wrapping(1); + } + } + } + ix0 = (q >> 1) + 0x3fe00000; + ix1 = q1 >> 1; + if (q & 1) == 1 { + ix1 |= sign; + } + ix0 += m << 20; + f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) } - ix0 += m << 20; - f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) } diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index 889b52581..a4d9ab53d 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -29,83 +29,101 @@ pub fn sqrtf(x: f32) -> f32 { } } } - let mut z: f32; - let sign: i32 = 0x80000000u32 as i32; - let mut ix: i32; - let mut s: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: u32; + #[cfg(target_feature="sse")] + { + // Note(Lokathor): If compile time settings allow, we just use SSE, since + // the sqrt in `std` on these platforms also compiles down to an SSE + // instruction. + #[cfg(target_arch="x86")] + use core::arch::x86::*; + #[cfg(target_arch="x86_64")] + use core::arch::x86_64::*; + unsafe { + let m = _mm_set_ss(x); + let m_sqrt = _mm_sqrt_ss(m); + _mm_cvtss_f32(m_sqrt) + } + } + #[cfg(not(target_feature="sse"))] + { + let mut z: f32; + let sign: i32 = 0x80000000u32 as i32; + let mut ix: i32; + let mut s: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: u32; - ix = x.to_bits() as i32; + ix = x.to_bits() as i32; - /* take care of Inf and NaN */ - if (ix as u32 & 0x7f800000) == 0x7f800000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } + /* take care of Inf and NaN */ + if (ix as u32 & 0x7f800000) == 0x7f800000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } - /* take care of zero */ - if ix <= 0 { - if (ix & !sign) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } + /* take care of zero */ + if ix <= 0 { + if (ix & !sign) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ + } + } - /* normalize x */ - m = ix >> 23; - if m == 0 { - /* subnormal x */ - i = 0; - while ix & 0x00800000 == 0 { - ix <<= 1; - i = i + 1; - } - m -= i - 1; - } - m -= 127; /* unbias exponent */ - ix = (ix & 0x007fffff) | 0x00800000; - if m & 1 == 1 { - /* odd m, double x to make it even */ - ix += ix; - } - m >>= 1; /* m = [m/2] */ + /* normalize x */ + m = ix >> 23; + if m == 0 { + /* subnormal x */ + i = 0; + while ix & 0x00800000 == 0 { + ix <<= 1; + i = i + 1; + } + m -= i - 1; + } + m -= 127; /* unbias exponent */ + ix = (ix & 0x007fffff) | 0x00800000; + if m & 1 == 1 { + /* odd m, double x to make it even */ + ix += ix; + } + m >>= 1; /* m = [m/2] */ - /* generate sqrt(x) bit by bit */ - ix += ix; - q = 0; - s = 0; - r = 0x01000000; /* r = moving bit from right to left */ + /* generate sqrt(x) bit by bit */ + ix += ix; + q = 0; + s = 0; + r = 0x01000000; /* r = moving bit from right to left */ - while r != 0 { - t = s + r as i32; - if t <= ix { - s = t + r as i32; - ix -= t; - q += r as i32; - } - ix += ix; - r >>= 1; - } + while r != 0 { + t = s + r as i32; + if t <= ix { + s = t + r as i32; + ix -= t; + q += r as i32; + } + ix += ix; + r >>= 1; + } - /* use floating add to find out rounding direction */ - if ix != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if z > 1.0 { - q += 2; - } else { - q += q & 1; - } - } - } + /* use floating add to find out rounding direction */ + if ix != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if z > 1.0 { + q += 2; + } else { + q += q & 1; + } + } + } - ix = (q >> 1) + 0x3f000000; - ix += m << 23; - f32::from_bits(ix as u32) + ix = (q >> 1) + 0x3f000000; + ix += m << 23; + f32::from_bits(ix as u32) + } } From 2626224eb68be077dae604c3ca5bf99924c2c804 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Wed, 7 Aug 2019 14:10:34 -0600 Subject: [PATCH 0302/1459] apply rustfmt --- libm/src/math/sqrt.rs | 264 ++++++++++++++++++++--------------------- libm/src/math/sqrtf.rs | 170 +++++++++++++------------- 2 files changed, 217 insertions(+), 217 deletions(-) diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index f01267da7..2fb1b24b7 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -95,146 +95,146 @@ pub fn sqrt(x: f64) -> f64 { } } } - #[cfg(target_feature="sse2")] + #[cfg(target_feature = "sse2")] { - // Note(Lokathor): If compile time settings allow, we just use SSE2, since - // the sqrt in `std` on these platforms also compiles down to an SSE2 - // instruction. - #[cfg(target_arch="x86")] - use core::arch::x86::*; - #[cfg(target_arch="x86_64")] - use core::arch::x86_64::*; - unsafe { - let m = _mm_set_sd(x); - let m_sqrt = _mm_sqrt_pd(m); - _mm_cvtsd_f64(m_sqrt) - } + // Note(Lokathor): If compile time settings allow, we just use SSE2, since + // the sqrt in `std` on these platforms also compiles down to an SSE2 + // instruction. + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + unsafe { + let m = _mm_set_sd(x); + let m_sqrt = _mm_sqrt_pd(m); + _mm_cvtsd_f64(m_sqrt) + } } - #[cfg(not(target_feature="sse2"))] + #[cfg(not(target_feature = "sse2"))] { - let mut z: f64; - let sign: Wrapping = Wrapping(0x80000000); - let mut ix0: i32; - let mut s0: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: Wrapping; - let mut t1: Wrapping; - let mut s1: Wrapping; - let mut ix1: Wrapping; - let mut q1: Wrapping; + let mut z: f64; + let sign: Wrapping = Wrapping(0x80000000); + let mut ix0: i32; + let mut s0: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: Wrapping; + let mut t1: Wrapping; + let mut s1: Wrapping; + let mut ix1: Wrapping; + let mut q1: Wrapping; - ix0 = (x.to_bits() >> 32) as i32; - ix1 = Wrapping(x.to_bits() as u32); + ix0 = (x.to_bits() >> 32) as i32; + ix1 = Wrapping(x.to_bits() as u32); - /* take care of Inf and NaN */ - if (ix0 & 0x7ff00000) == 0x7ff00000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } - /* take care of zero */ - if ix0 <= 0 { - if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix0 < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } - /* normalize x */ - m = ix0 >> 20; - if m == 0 { - /* subnormal x */ - while ix0 == 0 { - m -= 21; - ix0 |= (ix1 >> 11).0 as i32; - ix1 <<= 21; - } - i = 0; - while (ix0 & 0x00100000) == 0 { - i += 1; - ix0 <<= 1; - } - m -= i - 1; - ix0 |= (ix1 >> (32 - i) as usize).0 as i32; - ix1 = ix1 << i as usize; - } - m -= 1023; /* unbias exponent */ - ix0 = (ix0 & 0x000fffff) | 0x00100000; - if (m & 1) == 1 { - /* odd m, double x to make it even */ - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - } - m >>= 1; /* m = [m/2] */ + /* take care of Inf and NaN */ + if (ix0 & 0x7ff00000) == 0x7ff00000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + /* take care of zero */ + if ix0 <= 0 { + if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix0 < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ + } + } + /* normalize x */ + m = ix0 >> 20; + if m == 0 { + /* subnormal x */ + while ix0 == 0 { + m -= 21; + ix0 |= (ix1 >> 11).0 as i32; + ix1 <<= 21; + } + i = 0; + while (ix0 & 0x00100000) == 0 { + i += 1; + ix0 <<= 1; + } + m -= i - 1; + ix0 |= (ix1 >> (32 - i) as usize).0 as i32; + ix1 = ix1 << i as usize; + } + m -= 1023; /* unbias exponent */ + ix0 = (ix0 & 0x000fffff) | 0x00100000; + if (m & 1) == 1 { + /* odd m, double x to make it even */ + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + } + m >>= 1; /* m = [m/2] */ - /* generate sqrt(x) bit by bit */ - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - q = 0; /* [q,q1] = sqrt(x) */ - q1 = Wrapping(0); - s0 = 0; - s1 = Wrapping(0); - r = Wrapping(0x00200000); /* r = moving bit from right to left */ + /* generate sqrt(x) bit by bit */ + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + q = 0; /* [q,q1] = sqrt(x) */ + q1 = Wrapping(0); + s0 = 0; + s1 = Wrapping(0); + r = Wrapping(0x00200000); /* r = moving bit from right to left */ - while r != Wrapping(0) { - t = s0 + r.0 as i32; - if t <= ix0 { - s0 = t + r.0 as i32; - ix0 -= t; - q += r.0 as i32; - } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } + while r != Wrapping(0) { + t = s0 + r.0 as i32; + if t <= ix0 { + s0 = t + r.0 as i32; + ix0 -= t; + q += r.0 as i32; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } - r = sign; - while r != Wrapping(0) { - t1 = s1 + r; - t = s0; - if t < ix0 || (t == ix0 && t1 <= ix1) { - s1 = t1 + r; - if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { - s0 += 1; - } - ix0 -= t; - if ix1 < t1 { - ix0 -= 1; - } - ix1 -= t1; - q1 += r; - } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } + r = sign; + while r != Wrapping(0) { + t1 = s1 + r; + t = s0; + if t < ix0 || (t == ix0 && t1 <= ix1) { + s1 = t1 + r; + if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { + s0 += 1; + } + ix0 -= t; + if ix1 < t1 { + ix0 -= 1; + } + ix1 -= t1; + q1 += r; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } - /* use floating add to find out rounding direction */ - if (ix0 as u32 | ix1.0) != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if q1.0 == 0xffffffff { - q1 = Wrapping(0); - q += 1; - } else if z > 1.0 { - if q1.0 == 0xfffffffe { - q += 1; - } - q1 += Wrapping(2); - } else { - q1 += q1 & Wrapping(1); - } - } - } - ix0 = (q >> 1) + 0x3fe00000; - ix1 = q1 >> 1; - if (q & 1) == 1 { - ix1 |= sign; - } - ix0 += m << 20; - f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) + /* use floating add to find out rounding direction */ + if (ix0 as u32 | ix1.0) != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if q1.0 == 0xffffffff { + q1 = Wrapping(0); + q += 1; + } else if z > 1.0 { + if q1.0 == 0xfffffffe { + q += 1; + } + q1 += Wrapping(2); + } else { + q1 += q1 & Wrapping(1); + } + } + } + ix0 = (q >> 1) + 0x3fe00000; + ix1 = q1 >> 1; + if (q & 1) == 1 { + ix1 |= sign; + } + ix0 += m << 20; + f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) } } diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index a4d9ab53d..5fe0a7744 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -29,101 +29,101 @@ pub fn sqrtf(x: f32) -> f32 { } } } - #[cfg(target_feature="sse")] + #[cfg(target_feature = "sse")] { - // Note(Lokathor): If compile time settings allow, we just use SSE, since - // the sqrt in `std` on these platforms also compiles down to an SSE - // instruction. - #[cfg(target_arch="x86")] - use core::arch::x86::*; - #[cfg(target_arch="x86_64")] - use core::arch::x86_64::*; - unsafe { - let m = _mm_set_ss(x); - let m_sqrt = _mm_sqrt_ss(m); - _mm_cvtss_f32(m_sqrt) - } + // Note(Lokathor): If compile time settings allow, we just use SSE, since + // the sqrt in `std` on these platforms also compiles down to an SSE + // instruction. + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + unsafe { + let m = _mm_set_ss(x); + let m_sqrt = _mm_sqrt_ss(m); + _mm_cvtss_f32(m_sqrt) + } } - #[cfg(not(target_feature="sse"))] + #[cfg(not(target_feature = "sse"))] { - let mut z: f32; - let sign: i32 = 0x80000000u32 as i32; - let mut ix: i32; - let mut s: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: u32; + let mut z: f32; + let sign: i32 = 0x80000000u32 as i32; + let mut ix: i32; + let mut s: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: u32; - ix = x.to_bits() as i32; + ix = x.to_bits() as i32; - /* take care of Inf and NaN */ - if (ix as u32 & 0x7f800000) == 0x7f800000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } + /* take care of Inf and NaN */ + if (ix as u32 & 0x7f800000) == 0x7f800000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } - /* take care of zero */ - if ix <= 0 { - if (ix & !sign) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } + /* take care of zero */ + if ix <= 0 { + if (ix & !sign) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ + } + } - /* normalize x */ - m = ix >> 23; - if m == 0 { - /* subnormal x */ - i = 0; - while ix & 0x00800000 == 0 { - ix <<= 1; - i = i + 1; - } - m -= i - 1; - } - m -= 127; /* unbias exponent */ - ix = (ix & 0x007fffff) | 0x00800000; - if m & 1 == 1 { - /* odd m, double x to make it even */ - ix += ix; - } - m >>= 1; /* m = [m/2] */ + /* normalize x */ + m = ix >> 23; + if m == 0 { + /* subnormal x */ + i = 0; + while ix & 0x00800000 == 0 { + ix <<= 1; + i = i + 1; + } + m -= i - 1; + } + m -= 127; /* unbias exponent */ + ix = (ix & 0x007fffff) | 0x00800000; + if m & 1 == 1 { + /* odd m, double x to make it even */ + ix += ix; + } + m >>= 1; /* m = [m/2] */ - /* generate sqrt(x) bit by bit */ - ix += ix; - q = 0; - s = 0; - r = 0x01000000; /* r = moving bit from right to left */ + /* generate sqrt(x) bit by bit */ + ix += ix; + q = 0; + s = 0; + r = 0x01000000; /* r = moving bit from right to left */ - while r != 0 { - t = s + r as i32; - if t <= ix { - s = t + r as i32; - ix -= t; - q += r as i32; - } - ix += ix; - r >>= 1; - } + while r != 0 { + t = s + r as i32; + if t <= ix { + s = t + r as i32; + ix -= t; + q += r as i32; + } + ix += ix; + r >>= 1; + } - /* use floating add to find out rounding direction */ - if ix != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if z > 1.0 { - q += 2; - } else { - q += q & 1; - } - } - } + /* use floating add to find out rounding direction */ + if ix != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if z > 1.0 { + q += 2; + } else { + q += q & 1; + } + } + } - ix = (q >> 1) + 0x3f000000; - ix += m << 23; - f32::from_bits(ix as u32) + ix = (q >> 1) + 0x3f000000; + ix += m << 23; + f32::from_bits(ix as u32) } } From 5c569a772cac4873fc95018d4f323ca0be5c0d28 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Wed, 7 Aug 2019 14:16:10 -0600 Subject: [PATCH 0303/1459] move use/const statements to a limited scope --- libm/src/math/sqrt.rs | 7 ++++--- libm/src/math/sqrtf.rs | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 2fb1b24b7..8a67cb18b 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -77,9 +77,6 @@ */ use core::f64; -use core::num::Wrapping; - -const TINY: f64 = 1.0e-300; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrt(x: f64) -> f64 { @@ -112,6 +109,10 @@ pub fn sqrt(x: f64) -> f64 { } #[cfg(not(target_feature = "sse2"))] { + use core::num::Wrapping; + + const TINY: f64 = 1.0e-300; + let mut z: f64; let sign: Wrapping = Wrapping(0x80000000); let mut ix0: i32; diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index 5fe0a7744..cb3c1672e 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -13,8 +13,6 @@ * ==================================================== */ -const TINY: f32 = 1.0e-30; - #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrtf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized @@ -46,6 +44,8 @@ pub fn sqrtf(x: f32) -> f32 { } #[cfg(not(target_feature = "sse"))] { + const TINY: f32 = 1.0e-30; + let mut z: f32; let sign: i32 = 0x80000000u32 as i32; let mut ix: i32; From 7249f78404bb5215ae57d78fdc9229ef7fbb2040 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 8 Aug 2019 18:21:10 -0600 Subject: [PATCH 0304/1459] update comments --- libm/src/math/sqrtf.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index cb3c1672e..1d5b78e84 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -29,9 +29,9 @@ pub fn sqrtf(x: f32) -> f32 { } #[cfg(target_feature = "sse")] { - // Note(Lokathor): If compile time settings allow, we just use SSE, since - // the sqrt in `std` on these platforms also compiles down to an SSE - // instruction. + // Note: This path is unlikely since LLVM will usually have already + // optimized sqrt calls into hardware instructions if sse is available, + // but if someone does end up here they'll apprected the speed increase. #[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")] From 9ec48059dc796c0962162754d51afb713e51bcad Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 8 Aug 2019 18:21:18 -0600 Subject: [PATCH 0305/1459] update comments --- libm/src/math/sqrt.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 8a67cb18b..31afe3356 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -94,9 +94,9 @@ pub fn sqrt(x: f64) -> f64 { } #[cfg(target_feature = "sse2")] { - // Note(Lokathor): If compile time settings allow, we just use SSE2, since - // the sqrt in `std` on these platforms also compiles down to an SSE2 - // instruction. + // Note: This path is unlikely since LLVM will usually have already + // optimized sqrt calls into hardware instructions if sse2 is available, + // but if someone does end up here they'll apprected the speed increase. #[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")] From bd4ecf6e3de60e10c06f7d605cf5fecac87ef2ec Mon Sep 17 00:00:00 2001 From: Roman Proskuryakov Date: Sat, 10 Aug 2019 15:30:43 +0300 Subject: [PATCH 0306/1459] Fix broken link in README --- libm/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libm/README.md b/libm/README.md index e10904f8c..e90b42d74 100644 --- a/libm/README.md +++ b/libm/README.md @@ -22,6 +22,8 @@ This crate is [on crates.io] and can be used today in stable `#![no_std]` progra The API documentation can be found [here](https://docs.rs/libm). +[on crates.io]: https://crates.io/crates/libm + ## Benchmark [benchmark]: #benchmark From b20be1334e8be6ae92253529592fbefc2840f871 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Tue, 13 Aug 2019 23:40:54 -0600 Subject: [PATCH 0307/1459] slightly improve spec and sanity check coverage --- libm/src/math/ceil.rs | 17 +++++++++++++++-- libm/src/math/ceilf.rs | 22 ++++++++++++++++++++++ libm/src/math/fabs.rs | 24 ++++++++++++++++++++++++ libm/src/math/fabsf.rs | 24 ++++++++++++++++++++++++ libm/src/math/floor.rs | 22 ++++++++++++++++++++++ libm/src/math/floorf.rs | 19 +++++++++++++++++-- libm/src/math/sqrt.rs | 25 ++++++++++++++++++++++++- libm/src/math/sqrtf.rs | 23 +++++++++++++++++++++++ 8 files changed, 171 insertions(+), 5 deletions(-) diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index 63c1121c6..eda28b9a0 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -42,9 +42,22 @@ pub fn ceil(x: f64) -> f64 { #[cfg(test)] mod tests { + use super::*; + use core::f64::*; + #[test] fn sanity_check() { - assert_eq!(super::ceil(1.1), 2.0); - assert_eq!(super::ceil(2.9), 3.0); + assert_eq!(ceil(1.1), 2.0); + assert_eq!(ceil(2.9), 3.0); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil + #[test] + fn spec_tests() { + // Not Asserted: that the current rounding mode has no effect. + assert!(ceil(NAN).is_nan()); + for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { + assert_eq!(ceil(f), f); + } } } diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs index 87d96982a..f1edbd061 100644 --- a/libm/src/math/ceilf.rs +++ b/libm/src/math/ceilf.rs @@ -39,3 +39,25 @@ pub fn ceilf(x: f32) -> f32 { } f32::from_bits(ui) } + +#[cfg(test)] +mod tests { + use super::*; + use core::f32::*; + + #[test] + fn sanity_check() { + assert_eq!(ceilf(1.1), 2.0); + assert_eq!(ceilf(2.9), 3.0); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil + #[test] + fn spec_tests() { + // Not Asserted: that the current rounding mode has no effect. + assert!(ceilf(NAN).is_nan()); + for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { + assert_eq!(ceilf(f), f); + } + } +} diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index 5a7f795f6..4b292acc6 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -15,3 +15,27 @@ pub fn fabs(x: f64) -> f64 { } f64::from_bits(x.to_bits() & (u64::MAX / 2)) } + +#[cfg(test)] +mod tests { + use super::*; + use core::f64::*; + + #[test] + fn sanity_check() { + assert_eq!(fabs(-1.0), 1.0); + assert_eq!(fabs(2.8), 2.8); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs + #[test] + fn spec_tests() { + assert!(fabs(NAN).is_nan()); + for f in [0.0, -0.0].iter().copied() { + assert_eq!(fabs(f), 0.0); + } + for f in [INFINITY, NEG_INFINITY].iter().copied() { + assert_eq!(fabs(f), INFINITY); + } + } +} diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs index 495512584..21dda8d6a 100644 --- a/libm/src/math/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -13,3 +13,27 @@ pub fn fabsf(x: f32) -> f32 { } f32::from_bits(x.to_bits() & 0x7fffffff) } + +#[cfg(test)] +mod tests { + use super::*; + use core::f32::*; + + #[test] + fn sanity_check() { + assert_eq!(fabsf(-1.0), 1.0); + assert_eq!(fabsf(2.8), 2.8); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs + #[test] + fn spec_tests() { + assert!(fabsf(NAN).is_nan()); + for f in [0.0, -0.0].iter().copied() { + assert_eq!(fabsf(f), 0.0); + } + for f in [INFINITY, NEG_INFINITY].iter().copied() { + assert_eq!(fabsf(f), INFINITY); + } + } +} diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index 91825e3c8..b2b760570 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -38,3 +38,25 @@ pub fn floor(x: f64) -> f64 { x + y } } + +#[cfg(test)] +mod tests { + use super::*; + use core::f64::*; + + #[test] + fn sanity_check() { + assert_eq!(floor(1.1), 1.0); + assert_eq!(floor(2.9), 2.0); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor + #[test] + fn spec_tests() { + // Not Asserted: that the current rounding mode has no effect. + assert!(floor(NAN).is_nan()); + for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { + assert_eq!(floor(f), f); + } + } +} diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs index 6d751b077..287f08642 100644 --- a/libm/src/math/floorf.rs +++ b/libm/src/math/floorf.rs @@ -42,8 +42,23 @@ pub fn floorf(x: f32) -> f32 { #[cfg(test)] mod tests { + use super::*; + use core::f32::*; + #[test] - fn no_overflow() { - assert_eq!(super::floorf(0.5), 0.0); + fn sanity_check() { + assert_eq!(floorf(0.5), 0.0); + assert_eq!(floorf(1.1), 1.0); + assert_eq!(floorf(2.9), 2.0); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor + #[test] + fn spec_tests() { + // Not Asserted: that the current rounding mode has no effect. + assert!(floorf(NAN).is_nan()); + for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { + assert_eq!(floorf(f), f); + } } } diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 31afe3356..addecba23 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -37,7 +37,7 @@ * If (2) is false, then q = q ; otherwise q = q + 2 . * i+1 i i+1 i * - * With some algebric manipulation, it is not difficult to see + * With some algebraic manipulation, it is not difficult to see * that (2) is equivalent to * -(i+1) * s + 2 <= y (3) @@ -239,3 +239,26 @@ pub fn sqrt(x: f64) -> f64 { f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) } } + +#[cfg(test)] +mod tests { + use super::*; + use core::f64::*; + + #[test] + fn sanity_check() { + assert_eq!(sqrt(100.0), 10.0); + assert_eq!(sqrt(4.0), 2.0); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt + #[test] + fn spec_tests() { + // Not Asserted: FE_INVALID exception is raised if argument is negative. + assert!(sqrt(-1.0).is_nan()); + assert!(sqrt(NAN).is_nan()); + for f in [0.0, -0.0, INFINITY].iter().copied() { + assert_eq!(sqrt(f), f); + } + } +} diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index 1d5b78e84..0bee02869 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -127,3 +127,26 @@ pub fn sqrtf(x: f32) -> f32 { f32::from_bits(ix as u32) } } + +#[cfg(test)] +mod tests { + use super::*; + use core::f32::*; + + #[test] + fn sanity_check() { + assert_eq!(sqrtf(100.0), 10.0); + assert_eq!(sqrtf(4.0), 2.0); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt + #[test] + fn spec_tests() { + // Not Asserted: FE_INVALID exception is raised if argument is negative. + assert!(sqrtf(-1.0).is_nan()); + assert!(sqrtf(NAN).is_nan()); + for f in [0.0, -0.0, INFINITY].iter().copied() { + assert_eq!(sqrtf(f), f); + } + } +} From b717fa656f29751b78beb092b4e6c7c1de38d85b Mon Sep 17 00:00:00 2001 From: Lokathor Date: Tue, 13 Aug 2019 23:45:50 -0600 Subject: [PATCH 0308/1459] rustfmt fixes --- libm/src/math/fabs.rs | 4 ++-- libm/src/math/fabsf.rs | 4 ++-- libm/src/math/sqrt.rs | 4 ++-- libm/src/math/sqrtf.rs | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index 4b292acc6..b2255ad32 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -23,8 +23,8 @@ mod tests { #[test] fn sanity_check() { - assert_eq!(fabs(-1.0), 1.0); - assert_eq!(fabs(2.8), 2.8); + assert_eq!(fabs(-1.0), 1.0); + assert_eq!(fabs(2.8), 2.8); } /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs index 21dda8d6a..6655c4c3c 100644 --- a/libm/src/math/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -21,8 +21,8 @@ mod tests { #[test] fn sanity_check() { - assert_eq!(fabsf(-1.0), 1.0); - assert_eq!(fabsf(2.8), 2.8); + assert_eq!(fabsf(-1.0), 1.0); + assert_eq!(fabsf(2.8), 2.8); } /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index addecba23..f06b209a4 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -247,8 +247,8 @@ mod tests { #[test] fn sanity_check() { - assert_eq!(sqrt(100.0), 10.0); - assert_eq!(sqrt(4.0), 2.0); + assert_eq!(sqrt(100.0), 10.0); + assert_eq!(sqrt(4.0), 2.0); } /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index 0bee02869..ee868c8c8 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -135,8 +135,8 @@ mod tests { #[test] fn sanity_check() { - assert_eq!(sqrtf(100.0), 10.0); - assert_eq!(sqrtf(4.0), 2.0); + assert_eq!(sqrtf(100.0), 10.0); + assert_eq!(sqrtf(4.0), 2.0); } /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt From ca423fe49a9cb90baf5dd3cfefc13c11d6c1eb02 Mon Sep 17 00:00:00 2001 From: Joel Galenson Date: Mon, 19 Aug 2019 09:30:45 -0700 Subject: [PATCH 0309/1459] Support deterministic builds by remapping the __FILE__ prefix if the compiler supports it. --- build.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/build.rs b/build.rs index c714bc15d..feb7fc6ec 100644 --- a/build.rs +++ b/build.rs @@ -423,6 +423,10 @@ mod c { panic!("RUST_COMPILER_RT_ROOT={} does not exist", root.display()); } + // Support deterministic builds by remapping the __FILE__ prefix if the + // compiler supports it. + cfg.flag_if_supported(&format!("-ffile-prefix-map={}=.", root.display())); + let src_dir = root.join("lib/builtins"); for (sym, src) in sources.map.iter() { let src = src_dir.join(src); From 332220adea3177532af66e7757a2fbc46f1540ec Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 19 Aug 2019 14:02:08 -0700 Subject: [PATCH 0310/1459] Modernize the `testcrate` slighty * Update `rand` dependency * Drop `cast` in favor of explicit casting or crate-defined * Move build script to 2018 edition --- testcrate/Cargo.toml | 4 +- testcrate/build.rs | 130 +++++++++++++++++++++++++++---------------- 2 files changed, 83 insertions(+), 51 deletions(-) diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 2a102660b..3b99b574e 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -2,14 +2,14 @@ name = "testcrate" version = "0.1.0" authors = ["Alex Crichton "] +edition = "2018" [lib] test = false doctest = false [build-dependencies] -cast = { version = "0.2.2", features = ["x128"] } -rand = { version = "0.4", features = ["i128_support"] } +rand = "0.7" [dependencies.compiler_builtins] path = ".." diff --git a/testcrate/build.rs b/testcrate/build.rs index 4bd4005b7..e1d4cf9e8 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -1,6 +1,5 @@ -extern crate cast; -extern crate rand; - +use rand::seq::SliceRandom; +use rand::Rng; use std::collections::HashMap; use std::fmt; use std::fmt::Write as FmtWrite; @@ -10,9 +9,6 @@ use std::io::Write; use std::path::PathBuf; use std::{env, mem}; -use self::cast::{f32, f64, i128, i32, i64, u128, u32, u64}; -use self::rand::Rng; - const NTESTS: usize = 1_000; fn main() { @@ -397,7 +393,7 @@ fn main() { if a.0.is_nan() { return None; } - Some(f64(a.0)) + Some(f64::from(a.0)) }, "builtins::float::extend::__extendsfdf2(a)", ); @@ -407,7 +403,7 @@ fn main() { if a.0.is_nan() { return None; } - Some(f64(a.0)) + Some(f64::from(a.0)) }, "builtins::float::extend::__extendsfdf2vfp(a)", ); @@ -415,92 +411,92 @@ fn main() { // float/conv.rs gen( - |a: MyF64| i64(a.0).ok(), + |a: MyF64| i64::cast(a.0), "builtins::float::conv::__fixdfdi(a)", ); gen( - |a: MyF64| i32(a.0).ok(), + |a: MyF64| i32::cast(a.0), "builtins::float::conv::__fixdfsi(a)", ); gen( - |a: MyF32| i64(a.0).ok(), + |a: MyF32| i64::cast(a.0), "builtins::float::conv::__fixsfdi(a)", ); gen( - |a: MyF32| i32(a.0).ok(), + |a: MyF32| i32::cast(a.0), "builtins::float::conv::__fixsfsi(a)", ); gen( - |a: MyF32| i128(a.0).ok(), + |a: MyF32| i128::cast(a.0), "builtins::float::conv::__fixsfti(a)", ); gen( - |a: MyF64| i128(a.0).ok(), + |a: MyF64| i128::cast(a.0), "builtins::float::conv::__fixdfti(a)", ); gen( - |a: MyF64| u64(a.0).ok(), + |a: MyF64| u64::cast(a.0), "builtins::float::conv::__fixunsdfdi(a)", ); gen( - |a: MyF64| u32(a.0).ok(), + |a: MyF64| u32::cast(a.0), "builtins::float::conv::__fixunsdfsi(a)", ); gen( - |a: MyF32| u64(a.0).ok(), + |a: MyF32| u64::cast(a.0), "builtins::float::conv::__fixunssfdi(a)", ); gen( - |a: MyF32| u32(a.0).ok(), + |a: MyF32| u32::cast(a.0), "builtins::float::conv::__fixunssfsi(a)", ); gen( - |a: MyF32| u128(a.0).ok(), + |a: MyF32| u128::cast(a.0), "builtins::float::conv::__fixunssfti(a)", ); gen( - |a: MyF64| u128(a.0).ok(), + |a: MyF64| u128::cast(a.0), "builtins::float::conv::__fixunsdfti(a)", ); gen( - |a: MyI64| Some(f64(a.0)), + |a: MyI64| Some(a.0 as f64), "builtins::float::conv::__floatdidf(a)", ); gen( - |a: MyI32| Some(f64(a.0)), + |a: MyI32| Some(a.0 as f64), "builtins::float::conv::__floatsidf(a)", ); gen( - |a: MyI32| Some(f32(a.0)), + |a: MyI32| Some(a.0 as f32), "builtins::float::conv::__floatsisf(a)", ); gen( - |a: MyU64| Some(f64(a.0)), + |a: MyU64| Some(a.0 as f64), "builtins::float::conv::__floatundidf(a)", ); gen( - |a: MyU32| Some(f64(a.0)), + |a: MyU32| Some(a.0 as f64), "builtins::float::conv::__floatunsidf(a)", ); gen( - |a: MyU32| Some(f32(a.0)), + |a: MyU32| Some(a.0 as f32), "builtins::float::conv::__floatunsisf(a)", ); gen( - |a: MyU128| f32(a.0).ok(), + |a: MyU128| Some(a.0 as f32), "builtins::float::conv::__floatuntisf(a)", ); if !target_arch_mips { gen( - |a: MyI128| Some(f32(a.0)), + |a: MyI128| Some(a.0 as f32), "builtins::float::conv::__floattisf(a)", ); gen( - |a: MyI128| Some(f64(a.0)), + |a: MyI128| Some(a.0 as f64), "builtins::float::conv::__floattidf(a)", ); gen( - |a: MyU128| Some(f64(a.0)), + |a: MyU128| Some(a.0 as f64), "builtins::float::conv::__floatuntidf(a)", ); } @@ -996,7 +992,7 @@ macro_rules! gen_float { $significand_bits:expr) => { pub fn $name(rng: &mut R) -> $fty where - R: Rng, + R: Rng + ?Sized, { const BITS: u8 = $bits; const SIGNIFICAND_BITS: u8 = $significand_bits; @@ -1015,9 +1011,9 @@ macro_rules! gen_float { } } - if rng.gen_weighted_bool(10) { + if rng.gen_range(0, 10) == 1 { // Special values - *rng.choose(&[ + *[ -0.0, 0.0, ::std::$fty::MIN, @@ -1026,9 +1022,10 @@ macro_rules! gen_float { ::std::$fty::NAN, ::std::$fty::INFINITY, -::std::$fty::INFINITY, - ]) + ] + .choose(rng) .unwrap() - } else if rng.gen_weighted_bool(10) { + } else if rng.gen_range(0, 10) == 1 { // NaN patterns mk_f32(rng.gen(), rng.gen(), 0) } else if rng.gen() { @@ -1053,7 +1050,7 @@ macro_rules! gen_large_float { $significand_bits:expr) => { pub fn $name(rng: &mut R) -> $fty where - R: Rng, + R: Rng + ?Sized, { const BITS: u8 = $bits; const SIGNIFICAND_BITS: u8 = $significand_bits; @@ -1072,9 +1069,9 @@ macro_rules! gen_large_float { } } - if rng.gen_weighted_bool(10) { + if rng.gen_range(0, 10) == 1 { // Special values - *rng.choose(&[ + *[ -0.0, 0.0, ::std::$fty::MIN, @@ -1083,9 +1080,10 @@ macro_rules! gen_large_float { ::std::$fty::NAN, ::std::$fty::INFINITY, -::std::$fty::INFINITY, - ]) + ] + .choose(rng) .unwrap() - } else if rng.gen_weighted_bool(10) { + } else if rng.gen_range(0, 10) == 1 { // NaN patterns mk_f32(rng.gen(), rng.gen(), 0) } else if rng.gen() { @@ -1102,7 +1100,7 @@ macro_rules! gen_large_float { gen_large_float!(gen_large_f32, f32, u32, 32, 23); gen_large_float!(gen_large_f64, f64, u64, 64, 52); -trait TestInput: rand::Rand + Hash + Eq + fmt::Debug { +trait TestInput: Hash + Eq + fmt::Debug { fn ty_name() -> String; fn generate_lets(container: &str, cnt: &mut u8) -> String; fn generate_static(&self, dst: &mut String); @@ -1119,6 +1117,7 @@ where F: FnMut(A) -> Option, A: TestInput + Copy, R: TestOutput, + rand::distributions::Standard: rand::distributions::Distribution, { let rng = &mut rand::thread_rng(); let testname = test.split("::").last().unwrap().split("(").next().unwrap(); @@ -1207,8 +1206,8 @@ macro_rules! my_float { } } - impl rand::Rand for $name { - fn rand(r: &mut R) -> $name { + impl rand::distributions::Distribution<$name> for rand::distributions::Standard { + fn sample(&self, r: &mut R) -> $name { $name($gen(r)) } } @@ -1260,18 +1259,18 @@ macro_rules! my_integer { } } - impl rand::Rand for $name { - fn rand(rng: &mut R) -> $name { + impl rand::distributions::Distribution<$name> for rand::distributions::Standard { + fn sample(&self, r: &mut R) -> $name { let bits = (0 as $inner).count_zeros(); let mut mk = || { - if rng.gen_weighted_bool(10) { - *rng.choose(&[ + if r.gen_range(0, 10) == 1 { + *[ ::std::$inner::MAX >> (bits / 2), 0, ::std::$inner::MIN >> (bits / 2), - ]).unwrap() + ].choose(r).unwrap() } else { - rng.gen::<$inner>() + r.gen::<$inner>() } }; let a = mk(); @@ -1386,3 +1385,36 @@ where container.to_string() } } + +trait FromFloat: Sized { + fn cast(src: T) -> Option; +} + +macro_rules! from_float { + ($($src:ident => $($dst:ident),+);+;) => { + $( + $( + impl FromFloat<$src> for $dst { + fn cast(src: $src) -> Option<$dst> { + use std::{$dst, $src}; + + if src.is_nan() || + src.is_infinite() || + src < std::$dst::MIN as $src || + src > std::$dst::MAX as $src + { + None + } else { + Some(src as $dst) + } + } + } + )+ + )+ + } +} + +from_float! { + f32 => i32, i64, i128, u32, u64, u128; + f64 => i32, i64, i128, u32, u64, u128; +} From c8f79289169e7cce857fc04c5ec02c820b9dbf48 Mon Sep 17 00:00:00 2001 From: Joel Galenson Date: Mon, 19 Aug 2019 15:12:07 -0700 Subject: [PATCH 0311/1459] Update comment to mention where the __FILE__ macro is used. --- build.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build.rs b/build.rs index feb7fc6ec..490b6391d 100644 --- a/build.rs +++ b/build.rs @@ -424,7 +424,8 @@ mod c { } // Support deterministic builds by remapping the __FILE__ prefix if the - // compiler supports it. + // compiler supports it. This fixes the nondeterminism caused by the + // use of that macro in lib/builtins/int_util.h in compiler-rt. cfg.flag_if_supported(&format!("-ffile-prefix-map={}=.", root.display())); let src_dir = root.join("lib/builtins"); From 997b86d76742843f6a209e98de003e3c5a378f47 Mon Sep 17 00:00:00 2001 From: Benjamin Saunders Date: Tue, 20 Aug 2019 14:10:38 -0700 Subject: [PATCH 0312/1459] Implement LLVM's elementwise unordered atomic memory intrinsics Allows uses of intrinsics of the form llvm.(memcpy|memmove|memset).element.unordered.atomic.* to be linked. --- src/mem.rs | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/src/mem.rs b/src/mem.rs index c863bb729..be7849ff3 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -5,6 +5,10 @@ type c_int = i16; #[cfg(not(target_pointer_width = "16"))] type c_int = i32; +use core::intrinsics::{atomic_load_unordered, atomic_store_unordered, unchecked_div}; +use core::mem; +use core::ops::{BitOr, Shl}; + #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { let mut i = 0; @@ -58,3 +62,105 @@ pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { } 0 } + +fn memcpy_element_unordered_atomic(dest: *mut T, src: *const T, bytes: usize) { + unsafe { + let n = unchecked_div(bytes, mem::size_of::()); + let mut i = 0; + while i < n { + atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); + i += 1; + } + } +} + +fn memmove_element_unordered_atomic(dest: *mut T, src: *const T, bytes: usize) { + unsafe { + let n = unchecked_div(bytes, mem::size_of::()); + if src < dest as *const T { + // copy from end + let mut i = n; + while i != 0 { + i -= 1; + atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); + } + } else { + // copy from beginning + let mut i = 0; + while i < n { + atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); + i += 1; + } + } + } +} + +fn memset_element_unordered_atomic(s: *mut T, c: u8, bytes: usize) +where + T: Copy + From + Shl + BitOr, +{ + unsafe { + let n = unchecked_div(bytes, mem::size_of::()); + let mut x = T::from(c); + let mut i = 1; + while i < mem::size_of::() { + x = x << 8 | T::from(c); + i += 1; + } + let mut i = 0; + while i < n { + atomic_store_unordered(s.add(i), x); + i += 1; + } + } +} + +intrinsics! { + pub extern "C" fn __llvm_memcpy_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { + memcpy_element_unordered_atomic(dest, src, bytes); + } + pub extern "C" fn __llvm_memcpy_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { + memcpy_element_unordered_atomic(dest, src, bytes); + } + pub extern "C" fn __llvm_memcpy_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { + memcpy_element_unordered_atomic(dest, src, bytes); + } + pub extern "C" fn __llvm_memcpy_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { + memcpy_element_unordered_atomic(dest, src, bytes); + } + pub extern "C" fn __llvm_memcpy_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { + memcpy_element_unordered_atomic(dest, src, bytes); + } + + pub extern "C" fn __llvm_memmove_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { + memmove_element_unordered_atomic(dest, src, bytes); + } + pub extern "C" fn __llvm_memmove_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { + memmove_element_unordered_atomic(dest, src, bytes); + } + pub extern "C" fn __llvm_memmove_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { + memmove_element_unordered_atomic(dest, src, bytes); + } + pub extern "C" fn __llvm_memmove_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { + memmove_element_unordered_atomic(dest, src, bytes); + } + pub extern "C" fn __llvm_memmove_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { + memmove_element_unordered_atomic(dest, src, bytes); + } + + pub extern "C" fn __llvm_memset_element_unordered_atomic_1(s: *mut u8, c: u8, bytes: usize) -> () { + memset_element_unordered_atomic(s, c, bytes); + } + pub extern "C" fn __llvm_memset_element_unordered_atomic_2(s: *mut u16, c: u8, bytes: usize) -> () { + memset_element_unordered_atomic(s, c, bytes); + } + pub extern "C" fn __llvm_memset_element_unordered_atomic_4(s: *mut u32, c: u8, bytes: usize) -> () { + memset_element_unordered_atomic(s, c, bytes); + } + pub extern "C" fn __llvm_memset_element_unordered_atomic_8(s: *mut u64, c: u8, bytes: usize) -> () { + memset_element_unordered_atomic(s, c, bytes); + } + pub extern "C" fn __llvm_memset_element_unordered_atomic_16(s: *mut u128, c: u8, bytes: usize) -> () { + memset_element_unordered_atomic(s, c, bytes); + } +} From 9775f08b1d831360035da9af813164b81b483548 Mon Sep 17 00:00:00 2001 From: Benjamin Saunders Date: Fri, 23 Aug 2019 10:18:36 -0700 Subject: [PATCH 0313/1459] Tidy up unordered elementwise atomic memory intrinsics --- src/mem.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/mem.rs b/src/mem.rs index be7849ff3..76372d9bc 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -5,7 +5,7 @@ type c_int = i16; #[cfg(not(target_pointer_width = "16"))] type c_int = i32; -use core::intrinsics::{atomic_load_unordered, atomic_store_unordered, unchecked_div}; +use core::intrinsics::{atomic_load_unordered, atomic_store_unordered, exact_div}; use core::mem; use core::ops::{BitOr, Shl}; @@ -63,9 +63,10 @@ pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { 0 } +// `bytes` must be a multiple of `mem::size_of::()` fn memcpy_element_unordered_atomic(dest: *mut T, src: *const T, bytes: usize) { unsafe { - let n = unchecked_div(bytes, mem::size_of::()); + let n = exact_div(bytes, mem::size_of::()); let mut i = 0; while i < n { atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); @@ -74,9 +75,10 @@ fn memcpy_element_unordered_atomic(dest: *mut T, src: *const T, bytes: } } +// `bytes` must be a multiple of `mem::size_of::()` fn memmove_element_unordered_atomic(dest: *mut T, src: *const T, bytes: usize) { unsafe { - let n = unchecked_div(bytes, mem::size_of::()); + let n = exact_div(bytes, mem::size_of::()); if src < dest as *const T { // copy from end let mut i = n; @@ -95,18 +97,24 @@ fn memmove_element_unordered_atomic(dest: *mut T, src: *const T, bytes: } } +// `T` must be a primitive integer type, and `bytes` must be a multiple of `mem::size_of::()` fn memset_element_unordered_atomic(s: *mut T, c: u8, bytes: usize) where T: Copy + From + Shl + BitOr, { unsafe { - let n = unchecked_div(bytes, mem::size_of::()); + let n = exact_div(bytes, mem::size_of::()); + + // Construct a value of type `T` consisting of repeated `c` + // bytes, to let us ensure we write each `T` atomically. let mut x = T::from(c); let mut i = 1; while i < mem::size_of::() { x = x << 8 | T::from(c); i += 1; } + + // Write it to `s` let mut i = 0; while i < n { atomic_store_unordered(s.add(i), x); From 726c5b65f501597bee5f22ed6a63051e22225fc2 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 5 Sep 2019 08:32:05 -0600 Subject: [PATCH 0314/1459] there are no longer any default features --- libm/azure-pipelines.yml | 2 -- libm/ci/run.sh | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/libm/azure-pipelines.yml b/libm/azure-pipelines.yml index c89346c73..0d723c56d 100644 --- a/libm/azure-pipelines.yml +++ b/libm/azure-pipelines.yml @@ -49,8 +49,6 @@ jobs: displayName: "Install rust wasm target" - script: cargo build --target wasm32-unknown-unknown displayName: "Build for wasm" - - script: cargo build --target wasm32-unknown-unknown --no-default-features - displayName: "Build for wasm (no default features)" variables: TOOLCHAIN: nightly diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 37ffb8793..59930b238 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -3,7 +3,7 @@ set -ex TARGET=$1 -CMD="cargo test --all --no-default-features --target $TARGET" +CMD="cargo test --all --target $TARGET" $CMD $CMD --release From e4ac1399062c8aaba72cf1ac5280afe2730337ff Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 5 Sep 2019 08:32:26 -0600 Subject: [PATCH 0315/1459] swap stable to be unstable, checked is now debug_assertions --- libm/Cargo.toml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 3e6817851..37aff9a76 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -11,18 +11,16 @@ version = "0.2.0" edition = "2018" [features] -# only used to run our test suite -default = ['stable'] -stable = [] +default = [] + +# This tells the compiler to assume that a Nightly toolchain is being used and +# that it should activate any useful Nightly things accordingly. +unstable = [] # Generate tests which are random inputs and the outputs are calculated with # musl libc. musl-reference-tests = ['rand'] -# Used checked array indexing instead of unchecked array indexing in this -# library. -checked = [] - [workspace] members = [ "crates/compiler-builtins-smoke-test", From 85ee385b1d146ada02247a16c424e1ea9eceef94 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 5 Sep 2019 08:32:38 -0600 Subject: [PATCH 0316/1459] swap stable to be unstable --- libm/crates/libm-bench/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/crates/libm-bench/Cargo.toml b/libm/crates/libm-bench/Cargo.toml index ba65dbd5f..b09db339b 100644 --- a/libm/crates/libm-bench/Cargo.toml +++ b/libm/crates/libm-bench/Cargo.toml @@ -12,4 +12,4 @@ paste = "0.1.5" [features] default = [] -stable = [ "libm/stable" ] +unstable = [ "libm/unstable" ] From 951aa8bf342f3c5c2a5eb647bfc7a6da5734f84b Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 5 Sep 2019 08:33:03 -0600 Subject: [PATCH 0317/1459] swap stable to be unstable --- libm/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index b15857dbe..e9def4940 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -2,7 +2,7 @@ #![deny(warnings)] #![no_std] #![cfg_attr( - all(target_arch = "wasm32", not(feature = "stable")), + all(target_arch = "wasm32", feature = "unstable"), feature(core_intrinsics) )] From 3f7176dc4c1805c5ffcae31d3eee924b5464e56d Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 5 Sep 2019 08:33:23 -0600 Subject: [PATCH 0318/1459] swap stable to be unstable, use debug_assertions --- libm/src/math/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index fcf4e649c..c8d7bd819 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -6,7 +6,7 @@ macro_rules! force_eval { }; } -#[cfg(not(feature = "checked"))] +#[cfg(not(debug_assertions))] macro_rules! i { ($array:expr, $index:expr) => { unsafe { *$array.get_unchecked($index) } @@ -36,7 +36,7 @@ macro_rules! i { }; } -#[cfg(feature = "checked")] +#[cfg(debug_assertions)] macro_rules! i { ($array:expr, $index:expr) => { *$array.get($index).unwrap() @@ -60,7 +60,7 @@ macro_rules! i { macro_rules! llvm_intrinsically_optimized { (#[cfg($($clause:tt)*)] $e:expr) => { - #[cfg(all(not(feature = "stable"), $($clause)*))] + #[cfg(all(feature = "unstable", $($clause)*))] { if true { // thwart the dead code lint $e From 9170d3581591aa2f444ea19f9135b895f933571f Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 5 Sep 2019 08:33:34 -0600 Subject: [PATCH 0319/1459] use sebug_assertions --- libm/src/math/lgamma_r.rs | 4 ++-- libm/src/math/lgammaf_r.rs | 4 ++-- libm/src/math/rem_pio2_large.rs | 4 ++-- libm/src/math/sincos.rs | 4 ++-- libm/src/math/sincosf.rs | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/libm/src/math/lgamma_r.rs b/libm/src/math/lgamma_r.rs index 382a501fc..9533e882c 100644 --- a/libm/src/math/lgamma_r.rs +++ b/libm/src/math/lgamma_r.rs @@ -270,9 +270,9 @@ pub fn lgamma_r(mut x: f64) -> (f64, i32) { p2 = 1.0 + y * (V1 + y * (V2 + y * (V3 + y * (V4 + y * V5)))); r += -0.5 * y + p1 / p2; } - #[cfg(feature = "checked")] + #[cfg(debug_assertions)] _ => unreachable!(), - #[cfg(not(feature = "checked"))] + #[cfg(not(debug_assertions))] _ => {} } } else if ix < 0x40200000 { diff --git a/libm/src/math/lgammaf_r.rs b/libm/src/math/lgammaf_r.rs index 0745359a2..c5e559f46 100644 --- a/libm/src/math/lgammaf_r.rs +++ b/libm/src/math/lgammaf_r.rs @@ -205,9 +205,9 @@ pub fn lgammaf_r(mut x: f32) -> (f32, i32) { p2 = 1.0 + y * (V1 + y * (V2 + y * (V3 + y * (V4 + y * V5)))); r += -0.5 * y + p1 / p2; } - #[cfg(feature = "checked")] + #[cfg(debug_assertions)] _ => unreachable!(), - #[cfg(not(feature = "checked"))] + #[cfg(not(debug_assertions))] _ => {} } } else if ix < 0x41000000 { diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 8533dc289..002ce2e21 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -461,9 +461,9 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i!(y, 2, =, -fw); } } - #[cfg(feature = "checked")] + #[cfg(debug_assertions)] _ => unreachable!(), - #[cfg(not(feature = "checked"))] + #[cfg(not(debug_assertions))] _ => {} } n & 7 diff --git a/libm/src/math/sincos.rs b/libm/src/math/sincos.rs index 750908df4..d49f65c97 100644 --- a/libm/src/math/sincos.rs +++ b/libm/src/math/sincos.rs @@ -51,9 +51,9 @@ pub fn sincos(x: f64) -> (f64, f64) { 1 => (c, -s), 2 => (-s, -c), 3 => (-c, s), - #[cfg(feature = "checked")] + #[cfg(debug_assertions)] _ => unreachable!(), - #[cfg(not(feature = "checked"))] + #[cfg(not(debug_assertions))] _ => (0.0, 1.0), } } diff --git a/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs index bb9a00392..d4e0772d5 100644 --- a/libm/src/math/sincosf.rs +++ b/libm/src/math/sincosf.rs @@ -115,9 +115,9 @@ pub fn sincosf(x: f32) -> (f32, f32) { 1 => (c, -s), 2 => (-s, -c), 3 => (-c, s), - #[cfg(feature = "checked")] + #[cfg(debug_assertions)] _ => unreachable!(), - #[cfg(not(feature = "checked"))] + #[cfg(not(debug_assertions))] _ => (0.0, 1.0), } } From 8d0b4327ca53cd9ce43445032de5100ec1a799c2 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 5 Sep 2019 08:36:08 -0600 Subject: [PATCH 0320/1459] Update run.sh --- libm/ci/run.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 59930b238..ed253ab0d 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -5,11 +5,14 @@ TARGET=$1 CMD="cargo test --all --target $TARGET" +# stable by default $CMD $CMD --release -$CMD --features 'stable' -$CMD --release --features 'stable' +# unstable with a feature +$CMD --features 'unstable' +$CMD --release --features 'unstable' -$CMD --features 'stable checked musl-reference-tests' -$CMD --release --features 'stable checked musl-reference-tests' +# also run the reference tests +$CMD --features 'unstable musl-reference-tests' +$CMD --release --features 'unstable musl-reference-tests' From a4e3e8116a012fbd9071c69392ccaa8c480d3e59 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 5 Sep 2019 08:57:15 -0600 Subject: [PATCH 0321/1459] suppress useless clippy warnings --- libm/src/lib.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index e9def4940..e228af9b3 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -5,6 +5,15 @@ all(target_arch = "wasm32", feature = "unstable"), feature(core_intrinsics) )] +#![allow(clippy::unreadable_literal)] +#![allow(clippy::many_single_char_names)] +#![allow(clippy::needless_return)] +#![allow(clippy::int_plus_one)] +#![allow(clippy::deprecated_cfg_attr)] +#![allow(clippy::mixed_case_hex_literals)] +#![allow(clippy::float_cmp)] +#![allow(clippy::eq_op)] +#![allow(clippy::assign_op_pattern)] mod math; From a14ee75a2475307f7fcf8af2979a2f9df3ffafd6 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Fri, 6 Sep 2019 11:37:00 -0600 Subject: [PATCH 0322/1459] i was told to change this path to my repo and reset things --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index a71118ff9..23d29e11b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "libm"] path = libm - url = https://github.com/rust-lang-nursery/libm + url = https://github.com/Lokathor/libm From b3f16e25f2979a0d5443c8b896da6781b5c5fa05 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Fri, 6 Sep 2019 13:48:15 -0600 Subject: [PATCH 0323/1459] Update the libm submodule --- libm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm b/libm index 01bee72a9..52983bee3 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 01bee72a93ebaeea2883d0f963174c2b00d4fe68 +Subproject commit 52983bee338ac439bc0f1146d6053955e3b4c33a From 095f0201225487b8890deb472dc4c16832c0267e Mon Sep 17 00:00:00 2001 From: Lokathor Date: Fri, 6 Sep 2019 15:20:05 -0600 Subject: [PATCH 0324/1459] Update build.rs --- build.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/build.rs b/build.rs index 490b6391d..5cdabf959 100644 --- a/build.rs +++ b/build.rs @@ -8,6 +8,9 @@ fn main() { println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display()); + // Activate libm's unstable features to make full use of Nightly. + println!("rustc-cfg=libm/unstable"); + // Emscripten's runtime includes all the builtins if target.contains("emscripten") { return; From 0a3e283c7c2c81e86ba697f28b8de4c0d3c8dc09 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Fri, 6 Sep 2019 18:02:11 -0600 Subject: [PATCH 0325/1459] Update build.rs --- build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.rs b/build.rs index 5cdabf959..b520b6247 100644 --- a/build.rs +++ b/build.rs @@ -9,7 +9,7 @@ fn main() { println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display()); // Activate libm's unstable features to make full use of Nightly. - println!("rustc-cfg=libm/unstable"); + println!("cargo:rustc-cfg=feature=\"unstable\""); // Emscripten's runtime includes all the builtins if target.contains("emscripten") { From 893fe7e5dec26332568629934137bb726d5f49ca Mon Sep 17 00:00:00 2001 From: Lokathor Date: Mon, 9 Sep 2019 19:43:37 -0600 Subject: [PATCH 0326/1459] update the libm submodule, again --- .gitmodules | 2 +- libm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 23d29e11b..a71118ff9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "libm"] path = libm - url = https://github.com/Lokathor/libm + url = https://github.com/rust-lang-nursery/libm diff --git a/libm b/libm index 52983bee3..8eedc2470 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 52983bee338ac439bc0f1146d6053955e3b4c33a +Subproject commit 8eedc2470531f51b978e4c873ee78a33c90e0fbd From 96690ecd9fd975853b70d8549f598520c706eb77 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 10 Sep 2019 10:07:17 -0500 Subject: [PATCH 0327/1459] Migrate from azure pipelines to Github actions (#226) Should make user management easier and also helps follow the repository when it's renamed. --- libm/.github/workflows/main.yml | 67 +++++++++++++++++++++++++++ libm/README.md | 4 +- libm/azure-pipelines.yml | 82 --------------------------------- libm/ci/azure-install-rust.yml | 25 ---------- 4 files changed, 68 insertions(+), 110 deletions(-) create mode 100644 libm/.github/workflows/main.yml delete mode 100644 libm/azure-pipelines.yml delete mode 100644 libm/ci/azure-install-rust.yml diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml new file mode 100644 index 000000000..80ce4ebd5 --- /dev/null +++ b/libm/.github/workflows/main.yml @@ -0,0 +1,67 @@ +name: CI +on: [push, pull_request] + +jobs: + docker: + name: Docker + runs-on: ubuntu-latest + strategy: + matrix: + target: + - aarch64-unknown-linux-gnu + - arm-unknown-linux-gnueabi + - arm-unknown-linux-gnueabihf + - armv7-unknown-linux-gnueabihf + - i686-unknown-linux-gnu + - mips-unknown-linux-gnu + - mips64-unknown-linux-gnuabi64 + - mips64el-unknown-linux-gnuabi64 + - powerpc-unknown-linux-gnu + - powerpc64-unknown-linux-gnu + - powerpc64le-unknown-linux-gnu + - x86_64-unknown-linux-gnu + steps: + - uses: actions/checkout@master + - name: Install Rust + run: rustup update nightly && rustup default nightly + - run: rustup target add ${{ matrix.target }} + - run: rustup target add x86_64-unknown-linux-musl + - run: cargo generate-lockfile + - run: ./ci/run-docker.sh ${{ matrix.target }} + + rustfmt: + name: Rustfmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: Install Rust + run: rustup update stable && rustup default stable && rustup component add rustfmt + - run: cargo fmt -- --check + + wasm: + name: WebAssembly + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: Install Rust + run: rustup update nightly && rustup default nightly + - run: rustup target add wasm32-unknown-unknown + - run: cargo build --target wasm32-unknown-unknown + + cb: + name: "The compiler-builtins crate works" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: Install Rust + run: rustup update nightly && rustup default nightly + - run: cargo build -p cb + + benchmarks: + name: Benchmarks + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: Install Rust + run: rustup update nightly && rustup default nightly + - run: cargo bench --all diff --git a/libm/README.md b/libm/README.md index e90b42d74..61a6ebfb3 100644 --- a/libm/README.md +++ b/libm/README.md @@ -1,7 +1,5 @@ # `libm` -[![Build Status](https://dev.azure.com/rust-lang/libm/_apis/build/status/rust-lang-nursery.libm?branchName=master)](https://dev.azure.com/rust-lang/libm/_build/latest?definitionId=7&branchName=master) - A port of [MUSL]'s libm to Rust. [MUSL]: https://www.musl-libc.org/ @@ -27,7 +25,7 @@ The API documentation can be found [here](https://docs.rs/libm). ## Benchmark [benchmark]: #benchmark -The benchmarks are located in `crates/libm-bench` and require a nightly Rust toolchain. +The benchmarks are located in `crates/libm-bench` and require a nightly Rust toolchain. To run all benchmarks: > cargo +nightly bench --all diff --git a/libm/azure-pipelines.yml b/libm/azure-pipelines.yml deleted file mode 100644 index 0d723c56d..000000000 --- a/libm/azure-pipelines.yml +++ /dev/null @@ -1,82 +0,0 @@ -trigger: - - master - -jobs: - - job: Docker - pool: - vmImage: ubuntu-16.04 - steps: - - template: ci/azure-install-rust.yml - - bash: rustup target add $TARGET - displayName: "add cross target" - - bash: rustup target add x86_64-unknown-linux-musl - displayName: "add musl target" - - bash: cargo generate-lockfile && ./ci/run-docker.sh $TARGET - displayName: "run tests" - strategy: - matrix: - aarch64: - TARGET: aarch64-unknown-linux-gnu - arm: - TARGET: arm-unknown-linux-gnueabi - armhf: - TARGET: arm-unknown-linux-gnueabihf - armv7: - TARGET: armv7-unknown-linux-gnueabihf - i686: - TARGET: i686-unknown-linux-gnu - mips: - TARGET: mips-unknown-linux-gnu - mips64: - TARGET: mips64-unknown-linux-gnuabi64 - mips64el: - TARGET: mips64el-unknown-linux-gnuabi64 - powerpc: - TARGET: powerpc-unknown-linux-gnu - powerpc64: - TARGET: powerpc64-unknown-linux-gnu - powerpc64le: - TARGET: powerpc64le-unknown-linux-gnu - x86_64: - TARGET: x86_64-unknown-linux-gnu - - - job: wasm - pool: - vmImage: ubuntu-16.04 - steps: - - template: ci/azure-install-rust.yml - - script: rustup target add wasm32-unknown-unknown - displayName: "Install rust wasm target" - - script: cargo build --target wasm32-unknown-unknown - displayName: "Build for wasm" - variables: - TOOLCHAIN: nightly - - - job: rustfmt - pool: - vmImage: ubuntu-16.04 - steps: - - template: ci/azure-install-rust.yml - - bash: rustup component add rustfmt - displayName: "install rustfmt" - - bash: cargo fmt --all -- --check - displayName: "check formatting" - - - job: compiler_builtins_works - pool: - vmImage: ubuntu-16.04 - steps: - - template: ci/azure-install-rust.yml - - bash: cargo build -p cb - displayName: "Check compiler-builtins still probably builds" - - - job: benchmarks - pool: - vmImage: ubuntu-16.04 - steps: - - template: ci/azure-install-rust.yml - - bash: cargo bench --all - displayName: "Benchmarks" - variables: - TOOLCHAIN: nightly - diff --git a/libm/ci/azure-install-rust.yml b/libm/ci/azure-install-rust.yml deleted file mode 100644 index f1cd87bcc..000000000 --- a/libm/ci/azure-install-rust.yml +++ /dev/null @@ -1,25 +0,0 @@ -steps: - - bash: | - set -e - toolchain=$TOOLCHAIN - if [ "$toolchain" = "" ]; then - toolchain=stable - fi - if command -v rustup; then - rustup update $toolchain - rustup default $toolchain - else - curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain $toolchain - echo "##vso[task.prependpath]$HOME/.cargo/bin" - fi - displayName: Install rust (unix) - condition: ne( variables['Agent.OS'], 'Windows_NT' ) - - - bash: rustup update stable-$TOOLCHAIN && rustup default stable-$TOOLCHAIN - displayName: Install rust (windows) - condition: eq( variables['Agent.OS'], 'Windows_NT' ) - - - script: | - rustc -Vv - cargo -V - displayName: Query rust and cargo versions From 1ac39708554b196f54ebc414f3e78574d40f2d8f Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 10 Sep 2019 18:16:26 -0700 Subject: [PATCH 0328/1459] Migrate to github actions from Azure Pipelines Less need to manage users, more concurrency, and interface is all in one! --- .github/workflows/main.yml | 146 +++++++++++++++++++++++++++++++++++++ README.md | 2 - azure-pipelines.yml | 80 -------------------- ci/azure-install-rust.yml | 30 -------- ci/azure-steps.yml | 28 ------- 5 files changed, 146 insertions(+), 140 deletions(-) create mode 100644 .github/workflows/main.yml delete mode 100644 azure-pipelines.yml delete mode 100644 ci/azure-install-rust.yml delete mode 100644 ci/azure-steps.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 000000000..f9fb844a9 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,146 @@ +name: CI +on: [push, pull_request] + +jobs: + test: + name: Test + runs-on: ${{ matrix.os }} + strategy: + matrix: + target: + - aarch64-unknown-linux-gnu + - arm-unknown-linux-gnueabi + - arm-unknown-linux-gnueabihf + - i586-unknown-linux-gnu + - i686-unknown-linux-gnu + - mips-unknown-linux-gnu + - mips64-unknown-linux-gnuabi64 + - mips64el-unknown-linux-gnuabi64 + - mipsel-unknown-linux-gnu + - powerpc-unknown-linux-gnu + - powerpc64-unknown-linux-gnu + - powerpc64le-unknown-linux-gnu + - thumbv6m-none-eabi + - thumbv7em-none-eabi + - thumbv7em-none-eabihf + - thumbv7m-none-eabi + - wasm32-unknown-unknown + - x86_64-unknown-linux-gnu + - x86_64-apple-darwin + - i686-pc-windows-msvc + - x86_64-pc-windows-msvc + - i686-pc-windows-gnu + - x86_64-pc-windows-gnu + include: + - target: aarch64-unknown-linux-gnu + os: ubuntu-latest + rust: nightly + - target: arm-unknown-linux-gnueabi + os: ubuntu-latest + rust: nightly + - target: arm-unknown-linux-gnueabihf + os: ubuntu-latest + rust: nightly + - target: i586-unknown-linux-gnu + os: ubuntu-latest + rust: nightly + - target: i686-unknown-linux-gnu + os: ubuntu-latest + rust: nightly + - target: mips-unknown-linux-gnu + os: ubuntu-latest + rust: nightly + - target: mips64-unknown-linux-gnuabi64 + os: ubuntu-latest + rust: nightly + - target: mips64el-unknown-linux-gnuabi64 + os: ubuntu-latest + rust: nightly + - target: mipsel-unknown-linux-gnu + os: ubuntu-latest + rust: nightly + - target: powerpc-unknown-linux-gnu + os: ubuntu-latest + rust: nightly + - target: powerpc64-unknown-linux-gnu + os: ubuntu-latest + rust: nightly + - target: powerpc64le-unknown-linux-gnu + os: ubuntu-latest + rust: nightly + - target: thumbv6m-none-eabi + os: ubuntu-latest + rust: nightly + - target: thumbv7em-none-eabi + os: ubuntu-latest + rust: nightly + - target: thumbv7em-none-eabihf + os: ubuntu-latest + rust: nightly + - target: thumbv7m-none-eabi + os: ubuntu-latest + rust: nightly + - target: wasm32-unknown-unknown + os: ubuntu-latest + rust: nightly + - target: x86_64-unknown-linux-gnu + os: ubuntu-latest + rust: nightly + - target: x86_64-apple-darwin + os: macos-latest + rust: nightly + - target: i686-pc-windows-msvc + os: windows-latest + rust: nightly + - target: x86_64-pc-windows-msvc + os: windows-latest + rust: nightly + - target: i686-pc-windows-gnu + os: windows-latest + rust: nightly-i686-gnu + - target: x86_64-pc-windows-gnu + os: windows-latest + rust: nightly-x86_64-gnu + steps: + - uses: actions/checkout@master + with: + submodules: true + - name: Install Rust (rustup) + run: rustup update ${{ matrix.rust }} --no-self-update && rustup default ${{ matrix.rust }} + if: matrix.os != 'macos-latest' + - name: Install Rust (macos) + run: | + curl https://sh.rustup.rs | sh -s -- -y --default-toolchain nightly + echo "##[add-path]$HOME/.cargo/bin" + if: matrix.os == 'macos-latest' + - run: rustup target add ${{ matrix.target }} + - name: Download compiler-rt reference sources + run: | + curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/8.0-2019-03-18.tar.gz + tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-8.0-2019-03-18/compiler-rt + echo "##[set-env name=RUST_COMPILER_RT_ROOT]./compiler-rt" + shell: bash + + # Non-linux tests just use our raw script + - run: ./ci/run.sh ${{ matrix.target }} + if: matrix.os != 'ubuntu-latest' + shell: bash + + # Wasm is special and is just build as a smoke test + - run: cargo build --target ${{ matrix.target }} + if: matrix.target == 'wasm32-unknown-unknown' + + # Otherwise we use our docker containers to run builds + - run: cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} + if: matrix.target != 'wasm32-unknown-unknown' && matrix.os == 'ubuntu-latest' + + rustfmt: + name: Rustfmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + with: + submodules: true + - name: Install Rust + run: rustup update stable && rustup default stable && rustup component add rustfmt + - run: cargo fmt -- --check diff --git a/README.md b/README.md index f0724bffe..262e9843d 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,5 @@ # `compiler-builtins` -[![Build Status](https://dev.azure.com/rust-lang/compiler-builtins/_apis/build/status/rust-lang-nursery.compiler-builtins?branchName=master)](https://dev.azure.com/rust-lang/compiler-builtins/_build/latest?definitionId=6&branchName=master) - > Porting `compiler-rt` intrinsics to Rust See [rust-lang/rust#35437][0]. diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index eac3fb5e4..000000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,80 +0,0 @@ -trigger: - - master - -jobs: - - job: Linux - pool: - vmImage: ubuntu-16.04 - steps: - - template: ci/azure-steps.yml - strategy: - matrix: - aarch64: - TARGET: aarch64-unknown-linux-gnu - arm: - TARGET: arm-unknown-linux-gnueabi - armhf: - TARGET: arm-unknown-linux-gnueabihf - i586: - TARGET: i586-unknown-linux-gnu - i686: - TARGET: i686-unknown-linux-gnu - mips: - TARGET: mips-unknown-linux-gnu - mips64: - TARGET: mips64-unknown-linux-gnuabi64 - mips64el: - TARGET: mips64el-unknown-linux-gnuabi64 - mipsel: - TARGET: mipsel-unknown-linux-gnu - powerpc: - TARGET: powerpc-unknown-linux-gnu - powerpc64: - TARGET: powerpc64-unknown-linux-gnu - powerpc64le: - TARGET: powerpc64le-unknown-linux-gnu - thumbv6m: - TARGET: thumbv6m-none-eabi - thumbv7em: - TARGET: thumbv7em-none-eabi - thumbv7emhf: - TARGET: thumbv7em-none-eabihf - thumbv7m: - TARGET: thumbv7m-none-eabi - wasm32: - TARGET: wasm32-unknown-unknown - ONLY_BUILD: 1 - x86_64: - TARGET: x86_64-unknown-linux-gnu - - - job: macOS - pool: - vmImage: macos-10.13 - steps: - - template: ci/azure-steps.yml - strategy: - matrix: - x86_64: - TARGET: x86_64-apple-darwin - i686: - TARGET: i686-apple-darwin - - - job: Windows - pool: - vmImage: 'vs2017-win2016' - steps: - - template: ci/azure-steps.yml - strategy: - matrix: - i686-msvc: - TARGET: i686-pc-windows-msvc - x86_64-msvc: - TARGET: x86_64-pc-windows-msvc - i686-gnu: - TARGET: i686-pc-windows-gnu - INTRINSICS_FAILS_WITH_MEM_FEATURE: 1 - DEBUG_LTO_BUILD_DOESNT_WORK: 1 - x86_64-gnu: - TARGET: x86_64-pc-windows-gnu - INTRINSICS_FAILS_WITH_MEM_FEATURE: 1 - DEBUG_LTO_BUILD_DOESNT_WORK: 1 diff --git a/ci/azure-install-rust.yml b/ci/azure-install-rust.yml deleted file mode 100644 index d0255f85b..000000000 --- a/ci/azure-install-rust.yml +++ /dev/null @@ -1,30 +0,0 @@ -parameters: - toolchain: 'nightly' - -steps: - - bash: | - set -e - if command -v rustup; then - rustup update $TOOLCHAIN - rustup default $TOOLCHAIN - else - curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain $TOOLCHAIN - echo "##vso[task.prependpath]$HOME/.cargo/bin" - fi - displayName: Install rust - condition: ne( variables['Agent.OS'], 'Windows_NT' ) - env: - TOOLCHAIN: ${{ parameters.toolchain }} - - - script: | - rustup update --no-self-update %TOOLCHAIN%-%TARGET% - rustup default %TOOLCHAIN%-%TARGET% - displayName: Install rust - condition: eq( variables['Agent.OS'], 'Windows_NT' ) - env: - TOOLCHAIN: ${{ parameters.toolchain }} - - - script: | - rustc -Vv - cargo -V - displayName: Query rust and cargo versions diff --git a/ci/azure-steps.yml b/ci/azure-steps.yml deleted file mode 100644 index f08beca05..000000000 --- a/ci/azure-steps.yml +++ /dev/null @@ -1,28 +0,0 @@ -steps: - - checkout: self - submodules: true - - - template: azure-install-rust.yml - - - bash: rustup target add $TARGET - displayName: Install Rust target - - - bash: | - set -e - curl -L https://github.com/rust-lang/llvm-project/archive/rustc/8.0-2019-03-18.tar.gz | \ - tar xzf - --strip-components 1 llvm-project-rustc-8.0-2019-03-18/compiler-rt - echo '##vso[task.setvariable variable=RUST_COMPILER_RT_ROOT]./compiler-rt' - displayName: "Download compiler-rt reference sources" - - - bash: ./ci/run.sh $TARGET - condition: ne( variables['Agent.OS'], 'Linux' ) - displayName: Run test script - - - bash: | - if [ "$ONLY_BUILD" = "1" ]; then - cargo build --target $TARGET - else - cargo generate-lockfile && ./ci/run-docker.sh $TARGET - fi - condition: eq( variables['Agent.OS'], 'Linux' ) - displayName: Run docker test script From 711d17fb3cd8c5ad981fed8fe1710fa5c255cc43 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 25 Sep 2019 11:19:35 -0700 Subject: [PATCH 0329/1459] Update the gitmodule url for `libm` --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index a71118ff9..726b1c5c6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "libm"] path = libm - url = https://github.com/rust-lang-nursery/libm + url = https://github.com/rust-lang/libm.git From ea5db23a6a7f9864ddebddf58eebc71a7793f9e5 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 25 Sep 2019 11:20:14 -0700 Subject: [PATCH 0330/1459] Remove `-nursery` from urls --- Cargo.toml | 4 ++-- README.md | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0705dbd9c..2e3c7038e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,8 +4,8 @@ name = "compiler_builtins" version = "0.1.19" license = "MIT/Apache-2.0" readme = "README.md" -repository = "https://github.com/rust-lang-nursery/compiler-builtins" -homepage = "https://github.com/rust-lang-nursery/compiler-builtins" +repository = "https://github.com/rust-lang/compiler-builtins" +homepage = "https://github.com/rust-lang/compiler-builtins" documentation = "https://docs.rs/compiler_builtins" description = """ Compiler intrinsics used by the Rust compiler. Also available for other targets diff --git a/README.md b/README.md index 262e9843d..a20d038f8 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ building: ``` toml # Cargo.toml [dependencies] -compiler_builtins = { git = "https://github.com/rust-lang-nursery/compiler-builtins" } +compiler_builtins = { git = "https://github.com/rust-lang/compiler-builtins" } ``` ``` rust @@ -40,11 +40,11 @@ implementation as well for unimplemented intrinsics: ```toml [dependencies.compiler_builtins] -git = "https://github.com/rust-lang-nursery/compiler-builtins" +git = "https://github.com/rust-lang/compiler-builtins" features = ["c"] ``` -[an issue]: https://github.com/rust-lang-nursery/compiler-builtins/issues +[an issue]: https://github.com/rust-lang/compiler-builtins/issues ## Contributing @@ -61,8 +61,8 @@ features = ["c"] [1]: https://github.com/rust-lang/compiler-rt/tree/8598065bd965d9713bfafb6c1e766d63a7b17b89/test/builtins/Unit [2]: https://github.com/rust-lang/compiler-rt/tree/8598065bd965d9713bfafb6c1e766d63a7b17b89/lib/builtins -[3]: https://github.com/rust-lang-nursery/compiler-builtins/blob/0ba07e49264a54cb5bbd4856fcea083bb3fbec15/build.rs#L180-L265 -[4]: https://travis-ci.org/rust-lang-nursery/compiler-builtins +[3]: https://github.com/rust-lang/compiler-builtins/blob/0ba07e49264a54cb5bbd4856fcea083bb3fbec15/build.rs#L180-L265 +[4]: https://travis-ci.org/rust-lang/compiler-builtins ### Porting Reminders From d47cfe6bc6ae66ce090329960606555b05983c6b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 25 Sep 2019 11:20:49 -0700 Subject: [PATCH 0331/1459] Remove `-nursery` from urls --- libm/CONTRIBUTING.md | 6 +++--- libm/Cargo.toml | 2 +- libm/README.md | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libm/CONTRIBUTING.md b/libm/CONTRIBUTING.md index a7e817e13..59c37a6f9 100644 --- a/libm/CONTRIBUTING.md +++ b/libm/CONTRIBUTING.md @@ -14,13 +14,13 @@ corresponding issue. - :tada: -[issue tracker]: https://github.com/rust-lang-nursery/libm/issues +[issue tracker]: https://github.com/rust-lang/libm/issues [src]: https://git.musl-libc.org/cgit/musl/tree/src/math -[`src/math/truncf.rs`]: https://github.com/rust-lang-nursery/libm/blob/master/src/math/truncf.rs +[`src/math/truncf.rs`]: https://github.com/rust-lang/libm/blob/master/src/math/truncf.rs Check [PR #65] for an example. -[PR #65]: https://github.com/rust-lang-nursery/libm/pull/65 +[PR #65]: https://github.com/rust-lang/libm/pull/65 ## Tips and tricks diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 37aff9a76..1d7d97146 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -6,7 +6,7 @@ documentation = "https://docs.rs/libm" keywords = ["libm", "math"] license = "MIT OR Apache-2.0" name = "libm" -repository = "https://github.com/rust-lang-nursery/libm" +repository = "https://github.com/rust-lang/libm" version = "0.2.0" edition = "2018" diff --git a/libm/README.md b/libm/README.md index 61a6ebfb3..5d9e9bddb 100644 --- a/libm/README.md +++ b/libm/README.md @@ -7,12 +7,12 @@ A port of [MUSL]'s libm to Rust. ## Goals The short term goal of this library is to [enable math support (e.g. `sin`, `atan2`) for the -`wasm32-unknown-unknown` target][wasm] (cf. [rust-lang-nursery/compiler-builtins][pr]). The longer +`wasm32-unknown-unknown` target][wasm] (cf. [rust-lang/compiler-builtins][pr]). The longer term goal is to enable [math support in the `core` crate][core]. -[wasm]: https://github.com/rust-lang-nursery/libm/milestone/1 -[pr]: https://github.com/rust-lang-nursery/compiler-builtins/pull/248 -[core]: https://github.com/rust-lang-nursery/libm/milestone/2 +[wasm]: https://github.com/rust-lang/libm/milestone/1 +[pr]: https://github.com/rust-lang/compiler-builtins/pull/248 +[core]: https://github.com/rust-lang/libm/milestone/2 ## Already usable From 462b73c1fe1f67a62223a3ccf830f02a2571c016 Mon Sep 17 00:00:00 2001 From: Ian Kronquist Date: Mon, 30 Sep 2019 09:02:47 -0700 Subject: [PATCH 0332/1459] Implement bcmp (#315) As of LLVM 9.0, certain calls to memcmp may be converted to bcmp, which I guess could save a single subtraction on some architectures. [1] bcmp is just like memcmp except instead of returning the difference between the two differing bytes, it returns non-zero instead. As such, memcmp is a valid implementation of bcmp. If we care about size, bcmp should just call memcmp. If we care about speed, we can change bcmp to look like this instead: ```rust pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { let mut i = 0; while i < n { let a = *s1.offset(i as isize); let b = *s2.offset(i as isize); if a != b { return 1; } i += 1; } 0 } ``` In this PR I do not address any changes which may or may not be needed for arm aebi as I lack proper test hardware. [1]: https://releases.llvm.org/9.0.0/docs/ReleaseNotes.html#noteworthy-optimizations --- src/mem.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mem.rs b/src/mem.rs index 76372d9bc..7b8a37fde 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -63,6 +63,11 @@ pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { 0 } +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { + memcmp(s1, s2, n) +} + // `bytes` must be a multiple of `mem::size_of::()` fn memcpy_element_unordered_atomic(dest: *mut T, src: *const T, bytes: usize) { unsafe { From ef9feeb4f6acd3b391bfee74ba67336589ee8eaf Mon Sep 17 00:00:00 2001 From: Christopher Serr Date: Fri, 18 Oct 2019 15:23:57 +0200 Subject: [PATCH 0333/1459] Fix sincosf(PI) (#229) Looks like the implementation was not ported correctly. Some negations were forgotten in a certain branch. Here is the original code in musl that has the negations: https://github.com/bpowers/musl/blob/94cb2ec2a0ffcb47d24dbf7a30e462505396cf54/src/math/sincosf.c#L66-L67 Resolves rust-lang/libm#228 --- libm/src/math/sincosf.rs | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs index d4e0772d5..df644f3b5 100644 --- a/libm/src/math/sincosf.rs +++ b/libm/src/math/sincosf.rs @@ -65,11 +65,11 @@ pub fn sincosf(x: f32) -> (f32, f32) { /* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */ else { if sign { - s = k_sinf((x + S2PIO2) as f64); - c = k_cosf((x + S2PIO2) as f64); + s = -k_sinf((x + S2PIO2) as f64); + c = -k_cosf((x + S2PIO2) as f64); } else { - s = k_sinf((x - S2PIO2) as f64); - c = k_cosf((x - S2PIO2) as f64); + s = -k_sinf((x - S2PIO2) as f64); + c = -k_cosf((x - S2PIO2) as f64); } } @@ -121,3 +121,16 @@ pub fn sincosf(x: f32) -> (f32, f32) { _ => (0.0, 1.0), } } + +#[cfg(test)] +mod tests { + use super::sincosf; + use crate::_eqf; + + #[test] + fn with_pi() { + let (s, c) = sincosf(core::f32::consts::PI); + _eqf(s.abs(), 0.0).unwrap(); + _eqf(c, -1.0).unwrap(); + } +} From 9823b956ced776a8f73ca668d476708450617afc Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 28 Oct 2019 07:39:04 -0700 Subject: [PATCH 0334/1459] Fix Github Actions for recent system changes --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f9fb844a9..92ce9ca21 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -108,6 +108,7 @@ jobs: - name: Install Rust (rustup) run: rustup update ${{ matrix.rust }} --no-self-update && rustup default ${{ matrix.rust }} if: matrix.os != 'macos-latest' + shell: bash - name: Install Rust (macos) run: | curl https://sh.rustup.rs | sh -s -- -y --default-toolchain nightly From 7a8161dab7716a599002f4ff23dfcf3c79bd03da Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Thu, 7 Nov 2019 20:04:11 +0100 Subject: [PATCH 0335/1459] Emit `_fltused` on `uefi` targets as a short-term workaround (#317) * Emit `_fltused` on `uefi` targets as a short-term workaround * Remove stray docker container --- src/x86_64.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/x86_64.rs b/src/x86_64.rs index 2360ab8a6..6940f8d9d 100644 --- a/src/x86_64.rs +++ b/src/x86_64.rs @@ -73,3 +73,10 @@ pub unsafe fn ___chkstk() { ); intrinsics::unreachable(); } + +// HACK(https://github.com/rust-lang/rust/issues/62785): x86_64-unknown-uefi needs special LLVM +// support unless we emit the _fltused +#[no_mangle] +#[used] +#[cfg(target_os = "uefi")] +static _fltused: i32 = 0; From a533ae9c5aa325db209659679535fe1f186eae81 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 7 Nov 2019 11:04:36 -0800 Subject: [PATCH 0336/1459] Bump to 0.1.20 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2e3c7038e..b667894ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.19" +version = "0.1.20" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 40ef6cb7499101310942b4a911a2667c1ac27208 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 11 Nov 2019 12:19:10 -0600 Subject: [PATCH 0337/1459] Allow FFI-unsafe warnings for u128/i128 (#323) * Allow FFI-unsafe warnings for u128/i128 Handle new warnings on nightly, and we shouldn't need to worry about these with compiler-builtins since this is tied to a particular compiler. * Clean up crate attributes * No need for stability marker * Rustdoc docs not used for this crate * Remove old build-system related cruft from rustc itself. * Run `cargo fmt` --- examples/intrinsics.rs | 1 - src/lib.rs | 33 +++++++++--------------------- testcrate/tests/aeabi_memclr.rs | 2 +- testcrate/tests/aeabi_memset.rs | 36 ++++++++++++++++----------------- 4 files changed, 28 insertions(+), 44 deletions(-) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 8b0ffa3a8..5ceebe132 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -7,7 +7,6 @@ #![cfg_attr(thumb, no_main)] #![deny(dead_code)] #![feature(asm)] -#![feature(compiler_builtins_lib)] #![feature(lang_items)] #![feature(start)] #![feature(allocator_api)] diff --git a/src/lib.rs b/src/lib.rs index ef5353a70..d90927aae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,34 +1,19 @@ -#![cfg_attr(not(stage0), deny(warnings))] -#![cfg_attr(not(test), no_std)] #![cfg_attr(feature = "compiler-builtins", compiler_builtins)] -#![crate_name = "compiler_builtins"] -#![crate_type = "rlib"] -#![doc( - html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk.png", - html_favicon_url = "https://doc.rust-lang.org/favicon.ico", - html_root_url = "https://doc.rust-lang.org/nightly/", - html_playground_url = "https://play.rust-lang.org/", - test(attr(deny(warnings))) -)] +#![feature(abi_unadjusted)] #![feature(asm)] #![feature(compiler_builtins)] #![feature(core_intrinsics)] +#![feature(lang_items)] +#![feature(linkage)] #![feature(naked_functions)] #![feature(repr_simd)] -#![feature(abi_unadjusted)] -#![feature(linkage)] -#![feature(lang_items)] -#![allow(unused_features)] #![no_builtins] -#![cfg_attr(feature = "compiler-builtins", feature(staged_api))] -#![cfg_attr( - feature = "compiler-builtins", - unstable( - feature = "compiler_builtins_lib", - reason = "Compiler builtins. Will never become stable.", - issue = "0" - ) -)] +#![no_std] +#![allow(unused_features)] +// We use `u128` in a whole bunch of places which we currently agree with the +// compiler on ABIs and such, so we should be "good enough" for now and changes +// to the `u128` ABI will be reflected here. +#![allow(improper_ctypes)] // We disable #[no_mangle] for tests so that we can verify the test results // against the native compiler-rt implementations of the builtins. diff --git a/testcrate/tests/aeabi_memclr.rs b/testcrate/tests/aeabi_memclr.rs index 326435c29..595076939 100644 --- a/testcrate/tests/aeabi_memclr.rs +++ b/testcrate/tests/aeabi_memclr.rs @@ -46,7 +46,7 @@ impl Aligned { #[test] fn memclr4() { - let mut aligned = Aligned::new();; + let mut aligned = Aligned::new(); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; diff --git a/testcrate/tests/aeabi_memset.rs b/testcrate/tests/aeabi_memset.rs index 3cfbfe5b0..f03729bed 100644 --- a/testcrate/tests/aeabi_memset.rs +++ b/testcrate/tests/aeabi_memset.rs @@ -45,7 +45,7 @@ impl Aligned { #[test] fn zero() { - let mut aligned = Aligned::new([0u8; 8]);; + let mut aligned = Aligned::new([0u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let c = 0xdeadbeef; @@ -54,7 +54,7 @@ fn zero() { assert_eq!(*xs, [0; 8]); - let mut aligned = Aligned::new([1u8; 8]);; + let mut aligned = Aligned::new([1u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let c = 0xdeadbeef; @@ -66,7 +66,7 @@ fn zero() { #[test] fn one() { - let mut aligned = Aligned::new([0u8; 8]);; + let mut aligned = Aligned::new([0u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let n = 1; @@ -76,7 +76,7 @@ fn one() { assert_eq!(*xs, [0xef, 0, 0, 0, 0, 0, 0, 0]); - let mut aligned = Aligned::new([1u8; 8]);; + let mut aligned = Aligned::new([1u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let c = 0xdeadbeef; @@ -88,7 +88,7 @@ fn one() { #[test] fn two() { - let mut aligned = Aligned::new([0u8; 8]);; + let mut aligned = Aligned::new([0u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let n = 2; @@ -98,7 +98,7 @@ fn two() { assert_eq!(*xs, [0xef, 0xef, 0, 0, 0, 0, 0, 0]); - let mut aligned = Aligned::new([1u8; 8]);; + let mut aligned = Aligned::new([1u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let c = 0xdeadbeef; @@ -110,7 +110,7 @@ fn two() { #[test] fn three() { - let mut aligned = Aligned::new([0u8; 8]);; + let mut aligned = Aligned::new([0u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let n = 3; @@ -120,7 +120,7 @@ fn three() { assert_eq!(*xs, [0xef, 0xef, 0xef, 0, 0, 0, 0, 0]); - let mut aligned = Aligned::new([1u8; 8]);; + let mut aligned = Aligned::new([1u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let c = 0xdeadbeef; @@ -132,7 +132,7 @@ fn three() { #[test] fn four() { - let mut aligned = Aligned::new([0u8; 8]);; + let mut aligned = Aligned::new([0u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let n = 4; @@ -142,7 +142,7 @@ fn four() { assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0, 0, 0, 0]); - let mut aligned = Aligned::new([1u8; 8]);; + let mut aligned = Aligned::new([1u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let c = 0xdeadbeef; @@ -154,7 +154,7 @@ fn four() { #[test] fn five() { - let mut aligned = Aligned::new([0u8; 8]);; + let mut aligned = Aligned::new([0u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let n = 5; @@ -164,7 +164,7 @@ fn five() { assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0, 0, 0]); - let mut aligned = Aligned::new([1u8; 8]);; + let mut aligned = Aligned::new([1u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let c = 0xdeadbeef; @@ -176,7 +176,7 @@ fn five() { #[test] fn six() { - let mut aligned = Aligned::new([0u8; 8]);; + let mut aligned = Aligned::new([0u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let n = 6; @@ -186,7 +186,7 @@ fn six() { assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0, 0]); - let mut aligned = Aligned::new([1u8; 8]);; + let mut aligned = Aligned::new([1u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let c = 0xdeadbeef; @@ -198,7 +198,7 @@ fn six() { #[test] fn seven() { - let mut aligned = Aligned::new([0u8; 8]);; + let mut aligned = Aligned::new([0u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let n = 7; @@ -208,7 +208,7 @@ fn seven() { assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0]); - let mut aligned = Aligned::new([1u8; 8]);; + let mut aligned = Aligned::new([1u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let c = 0xdeadbeef; @@ -220,7 +220,7 @@ fn seven() { #[test] fn eight() { - let mut aligned = Aligned::new([0u8; 8]);; + let mut aligned = Aligned::new([0u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let n = 8; @@ -230,7 +230,7 @@ fn eight() { assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]); - let mut aligned = Aligned::new([1u8; 8]);; + let mut aligned = Aligned::new([1u8; 8]); assert_eq!(mem::align_of_val(&aligned), 4); let xs = &mut aligned.array; let c = 0xdeadbeef; From d6a13419f648954ecaec9986c8757a8784ac30f6 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 11 Nov 2019 12:38:50 -0600 Subject: [PATCH 0338/1459] Gate atomic intrinsics on presence of instructions (#324) Don't emit the intrinsics for platforms which don't actually have the instructions to do atomic loads/stores. Closes #322 --- src/lib.rs | 1 + src/mem.rs | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index d90927aae..0e1a43537 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ #![cfg_attr(feature = "compiler-builtins", compiler_builtins)] #![feature(abi_unadjusted)] #![feature(asm)] +#![feature(cfg_target_has_atomic)] #![feature(compiler_builtins)] #![feature(core_intrinsics)] #![feature(lang_items)] diff --git a/src/mem.rs b/src/mem.rs index 7b8a37fde..24552ed85 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -129,50 +129,65 @@ where } intrinsics! { + #[cfg(target_has_atomic_load_store = "8")] pub extern "C" fn __llvm_memcpy_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { memcpy_element_unordered_atomic(dest, src, bytes); } + #[cfg(target_has_atomic_load_store = "16")] pub extern "C" fn __llvm_memcpy_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { memcpy_element_unordered_atomic(dest, src, bytes); } + #[cfg(target_has_atomic_load_store = "32")] pub extern "C" fn __llvm_memcpy_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { memcpy_element_unordered_atomic(dest, src, bytes); } + #[cfg(target_has_atomic_load_store = "64")] pub extern "C" fn __llvm_memcpy_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { memcpy_element_unordered_atomic(dest, src, bytes); } + #[cfg(target_has_atomic_load_store = "128")] pub extern "C" fn __llvm_memcpy_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { memcpy_element_unordered_atomic(dest, src, bytes); } + #[cfg(target_has_atomic_load_store = "8")] pub extern "C" fn __llvm_memmove_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { memmove_element_unordered_atomic(dest, src, bytes); } + #[cfg(target_has_atomic_load_store = "16")] pub extern "C" fn __llvm_memmove_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { memmove_element_unordered_atomic(dest, src, bytes); } + #[cfg(target_has_atomic_load_store = "32")] pub extern "C" fn __llvm_memmove_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { memmove_element_unordered_atomic(dest, src, bytes); } + #[cfg(target_has_atomic_load_store = "64")] pub extern "C" fn __llvm_memmove_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { memmove_element_unordered_atomic(dest, src, bytes); } + #[cfg(target_has_atomic_load_store = "128")] pub extern "C" fn __llvm_memmove_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { memmove_element_unordered_atomic(dest, src, bytes); } + #[cfg(target_has_atomic_load_store = "8")] pub extern "C" fn __llvm_memset_element_unordered_atomic_1(s: *mut u8, c: u8, bytes: usize) -> () { memset_element_unordered_atomic(s, c, bytes); } + #[cfg(target_has_atomic_load_store = "16")] pub extern "C" fn __llvm_memset_element_unordered_atomic_2(s: *mut u16, c: u8, bytes: usize) -> () { memset_element_unordered_atomic(s, c, bytes); } + #[cfg(target_has_atomic_load_store = "32")] pub extern "C" fn __llvm_memset_element_unordered_atomic_4(s: *mut u32, c: u8, bytes: usize) -> () { memset_element_unordered_atomic(s, c, bytes); } + #[cfg(target_has_atomic_load_store = "64")] pub extern "C" fn __llvm_memset_element_unordered_atomic_8(s: *mut u64, c: u8, bytes: usize) -> () { memset_element_unordered_atomic(s, c, bytes); } + #[cfg(target_has_atomic_load_store = "128")] pub extern "C" fn __llvm_memset_element_unordered_atomic_16(s: *mut u128, c: u8, bytes: usize) -> () { memset_element_unordered_atomic(s, c, bytes); } From 49ad9018a9888fbbc253c5311d6b196fbc82843e Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 11 Nov 2019 10:39:08 -0800 Subject: [PATCH 0339/1459] Bump to 0.1.21 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b667894ab..200c41bf8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.20" +version = "0.1.21" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 0df0cf55d65940cb91bfb467acb603084c4822b8 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 11 Nov 2019 10:40:09 -0800 Subject: [PATCH 0340/1459] Update publishing instructions --- PUBLISHING.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/PUBLISHING.md b/PUBLISHING.md index ad100dee0..3df682ab0 100644 --- a/PUBLISHING.md +++ b/PUBLISHING.md @@ -10,8 +10,7 @@ greatly appreciated! 3. Commit this change 4. Run `git tag` to create a tag for this version 5. Delete the `libm/Cargo.toml` file -6. Comment out the `[dev-dependencies]` section of `Cargo.toml` -7. Run `cargo +nightly publish --allow-dirty` -8. Push the tag -9. Push the commit -10. Undo changes to `Cargo.toml` and the `libm` submodule +6. Run `cargo +nightly publish` +7. Push the tag +8. Push the commit +9. Undo changes to `Cargo.toml` and the `libm` submodule From 2e80112d70e9ff65b976e6548612f9fee1e01fb5 Mon Sep 17 00:00:00 2001 From: AJ Frantz Date: Fri, 22 Nov 2019 14:16:00 -0500 Subject: [PATCH 0341/1459] Fix sincosf for interval (7*pi/4, 9*pi/4) (#233) A mistake was made in porting musl's implementation which caused the sin and cos components to be reversed. closes rust-lang/libm#232 --- libm/src/math/sincosf.rs | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs index df644f3b5..2725caad6 100644 --- a/libm/src/math/sincosf.rs +++ b/libm/src/math/sincosf.rs @@ -89,11 +89,11 @@ pub fn sincosf(x: f32) -> (f32, f32) { } } else { if sign { - s = k_cosf((x + S4PIO2) as f64); - c = k_sinf((x + S4PIO2) as f64); + s = k_sinf((x + S4PIO2) as f64); + c = k_cosf((x + S4PIO2) as f64); } else { - s = k_cosf((x - S4PIO2) as f64); - c = k_sinf((x - S4PIO2) as f64); + s = k_sinf((x - S4PIO2) as f64); + c = k_cosf((x - S4PIO2) as f64); } } @@ -133,4 +133,22 @@ mod tests { _eqf(s.abs(), 0.0).unwrap(); _eqf(c, -1.0).unwrap(); } + + #[test] + fn rotational_symmetry() { + use core::f32::consts::PI; + const N: usize = 24; + for n in 0..N { + let theta = 2. * PI * (n as f32) / (N as f32); + let (s, c) = sincosf(theta); + let (s_plus, c_plus) = sincosf(theta + 2. * PI); + let (s_minus, c_minus) = sincosf(theta - 2. * PI); + + const TOLERANCE: f32 = 1e-6; + assert!((s - s_plus).abs() < TOLERANCE); + assert!((s - s_minus).abs() < TOLERANCE); + assert!((c - c_plus).abs() < TOLERANCE); + assert!((c - c_minus).abs() < TOLERANCE); + } + } } From c79fbf6c32ab5cbbf7f7ffebdf7a906f8f7a5cb4 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 22 Nov 2019 11:16:36 -0800 Subject: [PATCH 0342/1459] Bump to 0.2.1 --- libm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 1d7d97146..d9d668040 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -7,7 +7,7 @@ keywords = ["libm", "math"] license = "MIT OR Apache-2.0" name = "libm" repository = "https://github.com/rust-lang/libm" -version = "0.2.0" +version = "0.2.1" edition = "2018" [features] From f8c28c5c3b290f331ca8483f17439b4591628c11 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 3 Dec 2019 09:11:29 -0800 Subject: [PATCH 0343/1459] Update checkout action reference --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 92ce9ca21..1e1666ab5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -102,7 +102,7 @@ jobs: os: windows-latest rust: nightly-x86_64-gnu steps: - - uses: actions/checkout@master + - uses: actions/checkout@v1 with: submodules: true - name: Install Rust (rustup) @@ -139,7 +139,7 @@ jobs: name: Rustfmt runs-on: ubuntu-latest steps: - - uses: actions/checkout@master + - uses: actions/checkout@v1 with: submodules: true - name: Install Rust From 2566aa663bef6e8ebc6379de81b538b2d6327a9d Mon Sep 17 00:00:00 2001 From: Tyler Mandry Date: Fri, 6 Dec 2019 06:51:42 -0800 Subject: [PATCH 0344/1459] Add control flow information to __rust_probestack (#328) --- src/lib.rs | 1 + src/probestack.rs | 232 ++++++++++++++++++++++++++++------------------ 2 files changed, 144 insertions(+), 89 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0e1a43537..e57a5ef3f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ #![cfg_attr(feature = "compiler-builtins", compiler_builtins)] #![feature(abi_unadjusted)] #![feature(asm)] +#![feature(global_asm)] #![feature(cfg_target_has_atomic)] #![feature(compiler_builtins)] #![feature(core_intrinsics)] diff --git a/src/probestack.rs b/src/probestack.rs index 9bcaf4fd1..933a60dd9 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -41,95 +41,149 @@ //! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would //! be more than welcome to accept such a change! -#![cfg(not(windows))] // Windows already has builtins to do this - -#[naked] -#[no_mangle] -#[cfg(all(target_arch = "x86_64", not(feature = "mangled-names")))] -pub unsafe extern "C" fn __rust_probestack() { - // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, - // ensuring that if any pages are unmapped we'll make a page fault. - // - // The ABI here is that the stack frame size is located in `%eax`. Upon - // return we're not supposed to modify `%esp` or `%eax`. - asm!(" - pushq %rbp - movq %rsp, %rbp - - mov %rax,%r11 // duplicate %rax as we're clobbering %r11 - - // Main loop, taken in one page increments. We're decrementing rsp by - // a page each time until there's less than a page remaining. We're - // guaranteed that this function isn't called unless there's more than a - // page needed. - // - // Note that we're also testing against `8(%rsp)` to account for the 8 - // bytes pushed on the stack orginally with our return address. Using - // `8(%rsp)` simulates us testing the stack pointer in the caller's - // context. - - // It's usually called when %rax >= 0x1000, but that's not always true. - // Dynamic stack allocation, which is needed to implement unsized - // rvalues, triggers stackprobe even if %rax < 0x1000. - // Thus we have to check %r11 first to avoid segfault. - cmp $$0x1000,%r11 - jna 3f - 2: - sub $$0x1000,%rsp - test %rsp,8(%rsp) - sub $$0x1000,%r11 - cmp $$0x1000,%r11 - ja 2b - - 3: - // Finish up the last remaining stack space requested, getting the last - // bits out of r11 - sub %r11,%rsp - test %rsp,8(%rsp) - - // Restore the stack pointer to what it previously was when entering - // this function. The caller will readjust the stack pointer after we - // return. - add %rax,%rsp - - leave - ret - " ::: "memory" : "volatile"); - ::core::intrinsics::unreachable(); +#![cfg(not(feature = "mangled-names"))] +// Windows already has builtins to do this. +#![cfg(not(windows))] +// We only define stack probing for these architectures today. +#![cfg(any(target_arch = "x86_64", target_arch = "x86"))] + +extern "C" { + pub fn __rust_probestack(); } -#[naked] -#[no_mangle] -#[cfg(all(target_arch = "x86", not(feature = "mangled-names")))] -pub unsafe extern "C" fn __rust_probestack() { - // This is the same as x86_64 above, only translated for 32-bit sizes. Note - // that on Unix we're expected to restore everything as it was, this - // function basically can't tamper with anything. - // - // The ABI here is the same as x86_64, except everything is 32-bits large. - asm!(" - push %ebp - mov %esp, %ebp - push %ecx - mov %eax,%ecx - - cmp $$0x1000,%ecx - jna 3f - 2: - sub $$0x1000,%esp - test %esp,8(%esp) - sub $$0x1000,%ecx - cmp $$0x1000,%ecx - ja 2b - - 3: - sub %ecx,%esp - test %esp,8(%esp) - - add %eax,%esp - pop %ecx - leave - ret - " ::: "memory" : "volatile"); - ::core::intrinsics::unreachable(); +// A wrapper for our implementation of __rust_probestack, which allows us to +// keep the assembly inline while controlling all CFI directives in the assembly +// emitted for the function. +// +// This is the ELF version. +#[cfg(not(target_vendor = "apple"))] +macro_rules! define_rust_probestack { + ($body: expr) => { + concat!( + " + .pushsection .text.__rust_probestack + .globl __rust_probestack + .type __rust_probestack, @function + __rust_probestack: + ", + $body, + " + .size __rust_probestack, . - __rust_probestack + .popsection + " + ) + }; +} + +// Same as above, but for Mach-O. +#[cfg(target_vendor = "apple")] +macro_rules! define_rust_probestack { + ($body: expr) => { + concat!( + " + .globl ___rust_probestack + ___rust_probestack: + ", + $body + ) + }; } + +// Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, +// ensuring that if any pages are unmapped we'll make a page fault. +// +// The ABI here is that the stack frame size is located in `%rax`. Upon +// return we're not supposed to modify `%rsp` or `%rax`. +#[cfg(target_arch = "x86_64")] +global_asm!(define_rust_probestack!( + " + .cfi_startproc + pushq %rbp + .cfi_adjust_cfa_offset 8 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + + mov %rax,%r11 // duplicate %rax as we're clobbering %r11 + + // Main loop, taken in one page increments. We're decrementing rsp by + // a page each time until there's less than a page remaining. We're + // guaranteed that this function isn't called unless there's more than a + // page needed. + // + // Note that we're also testing against `8(%rsp)` to account for the 8 + // bytes pushed on the stack orginally with our return address. Using + // `8(%rsp)` simulates us testing the stack pointer in the caller's + // context. + + // It's usually called when %rax >= 0x1000, but that's not always true. + // Dynamic stack allocation, which is needed to implement unsized + // rvalues, triggers stackprobe even if %rax < 0x1000. + // Thus we have to check %r11 first to avoid segfault. + cmp $0x1000,%r11 + jna 3f +2: + sub $0x1000,%rsp + test %rsp,8(%rsp) + sub $0x1000,%r11 + cmp $0x1000,%r11 + ja 2b + +3: + // Finish up the last remaining stack space requested, getting the last + // bits out of r11 + sub %r11,%rsp + test %rsp,8(%rsp) + + // Restore the stack pointer to what it previously was when entering + // this function. The caller will readjust the stack pointer after we + // return. + add %rax,%rsp + + leave + .cfi_def_cfa_register %rsp + .cfi_adjust_cfa_offset -8 + ret + .cfi_endproc + " +)); + +#[cfg(target_arch = "x86")] +// This is the same as x86_64 above, only translated for 32-bit sizes. Note +// that on Unix we're expected to restore everything as it was, this +// function basically can't tamper with anything. +// +// The ABI here is the same as x86_64, except everything is 32-bits large. +global_asm!(define_rust_probestack!( + " + .cfi_startproc + push %ebp + .cfi_adjust_cfa_offset 4 + .cfi_offset %ebp, -8 + mov %esp, %ebp + .cfi_def_cfa_register %ebp + push %ecx + mov %eax,%ecx + + cmp $0x1000,%ecx + jna 3f +2: + sub $0x1000,%esp + test %esp,8(%esp) + sub $0x1000,%ecx + cmp $0x1000,%ecx + ja 2b + +3: + sub %ecx,%esp + test %esp,8(%esp) + + add %eax,%esp + pop %ecx + leave + .cfi_def_cfa_register %esp + .cfi_adjust_cfa_offset -4 + ret + .cfi_endproc + " +)); From 59b3eb7df12bfb2e72f006d162adf636f33674ad Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 6 Dec 2019 06:54:50 -0800 Subject: [PATCH 0345/1459] Bump to 1.0.22 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 200c41bf8..4d5ffa712 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.21" +version = "0.1.22" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 9193bbee5d6523afc1652df4a2c233025182c7f8 Mon Sep 17 00:00:00 2001 From: Runji Wang Date: Wed, 11 Dec 2019 01:02:14 +0800 Subject: [PATCH 0346/1459] Fix compile error on x86_64-unknown-uefi target (#331) * fix compile error on x86_64-unknown-uefi target * Fix tests on nightly --- src/probestack.rs | 4 ++-- testcrate/tests/count_leading_zeros.rs | 2 -- testcrate/tests/generated.rs | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/probestack.rs b/src/probestack.rs index 933a60dd9..70b33b8be 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -56,7 +56,7 @@ extern "C" { // emitted for the function. // // This is the ELF version. -#[cfg(not(target_vendor = "apple"))] +#[cfg(not(any(target_vendor = "apple", target_os = "uefi")))] macro_rules! define_rust_probestack { ($body: expr) => { concat!( @@ -76,7 +76,7 @@ macro_rules! define_rust_probestack { } // Same as above, but for Mach-O. -#[cfg(target_vendor = "apple")] +#[cfg(any(target_vendor = "apple", target_os = "uefi"))] macro_rules! define_rust_probestack { ($body: expr) => { concat!( diff --git a/testcrate/tests/count_leading_zeros.rs b/testcrate/tests/count_leading_zeros.rs index b50a7ce84..022b2d852 100644 --- a/testcrate/tests/count_leading_zeros.rs +++ b/testcrate/tests/count_leading_zeros.rs @@ -1,5 +1,3 @@ -#![feature(compiler_builtins_lib)] - extern crate compiler_builtins; use compiler_builtins::int::__clzsi2; diff --git a/testcrate/tests/generated.rs b/testcrate/tests/generated.rs index ee575cba8..a296db22d 100644 --- a/testcrate/tests/generated.rs +++ b/testcrate/tests/generated.rs @@ -1,4 +1,3 @@ -#![feature(compiler_builtins_lib)] #![feature(lang_items)] #![allow(bad_style)] #![allow(unused_imports)] From 6de4f8f2e03197540419eb4c95ead0a5d5307e9c Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 10 Dec 2019 09:02:40 -0800 Subject: [PATCH 0347/1459] Bump to 0.1.23 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4d5ffa712..39c4553fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.22" +version = "0.1.23" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From d7191b14c1d893558287817193d766d6482e50e9 Mon Sep 17 00:00:00 2001 From: Adam Schwalm Date: Mon, 6 Jan 2020 10:22:30 -0600 Subject: [PATCH 0348/1459] Add separate rust_probestack definition for uefi (#335) This is necessary because the Mach-O definition must have the triple underscore, but the UEFI one must not. --- src/probestack.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/probestack.rs b/src/probestack.rs index 70b33b8be..3797df0d5 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -75,8 +75,22 @@ macro_rules! define_rust_probestack { }; } -// Same as above, but for Mach-O. -#[cfg(any(target_vendor = "apple", target_os = "uefi"))] +#[cfg(target_os = "uefi")] +macro_rules! define_rust_probestack { + ($body: expr) => { + concat!( + " + .globl __rust_probestack + __rust_probestack: + ", + $body + ) + }; +} + +// Same as above, but for Mach-O. Note that the triple underscore +// is deliberate +#[cfg(target_vendor = "apple")] macro_rules! define_rust_probestack { ($body: expr) => { concat!( From 0a15c9bdfe97b76f61fdc06626cecc0573aa8269 Mon Sep 17 00:00:00 2001 From: Daniel Frampton Date: Tue, 14 Jan 2020 13:28:10 -0800 Subject: [PATCH 0349/1459] Don't modify the intrinsic abi for aarch64 windows (#337) --- Cargo.toml | 2 +- src/macros.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 39c4553fe..3203f1c6c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.23" +version = "0.1.24" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" diff --git a/src/macros.rs b/src/macros.rs index 2d11ba622..a86794072 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -165,13 +165,13 @@ macro_rules! intrinsics { $($rest:tt)* ) => ( - #[cfg(all(windows, target_pointer_width = "64"))] + #[cfg(all(windows, target_arch = "x86_64"))] $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { $($body)* } - #[cfg(all(windows, target_pointer_width = "64"))] + #[cfg(all(windows, target_arch = "x86_64"))] pub mod $name { intrinsics! { @@ -184,7 +184,7 @@ macro_rules! intrinsics { } } - #[cfg(not(all(windows, target_pointer_width = "64")))] + #[cfg(not(all(windows, target_arch = "x86_64")))] intrinsics! { $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { From 955cea42cc00de189c00c657a78dafab9ba43625 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 28 Jan 2020 00:39:52 -0800 Subject: [PATCH 0350/1459] Update CI installation of Rust on macos --- .github/workflows/main.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1e1666ab5..469154997 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -107,13 +107,7 @@ jobs: submodules: true - name: Install Rust (rustup) run: rustup update ${{ matrix.rust }} --no-self-update && rustup default ${{ matrix.rust }} - if: matrix.os != 'macos-latest' shell: bash - - name: Install Rust (macos) - run: | - curl https://sh.rustup.rs | sh -s -- -y --default-toolchain nightly - echo "##[add-path]$HOME/.cargo/bin" - if: matrix.os == 'macos-latest' - run: rustup target add ${{ matrix.target }} - name: Download compiler-rt reference sources run: | From cae3e6ea23739166504f9f9fb50ec070097979d4 Mon Sep 17 00:00:00 2001 From: Tyler Mandry Date: Tue, 11 Feb 2020 22:21:12 -0800 Subject: [PATCH 0351/1459] Set probestack visibility to hidden on ELF targets (#340) --- src/probestack.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/probestack.rs b/src/probestack.rs index 3797df0d5..19307df4e 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -64,6 +64,7 @@ macro_rules! define_rust_probestack { .pushsection .text.__rust_probestack .globl __rust_probestack .type __rust_probestack, @function + .hidden __rust_probestack __rust_probestack: ", $body, From 3e6327aa59214133fa74b3c51743b5ebde39526f Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 11 Feb 2020 22:21:38 -0800 Subject: [PATCH 0352/1459] Bump to 0.1.25 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 3203f1c6c..d75189709 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.24" +version = "0.1.25" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 3f012a835eca1e71ec27d1717ce5c164f18cfa8f Mon Sep 17 00:00:00 2001 From: Yuxiang Zhu Date: Wed, 12 Feb 2020 10:44:10 +0800 Subject: [PATCH 0353/1459] add mips/mips64 compiler-rt fallbacks so that libgcc is not required This adds compiler-rt fallbacks for mips and mips64 arches. Solves linking issues like https://github.com/rust-lang/rust/issues/57820. Signed-off-by: Yuxiang Zhu --- .github/workflows/main.yml | 4 ++-- build.rs | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 469154997..750f288cf 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -111,8 +111,8 @@ jobs: - run: rustup target add ${{ matrix.target }} - name: Download compiler-rt reference sources run: | - curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/8.0-2019-03-18.tar.gz - tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-8.0-2019-03-18/compiler-rt + curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/10.0-2020-02-05.tar.gz + tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-10.0-2020-02-05/compiler-rt echo "##[set-env name=RUST_COMPILER_RT_ROOT]./compiler-rt" shell: bash diff --git a/build.rs b/build.rs index b520b6247..b3c65d5b9 100644 --- a/build.rs +++ b/build.rs @@ -396,6 +396,25 @@ mod c { } } + if target_arch == "mips" { + sources.extend(&[("__bswapsi2", "bswapsi2.c")]); + } + + if target_arch == "mips64" { + sources.extend(&[ + ("__extenddftf2", "extenddftf2.c"), + ("__netf2", "comparetf2.c"), + ("__addtf3", "addtf3.c"), + ("__multf3", "multf3.c"), + ("__subtf3", "subtf3.c"), + ("__fixtfsi", "fixtfsi.c"), + ("__floatsitf", "floatsitf.c"), + ("__fixunstfsi", "fixunstfsi.c"), + ("__floatunsitf", "floatunsitf.c"), + ("__fe_getround", "fp_mode.c"), + ]); + } + // Remove the assembly implementations that won't compile for the target if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" { let mut to_remove = Vec::new(); From 7094e1575c4d646eb6f5f1195ba22f70caf8bd90 Mon Sep 17 00:00:00 2001 From: Wolfgang Silbermayr Date: Fri, 21 Feb 2020 11:01:50 +0100 Subject: [PATCH 0354/1459] Use lower-case file extension for LICENSE.txt The `LICENSE.txt` file should be distributed to crates.io, but it wasn't due to the `Cargo.toml` entry in the `includes` field being `LICENSE.txt` with lower-case file extension while the file itself had an upper-case extension. --- LICENSE.TXT => LICENSE.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename LICENSE.TXT => LICENSE.txt (100%) diff --git a/LICENSE.TXT b/LICENSE.txt similarity index 100% rename from LICENSE.TXT rename to LICENSE.txt From 43bcb3b59f169d95c4fc7183836170eefe41f72e Mon Sep 17 00:00:00 2001 From: Andre Richter Date: Fri, 28 Feb 2020 20:01:22 +0100 Subject: [PATCH 0355/1459] aarch64: Exclude FP intrinsics on +nofp or +nosimd (#344) `AArch64` GCCs exit with an error condition when they encounter any kind of floating point code if the `nofp` and/or `nosimd` compiler flags have been set. Therefore, evaluate if those flags are present and set a boolean that causes any compiler-rt intrinsics that contain floating point source to be excluded for this target. This patch prepares https://github.com/rust-lang/rust/pull/68334 --- build.rs | 57 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/build.rs b/build.rs index b520b6247..c4a80223b 100644 --- a/build.rs +++ b/build.rs @@ -51,7 +51,7 @@ fn main() { // time). This can probably be removed in the future if !target.contains("wasm32") && !target.contains("nvptx") && !target.starts_with("riscv") { #[cfg(feature = "c")] - c::compile(&llvm_target); + c::compile(&llvm_target, &target); } } @@ -121,13 +121,28 @@ mod c { } /// Compile intrinsics from the compiler-rt C source code - pub fn compile(llvm_target: &[&str]) { + pub fn compile(llvm_target: &[&str], target: &String) { let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); let target_env = env::var("CARGO_CFG_TARGET_ENV").unwrap(); let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap(); let target_vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap(); + let mut consider_float_intrinsics = true; let cfg = &mut cc::Build::new(); + // AArch64 GCCs exit with an error condition when they encounter any kind of floating point + // code if the `nofp` and/or `nosimd` compiler flags have been set. + // + // Therefore, evaluate if those flags are present and set a boolean that causes any + // compiler-rt intrinsics that contain floating point source to be excluded for this target. + if target_arch == "aarch64" { + let cflags_key = String::from("CFLAGS_") + &(target.to_owned().replace("-", "_")); + if let Ok(cflags_value) = env::var(cflags_key) { + if cflags_value.contains("+nofp") || cflags_value.contains("+nosimd") { + consider_float_intrinsics = false; + } + } + } + cfg.warnings(false); if target_env == "msvc" { @@ -166,34 +181,39 @@ mod c { ("__cmpdi2", "cmpdi2.c"), ("__ctzdi2", "ctzdi2.c"), ("__ctzsi2", "ctzsi2.c"), - ("__divdc3", "divdc3.c"), - ("__divsc3", "divsc3.c"), - ("__divxc3", "divxc3.c"), - ("__extendhfsf2", "extendhfsf2.c"), ("__int_util", "int_util.c"), - ("__muldc3", "muldc3.c"), - ("__mulsc3", "mulsc3.c"), ("__mulvdi3", "mulvdi3.c"), ("__mulvsi3", "mulvsi3.c"), - ("__mulxc3", "mulxc3.c"), - ("__negdf2", "negdf2.c"), ("__negdi2", "negdi2.c"), - ("__negsf2", "negsf2.c"), ("__negvdi2", "negvdi2.c"), ("__negvsi2", "negvsi2.c"), ("__paritydi2", "paritydi2.c"), ("__paritysi2", "paritysi2.c"), ("__popcountdi2", "popcountdi2.c"), ("__popcountsi2", "popcountsi2.c"), - ("__powixf2", "powixf2.c"), ("__subvdi3", "subvdi3.c"), ("__subvsi3", "subvsi3.c"), - ("__truncdfhf2", "truncdfhf2.c"), - ("__truncdfsf2", "truncdfsf2.c"), - ("__truncsfhf2", "truncsfhf2.c"), ("__ucmpdi2", "ucmpdi2.c"), ]); + if consider_float_intrinsics { + sources.extend(&[ + ("__divdc3", "divdc3.c"), + ("__divsc3", "divsc3.c"), + ("__divxc3", "divxc3.c"), + ("__extendhfsf2", "extendhfsf2.c"), + ("__muldc3", "muldc3.c"), + ("__mulsc3", "mulsc3.c"), + ("__mulxc3", "mulxc3.c"), + ("__negdf2", "negdf2.c"), + ("__negsf2", "negsf2.c"), + ("__powixf2", "powixf2.c"), + ("__truncdfhf2", "truncdfhf2.c"), + ("__truncdfsf2", "truncdfsf2.c"), + ("__truncsfhf2", "truncsfhf2.c"), + ]); + } + // When compiling in rustbuild (the rust-lang/rust repo) this library // also needs to satisfy intrinsics that jemalloc or C in general may // need, so include a few more that aren't typically needed by @@ -214,12 +234,15 @@ mod c { ("__ffsti2", "ffsti2.c"), ("__mulvti3", "mulvti3.c"), ("__negti2", "negti2.c"), - ("__negvti2", "negvti2.c"), ("__parityti2", "parityti2.c"), ("__popcountti2", "popcountti2.c"), ("__subvti3", "subvti3.c"), ("__ucmpti2", "ucmpti2.c"), ]); + + if consider_float_intrinsics { + sources.extend(&[("__negvti2", "negvti2.c")]); + } } if target_vendor == "apple" { @@ -372,7 +395,7 @@ mod c { ]); } - if target_arch == "aarch64" { + if target_arch == "aarch64" && consider_float_intrinsics { sources.extend(&[ ("__comparetf2", "comparetf2.c"), ("__extenddftf2", "extenddftf2.c"), From 72526f3811fa17babe3b37ca2e6371fa3786f06b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 28 Feb 2020 11:01:42 -0800 Subject: [PATCH 0356/1459] Bump to 0.1.26 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index d75189709..f24093fdf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.25" +version = "0.1.26" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 2541f27e8c3d61505815c0492c045b7d17436e35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Mi=C4=85sko?= Date: Sat, 11 Apr 2020 00:00:50 +0200 Subject: [PATCH 0357/1459] Place intrinsics in individual object files (#349) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Tomasz Miąsko --- src/macros.rs | 49 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/src/macros.rs b/src/macros.rs index a86794072..b02f3ea5c 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -173,14 +173,12 @@ macro_rules! intrinsics { #[cfg(all(windows, target_arch = "x86_64"))] pub mod $name { - - intrinsics! { - pub extern $abi fn $name( $($argname: $ty),* ) - -> ::macros::win64_128bit_abi_hack::U64x2 - { - let e: $ret = super::$name($($argname),*); - ::macros::win64_128bit_abi_hack::U64x2::from(e) - } + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + pub extern $abi fn $name( $($argname: $ty),* ) + -> ::macros::win64_128bit_abi_hack::U64x2 + { + let e: $ret = super::$name($($argname),*); + ::macros::win64_128bit_abi_hack::U64x2::from(e) } } @@ -209,17 +207,23 @@ macro_rules! intrinsics { $($rest:tt)* ) => ( #[cfg(target_arch = "arm")] - #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { $($body)* } #[cfg(target_arch = "arm")] pub mod $name { - intrinsics! { - pub extern "aapcs" fn $alias( $($argname: $ty),* ) -> $ret { - super::$name($($argname),*) - } + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { + super::$name($($argname),*) + } + } + + #[cfg(target_arch = "arm")] + pub mod $alias { + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + pub extern "aapcs" fn $alias( $($argname: $ty),* ) -> $ret { + super::$name($($argname),*) } } @@ -234,9 +238,15 @@ macro_rules! intrinsics { intrinsics!($($rest)*); ); - // This is the final catch-all rule. At this point we just generate an + // This is the final catch-all rule. At this point we generate an // intrinsic with a conditional `#[no_mangle]` directive to avoid - // interfereing with duplicate symbols and whatnot during testing. + // interfering with duplicate symbols and whatnot during testing. + // + // The implementation is placed in a separate module, to take advantage + // of the fact that rustc partitions functions into code generation + // units based on module they are defined in. As a result we will have + // a separate object file for each intrinsic. For further details see + // corresponding PR in rustc https://github.com/rust-lang/rust/pull/70846 // // After the intrinsic is defined we just continue with the rest of the // input we were given. @@ -249,11 +259,18 @@ macro_rules! intrinsics { $($rest:tt)* ) => ( $(#[$($attr)*])* - #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { $($body)* } + pub mod $name { + $(#[$($attr)*])* + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { + super::$name($($argname),*) + } + } + intrinsics!($($rest)*); ); } From 25ea08da2824b8137b6ac1222b0171cb07cca083 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 13 Apr 2020 07:17:28 -0700 Subject: [PATCH 0358/1459] Bump to 0.1.27 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f24093fdf..41b33bf47 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.26" +version = "0.1.27" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From cde22bc180391e75de1c189fe29f442ada86ccde Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 29 Apr 2020 15:30:10 -0500 Subject: [PATCH 0359/1459] Switch to using `llvm_asm!` instead of `asm!` (#351) * Switch to using `llvm_asm!` instead of `asm!` * Run rustfmt * Fix how LTO is specified on nightly --- ci/run.sh | 6 +- examples/intrinsics.rs | 4 +- src/arm.rs | 140 +++++++++++++++++++++++------------------ src/int/mod.rs | 74 +++++++++++----------- src/lib.rs | 2 +- src/x86.rs | 6 +- src/x86_64.rs | 6 +- 7 files changed, 128 insertions(+), 110 deletions(-) diff --git a/ci/run.sh b/ci/run.sh index ae32806ec..c4cc6813d 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -85,9 +85,11 @@ RUSTFLAGS="-C debug-assertions=no" $build_intrinsics --features c --release if [ -z "$DEBUG_LTO_BUILD_DOESNT_WORK" ]; then RUSTFLAGS="-C debug-assertions=no" \ CARGO_INCREMENTAL=0 \ - $cargo rustc --features "$INTRINSICS_FEATURES" --target $1 --example intrinsics -- -C lto + CARGO_PROFILE_DEV_LTO=true \ + $cargo rustc --features "$INTRINSICS_FEATURES" --target $1 --example intrinsics fi -$cargo rustc --features "$INTRINSICS_FEATURES" --target $1 --example intrinsics --release -- -C lto +CARGO_PROFILE_RELEASE_LTO=true \ + $cargo rustc --features "$INTRINSICS_FEATURES" --target $1 --example intrinsics --release # Ensure no references to a panicking function for rlib in $(echo $path); do diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 5ceebe132..82762e076 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -6,7 +6,7 @@ #![allow(unused_features)] #![cfg_attr(thumb, no_main)] #![deny(dead_code)] -#![feature(asm)] +#![feature(llvm_asm)] #![feature(lang_items)] #![feature(start)] #![feature(allocator_api)] @@ -280,7 +280,7 @@ fn run() { // A copy of "test::black_box". Used to prevent LLVM from optimizing away the intrinsics during LTO fn bb(dummy: T) -> T { - unsafe { asm!("" : : "r"(&dummy)) } + unsafe { llvm_asm!("" : : "r"(&dummy)) } dummy } diff --git a/src/arm.rs b/src/arm.rs index 4cf73ef37..190bba726 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -8,13 +8,15 @@ use core::intrinsics; #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe fn __aeabi_uidivmod() { - asm!("push {lr} - sub sp, sp, #4 - mov r2, sp - bl __udivmodsi4 - ldr r1, [sp] - add sp, sp, #4 - pop {pc}" ::: "memory" : "volatile"); + llvm_asm!(" + push {lr} + sub sp, sp, #4 + mov r2, sp + bl __udivmodsi4 + ldr r1, [sp] + add sp, sp, #4 + pop {pc} + " ::: "memory" : "volatile"); intrinsics::unreachable(); } @@ -22,13 +24,15 @@ pub unsafe fn __aeabi_uidivmod() { #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe fn __aeabi_uidivmod() { - asm!("push {lr} - sub sp, sp, #4 - mov r2, sp - bl ___udivmodsi4 - ldr r1, [sp] - add sp, sp, #4 - pop {pc}" ::: "memory" : "volatile"); + llvm_asm!(" + push {lr} + sub sp, sp, #4 + mov r2, sp + bl ___udivmodsi4 + ldr r1, [sp] + add sp, sp, #4 + pop {pc} + " ::: "memory" : "volatile"); intrinsics::unreachable(); } @@ -36,15 +40,17 @@ pub unsafe fn __aeabi_uidivmod() { #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe fn __aeabi_uldivmod() { - asm!("push {r4, lr} - sub sp, sp, #16 - add r4, sp, #8 - str r4, [sp] - bl __udivmoddi4 - ldr r2, [sp, #8] - ldr r3, [sp, #12] - add sp, sp, #16 - pop {r4, pc}" ::: "memory" : "volatile"); + llvm_asm!(" + push {r4, lr} + sub sp, sp, #16 + add r4, sp, #8 + str r4, [sp] + bl __udivmoddi4 + ldr r2, [sp, #8] + ldr r3, [sp, #12] + add sp, sp, #16 + pop {r4, pc} + " ::: "memory" : "volatile"); intrinsics::unreachable(); } @@ -52,15 +58,17 @@ pub unsafe fn __aeabi_uldivmod() { #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe fn __aeabi_uldivmod() { - asm!("push {r4, lr} - sub sp, sp, #16 - add r4, sp, #8 - str r4, [sp] - bl ___udivmoddi4 - ldr r2, [sp, #8] - ldr r3, [sp, #12] - add sp, sp, #16 - pop {r4, pc}" ::: "memory" : "volatile"); + llvm_asm!(" + push {r4, lr} + sub sp, sp, #16 + add r4, sp, #8 + str r4, [sp] + bl ___udivmoddi4 + ldr r2, [sp, #8] + ldr r3, [sp, #12] + add sp, sp, #16 + pop {r4, pc} + " ::: "memory" : "volatile"); intrinsics::unreachable(); } @@ -68,12 +76,14 @@ pub unsafe fn __aeabi_uldivmod() { #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe fn __aeabi_idivmod() { - asm!("push {r0, r1, r4, lr} - bl __aeabi_idiv - pop {r1, r2} - muls r2, r2, r0 - subs r1, r1, r2 - pop {r4, pc}" ::: "memory" : "volatile"); + llvm_asm!(" + push {r0, r1, r4, lr} + bl __aeabi_idiv + pop {r1, r2} + muls r2, r2, r0 + subs r1, r1, r2 + pop {r4, pc} + " ::: "memory" : "volatile"); intrinsics::unreachable(); } @@ -81,12 +91,14 @@ pub unsafe fn __aeabi_idivmod() { #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe fn __aeabi_idivmod() { - asm!("push {r0, r1, r4, lr} - bl ___aeabi_idiv - pop {r1, r2} - muls r2, r2, r0 - subs r1, r1, r2 - pop {r4, pc}" ::: "memory" : "volatile"); + llvm_asm!(" + push {r0, r1, r4, lr} + bl ___aeabi_idiv + pop {r1, r2} + muls r2, r2, r0 + subs r1, r1, r2 + pop {r4, pc} + " ::: "memory" : "volatile"); intrinsics::unreachable(); } @@ -94,15 +106,17 @@ pub unsafe fn __aeabi_idivmod() { #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe fn __aeabi_ldivmod() { - asm!("push {r4, lr} - sub sp, sp, #16 - add r4, sp, #8 - str r4, [sp] - bl __divmoddi4 - ldr r2, [sp, #8] - ldr r3, [sp, #12] - add sp, sp, #16 - pop {r4, pc}" ::: "memory" : "volatile"); + llvm_asm!(" + push {r4, lr} + sub sp, sp, #16 + add r4, sp, #8 + str r4, [sp] + bl __divmoddi4 + ldr r2, [sp, #8] + ldr r3, [sp, #12] + add sp, sp, #16 + pop {r4, pc} + " ::: "memory" : "volatile"); intrinsics::unreachable(); } @@ -110,15 +124,17 @@ pub unsafe fn __aeabi_ldivmod() { #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe fn __aeabi_ldivmod() { - asm!("push {r4, lr} - sub sp, sp, #16 - add r4, sp, #8 - str r4, [sp] - bl ___divmoddi4 - ldr r2, [sp, #8] - ldr r3, [sp, #12] - add sp, sp, #16 - pop {r4, pc}" ::: "memory" : "volatile"); + llvm_asm!(" + push {r4, lr} + sub sp, sp, #16 + add r4, sp, #8 + str r4, [sp] + bl ___divmoddi4 + ldr r2, [sp, #8] + ldr r3, [sp, #12] + add sp, sp, #16 + pop {r4, pc} + " ::: "memory" : "volatile"); intrinsics::unreachable(); } diff --git a/src/int/mod.rs b/src/int/mod.rs index 7587bc69e..80ac4f9fa 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -88,55 +88,55 @@ fn unwrap(t: Option) -> T { macro_rules! int_impl_common { ($ty:ty, $bits:expr) => { - const BITS: u32 = $bits; + const BITS: u32 = $bits; - const ZERO: Self = 0; - const ONE: Self = 1; + const ZERO: Self = 0; + const ONE: Self = 1; - fn from_bool(b: bool) -> Self { - b as $ty - } + fn from_bool(b: bool) -> Self { + b as $ty + } - fn max_value() -> Self { - ::max_value() - } + fn max_value() -> Self { + ::max_value() + } - fn min_value() -> Self { - ::min_value() - } + fn min_value() -> Self { + ::min_value() + } - fn wrapping_add(self, other: Self) -> Self { - ::wrapping_add(self, other) - } + fn wrapping_add(self, other: Self) -> Self { + ::wrapping_add(self, other) + } - fn wrapping_mul(self, other: Self) -> Self { - ::wrapping_mul(self, other) - } + fn wrapping_mul(self, other: Self) -> Self { + ::wrapping_mul(self, other) + } - fn wrapping_sub(self, other: Self) -> Self { - ::wrapping_sub(self, other) - } + fn wrapping_sub(self, other: Self) -> Self { + ::wrapping_sub(self, other) + } - fn wrapping_shl(self, other: u32) -> Self { - ::wrapping_shl(self, other) - } + fn wrapping_shl(self, other: u32) -> Self { + ::wrapping_shl(self, other) + } - fn overflowing_add(self, other: Self) -> (Self, bool) { - ::overflowing_add(self, other) - } + fn overflowing_add(self, other: Self) -> (Self, bool) { + ::overflowing_add(self, other) + } - fn aborting_div(self, other: Self) -> Self { - unwrap(::checked_div(self, other)) - } + fn aborting_div(self, other: Self) -> Self { + unwrap(::checked_div(self, other)) + } - fn aborting_rem(self, other: Self) -> Self { - unwrap(::checked_rem(self, other)) - } + fn aborting_rem(self, other: Self) -> Self { + unwrap(::checked_rem(self, other)) + } - fn leading_zeros(self) -> u32 { - ::leading_zeros(self) - } - } + fn leading_zeros(self) -> u32 { + ::leading_zeros(self) + } + }; } macro_rules! int_impl { diff --git a/src/lib.rs b/src/lib.rs index e57a5ef3f..0ca770b1e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ #![cfg_attr(feature = "compiler-builtins", compiler_builtins)] #![feature(abi_unadjusted)] -#![feature(asm)] +#![feature(llvm_asm)] #![feature(global_asm)] #![feature(cfg_target_has_atomic)] #![feature(compiler_builtins)] diff --git a/src/x86.rs b/src/x86.rs index 035c0a31c..5511c4572 100644 --- a/src/x86.rs +++ b/src/x86.rs @@ -12,7 +12,7 @@ use core::intrinsics; #[naked] #[no_mangle] pub unsafe fn ___chkstk_ms() { - asm!(" + llvm_asm!(" push %ecx push %eax cmp $$0x1000,%eax @@ -38,7 +38,7 @@ pub unsafe fn ___chkstk_ms() { #[naked] #[no_mangle] pub unsafe fn __alloca() { - asm!("jmp ___chkstk // Jump to ___chkstk since fallthrough may be unreliable" + llvm_asm!("jmp ___chkstk // Jump to ___chkstk since fallthrough may be unreliable" ::: "memory" : "volatile"); intrinsics::unreachable(); } @@ -47,7 +47,7 @@ pub unsafe fn __alloca() { #[naked] #[no_mangle] pub unsafe fn ___chkstk() { - asm!(" + llvm_asm!(" push %ecx cmp $$0x1000,%eax lea 8(%esp),%ecx // esp before calling this routine -> ecx diff --git a/src/x86_64.rs b/src/x86_64.rs index 6940f8d9d..6a0cd5668 100644 --- a/src/x86_64.rs +++ b/src/x86_64.rs @@ -12,7 +12,7 @@ use core::intrinsics; #[naked] #[no_mangle] pub unsafe fn ___chkstk_ms() { - asm!(" + llvm_asm!(" push %rcx push %rax cmp $$0x1000,%rax @@ -37,7 +37,7 @@ pub unsafe fn ___chkstk_ms() { #[naked] #[no_mangle] pub unsafe fn __alloca() { - asm!("mov %rcx,%rax // x64 _alloca is a normal function with parameter in rcx + llvm_asm!("mov %rcx,%rax // x64 _alloca is a normal function with parameter in rcx jmp ___chkstk // Jump to ___chkstk since fallthrough may be unreliable" ::: "memory" : "volatile"); intrinsics::unreachable(); @@ -47,7 +47,7 @@ pub unsafe fn __alloca() { #[naked] #[no_mangle] pub unsafe fn ___chkstk() { - asm!( + llvm_asm!( " push %rcx cmp $$0x1000,%rax From 1108d2af9d9df3ae14b06d35a11190aa55097ab8 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 11 May 2020 22:29:49 -0700 Subject: [PATCH 0360/1459] Bump to 0.1.28 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 41b33bf47..75a7e1e93 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.27" +version = "0.1.28" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From c2eba93e28ae38cf7bdb2fbead7ecbc8dfe82332 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Fri, 22 May 2020 14:12:06 -0700 Subject: [PATCH 0361/1459] Add more targets to automatically select `mem` feature. (#357) --- build.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build.rs b/build.rs index c4a80223b..abeac9bf1 100644 --- a/build.rs +++ b/build.rs @@ -27,6 +27,8 @@ fn main() { // provide them. if (target.contains("wasm32") && !target.contains("wasi")) || (target.contains("sgx") && target.contains("fortanix")) + || target.contains("-none") + || target.contains("nvptx") { println!("cargo:rustc-cfg=feature=\"mem\""); } From d0ea0765aa76b7e9ea146e86db1d24e9b17443ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Mi=C4=85sko?= Date: Tue, 26 May 2020 22:12:10 +0200 Subject: [PATCH 0362/1459] Use crate visibility for traits (#358) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Tomasz Miąsko --- src/float/mod.rs | 2 +- src/int/mod.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/float/mod.rs b/src/float/mod.rs index 8b8039452..06e9aad4b 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -13,7 +13,7 @@ pub mod pow; pub mod sub; /// Trait for some basic operations on floats -pub trait Float: +pub(crate) trait Float: Copy + PartialEq + PartialOrd diff --git a/src/int/mod.rs b/src/int/mod.rs index 80ac4f9fa..d73bf6db9 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -19,7 +19,7 @@ pub mod shift; pub mod udiv; /// Trait for some basic operations on integers -pub trait Int: +pub(crate) trait Int: Copy + PartialEq + PartialOrd @@ -190,7 +190,7 @@ int_impl!(i64, u64, 64); int_impl!(i128, u128, 128); /// Trait to convert an integer to/from smaller parts -pub trait LargeInt: Int { +pub(crate) trait LargeInt: Int { type LowHalf: Int; type HighHalf: Int; @@ -232,7 +232,7 @@ large_int!(u128, u64, u64, 64); large_int!(i128, u64, i64, 64); /// Trait to express (possibly lossy) casting of integers -pub trait CastInto: Copy { +pub(crate) trait CastInto: Copy { fn cast(self) -> T; } @@ -256,7 +256,7 @@ cast_into!(i64); cast_into!(u128); cast_into!(i128); -pub trait WideInt: Int { +pub(crate) trait WideInt: Int { type Output: Int; fn wide_mul(self, other: Self) -> (Self, Self); From 156fcf1d674490a71f22e497e7522e2d5624d46c Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 26 May 2020 13:12:24 -0700 Subject: [PATCH 0363/1459] Bump to 0.1.29 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 75a7e1e93..7c6f4bdb7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.28" +version = "0.1.29" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 8a9ff97081f17e53586713e5b3a6742f29cc7893 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 29 May 2020 14:16:51 -0500 Subject: [PATCH 0364/1459] Use macros for more division/array checks (#244) * Use macros for more division/array checks This commit moves over more array accesses to the `i!` macro to avoid bounds checks when debug assertions are disabled. This is surfaced from rust-lang/compiler-builtins#360 where recent changes in codegen units has caused some bounds checks to not get elided in release mode. This also adds a `div!` macro to work around rust-lang/rust#72751. * Don't test/bench our shim crate It's not intended to run all our tests --- .../compiler-builtins-smoke-test/Cargo.toml | 5 ++++- libm/src/lib.rs | 5 +---- libm/src/math/atanf.rs | 8 ++++---- libm/src/math/exp.rs | 2 +- libm/src/math/exp2.rs | 6 +++--- libm/src/math/exp2f.rs | 2 +- libm/src/math/expf.rs | 2 +- libm/src/math/mod.rs | 18 ++++++++++++++++++ libm/src/math/pow.rs | 12 ++++++------ libm/src/math/powf.rs | 12 ++++++------ libm/src/math/rem_pio2.rs | 12 ++++++------ libm/src/math/rem_pio2_large.rs | 4 ++-- 12 files changed, 53 insertions(+), 35 deletions(-) diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index 40e75dd22..ac192a913 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -3,4 +3,7 @@ name = "cb" version = "0.1.0" authors = ["Jorge Aparicio "] -[dependencies] +[lib] +test = false +bench = false + diff --git a/libm/src/lib.rs b/libm/src/lib.rs index e228af9b3..bbc79ecad 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -1,10 +1,7 @@ //! libm in pure Rust #![deny(warnings)] #![no_std] -#![cfg_attr( - all(target_arch = "wasm32", feature = "unstable"), - feature(core_intrinsics) -)] +#![cfg_attr(all(feature = "unstable"), feature(core_intrinsics))] #![allow(clippy::unreadable_literal)] #![allow(clippy::many_single_char_names)] #![allow(clippy::needless_return)] diff --git a/libm/src/math/atanf.rs b/libm/src/math/atanf.rs index 73f3352e9..d042b3bc0 100644 --- a/libm/src/math/atanf.rs +++ b/libm/src/math/atanf.rs @@ -56,7 +56,7 @@ pub fn atanf(mut x: f32) -> f32 { if x.is_nan() { return x; } - z = ATAN_HI[3] + x1p_120; + z = i!(ATAN_HI, 3) + x1p_120; return if sign { -z } else { z }; } let id = if ix < 0x3ee00000 { @@ -97,13 +97,13 @@ pub fn atanf(mut x: f32) -> f32 { z = x * x; let w = z * z; /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */ - let s1 = z * (A_T[0] + w * (A_T[2] + w * A_T[4])); - let s2 = w * (A_T[1] + w * A_T[3]); + let s1 = z * (i!(A_T, 0) + w * (i!(A_T, 2) + w * i!(A_T, 4))); + let s2 = w * (i!(A_T, 1) + w * i!(A_T, 3)); if id < 0 { return x - x * (s1 + s2); } let id = id as usize; - let z = ATAN_HI[id] - ((x * (s1 + s2) - ATAN_LO[id]) - x); + let z = i!(ATAN_HI, id) - ((x * (s1 + s2) - i!(ATAN_LO, id)) - x); if sign { -z } else { diff --git a/libm/src/math/exp.rs b/libm/src/math/exp.rs index 5b163f954..d4994277f 100644 --- a/libm/src/math/exp.rs +++ b/libm/src/math/exp.rs @@ -124,7 +124,7 @@ pub fn exp(mut x: f64) -> f64 { /* if |x| > 0.5 ln2 */ if hx >= 0x3ff0a2b2 { /* if |x| >= 1.5 ln2 */ - k = (INVLN2 * x + HALF[sign as usize]) as i32; + k = (INVLN2 * x + i!(HALF, sign as usize)) as i32; } else { k = 1 - sign - sign; } diff --git a/libm/src/math/exp2.rs b/libm/src/math/exp2.rs index 8ea434dca..e0e385df2 100644 --- a/libm/src/math/exp2.rs +++ b/libm/src/math/exp2.rs @@ -374,14 +374,14 @@ pub fn exp2(mut x: f64) -> f64 { let mut i0 = ui as u32; i0 = i0.wrapping_add(TBLSIZE as u32 / 2); let ku = i0 / TBLSIZE as u32 * TBLSIZE as u32; - let ki = ku as i32 / TBLSIZE as i32; + let ki = div!(ku as i32, TBLSIZE as i32); i0 %= TBLSIZE as u32; let uf = f64::from_bits(ui) - redux; let mut z = x - uf; /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */ - let t = f64::from_bits(TBL[2 * i0 as usize]); /* exp2t[i0] */ - z -= f64::from_bits(TBL[2 * i0 as usize + 1]); /* eps[i0] */ + let t = f64::from_bits(i!(TBL, 2 * i0 as usize)); /* exp2t[i0] */ + z -= f64::from_bits(i!(TBL, 2 * i0 as usize + 1)); /* eps[i0] */ let r = t + t * z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * p5)))); scalbn(r, ki) diff --git a/libm/src/math/exp2f.rs b/libm/src/math/exp2f.rs index 8a890b832..f4867b80e 100644 --- a/libm/src/math/exp2f.rs +++ b/libm/src/math/exp2f.rs @@ -126,7 +126,7 @@ pub fn exp2f(mut x: f32) -> f32 { uf -= redux; let z: f64 = (x - uf) as f64; /* Compute r = exp2(y) = exp2ft[i0] * p(z). */ - let r: f64 = f64::from_bits(EXP2FT[i0 as usize]); + let r: f64 = f64::from_bits(i!(EXP2FT, i0 as usize)); let t: f64 = r as f64 * z; let r: f64 = r + t * (p1 as f64 + z * p2 as f64) + t * (z * z) * (p3 as f64 + z * p4 as f64); diff --git a/libm/src/math/expf.rs b/libm/src/math/expf.rs index 47c1b2c46..a53aa90a6 100644 --- a/libm/src/math/expf.rs +++ b/libm/src/math/expf.rs @@ -70,7 +70,7 @@ pub fn expf(mut x: f32) -> f32 { /* if |x| > 0.5 ln2 */ if hx > 0x3f851592 { /* if |x| > 1.5 ln2 */ - k = (INV_LN2 * x + HALF[sign as usize]) as i32; + k = (INV_LN2 * x + i!(HALF, sign as usize)) as i32; } else { k = 1 - sign - sign; } diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index c8d7bd819..ceeee0b31 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -58,6 +58,24 @@ macro_rules! i { }; } +// Temporary macro to avoid panic codegen for division (in debug mode too). At +// the time of this writing this is only used in a few places, and once +// rust-lang/rust#72751 is fixed then this macro will no longer be necessary and +// the native `/` operator can be used and panics won't be codegen'd. +#[cfg(any(debug_assertions, not(feature = "unstable")))] +macro_rules! div { + ($a:expr, $b:expr) => { + $a / $b + }; +} + +#[cfg(all(not(debug_assertions), feature = "unstable"))] +macro_rules! div { + ($a:expr, $b:expr) => { + unsafe { core::intrinsics::unchecked_div($a, $b) } + }; +} + macro_rules! llvm_intrinsically_optimized { (#[cfg($($clause:tt)*)] $e:expr) => { #[cfg(all(feature = "unstable", $($clause)*))] diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index ce8e83ee6..c7fd0dfa1 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -299,8 +299,8 @@ pub fn pow(x: f64, y: f64) -> f64 { ax = with_set_high_word(ax, ix as u32); /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ - let u: f64 = ax - BP[k as usize]; /* bp[0]=1.0, bp[1]=1.5 */ - let v: f64 = 1.0 / (ax + BP[k as usize]); + let u: f64 = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */ + let v: f64 = 1.0 / (ax + i!(BP, k as usize)); let ss: f64 = u * v; let s_h = with_set_low_word(ss, 0); @@ -309,7 +309,7 @@ pub fn pow(x: f64, y: f64) -> f64 { 0.0, ((ix as u32 >> 1) | 0x20000000) + 0x00080000 + ((k as u32) << 18), ); - let t_l: f64 = ax - (t_h - BP[k as usize]); + let t_l: f64 = ax - (t_h - i!(BP, k as usize)); let s_l: f64 = v * ((u - s_h * t_h) - s_h * t_l); /* compute log(ax) */ @@ -328,12 +328,12 @@ pub fn pow(x: f64, y: f64) -> f64 { let p_h: f64 = with_set_low_word(u + v, 0); let p_l = v - (p_h - u); let z_h: f64 = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ - let z_l: f64 = CP_L * p_h + p_l * CP + DP_L[k as usize]; + let z_l: f64 = CP_L * p_h + p_l * CP + i!(DP_L, k as usize); /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ let t: f64 = n as f64; - t1 = with_set_low_word(((z_h + z_l) + DP_H[k as usize]) + t, 0); - t2 = z_l - (((t1 - t) - DP_H[k as usize]) - z_h); + t1 = with_set_low_word(((z_h + z_l) + i!(DP_H, k as usize)) + t, 0); + t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h); } /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ diff --git a/libm/src/math/powf.rs b/libm/src/math/powf.rs index f3cf76f9a..68d2083bb 100644 --- a/libm/src/math/powf.rs +++ b/libm/src/math/powf.rs @@ -238,8 +238,8 @@ pub fn powf(x: f32, y: f32) -> f32 { ax = f32::from_bits(ix as u32); /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ - u = ax - BP[k as usize]; /* bp[0]=1.0, bp[1]=1.5 */ - v = 1.0 / (ax + BP[k as usize]); + u = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */ + v = 1.0 / (ax + i!(BP, k as usize)); s = u * v; s_h = s; is = s_h.to_bits() as i32; @@ -247,7 +247,7 @@ pub fn powf(x: f32, y: f32) -> f32 { /* t_h=ax+bp[k] High */ is = (((ix as u32 >> 1) & 0xfffff000) | 0x20000000) as i32; t_h = f32::from_bits(is as u32 + 0x00400000 + ((k as u32) << 21)); - t_l = ax - (t_h - BP[k as usize]); + t_l = ax - (t_h - i!(BP, k as usize)); s_l = v * ((u - s_h * t_h) - s_h * t_l); /* compute log(ax) */ s2 = s * s; @@ -267,13 +267,13 @@ pub fn powf(x: f32, y: f32) -> f32 { p_h = f32::from_bits(is as u32 & 0xfffff000); p_l = v - (p_h - u); z_h = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ - z_l = CP_L * p_h + p_l * CP + DP_L[k as usize]; + z_l = CP_L * p_h + p_l * CP + i!(DP_L, k as usize); /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */ t = n as f32; - t1 = ((z_h + z_l) + DP_H[k as usize]) + t; + t1 = ((z_h + z_l) + i!(DP_H, k as usize)) + t; is = t1.to_bits() as i32; t1 = f32::from_bits(is as u32 & 0xfffff000); - t2 = z_l - (((t1 - t) - DP_H[k as usize]) - z_h); + t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h); }; /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 6b7dbd348..46f7c38ff 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -167,21 +167,21 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { let mut z = f64::from_bits(ui); let mut tx = [0.0; 3]; for i in 0..2 { - tx[i] = z as i32 as f64; - z = (z - tx[i]) * x1p24; + i!(tx,i, =, z as i32 as f64); + z = (z - i!(tx, i)) * x1p24; } - tx[2] = z; + i!(tx,2, =, z); /* skip zero terms, first term is non-zero */ let mut i = 2; - while i != 0 && tx[i] == 0.0 { + while i != 0 && i!(tx, i) == 0.0 { i -= 1; } let mut ty = [0.0; 3]; let n = rem_pio2_large(&tx[..=i], &mut ty, ((ix as i32) >> 20) - (0x3ff + 23), 1); if sign != 0 { - return (-n, -ty[0], -ty[1]); + return (-n, -i!(ty, 0), -i!(ty, 1)); } - (n, ty[0], ty[1]) + (n, i!(ty, 0), i!(ty, 1)) } #[cfg(test)] diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 002ce2e21..65473f0ab 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -242,12 +242,12 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> let mut iq: [i32; 20] = [0; 20]; /* initialize jk*/ - let jk = INIT_JK[prec]; + let jk = i!(INIT_JK, prec); let jp = jk; /* determine jx,jv,q0, note that 3>q0 */ let jx = nx - 1; - let mut jv = (e0 - 3) / 24; + let mut jv = div!(e0 - 3, 24); if jv < 0 { jv = 0; } From 4bf8cad593c06c7996cf4e25834bef159d59f48b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 29 May 2020 14:38:29 -0500 Subject: [PATCH 0365/1459] Expand wasm32 testing on CI (#360) * Expand wasm32 testing on CI Run the full `run.sh` test script to get full assertions, including that nothing in the wasm compiler-builtins is panicking. Unfortunately it's currently panicking, so this is good to weed out! * Update libm --- .github/workflows/main.yml | 31 ++------------------- ci/docker/wasm32-unknown-unknown/Dockerfile | 6 ++++ ci/run.sh | 10 ++++--- examples/intrinsics.rs | 6 ++-- libm | 2 +- src/lib.rs | 2 +- 6 files changed, 19 insertions(+), 38 deletions(-) create mode 100644 ci/docker/wasm32-unknown-unknown/Dockerfile diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 469154997..25ab1b30a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -7,30 +7,6 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - target: - - aarch64-unknown-linux-gnu - - arm-unknown-linux-gnueabi - - arm-unknown-linux-gnueabihf - - i586-unknown-linux-gnu - - i686-unknown-linux-gnu - - mips-unknown-linux-gnu - - mips64-unknown-linux-gnuabi64 - - mips64el-unknown-linux-gnuabi64 - - mipsel-unknown-linux-gnu - - powerpc-unknown-linux-gnu - - powerpc64-unknown-linux-gnu - - powerpc64le-unknown-linux-gnu - - thumbv6m-none-eabi - - thumbv7em-none-eabi - - thumbv7em-none-eabihf - - thumbv7m-none-eabi - - wasm32-unknown-unknown - - x86_64-unknown-linux-gnu - - x86_64-apple-darwin - - i686-pc-windows-msvc - - x86_64-pc-windows-msvc - - i686-pc-windows-gnu - - x86_64-pc-windows-gnu include: - target: aarch64-unknown-linux-gnu os: ubuntu-latest @@ -109,6 +85,7 @@ jobs: run: rustup update ${{ matrix.rust }} --no-self-update && rustup default ${{ matrix.rust }} shell: bash - run: rustup target add ${{ matrix.target }} + - run: rustup component add llvm-tools-preview - name: Download compiler-rt reference sources run: | curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/8.0-2019-03-18.tar.gz @@ -121,13 +98,9 @@ jobs: if: matrix.os != 'ubuntu-latest' shell: bash - # Wasm is special and is just build as a smoke test - - run: cargo build --target ${{ matrix.target }} - if: matrix.target == 'wasm32-unknown-unknown' - # Otherwise we use our docker containers to run builds - run: cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} - if: matrix.target != 'wasm32-unknown-unknown' && matrix.os == 'ubuntu-latest' + if: matrix.os == 'ubuntu-latest' rustfmt: name: Rustfmt diff --git a/ci/docker/wasm32-unknown-unknown/Dockerfile b/ci/docker/wasm32-unknown-unknown/Dockerfile new file mode 100644 index 000000000..758d94d50 --- /dev/null +++ b/ci/docker/wasm32-unknown-unknown/Dockerfile @@ -0,0 +1,6 @@ +FROM ubuntu:20.04 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates + +ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=true diff --git a/ci/run.sh b/ci/run.sh index c4cc6813d..3c9dc0247 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -32,7 +32,10 @@ case $1 in ;; esac -NM=nm +NM=$(find $(rustc --print sysroot) -name llvm-nm) +if [ "$NM" = "" ]; then + NM=${PREFIX}nm +fi if [ -d /target ]; then path=/target/${1}/debug/deps/libcompiler_builtins-*.rlib @@ -47,8 +50,7 @@ for rlib in $(echo $path); do echo checking $rlib for duplicate symbols echo "================================================================" - stdout=$($PREFIX$NM -g --defined-only $rlib 2>&1) - + stdout=$($NM -g --defined-only $rlib 2>&1) # NOTE On i586, It's normal that the get_pc_thunk symbol appears several # times so ignore it # @@ -94,7 +96,7 @@ CARGO_PROFILE_RELEASE_LTO=true \ # Ensure no references to a panicking function for rlib in $(echo $path); do set +ex - $PREFIX$NM -u $rlib 2>&1 | grep panicking + $NM -u $rlib 2>&1 | grep panicking if test $? = 0; then exit 1 diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 82762e076..519cea2ae 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -14,7 +14,7 @@ extern crate panic_handler; -#[cfg(all(not(thumb), not(windows)))] +#[cfg(all(not(thumb), not(windows), not(target_arch = "wasm32")))] #[link(name = "c")] extern "C" {} @@ -340,11 +340,11 @@ fn run() { something_with_a_dtor(&|| assert_eq!(bb(1), 1)); extern "C" { - fn rust_begin_unwind(); + fn rust_begin_unwind(x: usize); } // if bb(false) { unsafe { - rust_begin_unwind(); + rust_begin_unwind(0); } // } } diff --git a/libm b/libm index 8eedc2470..fe396e00b 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 8eedc2470531f51b978e4c873ee78a33c90e0fbd +Subproject commit fe396e00b7e47821a81c4c87a481ddc6af1d2cdf diff --git a/src/lib.rs b/src/lib.rs index 0ca770b1e..564988654 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,7 +31,7 @@ extern crate core; fn abort() -> ! { - unsafe { core::intrinsics::abort() } + core::intrinsics::abort() } #[macro_use] From d3efbd29152be38df0231156097d5dc122139183 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 29 May 2020 13:41:09 -0700 Subject: [PATCH 0366/1459] Bump to 0.1.30 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7c6f4bdb7..0e9778e40 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.29" +version = "0.1.30" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From d837cce3d36ac99d899b5743aa7371bb887a39a4 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 29 May 2020 13:43:21 -0700 Subject: [PATCH 0367/1459] Add back in unsafe for bootstrapping And add an `#[allow]` for now to appease stage0 --- src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 564988654..db05af5de 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,8 +30,9 @@ #[cfg(test)] extern crate core; +#[allow(unused_unsafe)] fn abort() -> ! { - core::intrinsics::abort() + unsafe { core::intrinsics::abort() } } #[macro_use] From 5488a098002d848d37d8775889f162812249876b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 29 May 2020 13:43:57 -0700 Subject: [PATCH 0368/1459] Bump to 0.1.31 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0e9778e40..9a5f9984d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.30" +version = "0.1.31" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From f853d6d9b75454ed125c6d025e2c0256a5c9fbdf Mon Sep 17 00:00:00 2001 From: jethrogb Date: Mon, 1 Jun 2020 17:55:42 +0200 Subject: [PATCH 0369/1459] Manually patch ret instruction for LVI (#359) Co-authored-by: Jethro Beekman --- src/probestack.rs | 70 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/src/probestack.rs b/src/probestack.rs index 19307df4e..9c78faa1d 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -109,7 +109,12 @@ macro_rules! define_rust_probestack { // // The ABI here is that the stack frame size is located in `%rax`. Upon // return we're not supposed to modify `%rsp` or `%rax`. -#[cfg(target_arch = "x86_64")] +// +// Any changes to this function should be replicated to the SGX version below. +#[cfg(all( + target_arch = "x86_64", + not(all(target_env = "sgx", target_vendor = "fortanix")) +))] global_asm!(define_rust_probestack!( " .cfi_startproc @@ -163,6 +168,69 @@ global_asm!(define_rust_probestack!( " )); +// This function is the same as above, except that some instructions are +// [manually patched for LVI]. +// +// [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions +#[cfg(all( + target_arch = "x86_64", + all(target_env = "sgx", target_vendor = "fortanix") +))] +global_asm!(define_rust_probestack!( + " + .cfi_startproc + pushq %rbp + .cfi_adjust_cfa_offset 8 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + + mov %rax,%r11 // duplicate %rax as we're clobbering %r11 + + // Main loop, taken in one page increments. We're decrementing rsp by + // a page each time until there's less than a page remaining. We're + // guaranteed that this function isn't called unless there's more than a + // page needed. + // + // Note that we're also testing against `8(%rsp)` to account for the 8 + // bytes pushed on the stack orginally with our return address. Using + // `8(%rsp)` simulates us testing the stack pointer in the caller's + // context. + + // It's usually called when %rax >= 0x1000, but that's not always true. + // Dynamic stack allocation, which is needed to implement unsized + // rvalues, triggers stackprobe even if %rax < 0x1000. + // Thus we have to check %r11 first to avoid segfault. + cmp $0x1000,%r11 + jna 3f +2: + sub $0x1000,%rsp + test %rsp,8(%rsp) + sub $0x1000,%r11 + cmp $0x1000,%r11 + ja 2b + +3: + // Finish up the last remaining stack space requested, getting the last + // bits out of r11 + sub %r11,%rsp + test %rsp,8(%rsp) + + // Restore the stack pointer to what it previously was when entering + // this function. The caller will readjust the stack pointer after we + // return. + add %rax,%rsp + + leave + .cfi_def_cfa_register %rsp + .cfi_adjust_cfa_offset -8 + pop %r11 + lfence + jmp *%r11 + .cfi_endproc + " +)); + #[cfg(target_arch = "x86")] // This is the same as x86_64 above, only translated for 32-bit sizes. Note // that on Unix we're expected to restore everything as it was, this From 7b996ca0fa969199332d703b81fb411d85e5f7c4 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 1 Jun 2020 08:56:11 -0700 Subject: [PATCH 0370/1459] Bump to 0.1.32 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9a5f9984d..d4d1ad67d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.31" +version = "0.1.32" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 05d1e27be7480ac9139953c2a53c0bc76ee95583 Mon Sep 17 00:00:00 2001 From: kellda <59569234+kellda@users.noreply.github.com> Date: Mon, 8 Jun 2020 16:11:11 +0200 Subject: [PATCH 0371/1459] Update CHANGELOG.md (#245) --- libm/CHANGELOG.md | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index 28e27055d..e8e9acf9b 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -7,6 +7,30 @@ This project adheres to [Semantic Versioning](http://semver.org/). ... +## [v0.2.1] - 2019-11-22 + +### Fixed +- sincosf + +## [v0.2.0] - 2019-10-18 + +### Added +- Benchmarks +- signum +- remainder +- remainderf +- nextafter +- nextafterf + +### Fixed +- Rounding to negative zero +- Overflows in rem_pio2 and remquo +- Overflows in fma +- sincosf + +### Removed +- F32Ext and F64Ext traits + ## [v0.1.4] - 2019-06-12 ### Fixed @@ -90,7 +114,9 @@ This project adheres to [Semantic Versioning](http://semver.org/). - Initial release -[Unreleased]: https://github.com/japaric/libm/compare/v0.1.4...HEAD +[Unreleased]: https://github.com/japaric/libm/compare/v0.2.1...HEAD +[v0.2.1]: https://github.com/japaric/libm/compare/0.2.0...v0.2.1 +[v0.2.0]: https://github.com/japaric/libm/compare/0.1.4...v0.2.0 [v0.1.4]: https://github.com/japaric/libm/compare/0.1.3...v0.1.4 [v0.1.3]: https://github.com/japaric/libm/compare/v0.1.2...0.1.3 [v0.1.2]: https://github.com/japaric/libm/compare/v0.1.1...v0.1.2 From f3846bc05da87b8a71cd1a5a6ff9d980f46b2d0f Mon Sep 17 00:00:00 2001 From: Joseph Richey Date: Wed, 8 Jul 2020 07:07:19 -0700 Subject: [PATCH 0372/1459] lint: Allow improper_ctypes_definitions (#364) https://github.com/rust-lang/rust/pull/72700 caused the existing `allow(improper_ctypes)` guard to stop working, we now need `allow(improper_ctypes_definitions)` instead. We keep the old one to avoid any issues with older nightlies. Signed-off-by: Joe Richey --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index db05af5de..34397e0d2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ // We use `u128` in a whole bunch of places which we currently agree with the // compiler on ABIs and such, so we should be "good enough" for now and changes // to the `u128` ABI will be reflected here. -#![allow(improper_ctypes)] +#![allow(improper_ctypes, improper_ctypes_definitions)] // We disable #[no_mangle] for tests so that we can verify the test results // against the native compiler-rt implementations of the builtins. From f4c7940d3b13ec879c9fdc218812f71a65149123 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Tue, 28 Jul 2020 13:09:18 -0500 Subject: [PATCH 0373/1459] Improve `__clzsi2` performance (#366) --- src/int/leading_zeros.rs | 143 +++++++++++++++++++++++++ src/int/mod.rs | 69 +----------- testcrate/Cargo.toml | 6 ++ testcrate/tests/count_leading_zeros.rs | 23 ---- testcrate/tests/leading_zeros.rs | 54 ++++++++++ 5 files changed, 206 insertions(+), 89 deletions(-) create mode 100644 src/int/leading_zeros.rs delete mode 100644 testcrate/tests/count_leading_zeros.rs create mode 100644 testcrate/tests/leading_zeros.rs diff --git a/src/int/leading_zeros.rs b/src/int/leading_zeros.rs new file mode 100644 index 000000000..78556f0bc --- /dev/null +++ b/src/int/leading_zeros.rs @@ -0,0 +1,143 @@ +// Note: these functions happen to produce the correct `usize::leading_zeros(0)` value +// without a explicit zero check. Zero is probably common enough that it could warrant +// adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`. +// Compilers will insert the check for zero in cases where it is needed. + +/// Returns the number of leading binary zeros in `x`. +pub fn usize_leading_zeros_default(x: usize) -> usize { + // The basic idea is to test if the higher bits of `x` are zero and bisect the number + // of leading zeros. It is possible for all branches of the bisection to use the same + // code path by conditionally shifting the higher parts down to let the next bisection + // step work on the higher or lower parts of `x`. Instead of starting with `z == 0` + // and adding to the number of zeros, it is slightly faster to start with + // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros, + // because it simplifies the final bisection step. + let mut x = x; + // the number of potential leading zeros + let mut z = usize::MAX.count_ones() as usize; + // a temporary + let mut t: usize; + #[cfg(target_pointer_width = "64")] + { + t = x >> 32; + if t != 0 { + z -= 32; + x = t; + } + } + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] + { + t = x >> 16; + if t != 0 { + z -= 16; + x = t; + } + } + t = x >> 8; + if t != 0 { + z -= 8; + x = t; + } + t = x >> 4; + if t != 0 { + z -= 4; + x = t; + } + t = x >> 2; + if t != 0 { + z -= 2; + x = t; + } + // the last two bisections are combined into one conditional + t = x >> 1; + if t != 0 { + z - 2 + } else { + z - x + } + + // We could potentially save a few cycles by using the LUT trick from + // "https://embeddedgurus.com/state-space/2014/09/ + // fast-deterministic-and-portable-counting-leading-zeros/". + // However, 256 bytes for a LUT is too large for embedded use cases. We could remove + // the last 3 bisections and use this 16 byte LUT for the rest of the work: + //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]; + //z -= LUT[x] as usize; + //z + // However, it ends up generating about the same number of instructions. When benchmarked + // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO + // execution effects. Changing to using a LUT and branching is risky for smaller cores. +} + +// The above method does not compile well on RISC-V (because of the lack of predicated +// instructions), producing code with many branches or using an excessively long +// branchless solution. This method takes advantage of the set-if-less-than instruction on +// RISC-V that allows `(x >= power-of-two) as usize` to be branchless. + +/// Returns the number of leading binary zeros in `x`. +pub fn usize_leading_zeros_riscv(x: usize) -> usize { + let mut x = x; + // the number of potential leading zeros + let mut z = usize::MAX.count_ones() as usize; + // a temporary + let mut t: usize; + + // RISC-V does not have a set-if-greater-than-or-equal instruction and + // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is + // still the most optimal method. A conditional set can only be turned into a single + // immediate instruction if `x` is compared with an immediate `imm` (that can fit into + // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the + // right). If we try to save an instruction by using `x < imm` for each bisection, we + // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, + // but the immediate will never fit into 12 bits and never save an instruction. + #[cfg(target_pointer_width = "64")] + { + // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise + // `t` is set to 0. + t = ((x >= (1 << 32)) as usize) << 5; + // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the + // next step to process. + x >>= t; + // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential + // leading zeros + z -= t; + } + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] + { + t = ((x >= (1 << 16)) as usize) << 4; + x >>= t; + z -= t; + } + t = ((x >= (1 << 8)) as usize) << 3; + x >>= t; + z -= t; + t = ((x >= (1 << 4)) as usize) << 2; + x >>= t; + z -= t; + t = ((x >= (1 << 2)) as usize) << 1; + x >>= t; + z -= t; + t = (x >= (1 << 1)) as usize; + x >>= t; + z -= t; + // All bits except the LSB are guaranteed to be zero for this final bisection step. + // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. + z - x +} + +intrinsics! { + #[maybe_use_optimized_c_shim] + #[cfg(any( + target_pointer_width = "16", + target_pointer_width = "32", + target_pointer_width = "64" + ))] + /// Returns the number of leading binary zeros in `x`. + pub extern "C" fn __clzsi2(x: usize) -> usize { + if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { + usize_leading_zeros_riscv(x) + } else { + usize_leading_zeros_default(x) + } + } +} diff --git a/src/int/mod.rs b/src/int/mod.rs index d73bf6db9..8a469d901 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -13,11 +13,14 @@ macro_rules! os_ty { } pub mod addsub; +pub mod leading_zeros; pub mod mul; pub mod sdiv; pub mod shift; pub mod udiv; +pub use self::leading_zeros::__clzsi2; + /// Trait for some basic operations on integers pub(crate) trait Int: Copy @@ -300,69 +303,3 @@ macro_rules! impl_wide_int { impl_wide_int!(u32, u64, 32); impl_wide_int!(u64, u128, 64); - -intrinsics! { - #[maybe_use_optimized_c_shim] - #[cfg(any( - target_pointer_width = "16", - target_pointer_width = "32", - target_pointer_width = "64" - ))] - pub extern "C" fn __clzsi2(x: usize) -> usize { - // TODO: const this? Would require const-if - // Note(Lokathor): the `intrinsics!` macro can't process mut inputs - let mut x = x; - let mut y: usize; - let mut n: usize = { - #[cfg(target_pointer_width = "64")] - { - 64 - } - #[cfg(target_pointer_width = "32")] - { - 32 - } - #[cfg(target_pointer_width = "16")] - { - 16 - } - }; - #[cfg(target_pointer_width = "64")] - { - y = x >> 32; - if y != 0 { - n -= 32; - x = y; - } - } - #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] - { - y = x >> 16; - if y != 0 { - n -= 16; - x = y; - } - } - y = x >> 8; - if y != 0 { - n -= 8; - x = y; - } - y = x >> 4; - if y != 0 { - n -= 4; - x = y; - } - y = x >> 2; - if y != 0 { - n -= 2; - x = y; - } - y = x >> 1; - if y != 0 { - n - 2 - } else { - n - x - } - } -} diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 3b99b574e..61282af0b 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -11,6 +11,12 @@ doctest = false [build-dependencies] rand = "0.7" +[dev-dependencies] +# For fuzzing tests we want a deterministic seedable RNG. We also eliminate potential +# problems with system RNGs on the variety of platforms this crate is tested on. +# `xoshiro128**` is used for its quality, size, and speed at generating `u32` shift amounts. +rand_xoshiro = "0.4" + [dependencies.compiler_builtins] path = ".." default-features = false diff --git a/testcrate/tests/count_leading_zeros.rs b/testcrate/tests/count_leading_zeros.rs deleted file mode 100644 index 022b2d852..000000000 --- a/testcrate/tests/count_leading_zeros.rs +++ /dev/null @@ -1,23 +0,0 @@ -extern crate compiler_builtins; - -use compiler_builtins::int::__clzsi2; - -#[test] -fn __clzsi2_test() { - let mut i: usize = core::usize::MAX; - // Check all values above 0 - while i > 0 { - assert_eq!(__clzsi2(i) as u32, i.leading_zeros()); - i >>= 1; - } - // check 0 also - i = 0; - assert_eq!(__clzsi2(i) as u32, i.leading_zeros()); - // double check for bit patterns that aren't just solid 1s - i = 1; - for _ in 0..63 { - assert_eq!(__clzsi2(i) as u32, i.leading_zeros()); - i <<= 2; - i += 1; - } -} diff --git a/testcrate/tests/leading_zeros.rs b/testcrate/tests/leading_zeros.rs new file mode 100644 index 000000000..b857d9e0c --- /dev/null +++ b/testcrate/tests/leading_zeros.rs @@ -0,0 +1,54 @@ +use rand_xoshiro::rand_core::{RngCore, SeedableRng}; +use rand_xoshiro::Xoshiro128StarStar; + +use compiler_builtins::int::__clzsi2; +use compiler_builtins::int::leading_zeros::{ + usize_leading_zeros_default, usize_leading_zeros_riscv, +}; + +#[test] +fn __clzsi2_test() { + // Binary fuzzer. We cannot just send a random number directly to `__clzsi2()`, because we need + // large sequences of zeros to test. This XORs, ANDs, and ORs random length strings of 1s to + // `x`. ORs insure sequences of ones, ANDs insures sequences of zeros, and XORs are not often + // destructive but add entropy. + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x = 0usize; + // creates a mask for indexing the bits of the type + let bit_indexing_mask = usize::MAX.count_ones() - 1; + // 10000 iterations is enough to make sure edge cases like single set bits are tested and to go + // through many paths. + for _ in 0..10_000 { + let r0 = bit_indexing_mask & rng.next_u32(); + // random length of ones + let ones: usize = !0 >> r0; + let r1 = bit_indexing_mask & rng.next_u32(); + // random circular shift + let mask = ones.rotate_left(r1); + match rng.next_u32() % 4 { + 0 => x |= mask, + 1 => x &= mask, + // both 2 and 3 to make XORs as common as ORs and ANDs combined + _ => x ^= mask, + } + let lz = x.leading_zeros() as usize; + let lz0 = __clzsi2(x); + let lz1 = usize_leading_zeros_default(x); + let lz2 = usize_leading_zeros_riscv(x); + if lz0 != lz { + panic!("__clzsi2({}): expected: {}, found: {}", x, lz, lz0); + } + if lz1 != lz { + panic!( + "usize_leading_zeros_default({}): expected: {}, found: {}", + x, lz, lz1 + ); + } + if lz2 != lz { + panic!( + "usize_leading_zeros_riscv({}): expected: {}, found: {}", + x, lz, lz2 + ); + } + } +} From d242475b5c27a1f87bbdf4a954d654da77eaa28f Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Sun, 3 May 2020 22:38:04 -0500 Subject: [PATCH 0374/1459] regularize the location and documentation of division functions --- src/int/sdiv.rs | 44 +++++++++++++++++++++++++++----------------- src/int/udiv.rs | 14 ++++++-------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index c9e252cc3..854ea00bb 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -60,42 +60,52 @@ impl Divmod for i64 {} intrinsics! { #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_idiv] + /// Returns `n / d` pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 { a.div(b) } #[maybe_use_optimized_c_shim] - pub extern "C" fn __divdi3(a: i64, b: i64) -> i64 { - a.div(b) + /// Returns `n % d` + pub extern "C" fn __modsi3(a: i32, b: i32) -> i32 { + a.mod_(b) } - - #[win64_128bit_abi_hack] - pub extern "C" fn __divti3(a: i128, b: i128) -> i128 { - a.div(b) + + #[maybe_use_optimized_c_shim] + /// Returns `n / d` and sets `*rem = n % d` + pub extern "C" fn __divmodsi4(a: i32, b: i32, rem: &mut i32) -> i32 { + a.divmod(b, rem, |a, b| __divsi3(a, b)) } #[maybe_use_optimized_c_shim] - pub extern "C" fn __modsi3(a: i32, b: i32) -> i32 { - a.mod_(b) + /// Returns `n / d` + pub extern "C" fn __divdi3(a: i64, b: i64) -> i64 { + a.div(b) } #[maybe_use_optimized_c_shim] + /// Returns `n % d` pub extern "C" fn __moddi3(a: i64, b: i64) -> i64 { a.mod_(b) } - #[win64_128bit_abi_hack] - pub extern "C" fn __modti3(a: i128, b: i128) -> i128 { - a.mod_(b) + #[aapcs_on_arm] + /// Returns `n / d` and sets `*rem = n % d` + pub extern "C" fn __divmoddi4(a: i64, b: i64, rem: &mut i64) -> i64 { + a.divmod(b, rem, |a, b| __divdi3(a, b)) } - #[maybe_use_optimized_c_shim] - pub extern "C" fn __divmodsi4(a: i32, b: i32, rem: &mut i32) -> i32 { - a.divmod(b, rem, |a, b| __divsi3(a, b)) + #[win64_128bit_abi_hack] + /// Returns `n / d` + pub extern "C" fn __divti3(a: i128, b: i128) -> i128 { + a.div(b) } - #[aapcs_on_arm] - pub extern "C" fn __divmoddi4(a: i64, b: i64, rem: &mut i64) -> i64 { - a.divmod(b, rem, |a, b| __divdi3(a, b)) + #[win64_128bit_abi_hack] + /// Returns `n % d` + pub extern "C" fn __modti3(a: i128, b: i128) -> i128 { + a.mod_(b) } + + // LLVM does not currently have a `__divmodti4` function } diff --git a/src/int/udiv.rs b/src/int/udiv.rs index b393ac6db..1ee670c72 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -241,6 +241,11 @@ intrinsics! { rem } + /// Returns `n / d` and sets `*rem = n % d` + pub extern "C" fn __udivmoddi4(n: u64, d: u64, rem: Option<&mut u64>) -> u64 { + udivmod_inner!(n, d, rem, u64) + } + #[win64_128bit_abi_hack] /// Returns `n / d` pub extern "C" fn __udivti3(n: u128, d: u128) -> u128 { @@ -255,16 +260,9 @@ intrinsics! { rem } - /// Returns `n / d` and sets `*rem = n % d` - pub extern "C" fn __udivmoddi4(n: u64, d: u64, rem: Option<&mut u64>) -> u64 { - udivmod_inner!(n, d, rem, u64) - } - #[win64_128bit_abi_hack] /// Returns `n / d` and sets `*rem = n % d` - pub extern "C" fn __udivmodti4(n: u128, - d: u128, - rem: Option<&mut u128>) -> u128 { + pub extern "C" fn __udivmodti4(n: u128, d: u128, rem: Option<&mut u128>) -> u128 { udivmod_inner!(n, d, rem, u128) } } From 83425b17ebfbb0c64536e0985ee8e5fbf8a54538 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Sun, 3 May 2020 22:40:54 -0500 Subject: [PATCH 0375/1459] replace old soft division code with new functions --- src/int/sdiv.rs | 23 +++++++------ src/int/udiv.rs | 86 +++++++++++-------------------------------------- 2 files changed, 33 insertions(+), 76 deletions(-) diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index 854ea00bb..983c34cf3 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -57,54 +57,59 @@ trait Divmod: Int { impl Divmod for i32 {} impl Divmod for i64 {} + intrinsics! { #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_idiv] /// Returns `n / d` pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 { - a.div(b) + i32_div_rem(a, b).0 } #[maybe_use_optimized_c_shim] /// Returns `n % d` pub extern "C" fn __modsi3(a: i32, b: i32) -> i32 { - a.mod_(b) + i32_div_rem(a, b).1 } - + #[maybe_use_optimized_c_shim] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __divmodsi4(a: i32, b: i32, rem: &mut i32) -> i32 { - a.divmod(b, rem, |a, b| __divsi3(a, b)) + let quo_rem = i32_div_rem(a, b); + *rem = quo_rem.1; + quo_rem.0 } #[maybe_use_optimized_c_shim] /// Returns `n / d` pub extern "C" fn __divdi3(a: i64, b: i64) -> i64 { - a.div(b) + i64_div_rem(a, b).0 } #[maybe_use_optimized_c_shim] /// Returns `n % d` pub extern "C" fn __moddi3(a: i64, b: i64) -> i64 { - a.mod_(b) + i64_div_rem(a, b).1 } #[aapcs_on_arm] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __divmoddi4(a: i64, b: i64, rem: &mut i64) -> i64 { - a.divmod(b, rem, |a, b| __divdi3(a, b)) + let quo_rem = i64_div_rem(a, b); + *rem = quo_rem.1; + quo_rem.0 } #[win64_128bit_abi_hack] /// Returns `n / d` pub extern "C" fn __divti3(a: i128, b: i128) -> i128 { - a.div(b) + i128_div_rem(a, b).0 } #[win64_128bit_abi_hack] /// Returns `n % d` pub extern "C" fn __modti3(a: i128, b: i128) -> i128 { - a.mod_(b) + i128_div_rem(a, b).1 } // LLVM does not currently have a `__divmodti4` function diff --git a/src/int/udiv.rs b/src/int/udiv.rs index 1ee670c72..b312ca48b 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -156,113 +156,65 @@ intrinsics! { #[arm_aeabi_alias = __aeabi_uidiv] /// Returns `n / d` pub extern "C" fn __udivsi3(n: u32, d: u32) -> u32 { - // Special cases - if d == 0 { - // NOTE This should be unreachable in safe Rust because the program will panic before - // this intrinsic is called - ::abort(); - } - - if n == 0 { - return 0; - } - - let mut sr = d.leading_zeros().wrapping_sub(n.leading_zeros()); - - // d > n - if sr > u32::BITS - 1 { - return 0; - } - - // d == 1 - if sr == u32::BITS - 1 { - return n; - } - - sr += 1; - - // 1 <= sr <= u32::BITS - 1 - let mut q = n << (u32::BITS - sr); - let mut r = n >> sr; - - let mut carry = 0; - - // Don't use a range because they may generate references to memcpy in unoptimized code - let mut i = 0; - while i < sr { - i += 1; - - // r:q = ((r:q) << 1) | carry - r = (r << 1) | (q >> (u32::BITS - 1)); - q = (q << 1) | carry; - - // carry = 0; - // if r > d { - // r -= d; - // carry = 1; - // } - - let s = (d.wrapping_sub(r).wrapping_sub(1)) as i32 >> (u32::BITS - 1); - carry = (s & 1) as u32; - r -= d & s as u32; - } - - (q << 1) | carry + u32_div_rem(n, d).0 } #[maybe_use_optimized_c_shim] /// Returns `n % d` pub extern "C" fn __umodsi3(n: u32, d: u32) -> u32 { - let q = __udivsi3(n, d); - n - q * d + u32_div_rem(n, d).1 } #[maybe_use_optimized_c_shim] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 { - let q = __udivsi3(n, d); + let quo_rem = u32_div_rem(n, d); if let Some(rem) = rem { - *rem = n - (q * d); + *rem = quo_rem.1; } - q + quo_rem.0 } #[maybe_use_optimized_c_shim] /// Returns `n / d` pub extern "C" fn __udivdi3(n: u64, d: u64) -> u64 { - __udivmoddi4(n, d, None) + u64_div_rem(n, d).0 } #[maybe_use_optimized_c_shim] /// Returns `n % d` pub extern "C" fn __umoddi3(n: u64, d: u64) -> u64 { - let mut rem = 0; - __udivmoddi4(n, d, Some(&mut rem)); - rem + u64_div_rem(n, d).1 } /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmoddi4(n: u64, d: u64, rem: Option<&mut u64>) -> u64 { - udivmod_inner!(n, d, rem, u64) + let quo_rem = u64_div_rem(n, d); + if let Some(rem) = rem { + *rem = quo_rem.1; + } + quo_rem.0 } #[win64_128bit_abi_hack] /// Returns `n / d` pub extern "C" fn __udivti3(n: u128, d: u128) -> u128 { - __udivmodti4(n, d, None) + u128_div_rem(n, d).0 } #[win64_128bit_abi_hack] /// Returns `n % d` pub extern "C" fn __umodti3(n: u128, d: u128) -> u128 { - let mut rem = 0; - __udivmodti4(n, d, Some(&mut rem)); - rem + u128_div_rem(n, d).1 } #[win64_128bit_abi_hack] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmodti4(n: u128, d: u128, rem: Option<&mut u128>) -> u128 { - udivmod_inner!(n, d, rem, u128) + let quo_rem = u128_div_rem(n, d); + if let Some(rem) = rem { + *rem = quo_rem.1; + } + quo_rem.0 } } From 752ab52a7a3d26ed5a976e9611d7cdcba253705b Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Sun, 19 Jul 2020 17:15:50 -0500 Subject: [PATCH 0376/1459] Remove erroneous `aapcs_on_arm` and add `maybe_use_optimized_c_shim` --- src/int/sdiv.rs | 2 +- src/int/udiv.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index 983c34cf3..682ebeda2 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -92,7 +92,7 @@ intrinsics! { i64_div_rem(a, b).1 } - #[aapcs_on_arm] + #[maybe_use_optimized_c_shim] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __divmoddi4(a: i64, b: i64, rem: &mut i64) -> i64 { let quo_rem = i64_div_rem(a, b); diff --git a/src/int/udiv.rs b/src/int/udiv.rs index b312ca48b..e517ca862 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -187,6 +187,7 @@ intrinsics! { u64_div_rem(n, d).1 } + #[maybe_use_optimized_c_shim] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmoddi4(n: u64, d: u64, rem: Option<&mut u64>) -> u64 { let quo_rem = u64_div_rem(n, d); From 6aef025a369849dba4ed461bf2c80bda540dce1d Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Fri, 17 Jul 2020 15:17:40 -0500 Subject: [PATCH 0377/1459] Remove unused code --- src/int/mod.rs | 12 ---- src/int/sdiv.rs | 58 ------------------- src/int/udiv.rs | 151 ------------------------------------------------ 3 files changed, 221 deletions(-) diff --git a/src/int/mod.rs b/src/int/mod.rs index 8a469d901..7bafb82f8 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -1,17 +1,5 @@ use core::ops; -macro_rules! hty { - ($ty:ty) => { - <$ty as LargeInt>::HighHalf - }; -} - -macro_rules! os_ty { - ($ty:ty) => { - <$ty as Int>::OtherSign - }; -} - pub mod addsub; pub mod leading_zeros; pub mod mul; diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index 682ebeda2..d8bb9c093 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -1,63 +1,5 @@ use int::Int; -trait Div: Int { - /// Returns `a / b` - fn div(self, other: Self) -> Self { - let s_a = self >> (Self::BITS - 1); - let s_b = other >> (Self::BITS - 1); - // NOTE it's OK to overflow here because of the `.unsigned()` below. - // This whole operation is computing the absolute value of the inputs - // So some overflow will happen when dealing with e.g. `i64::MIN` - // where the absolute value is `(-i64::MIN) as u64` - let a = (self ^ s_a).wrapping_sub(s_a); - let b = (other ^ s_b).wrapping_sub(s_b); - let s = s_a ^ s_b; - - let r = a.unsigned().aborting_div(b.unsigned()); - (Self::from_unsigned(r) ^ s) - s - } -} - -impl Div for i32 {} -impl Div for i64 {} -impl Div for i128 {} - -trait Mod: Int { - /// Returns `a % b` - fn mod_(self, other: Self) -> Self { - let s = other >> (Self::BITS - 1); - // NOTE(wrapping_sub) see comment in the `div` - let b = (other ^ s).wrapping_sub(s); - let s = self >> (Self::BITS - 1); - let a = (self ^ s).wrapping_sub(s); - - let r = a.unsigned().aborting_rem(b.unsigned()); - (Self::from_unsigned(r) ^ s) - s - } -} - -impl Mod for i32 {} -impl Mod for i64 {} -impl Mod for i128 {} - -trait Divmod: Int { - /// Returns `a / b` and sets `*rem = n % d` - fn divmod(self, other: Self, rem: &mut Self, div: F) -> Self - where - F: Fn(Self, Self) -> Self, - { - let r = div(self, other); - // NOTE won't overflow because it's using the result from the - // previous division - *rem = self - r.wrapping_mul(other); - r - } -} - -impl Divmod for i32 {} -impl Divmod for i64 {} - - intrinsics! { #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_idiv] diff --git a/src/int/udiv.rs b/src/int/udiv.rs index e517ca862..491515949 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -1,156 +1,5 @@ use int::{Int, LargeInt}; -macro_rules! udivmod_inner { - ($n:expr, $d:expr, $rem:expr, $ty:ty) => {{ - let (n, d, rem) = ($n, $d, $rem); - // NOTE X is unknown, K != 0 - if n.high() == 0 { - if d.high() == 0 { - // 0 X - // --- - // 0 X - - if let Some(rem) = rem { - *rem = <$ty>::from(n.low().aborting_rem(d.low())); - } - return <$ty>::from(n.low().aborting_div(d.low())) - } else { - // 0 X - // --- - // K X - if let Some(rem) = rem { - *rem = n; - } - return 0; - }; - } - - let mut sr; - let mut q; - let mut r; - - if d.low() == 0 { - if d.high() == 0 { - // K X - // --- - // 0 0 - // NOTE This should be unreachable in safe Rust because the program will panic before - // this intrinsic is called - ::abort(); - } - - if n.low() == 0 { - // K 0 - // --- - // K 0 - if let Some(rem) = rem { - *rem = <$ty>::from_parts(0, n.high().aborting_rem(d.high())); - } - return <$ty>::from(n.high().aborting_div(d.high())) - } - - // K K - // --- - // K 0 - - if d.high().is_power_of_two() { - if let Some(rem) = rem { - *rem = <$ty>::from_parts(n.low(), n.high() & (d.high() - 1)); - } - return <$ty>::from(n.high() >> d.high().trailing_zeros()); - } - - sr = d.high().leading_zeros().wrapping_sub(n.high().leading_zeros()); - - // D > N - if sr > ::BITS - 2 { - if let Some(rem) = rem { - *rem = n; - } - return 0; - } - - sr += 1; - - // 1 <= sr <= ::BITS - 1 - q = n << (<$ty>::BITS - sr); - r = n >> sr; - } else if d.high() == 0 { - // K X - // --- - // 0 K - if d.low().is_power_of_two() { - if let Some(rem) = rem { - *rem = <$ty>::from(n.low() & (d.low() - 1)); - } - - if d.low() == 1 { - return n; - } else { - let sr = d.low().trailing_zeros(); - return n >> sr; - }; - } - - sr = 1 + ::BITS + d.low().leading_zeros() - n.high().leading_zeros(); - - // 2 <= sr <= u64::BITS - 1 - q = n << (<$ty>::BITS - sr); - r = n >> sr; - } else { - // K X - // --- - // K K - sr = d.high().leading_zeros().wrapping_sub(n.high().leading_zeros()); - - // D > N - if sr > ::BITS - 1 { - if let Some(rem) = rem { - *rem = n; - } - return 0; - } - - sr += 1; - - // 1 <= sr <= ::BITS - q = n << (<$ty>::BITS - sr); - r = n >> sr; - } - - // Not a special case - // q and r are initialized with - // q = n << (u64::BITS - sr) - // r = n >> sr - // 1 <= sr <= u64::BITS - 1 - let mut carry = 0; - - // Don't use a range because they may generate references to memcpy in unoptimized code - let mut i = 0; - while i < sr { - i += 1; - - // r:q = ((r:q) << 1) | carry - r = (r << 1) | (q >> (<$ty>::BITS - 1)); - q = (q << 1) | carry as $ty; - - // carry = 0 - // if r >= d { - // r -= d; - // carry = 1; - // } - let s = (d.wrapping_sub(r).wrapping_sub(1)) as os_ty!($ty) >> (<$ty>::BITS - 1); - carry = (s & 1) as hty!($ty); - r -= d & s as $ty; - } - - if let Some(rem) = rem { - *rem = r; - } - (q << 1) | carry as $ty - }} -} - intrinsics! { #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_uidiv] From 52386525152bb6b94492c9044c6b5451d3bfbd9e Mon Sep 17 00:00:00 2001 From: Max Audron Date: Thu, 13 Aug 2020 12:46:28 +0200 Subject: [PATCH 0378/1459] add 32 bit shift instructions * add 32 bit shift instructions to src/int/shift.rs __ashlsi3 __ashrsi3 __lshrsi3 * add int_impl! for 16 bit numbers and large_int! for i32 and u32 * add tests in testcrate/build.rs --- src/int/mod.rs | 3 +++ src/int/shift.rs | 18 ++++++++++++++++++ testcrate/build.rs | 13 +++++++++++++ 3 files changed, 34 insertions(+) diff --git a/src/int/mod.rs b/src/int/mod.rs index 8a469d901..128fdfdc5 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -188,6 +188,7 @@ macro_rules! int_impl { }; } +int_impl!(i16, u16, 16); int_impl!(i32, u32, 32); int_impl!(i64, u64, 64); int_impl!(i128, u128, 128); @@ -229,6 +230,8 @@ macro_rules! large_int { }; } +large_int!(u32, u16, u16, 16); +large_int!(i32, u16, i16, 16); large_int!(u64, u32, u32, 32); large_int!(i64, u32, i32, 32); large_int!(u128, u64, u64, 64); diff --git a/src/int/shift.rs b/src/int/shift.rs index 408f8f3cc..674c3ee8c 100644 --- a/src/int/shift.rs +++ b/src/int/shift.rs @@ -20,6 +20,7 @@ trait Ashl: Int + LargeInt { } } +impl Ashl for u32 {} impl Ashl for u64 {} impl Ashl for u128 {} @@ -47,6 +48,7 @@ trait Ashr: Int + LargeInt { } } +impl Ashr for i32 {} impl Ashr for i64 {} impl Ashr for i128 {} @@ -70,10 +72,16 @@ trait Lshr: Int + LargeInt { } } +impl Lshr for u32 {} impl Lshr for u64 {} impl Lshr for u128 {} intrinsics! { + #[maybe_use_optimized_c_shim] + pub extern "C" fn __ashlsi3(a: u32, b: u32) -> u32 { + a.ashl(b) + } + #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_llsl] pub extern "C" fn __ashldi3(a: u64, b: u32) -> u64 { @@ -84,6 +92,11 @@ intrinsics! { a.ashl(b) } + #[maybe_use_optimized_c_shim] + pub extern "C" fn __ashrsi3(a: i32, b: u32) -> i32 { + a.ashr(b) + } + #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_lasr] pub extern "C" fn __ashrdi3(a: i64, b: u32) -> i64 { @@ -94,6 +107,11 @@ intrinsics! { a.ashr(b) } + #[maybe_use_optimized_c_shim] + pub extern "C" fn __lshrsi3(a: u32, b: u32) -> u32 { + a.lshr(b) + } + #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_llsr] pub extern "C" fn __lshrdi3(a: u64, b: u32) -> u64 { diff --git a/testcrate/build.rs b/testcrate/build.rs index e1d4cf9e8..5a91a120a 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -857,6 +857,10 @@ fn main() { ); // int/shift.rs + gen( + |(a, b): (MyU32, MyU32)| Some(a.0 << (b.0 % 32)), + "builtins::int::shift::__ashlsi3(a, b % 32)", + ); gen( |(a, b): (MyU64, MyU32)| Some(a.0 << (b.0 % 64)), "builtins::int::shift::__ashldi3(a, b % 64)", @@ -865,6 +869,10 @@ fn main() { |(a, b): (MyU128, MyU32)| Some(a.0 << (b.0 % 128)), "builtins::int::shift::__ashlti3(a, b % 128)", ); + gen( + |(a, b): (MyI32, MyU32)| Some(a.0 >> (b.0 % 32)), + "builtins::int::shift::__ashrsi3(a, b % 32)", + ); gen( |(a, b): (MyI64, MyU32)| Some(a.0 >> (b.0 % 64)), "builtins::int::shift::__ashrdi3(a, b % 64)", @@ -873,6 +881,10 @@ fn main() { |(a, b): (MyI128, MyU32)| Some(a.0 >> (b.0 % 128)), "builtins::int::shift::__ashrti3(a, b % 128)", ); + gen( + |(a, b): (MyU32, MyU32)| Some(a.0 >> (b.0 % 32)), + "builtins::int::shift::__lshrsi3(a, b % 32)", + ); gen( |(a, b): (MyU64, MyU32)| Some(a.0 >> (b.0 % 64)), "builtins::int::shift::__lshrdi3(a, b % 64)", @@ -1285,6 +1297,7 @@ my_integer! { struct MyI32(i32); struct MyI64(i64); struct MyI128(i128); + struct MyU16(u16); struct MyU32(u32); struct MyU64(u64); struct MyU128(u128); From 0f34e3bdb4af18f30ab493e84b9ba01007eee685 Mon Sep 17 00:00:00 2001 From: Xiaoyu Lu Date: Fri, 14 Aug 2020 10:28:15 +0800 Subject: [PATCH 0379/1459] Add uefi arch x86 probestack support 1. In UEFI x86 arch, probestack need triple underscore. 2. In UEFI, probestack function do things like _chkstk(in MSVC). MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp themselves. But current probestack doesn't adjust esp. And LLVM doesn't generate sub %eax, %esp after probestack. So we adjust esp in probestack like MSVC x32's _chkstk. --- src/probestack.rs | 68 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/src/probestack.rs b/src/probestack.rs index 9c78faa1d..2f37a104e 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -76,7 +76,7 @@ macro_rules! define_rust_probestack { }; } -#[cfg(target_os = "uefi")] +#[cfg(all(target_os = "uefi", target_arch = "x86_64"))] macro_rules! define_rust_probestack { ($body: expr) => { concat!( @@ -104,6 +104,20 @@ macro_rules! define_rust_probestack { }; } +// In UEFI x86 arch, triple underscore is deliberate. +#[cfg(all(target_os = "uefi", target_arch = "x86"))] +macro_rules! define_rust_probestack { + ($body: expr) => { + concat!( + " + .globl ___rust_probestack + ___rust_probestack: + ", + $body + ) + }; +} + // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, // ensuring that if any pages are unmapped we'll make a page fault. // @@ -231,7 +245,7 @@ global_asm!(define_rust_probestack!( " )); -#[cfg(target_arch = "x86")] +#[cfg(all(target_arch = "x86", not(target_os = "uefi")))] // This is the same as x86_64 above, only translated for 32-bit sizes. Note // that on Unix we're expected to restore everything as it was, this // function basically can't tamper with anything. @@ -270,3 +284,53 @@ global_asm!(define_rust_probestack!( .cfi_endproc " )); + +#[cfg(all(target_arch = "x86", target_os = "uefi"))] +// UEFI target is windows like target. LLVM will do _chkstk things like windows. +// probestack function will also do things like _chkstk in MSVC. +// So we need to sub %ax %sp in probestack when arch is x86. +// +// REF: Rust commit(74e80468347) +// rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805 +// Comments in LLVM: +// MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. +// MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp +// themselves. +global_asm!(define_rust_probestack!( + " + .cfi_startproc + push %ebp + .cfi_adjust_cfa_offset 4 + .cfi_offset %ebp, -8 + mov %esp, %ebp + .cfi_def_cfa_register %ebp + push %ecx + push %edx + mov %eax,%ecx + + cmp $0x1000,%ecx + jna 3f +2: + sub $0x1000,%esp + test %esp,8(%esp) + sub $0x1000,%ecx + cmp $0x1000,%ecx + ja 2b + +3: + sub %ecx,%esp + test %esp,8(%esp) + mov 4(%ebp),%edx + mov %edx, 12(%esp) + add %eax,%esp + pop %edx + pop %ecx + leave + + sub %eax, %esp + .cfi_def_cfa_register %esp + .cfi_adjust_cfa_offset -4 + ret + .cfi_endproc + " +)); From 1621c6dbf9eb0c7a4acf1c47d52254b481bb58c2 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Sat, 11 Jul 2020 12:15:10 -0500 Subject: [PATCH 0380/1459] Use `specialized-div-rem` 1.0.0 for division algorithms --- Cargo.toml | 4 + src/int/mod.rs | 2 + src/int/sdiv.rs | 2 +- src/int/specialized_div_rem/asymmetric.rs | 169 ++++++ src/int/specialized_div_rem/binary_long.rs | 596 +++++++++++++++++++++ src/int/specialized_div_rem/delegate.rs | 226 ++++++++ src/int/specialized_div_rem/mod.rs | 295 ++++++++++ src/int/specialized_div_rem/norm_shift.rs | 106 ++++ src/int/specialized_div_rem/trifecta.rs | 441 +++++++++++++++ src/int/udiv.rs | 2 +- src/lib.rs | 1 + testcrate/Cargo.toml | 3 +- testcrate/tests/div_rem.rs | 143 +++++ 13 files changed, 1987 insertions(+), 3 deletions(-) create mode 100644 src/int/specialized_div_rem/asymmetric.rs create mode 100644 src/int/specialized_div_rem/binary_long.rs create mode 100644 src/int/specialized_div_rem/delegate.rs create mode 100644 src/int/specialized_div_rem/mod.rs create mode 100644 src/int/specialized_div_rem/norm_shift.rs create mode 100644 src/int/specialized_div_rem/trifecta.rs create mode 100644 testcrate/tests/div_rem.rs diff --git a/Cargo.toml b/Cargo.toml index d4d1ad67d..353d9a737 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,10 @@ panic-handler = { path = 'crates/panic-handler' } [features] default = ["compiler-builtins"] +# Some algorithms benefit from inline assembly, but some compiler backends do +# not support it, so inline assembly is only enabled when this flag is set. +asm = [] + # Enable compilation of C code in compiler-rt, filling in some more optimized # implementations and also filling in unimplemented intrinsics c = ["cc"] diff --git a/src/int/mod.rs b/src/int/mod.rs index 7bafb82f8..5cfad1d6c 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -1,5 +1,7 @@ use core::ops; +mod specialized_div_rem; + pub mod addsub; pub mod leading_zeros; pub mod mul; diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index d8bb9c093..57ef03cda 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -1,4 +1,4 @@ -use int::Int; +use int::specialized_div_rem::*; intrinsics! { #[maybe_use_optimized_c_shim] diff --git a/src/int/specialized_div_rem/asymmetric.rs b/src/int/specialized_div_rem/asymmetric.rs new file mode 100644 index 000000000..861e91742 --- /dev/null +++ b/src/int/specialized_div_rem/asymmetric.rs @@ -0,0 +1,169 @@ +/// Creates unsigned and signed division functions optimized for dividing integers with the same +/// bitwidth as the largest operand in an asymmetrically sized division. For example, x86-64 has an +/// assembly instruction that can divide a 128 bit integer by a 64 bit integer if the quotient fits +/// in 64 bits. The 128 bit version of this algorithm would use that fast hardware division to +/// construct a full 128 bit by 128 bit division. +#[macro_export] +macro_rules! impl_asymmetric { + ( + $unsigned_name:ident, // name of the unsigned division function + $signed_name:ident, // name of the signed division function + $zero_div_fn:ident, // function called when division by zero is attempted + $half_division:ident, // function for division of a $uX by a $uX + $asymmetric_division:ident, // function for division of a $uD by a $uX + $n_h:expr, // the number of bits in a $iH or $uH + $uH:ident, // unsigned integer with half the bit width of $uX + $uX:ident, // unsigned integer with half the bit width of $uD + $uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` + $iD:ident, // signed integer type for the inputs and outputs of `$signed_name` + $($unsigned_attr:meta),*; // attributes for the unsigned function + $($signed_attr:meta),* // attributes for the signed function + ) => { + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + $( + #[$unsigned_attr] + )* + pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD,$uD) { + fn carrying_mul(lhs: $uX, rhs: $uX) -> ($uX, $uX) { + let tmp = (lhs as $uD).wrapping_mul(rhs as $uD); + (tmp as $uX, (tmp >> ($n_h * 2)) as $uX) + } + fn carrying_mul_add(lhs: $uX, mul: $uX, add: $uX) -> ($uX, $uX) { + let tmp = (lhs as $uD).wrapping_mul(mul as $uD).wrapping_add(add as $uD); + (tmp as $uX, (tmp >> ($n_h * 2)) as $uX) + } + + let n: u32 = $n_h * 2; + + // Many of these subalgorithms are taken from trifecta.rs, see that for better + // documentation. + + let duo_lo = duo as $uX; + let duo_hi = (duo >> n) as $uX; + let div_lo = div as $uX; + let div_hi = (div >> n) as $uX; + if div_hi == 0 { + if div_lo == 0 { + $zero_div_fn() + } + if duo_hi < div_lo { + // `$uD` by `$uX` division with a quotient that will fit into a `$uX` + let (quo, rem) = unsafe { $asymmetric_division(duo, div_lo) }; + return (quo as $uD, rem as $uD) + } else if (div_lo >> $n_h) == 0 { + // Short division of $uD by a $uH. + + // Some x86_64 CPUs have bad division implementations that make specializing + // this case faster. + let div_0 = div_lo as $uH as $uX; + let (quo_hi, rem_3) = $half_division(duo_hi, div_0); + + let duo_mid = + ((duo >> $n_h) as $uH as $uX) + | (rem_3 << $n_h); + let (quo_1, rem_2) = $half_division(duo_mid, div_0); + + let duo_lo = + (duo as $uH as $uX) + | (rem_2 << $n_h); + let (quo_0, rem_1) = $half_division(duo_lo, div_0); + + return ( + (quo_0 as $uD) + | ((quo_1 as $uD) << $n_h) + | ((quo_hi as $uD) << n), + rem_1 as $uD + ) + } else { + // Short division using the $uD by $uX division + let (quo_hi, rem_hi) = $half_division(duo_hi, div_lo); + let tmp = unsafe { + $asymmetric_division((duo_lo as $uD) | ((rem_hi as $uD) << n), div_lo) + }; + return ((tmp.0 as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD) + } + } + + let duo_lz = duo_hi.leading_zeros(); + let div_lz = div_hi.leading_zeros(); + let rel_leading_sb = div_lz.wrapping_sub(duo_lz); + if rel_leading_sb < $n_h { + // Some x86_64 CPUs have bad hardware division implementations that make putting + // a two possibility algorithm here beneficial. We also avoid a full `$uD` + // multiplication. + let shift = n - duo_lz; + let duo_sig_n = (duo >> shift) as $uX; + let div_sig_n = (div >> shift) as $uX; + let quo = $half_division(duo_sig_n, div_sig_n).0; + let div_lo = div as $uX; + let div_hi = (div >> n) as $uX; + let (tmp_lo, carry) = carrying_mul(quo, div_lo); + let (tmp_hi, overflow) = carrying_mul_add(quo, div_hi, carry); + let tmp = (tmp_lo as $uD) | ((tmp_hi as $uD) << n); + if (overflow != 0) || (duo < tmp) { + return ( + (quo - 1) as $uD, + duo.wrapping_add(div).wrapping_sub(tmp) + ) + } else { + return ( + quo as $uD, + duo - tmp + ) + } + } else { + // This has been adapted from + // https://www.codeproject.com/tips/785014/uint-division-modulus which was in turn + // adapted from Hacker's Delight. This is similar to the two possibility algorithm + // in that it uses only more significant parts of `duo` and `div` to divide a large + // integer with a smaller division instruction. + + let div_extra = n - div_lz; + let div_sig_n = (div >> div_extra) as $uX; + let tmp = unsafe { + $asymmetric_division(duo >> 1, div_sig_n) + }; + + let mut quo = tmp.0 >> ((n - 1) - div_lz); + if quo != 0 { + quo -= 1; + } + + // Note that this is a full `$uD` multiplication being used here + let mut rem = duo - (quo as $uD).wrapping_mul(div); + if div <= rem { + quo += 1; + rem -= div; + } + return (quo as $uD, rem) + } + } + + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + $( + #[$signed_attr] + )* + pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) { + match (duo < 0, div < 0) { + (false, false) => { + let t = $unsigned_name(duo as $uD, div as $uD); + (t.0 as $iD, t.1 as $iD) + }, + (true, false) => { + let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD); + ((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg()) + }, + (false, true) => { + let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD); + ((t.0 as $iD).wrapping_neg(), t.1 as $iD) + }, + (true, true) => { + let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD); + (t.0 as $iD, (t.1 as $iD).wrapping_neg()) + }, + } + } + } +} diff --git a/src/int/specialized_div_rem/binary_long.rs b/src/int/specialized_div_rem/binary_long.rs new file mode 100644 index 000000000..4c63396a0 --- /dev/null +++ b/src/int/specialized_div_rem/binary_long.rs @@ -0,0 +1,596 @@ +/// Creates unsigned and signed division functions that use binary long division, designed for +/// computer architectures without division instructions. These functions have good performance for +/// microarchitectures with large branch miss penalties and architectures without the ability to +/// predicate instructions. For architectures with predicated instructions, one of the algorithms +/// described in the documentation of these functions probably has higher performance, and a custom +/// assembly routine should be used instead. +#[macro_export] +macro_rules! impl_binary_long { + ( + $unsigned_name:ident, // name of the unsigned division function + $signed_name:ident, // name of the signed division function + $zero_div_fn:ident, // function called when division by zero is attempted + $normalization_shift:ident, // function for finding the normalization shift + $n:tt, // the number of bits in a $iX or $uX + $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` + $iX:ident, // signed integer type for the inputs and outputs of `$signed_name` + $($unsigned_attr:meta),*; // attributes for the unsigned function + $($signed_attr:meta),* // attributes for the signed function + ) => { + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + $( + #[$unsigned_attr] + )* + pub fn $unsigned_name(duo: $uX, div: $uX) -> ($uX, $uX) { + let mut duo = duo; + // handle edge cases before calling `$normalization_shift` + if div == 0 { + $zero_div_fn() + } + if duo < div { + return (0, duo) + } + + // There are many variations of binary division algorithm that could be used. This + // documentation gives a tour of different methods so that future readers wanting to + // optimize further do not have to painstakingly derive them. The SWAR variation is + // especially hard to understand without reading the less convoluted methods first. + + // You may notice that a `duo < div_original` check is included in many these + // algorithms. A critical optimization that many algorithms miss is handling of + // quotients that will turn out to have many trailing zeros or many leading zeros. This + // happens in cases of exact or close-to-exact divisions, divisions by power of two, and + // in cases where the quotient is small. The `duo < div_original` check handles these + // cases of early returns and ends up replacing other kinds of mundane checks that + // normally terminate a binary division algorithm. + // + // Something you may see in other algorithms that is not special-cased here is checks + // for division by powers of two. The `duo < div_original` check handles this case and + // more, however it can be checked up front before the bisection using the + // `((div > 0) && ((div & (div - 1)) == 0))` trick. This is not special-cased because + // compilers should handle most cases where divisions by power of two occur, and we do + // not want to add on a few cycles for every division operation just to save a few + // cycles rarely. + + // The following example is the most straightforward translation from the way binary + // long division is typically visualized: + // Dividing 178u8 (0b10110010) by 6u8 (0b110). `div` is shifted left by 5, according to + // the result from `$normalization_shift(duo, div, false)`. + // + // Step 0: `sub` is negative, so there is not full normalization, so no `quo` bit is set + // and `duo` is kept unchanged. + // duo:10110010, div_shifted:11000000, sub:11110010, quo:00000000, shl:5 + // + // Step 1: `sub` is positive, set a `quo` bit and update `duo` for next step. + // duo:10110010, div_shifted:01100000, sub:01010010, quo:00010000, shl:4 + // + // Step 2: Continue based on `sub`. The `quo` bits start accumulating. + // duo:01010010, div_shifted:00110000, sub:00100010, quo:00011000, shl:3 + // duo:00100010, div_shifted:00011000, sub:00001010, quo:00011100, shl:2 + // duo:00001010, div_shifted:00001100, sub:11111110, quo:00011100, shl:1 + // duo:00001010, div_shifted:00000110, sub:00000100, quo:00011100, shl:0 + // The `duo < div_original` check terminates the algorithm with the correct quotient of + // 29u8 and remainder of 4u8 + /* + let div_original = div; + let mut shl = $normalization_shift(duo, div, false); + let mut quo = 0; + loop { + let div_shifted = div << shl; + let sub = duo.wrapping_sub(div_shifted); + // it is recommended to use `println!`s like this if functionality is unclear + /* + println!("duo:{:08b}, div_shifted:{:08b}, sub:{:08b}, quo:{:08b}, shl:{}", + duo, + div_shifted, + sub, + quo, + shl + ); + */ + if 0 <= (sub as $iX) { + duo = sub; + quo += 1 << shl; + if duo < div_original { + // this branch is optional + return (quo, duo) + } + } + if shl == 0 { + return (quo, duo) + } + shl -= 1; + } + */ + + // This restoring binary long division algorithm reduces the number of operations + // overall via: + // - `pow` can be shifted right instead of recalculating from `shl` + // - starting `div` shifted left and shifting it right for each step instead of + // recalculating from `shl` + // - The `duo < div_original` branch is used to terminate the algorithm instead of the + // `shl == 0` branch. This check is strong enough to prevent set bits of `pow` and + // `div` from being shifted off the end. This check also only occurs on half of steps + // on average, since it is behind the `(sub as $iX) >= 0` branch. + // - `shl` is now not needed by any aspect of of the loop and thus only 3 variables are + // being updated between steps + // + // There are many variations of this algorithm, but this encompases the largest number + // of architectures and does not rely on carry flags, add-with-carry, or SWAR + // complications to be decently fast. + /* + let div_original = div; + let shl = $normalization_shift(duo, div, false); + let mut div: $uX = div << shl; + let mut pow: $uX = 1 << shl; + let mut quo: $uX = 0; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as $iX) { + duo = sub; + quo |= pow; + if duo < div_original { + return (quo, duo) + } + } + div >>= 1; + pow >>= 1; + } + */ + + // If the architecture has flags and predicated arithmetic instructions, it is possible + // to do binary long division without branching and in only 3 or 4 instructions. This is + // a variation of a 3 instruction central loop from + // http://www.chiark.greenend.org.uk/~theom/riscos/docs/ultimate/a252div.txt. + // + // What allows doing division in only 3 instructions is realizing that instead of + // keeping `duo` in place and shifting `div` right to align bits, `div` can be kept in + // place and `duo` can be shifted left. This means `div` does not have to be updated, + // but causes edge case problems and makes `duo < div_original` tests harder. Some + // architectures have an option to shift an argument in an arithmetic operation, which + // means `duo` can be shifted left and subtracted from in one instruction. The other two + // instructions are updating `quo` and undoing the subtraction if it turns out things + // were not normalized. + + /* + // Perform one binary long division step on the already normalized arguments, because + // the main. Note that this does a full normalization since the central loop needs + // `duo.leading_zeros()` to be at least 1 more than `div.leading_zeros()`. The original + // variation only did normalization to the nearest 4 steps, but this makes handling edge + // cases much harder. We do a full normalization and perform a binary long division + // step. In the edge case where the msbs of `duo` and `div` are set, it clears the msb + // of `duo`, then the edge case handler shifts `div` right and does another long + // division step to always insure `duo.leading_zeros() + 1 >= div.leading_zeros()`. + let div_original = div; + let mut shl = $normalization_shift(duo, div, true); + let mut div: $uX = (div << shl); + let mut quo: $uX = 1; + duo = duo.wrapping_sub(div); + if duo < div_original { + return (1 << shl, duo); + } + let div_neg: $uX; + if (div as $iX) < 0 { + // A very ugly edge case where the most significant bit of `div` is set (after + // shifting to match `duo` when its most significant bit is at the sign bit), which + // leads to the sign bit of `div_neg` being cut off and carries not happening when + // they should. This branch performs a long division step that keeps `duo` in place + // and shifts `div` down. + div >>= 1; + div_neg = div.wrapping_neg(); + let (sub, carry) = duo.overflowing_add(div_neg); + duo = sub; + quo = quo.wrapping_add(quo).wrapping_add(carry as $uX); + if !carry { + duo = duo.wrapping_add(div); + } + shl -= 1; + } else { + div_neg = div.wrapping_neg(); + } + // The add-with-carry that updates `quo` needs to have the carry set when a normalized + // subtract happens. Using `duo.wrapping_shl(1).overflowing_sub(div)` to do the + // subtraction generates a carry when an unnormalized subtract happens, which is the + // opposite of what we want. Instead, we use + // `duo.wrapping_shl(1).overflowing_add(div_neg)`, where `div_neg` is negative `div`. + let mut i = shl; + loop { + if i == 0 { + break; + } + i -= 1; + // `ADDS duo, div, duo, LSL #1` + // (add `div` to `duo << 1` and set flags) + let (sub, carry) = duo.wrapping_shl(1).overflowing_add(div_neg); + duo = sub; + // `ADC quo, quo, quo` + // (add with carry). Effectively shifts `quo` left by 1 and sets the least + // significant bit to the carry. + quo = quo.wrapping_add(quo).wrapping_add(carry as $uX); + // `ADDCC duo, duo, div` + // (add if carry clear). Undoes the subtraction if no carry was generated. + if !carry { + duo = duo.wrapping_add(div); + } + } + return (quo, duo >> shl); + */ + + // This is the SWAR (SIMD within in a register) restoring division algorithm. + // This combines several ideas of the above algorithms: + // - If `duo` is shifted left instead of shifting `div` right like in the 3 instruction + // restoring division algorithm, some architectures can do the shifting and + // subtraction step in one instruction. + // - `quo` can be constructed by adding powers-of-two to it or shifting it left by one + // and adding one. + // - Every time `duo` is shifted left, there is another unused 0 bit shifted into the + // LSB, so what if we use those bits to store `quo`? + // Through a complex setup, it is possible to manage `duo` and `quo` in the same + // register, and perform one step with 2 or 3 instructions. The only major downsides are + // that there is significant setup (it is only saves instructions if `shl` is + // approximately more than 4), `duo < div_original` checks are impractical once SWAR is + // initiated, and the number of division steps taken has to be exact (we cannot do more + // division steps than `shl`, because it introduces edge cases where quotient bits in + // `duo` start to collide with the real part of `div`. + /* + // first step. The quotient bit is stored in `quo` for now + let div_original = div; + let mut shl = $normalization_shift(duo, div, true); + let mut div: $uX = (div << shl); + duo = duo.wrapping_sub(div); + let mut quo: $uX = 1 << shl; + if duo < div_original { + return (quo, duo); + } + + let mask: $uX; + if (div as $iX) < 0 { + // deal with same edge case as the 3 instruction restoring division algorithm, but + // the quotient bit from this step also has to be stored in `quo` + div >>= 1; + shl -= 1; + let tmp = 1 << shl; + mask = tmp - 1; + let sub = duo.wrapping_sub(div); + if (sub as $iX) >= 0 { + // restore + duo = sub; + quo |= tmp; + } + if duo < div_original { + return (quo, duo); + } + } else { + mask = quo - 1; + } + // There is now room for quotient bits in `duo`. + + // Note that `div` is already shifted left and has `shl` unset bits. We subtract 1 from + // `div` and end up with the subset of `shl` bits being all being set. This subset acts + // just like a two's complement negative one. The subset of `div` containing the divisor + // had 1 subtracted from it, but a carry will always be generated from the `shl` subset + // as long as the quotient stays positive. + // + // When the modified `div` is subtracted from `duo.wrapping_shl(1)`, the `shl` subset + // adds a quotient bit to the least significant bit. + // For example, 89 (0b01011001) divided by 3 (0b11): + // + // shl:4, div:0b00110000 + // first step: + // duo:0b01011001 + // + div_neg:0b11010000 + // ____________________ + // 0b00101001 + // quo is set to 0b00010000 and mask is set to 0b00001111 for later + // + // 1 is subtracted from `div`. I will differentiate the `shl` part of `div` and the + // quotient part of `duo` with `^`s. + // chars. + // div:0b00110000 + // ^^^^ + // + 0b11111111 + // ________________ + // 0b00101111 + // ^^^^ + // div_neg:0b11010001 + // + // first SWAR step: + // duo_shl1:0b01010010 + // ^ + // + div_neg:0b11010001 + // ____________________ + // 0b00100011 + // ^ + // second: + // duo_shl1:0b01000110 + // ^^ + // + div_neg:0b11010001 + // ____________________ + // 0b00010111 + // ^^ + // third: + // duo_shl1:0b00101110 + // ^^^ + // + div_neg:0b11010001 + // ____________________ + // 0b11111111 + // ^^^ + // 3 steps resulted in the quotient with 3 set bits as expected, but currently the real + // part of `duo` is negative and the third step was an unnormalized step. The restore + // branch then restores `duo`. Note that the restore branch does not shift `duo` left. + // + // duo:0b11111111 + // ^^^ + // + div:0b00101111 + // ^^^^ + // ________________ + // 0b00101110 + // ^^^ + // `duo` is now back in the `duo_shl1` state it was at in the the third step, with an + // unset quotient bit. + // + // final step (`shl` was 4, so exactly 4 steps must be taken) + // duo_shl1:0b01011100 + // ^^^^ + // + div_neg:0b11010001 + // ____________________ + // 0b00101101 + // ^^^^ + // The quotient includes the `^` bits added with the `quo` bits from the beginning that + // contained the first step and potential edge case step, + // `quo:0b00010000 + (duo:0b00101101 & mask:0b00001111) == 0b00011101 == 29u8`. + // The remainder is the bits remaining in `duo` that are not part of the quotient bits, + // `duo:0b00101101 >> shl == 0b0010 == 2u8`. + let div: $uX = div.wrapping_sub(1); + let mut i = shl; + loop { + if i == 0 { + break; + } + i -= 1; + duo = duo.wrapping_shl(1).wrapping_sub(div); + if (duo as $iX) < 0 { + // restore + duo = duo.wrapping_add(div); + } + } + // unpack the results of SWAR + return ((duo & mask) | quo, duo >> shl); + */ + + // The problem with the conditional restoring SWAR algorithm above is that, in practice, + // it requires assembly code to bring out its full unrolled potential (It seems that + // LLVM can't use unrolled conditionals optimally and ends up erasing all the benefit + // that my algorithm intends. On architectures without predicated instructions, the code + // gen is especially bad. We need a default software division algorithm that is + // guaranteed to get decent code gen for the central loop. + + // For non-SWAR algorithms, there is a way to do binary long division without + // predication or even branching. This involves creating a mask from the sign bit and + // performing different kinds of steps using that. + /* + let shl = $normalization_shift(duo, div, true); + let mut div: $uX = div << shl; + let mut pow: $uX = 1 << shl; + let mut quo: $uX = 0; + loop { + let sub = duo.wrapping_sub(div); + let sign_mask = !((sub as $iX).wrapping_shr($n - 1) as $uX); + duo -= div & sign_mask; + quo |= pow & sign_mask; + div >>= 1; + pow >>= 1; + if pow == 0 { + break; + } + } + return (quo, duo); + */ + // However, it requires about 4 extra operations (smearing the sign bit, negating the + // mask, and applying the mask twice) on top of the operations done by the actual + // algorithm. With SWAR however, just 2 extra operations are needed, making it + // practical and even the most optimal algorithm for some architectures. + + // What we do is use custom assembly for predicated architectures that need software + // division, and for the default algorithm use a mask based restoring SWAR algorithm + // without conditionals or branches. On almost all architectures, this Rust code is + // guaranteed to compile down to 5 assembly instructions or less for each step, and LLVM + // will unroll it in a decent way. + + // standard opening for SWAR algorithm with first step and edge case handling + let div_original = div; + let mut shl = $normalization_shift(duo, div, true); + let mut div: $uX = (div << shl); + duo = duo.wrapping_sub(div); + let mut quo: $uX = 1 << shl; + if duo < div_original { + return (quo, duo); + } + let mask: $uX; + if (div as $iX) < 0 { + div >>= 1; + shl -= 1; + let tmp = 1 << shl; + mask = tmp - 1; + let sub = duo.wrapping_sub(div); + if (sub as $iX) >= 0 { + duo = sub; + quo |= tmp; + } + if duo < div_original { + return (quo, duo); + } + } else { + mask = quo - 1; + } + + // central loop + div = div.wrapping_sub(1); + let mut i = shl; + loop { + if i == 0 { + break + } + i -= 1; + // shift left 1 and subtract + duo = duo.wrapping_shl(1).wrapping_sub(div); + // create mask + let mask = (duo as $iX).wrapping_shr($n - 1) as $uX; + // restore + duo = duo.wrapping_add(div & mask); + } + // unpack + return ((duo & mask) | quo, duo >> shl); + + // miscellanious binary long division algorithms that might be better for specific + // architectures + + // Another kind of long division uses an interesting fact that `div` and `pow` can be + // negated when `duo` is negative to perform a "negated" division step that works in + // place of any normalization mechanism. This is a non-restoring division algorithm that + // is very similar to the non-restoring division algorithms that can be found on the + // internet, except there is only one test for `duo < 0`. The subtraction from `quo` can + // be viewed as shifting the least significant set bit right (e.x. if we enter a series + // of negated binary long division steps starting with `quo == 0b1011_0000` and + // `pow == 0b0000_1000`, `quo` will progress like this: 0b1010_1000, 0b1010_0100, + // 0b1010_0010, 0b1010_0001). + /* + let div_original = div; + let shl = $normalization_shift(duo, div, true); + let mut div: $uX = (div << shl); + let mut pow: $uX = 1 << shl; + let mut quo: $uX = pow; + duo = duo.wrapping_sub(div); + if duo < div_original { + return (quo, duo); + } + div >>= 1; + pow >>= 1; + loop { + if (duo as $iX) < 0 { + // Negated binary long division step. + duo = duo.wrapping_add(div); + quo = quo.wrapping_sub(pow); + } else { + // Normal long division step. + if duo < div_original { + return (quo, duo) + } + duo = duo.wrapping_sub(div); + quo = quo.wrapping_add(pow); + } + pow >>= 1; + div >>= 1; + } + */ + + // This is the Nonrestoring SWAR algorithm, combining the nonrestoring algorithm with + // SWAR techniques that makes the only difference between steps be negation of `div`. + // If there was an architecture with an instruction that negated inputs to an adder + // based on conditionals, and in place shifting (or a three input addition operation + // that can have `duo` as two of the inputs to effectively shift it left by 1), then a + // single instruction central loop is possible. Microarchitectures often have inputs to + // their ALU that can invert the arguments and carry in of adders, but the architectures + // unfortunately do not have an instruction to dynamically invert this input based on + // conditionals. + /* + // SWAR opening + let div_original = div; + let mut shl = $normalization_shift(duo, div, true); + let mut div: $uX = (div << shl); + duo = duo.wrapping_sub(div); + let mut quo: $uX = 1 << shl; + if duo < div_original { + return (quo, duo); + } + let mask: $uX; + if (div as $iX) < 0 { + div >>= 1; + shl -= 1; + let tmp = 1 << shl; + let sub = duo.wrapping_sub(div); + if (sub as $iX) >= 0 { + // restore + duo = sub; + quo |= tmp; + } + if duo < div_original { + return (quo, duo); + } + mask = tmp - 1; + } else { + mask = quo - 1; + } + + // central loop + let div: $uX = div.wrapping_sub(1); + let mut i = shl; + loop { + if i == 0 { + break; + } + i -= 1; + // note: the `wrapping_shl(1)` can be factored out, but would require another + // restoring division step to prevent `(duo as $iX)` from overflowing + if (duo as $iX) < 0 { + // Negated binary long division step. + duo = duo.wrapping_shl(1).wrapping_add(div); + } else { + // Normal long division step. + duo = duo.wrapping_shl(1).wrapping_sub(div); + } + } + if (duo as $iX) < 0 { + // Restore. This was not needed in the original nonrestoring algorithm because of + // the `duo < div_original` checks. + duo = duo.wrapping_add(div); + } + // unpack + return ((duo & mask) | quo, duo >> shl); + */ + } + + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + $( + #[$signed_attr] + )* + pub fn $signed_name(duo: $iX, div: $iX) -> ($iX, $iX) { + // There is a way of doing this without any branches, but requires too many extra + // operations to be faster. + /* + let duo_s = duo >> ($n - 1); + let div_s = div >> ($n - 1); + let duo = (duo ^ duo_s).wrapping_sub(duo_s); + let div = (div ^ div_s).wrapping_sub(div_s); + let quo_s = duo_s ^ div_s; + let rem_s = duo_s; + let tmp = $unsigned_name(duo as $uX, div as $uX); + ( + ((tmp.0 as $iX) ^ quo_s).wrapping_sub(quo_s), + ((tmp.1 as $iX) ^ rem_s).wrapping_sub(rem_s), + ) + */ + + match (duo < 0, div < 0) { + (false, false) => { + let t = $unsigned_name(duo as $uX, div as $uX); + (t.0 as $iX, t.1 as $iX) + }, + (true, false) => { + let t = $unsigned_name(duo.wrapping_neg() as $uX, div as $uX); + ((t.0 as $iX).wrapping_neg(), (t.1 as $iX).wrapping_neg()) + }, + (false, true) => { + let t = $unsigned_name(duo as $uX, div.wrapping_neg() as $uX); + ((t.0 as $iX).wrapping_neg(), t.1 as $iX) + }, + (true, true) => { + let t = $unsigned_name(duo.wrapping_neg() as $uX, div.wrapping_neg() as $uX); + (t.0 as $iX, (t.1 as $iX).wrapping_neg()) + }, + } + } + } +} diff --git a/src/int/specialized_div_rem/delegate.rs b/src/int/specialized_div_rem/delegate.rs new file mode 100644 index 000000000..1ba72431d --- /dev/null +++ b/src/int/specialized_div_rem/delegate.rs @@ -0,0 +1,226 @@ +/// Creates unsigned and signed division functions that use a combination of hardware division and +/// binary long division to divide integers larger than what hardware division by itself can do. This +/// function is intended for microarchitectures that have division hardware, but not fast enough +/// multiplication hardware for `impl_trifecta` to be faster. +#[macro_export] +macro_rules! impl_delegate { + ( + $unsigned_name:ident, // name of the unsigned division function + $signed_name:ident, // name of the signed division function + $zero_div_fn:ident, // function called when division by zero is attempted + $half_normalization_shift:ident, // function for finding the normalization shift of $uX + $half_division:ident, // function for division of a $uX by a $uX + $n_h:expr, // the number of bits in $iH or $uH + $uH:ident, // unsigned integer with half the bit width of $uX + $uX:ident, // unsigned integer with half the bit width of $uD. + $uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` + $iD:ident, // signed integer type for the inputs and outputs of `$signed_name` + $($unsigned_attr:meta),*; // attributes for the unsigned function + $($signed_attr:meta),* // attributes for the signed function + ) => { + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + $( + #[$unsigned_attr] + )* + pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD, $uD) { + // The two possibility algorithm, undersubtracting long division algorithm, or any kind + // of reciprocal based algorithm will not be fastest, because they involve large + // multiplications that we assume to not be fast enough relative to the divisions to + // outweigh setup times. + + // the number of bits in a $uX + let n = $n_h * 2; + + let duo_lo = duo as $uX; + let duo_hi = (duo >> n) as $uX; + let div_lo = div as $uX; + let div_hi = (div >> n) as $uX; + + match (div_lo == 0, div_hi == 0, duo_hi == 0) { + (true, true, _) => { + $zero_div_fn() + } + (_, false, true) => { + // `duo` < `div` + return (0, duo) + } + (false, true, true) => { + // delegate to smaller division + let tmp = $half_division(duo_lo, div_lo); + return (tmp.0 as $uD, tmp.1 as $uD) + } + (false, true, false) => { + if duo_hi < div_lo { + // `quo_hi` will always be 0. This performs a binary long division algorithm + // to zero `duo_hi` followed by a half division. + + // We can calculate the normalization shift using only `$uX` size functions. + // If we calculated the normalization shift using + // `$half_normalization_shift(duo_hi, div_lo false)`, it would break the + // assumption the function has that the first argument is more than the + // second argument. If the arguments are switched, the assumption holds true + // since `duo_hi < div_lo`. + let norm_shift = $half_normalization_shift(div_lo, duo_hi, false); + let shl = if norm_shift == 0 { + // Consider what happens if the msbs of `duo_hi` and `div_lo` align with + // no shifting. The normalization shift will always return + // `norm_shift == 0` regardless of whether it is fully normalized, + // because `duo_hi < div_lo`. In that edge case, `n - norm_shift` would + // result in shift overflow down the line. For the edge case, because + // both `duo_hi < div_lo` and we are comparing all the significant bits + // of `duo_hi` and `div`, we can make `shl = n - 1`. + n - 1 + } else { + // We also cannot just use `shl = n - norm_shift - 1` in the general + // case, because when we are not in the edge case comparing all the + // significant bits, then the full `duo < div` may not be true and thus + // breaks the division algorithm. + n - norm_shift + }; + + // The 3 variable restoring division algorithm (see binary_long.rs) is ideal + // for this task, since `pow` and `quo` can be `$uX` and the delegation + // check is simple. + let mut div: $uD = div << shl; + let mut pow_lo: $uX = 1 << shl; + let mut quo_lo: $uX = 0; + let mut duo = duo; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as $iD) { + duo = sub; + quo_lo |= pow_lo; + let duo_hi = (duo >> n) as $uX; + if duo_hi == 0 { + // Delegate to get the rest of the quotient. Note that the + // `div_lo` here is the original unshifted `div`. + let tmp = $half_division(duo as $uX, div_lo); + return ((quo_lo | tmp.0) as $uD, tmp.1 as $uD) + } + } + div >>= 1; + pow_lo >>= 1; + } + } else if duo_hi == div_lo { + // `quo_hi == 1`. This branch is cheap and helps with edge cases. + let tmp = $half_division(duo as $uX, div as $uX); + return ((1 << n) | (tmp.0 as $uD), tmp.1 as $uD) + } else { + // `div_lo < duo_hi` + // `rem_hi == 0` + if (div_lo >> $n_h) == 0 { + // Short division of $uD by a $uH, using $uX by $uX division + let div_0 = div_lo as $uH as $uX; + let (quo_hi, rem_3) = $half_division(duo_hi, div_0); + + let duo_mid = + ((duo >> $n_h) as $uH as $uX) + | (rem_3 << $n_h); + let (quo_1, rem_2) = $half_division(duo_mid, div_0); + + let duo_lo = + (duo as $uH as $uX) + | (rem_2 << $n_h); + let (quo_0, rem_1) = $half_division(duo_lo, div_0); + + return ( + (quo_0 as $uD) + | ((quo_1 as $uD) << $n_h) + | ((quo_hi as $uD) << n), + rem_1 as $uD + ) + } + + // This is basically a short division composed of a half division for the hi + // part, specialized 3 variable binary long division in the middle, and + // another half division for the lo part. + let duo_lo = duo as $uX; + let tmp = $half_division(duo_hi, div_lo); + let quo_hi = tmp.0; + let mut duo = (duo_lo as $uD) | ((tmp.1 as $uD) << n); + // This check is required to avoid breaking the long division below. + if duo < div { + return ((quo_hi as $uD) << n, duo); + } + + // The half division handled all shift alignments down to `n`, so this + // division can continue with a shift of `n - 1`. + let mut div: $uD = div << (n - 1); + let mut pow_lo: $uX = 1 << (n - 1); + let mut quo_lo: $uX = 0; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as $iD) { + duo = sub; + quo_lo |= pow_lo; + let duo_hi = (duo >> n) as $uX; + if duo_hi == 0 { + // Delegate to get the rest of the quotient. Note that the + // `div_lo` here is the original unshifted `div`. + let tmp = $half_division(duo as $uX, div_lo); + return ( + (tmp.0) as $uD | (quo_lo as $uD) | ((quo_hi as $uD) << n), + tmp.1 as $uD + ); + } + } + div >>= 1; + pow_lo >>= 1; + } + } + } + (_, false, false) => { + // Full $uD by $uD binary long division. `quo_hi` will always be 0. + if duo < div { + return (0, duo); + } + let div_original = div; + let shl = $half_normalization_shift(duo_hi, div_hi, false); + let mut duo = duo; + let mut div: $uD = div << shl; + let mut pow_lo: $uX = 1 << shl; + let mut quo_lo: $uX = 0; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as $iD) { + duo = sub; + quo_lo |= pow_lo; + if duo < div_original { + return (quo_lo as $uD, duo) + } + } + div >>= 1; + pow_lo >>= 1; + } + } + } + } + + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + $( + #[$signed_attr] + )* + pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) { + match (duo < 0, div < 0) { + (false, false) => { + let t = $unsigned_name(duo as $uD, div as $uD); + (t.0 as $iD, t.1 as $iD) + }, + (true, false) => { + let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD); + ((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg()) + }, + (false, true) => { + let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD); + ((t.0 as $iD).wrapping_neg(), t.1 as $iD) + }, + (true, true) => { + let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD); + (t.0 as $iD, (t.1 as $iD).wrapping_neg()) + }, + } + } + } +} diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs new file mode 100644 index 000000000..5a034dcf1 --- /dev/null +++ b/src/int/specialized_div_rem/mod.rs @@ -0,0 +1,295 @@ +// TODO: when `unsafe_block_in_unsafe_fn` is stabilized, remove this +#![allow(unused_unsafe)] + +//! This `specialized_div_rem` module is originally from version 1.0.0 of the +//! `specialized-div-rem` crate. Note that `for` loops with ranges are not used in this +//! module, since unoptimized compilation may generate references to `memcpy`. +//! +//! The purpose of these macros is to easily change the both the division algorithm used +//! for a given integer size and the half division used by that algorithm. The way +//! functions call each other is also constructed such that linkers will find the chain of +//! software and hardware divisions needed for every size of signed and unsigned division. +//! For example, most target compilations do the following: +//! +//! - Many 128 bit division functions like `u128::wrapping_div` use +//! `std::intrinsics::unchecked_div`, which gets replaced by `__udivti3` because there +//! is not a 128 bit by 128 bit hardware division function in most architectures. +//! `__udivti3` uses `u128_div_rem` (this extra level of function calls exists because +//! `__umodti3` and `__udivmodti4` also exist, and `specialized_div_rem` supplies just +//! one function to calculate both the quotient and remainder. If configuration flags +//! enable it, `impl_trifecta!` defines `u128_div_rem` to use the trifecta algorithm, +//! which requires the half sized division `u64_by_u64_div_rem`. If the architecture +//! supplies a 64 bit hardware division instruction, `u64_by_u64_div_rem` will be +//! reduced to those instructions. Note that we do not specify the half size division +//! directly to be `__udivdi3`, because hardware division would never be introduced. +//! - If the architecture does not supply a 64 bit hardware division instruction, u64 +//! divisions will use functions such as `__udivdi3`. This will call `u64_div_rem` +//! which is defined by `impl_delegate!`. The half division for this algorithm is +//! `u32_by_u32_div_rem` which in turn becomes hardware division instructions or more +//! software division algorithms. +//! - If the architecture does not supply a 32 bit hardware instruction, linkers will +//! look for `__udivsi3`. `impl_binary_long!` is used, but this algorithm uses no half +//! division, so the chain of calls ends here. +//! +//! On some architectures like x86_64, an asymmetrically sized division is supplied, in +//! which 128 bit numbers can be divided by 64 bit numbers. `impl_asymmetric!` is used to +//! extend the 128 by 64 bit division to a full 128 by 128 bit division. + +// `allow(dead_code)` is used in various places, because the configuration code would otherwise be +// ridiculously complex + +#[macro_use] +mod norm_shift; + +#[macro_use] +mod binary_long; + +#[macro_use] +mod delegate; + +#[macro_use] +mod trifecta; + +#[macro_use] +mod asymmetric; + +/// The behavior of all divisions by zero is controlled by this function. This function should be +/// impossible to reach by Rust users, unless `compiler-builtins` public division functions or +/// `core/std::unchecked_div/rem` are directly used without a zero check in front. +fn zero_div_fn() -> ! { + // TODO: change this once the algorithms are verified + //unsafe {core::hint::unreachable_unchecked()} + ::abort() +} + +// The `B` extension on RISC-V determines if a CLZ assembly instruction exists +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +const USE_LZ: bool = cfg!(target_feature = "b"); + +#[cfg(target_arch = "arm")] +const USE_LZ: bool = if cfg!(target_feature = "thumb-mode") { + // ARM thumb targets have CLZ instructions if the instruction set of ARMv6T2 is supported. This + // is needed to successfully differentiate between targets like `thumbv8.base` and + // `thumbv8.main`. + cfg!(target_feature = "v6t2") +} else { + // Regular ARM targets have CLZ instructions if the ARMv5TE instruction set is supported. + // Technically, ARMv5T was the first to have CLZ, but the "v5t" target feature does not seem to + // work. + cfg!(target_feature = "v5te") +}; + +// All other targets Rust supports have CLZ instructions +#[cfg(not(any(target_arch = "arm", target_arch = "riscv32", target_arch = "riscv64")))] +const USE_LZ: bool = true; + +impl_normalization_shift!( + u32_normalization_shift, + USE_LZ, + 32, + u32, + i32, + allow(dead_code) +); +impl_normalization_shift!( + u64_normalization_shift, + USE_LZ, + 64, + u64, + i64, + allow(dead_code) +); + +/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. +/// `checked_div` and `checked_rem` are used to avoid bringing in panic function +/// dependencies. +#[inline] +fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) { + if let Some(quo) = duo.checked_div(div) { + if let Some(rem) = duo.checked_rem(div) { + return (quo, rem); + } + } + zero_div_fn() +} + +// Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a +// microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is +// faster if the target pointer width is at least 64. +#[cfg(all( + not(all(feature = "asm", target_arch = "x86_64")), + not(any(target_pointer_width = "16", target_pointer_width = "32")) +))] +impl_trifecta!( + u128_div_rem, + i128_div_rem, + zero_div_fn, + u64_by_u64_div_rem, + 32, + u32, + u64, + u128, + i128,; +); + +// If the pointer width less than 64, then the target architecture almost certainly does not have +// the fast 64 to 128 bit widening multiplication needed for `trifecta` to be faster. +#[cfg(all( + not(all(feature = "asm", target_arch = "x86_64")), + any(target_pointer_width = "16", target_pointer_width = "32") +))] +impl_delegate!( + u128_div_rem, + i128_div_rem, + zero_div_fn, + u64_normalization_shift, + u64_by_u64_div_rem, + 32, + u32, + u64, + u128, + i128,; +); + +/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. +/// +/// # Safety +/// +/// If the quotient does not fit in a `u64`, a floating point exception occurs. +/// If `div == 0`, then a division by zero exception occurs. +#[cfg(all(feature = "asm", target_arch = "x86_64"))] +#[inline] +unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) { + let duo_lo = duo as u64; + let duo_hi = (duo >> 64) as u64; + let quo: u64; + let rem: u64; + unsafe { + // divides the combined registers rdx:rax (`duo` is split into two 64 bit parts to do this) + // by `div`. The quotient is stored in rax and the remainder in rdx. + asm!( + "div {0}", + in(reg) div, + inlateout("rax") duo_lo => quo, + inlateout("rdx") duo_hi => rem, + options(pure, nomem, nostack) + ); + } + (quo, rem) +} + +// use `asymmetric` instead of `trifecta` on x86_64 +#[cfg(all(feature = "asm", target_arch = "x86_64"))] +impl_asymmetric!( + u128_div_rem, + i128_div_rem, + zero_div_fn, + u64_by_u64_div_rem, + u128_by_u64_div_rem, + 32, + u32, + u64, + u128, + i128,; +); + +/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. +/// `checked_div` and `checked_rem` are used to avoid bringing in panic function +/// dependencies. +#[inline] +#[allow(dead_code)] +fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) { + if let Some(quo) = duo.checked_div(div) { + if let Some(rem) = duo.checked_rem(div) { + return (quo, rem); + } + } + zero_div_fn() +} + +// When not on x86 and the pointer width is not 64, use `delegate` since the division size is larger +// than register size. +#[cfg(all( + not(all(feature = "asm", target_arch = "x86")), + not(target_pointer_width = "64") +))] +impl_delegate!( + u64_div_rem, + i64_div_rem, + zero_div_fn, + u32_normalization_shift, + u32_by_u32_div_rem, + 16, + u16, + u32, + u64, + i64,; +); + +// When not on x86 and the pointer width is 64, use `binary_long`. +#[cfg(all( + not(all(feature = "asm", target_arch = "x86")), + target_pointer_width = "64" +))] +impl_binary_long!( + u64_div_rem, + i64_div_rem, + zero_div_fn, + u64_normalization_shift, + 64, + u64, + i64,; +); + +/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. +/// +/// # Safety +/// +/// If the quotient does not fit in a `u32`, a floating point exception occurs. +/// If `div == 0`, then a division by zero exception occurs. +#[cfg(all(feature = "asm", target_arch = "x86"))] +#[inline] +unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) { + let duo_lo = duo as u32; + let duo_hi = (duo >> 32) as u32; + let quo: u32; + let rem: u32; + unsafe { + // divides the combined registers rdx:rax (`duo` is split into two 32 bit parts to do this) + // by `div`. The quotient is stored in rax and the remainder in rdx. + asm!( + "div {0}", + in(reg) div, + inlateout("rax") duo_lo => quo, + inlateout("rdx") duo_hi => rem, + options(pure, nomem, nostack) + ); + } + (quo, rem) +} + +// use `asymmetric` instead of `delegate` on x86 +#[cfg(all(feature = "asm", target_arch = "x86"))] +impl_asymmetric!( + u64_div_rem, + i64_div_rem, + zero_div_fn, + u32_by_u32_div_rem, + u64_by_u32_div_rem, + 16, + u16, + u32, + u64, + i64,; +); + +// 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division +impl_binary_long!( + u32_div_rem, + i32_div_rem, + zero_div_fn, + u32_normalization_shift, + 32, + u32, + i32,; +); diff --git a/src/int/specialized_div_rem/norm_shift.rs b/src/int/specialized_div_rem/norm_shift.rs new file mode 100644 index 000000000..33348b373 --- /dev/null +++ b/src/int/specialized_div_rem/norm_shift.rs @@ -0,0 +1,106 @@ +/// Creates a function used by some division algorithms to compute the "normalization shift". +#[macro_export] +macro_rules! impl_normalization_shift { + ( + $name:ident, // name of the normalization shift function + // boolean for if `$uX::leading_zeros` should be used (if an architecture does not have a + // hardware instruction for `usize::leading_zeros`, then this should be `true`) + $use_lz:ident, + $n:tt, // the number of bits in a $iX or $uX + $uX:ident, // unsigned integer type for the inputs of `$name` + $iX:ident, // signed integer type for the inputs of `$name` + $($unsigned_attr:meta),* // attributes for the function + ) => { + /// Finds the shift left that the divisor `div` would need to be normalized for a binary + /// long division step with the dividend `duo`. NOTE: This function assumes that these edge + /// cases have been handled before reaching it: + /// ` + /// if div == 0 { + /// panic!("attempt to divide by zero") + /// } + /// if duo < div { + /// return (0, duo) + /// } + /// ` + /// + /// Normalization is defined as (where `shl` is the output of this function): + /// ` + /// if duo.leading_zeros() != (div << shl).leading_zeros() { + /// // If the most significant bits of `duo` and `div << shl` are not in the same place, + /// // then `div << shl` has one more leading zero than `duo`. + /// assert_eq!(duo.leading_zeros() + 1, (div << shl).leading_zeros()); + /// // Also, `2*(div << shl)` is not more than `duo` (otherwise the first division step + /// // would not be able to clear the msb of `duo`) + /// assert!(duo < (div << (shl + 1))); + /// } + /// if full_normalization { + /// // Some algorithms do not need "full" normalization, which means that `duo` is + /// // larger than `div << shl` when the most significant bits are aligned. + /// assert!((div << shl) <= duo); + /// } + /// ` + /// + /// Note: If the software bisection algorithm is being used in this function, it happens + /// that full normalization always occurs, so be careful that new algorithms are not + /// invisibly depending on this invariant when `full_normalization` is set to `false`. + $( + #[$unsigned_attr] + )* + fn $name(duo: $uX, div: $uX, full_normalization: bool) -> usize { + // We have to find the leading zeros of `div` to know where its msb (most significant + // set bit) is to even begin binary long division. It is also good to know where the msb + // of `duo` is so that useful work can be started instead of shifting `div` for all + // possible quotients (many division steps are wasted if `duo.leading_zeros()` is large + // and `div` starts out being shifted all the way to the msb). Aligning the msbs of + // `div` and `duo` could be done by shifting `div` left by + // `div.leading_zeros() - duo.leading_zeros()`, but some CPUs without division hardware + // also do not have single instructions for calculating `leading_zeros`. Instead of + // software doing two bisections to find the two `leading_zeros`, we do one bisection to + // find `div.leading_zeros() - duo.leading_zeros()` without actually knowing either of + // the leading zeros values. + + let mut shl: usize; + if $use_lz { + shl = (div.leading_zeros() - duo.leading_zeros()) as usize; + if full_normalization { + if duo < (div << shl) { + // when the msb of `duo` and `div` are aligned, the resulting `div` may be + // larger than `duo`, so we decrease the shift by 1. + shl -= 1; + } + } + } else { + let mut test = duo; + shl = 0usize; + let mut lvl = $n >> 1; + loop { + let tmp = test >> lvl; + // It happens that a final `duo < (div << shl)` check is not needed, because the + // `div <= tmp` check insures that the msb of `test` never passes the msb of + // `div`, and any set bits shifted off the end of `test` would still keep + // `div <= tmp` true. + if div <= tmp { + test = tmp; + shl += lvl; + } + // narrow down bisection + lvl >>= 1; + if lvl == 0 { + break + } + } + } + // tests the invariants that should hold before beginning binary long division + /* + if full_normalization { + assert!((div << shl) <= duo); + } + if duo.leading_zeros() != (div << shl).leading_zeros() { + assert_eq!(duo.leading_zeros() + 1, (div << shl).leading_zeros()); + assert!(duo < (div << (shl + 1))); + } + */ + shl + } + } +} diff --git a/src/int/specialized_div_rem/trifecta.rs b/src/int/specialized_div_rem/trifecta.rs new file mode 100644 index 000000000..e76516f34 --- /dev/null +++ b/src/int/specialized_div_rem/trifecta.rs @@ -0,0 +1,441 @@ +/// Creates unsigned and signed division functions optimized for division of integers with bitwidths +/// larger than the largest hardware integer division supported. These functions use large radix +/// division algorithms that require both fast division and very fast widening multiplication on the +/// target microarchitecture. Otherwise, `impl_delegate` should be used instead. +#[macro_export] +macro_rules! impl_trifecta { + ( + $unsigned_name:ident, // name of the unsigned division function + $signed_name:ident, // name of the signed division function + $zero_div_fn:ident, // function called when division by zero is attempted + $half_division:ident, // function for division of a $uX by a $uX + $n_h:expr, // the number of bits in $iH or $uH + $uH:ident, // unsigned integer with half the bit width of $uX + $uX:ident, // unsigned integer with half the bit width of $uD + $uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` + $iD:ident, // signed integer type for the inputs and outputs of `$signed_name` + $($unsigned_attr:meta),*; // attributes for the unsigned function + $($signed_attr:meta),* // attributes for the signed function + ) => { + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + $( + #[$unsigned_attr] + )* + pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD, $uD) { + // This is called the trifecta algorithm because it uses three main algorithms: short + // division for small divisors, the two possibility algorithm for large divisors, and an + // undersubtracting long division algorithm for intermediate cases. + + // This replicates `carrying_mul` (rust-lang rfc #2417). LLVM correctly optimizes this + // to use a widening multiply to 128 bits on the relevant architectures. + fn carrying_mul(lhs: $uX, rhs: $uX) -> ($uX, $uX) { + let tmp = (lhs as $uD).wrapping_mul(rhs as $uD); + (tmp as $uX, (tmp >> ($n_h * 2)) as $uX) + } + fn carrying_mul_add(lhs: $uX, mul: $uX, add: $uX) -> ($uX, $uX) { + let tmp = (lhs as $uD).wrapping_mul(mul as $uD).wrapping_add(add as $uD); + (tmp as $uX, (tmp >> ($n_h * 2)) as $uX) + } + + // the number of bits in a $uX + let n = $n_h * 2; + + if div == 0 { + $zero_div_fn() + } + + // Trying to use a normalization shift function will cause inelegancies in the code and + // inefficiencies for architectures with a native count leading zeros instruction. The + // undersubtracting algorithm needs both values (keeping the original `div_lz` but + // updating `duo_lz` multiple times), so we assume hardware support for fast + // `leading_zeros` calculation. + let div_lz = div.leading_zeros(); + let mut duo_lz = duo.leading_zeros(); + + // the possible ranges of `duo` and `div` at this point: + // `0 <= duo < 2^n_d` + // `1 <= div < 2^n_d` + + // quotient is 0 or 1 branch + if div_lz <= duo_lz { + // The quotient cannot be more than 1. The highest set bit of `duo` needs to be at + // least one place higher than `div` for the quotient to be more than 1. + if duo >= div { + return (1, duo - div) + } else { + return (0, duo) + } + } + + // `_sb` is the number of significant bits (from the ones place to the highest set bit) + // `{2, 2^div_sb} <= duo < 2^n_d` + // `1 <= div < {2^duo_sb, 2^(n_d - 1)}` + // smaller division branch + if duo_lz >= n { + // `duo < 2^n` so it will fit in a $uX. `div` will also fit in a $uX (because of the + // `div_lz <= duo_lz` branch) so no numerical error. + let (quo, rem) = $half_division(duo as $uX, div as $uX); + return ( + quo as $uD, + rem as $uD + ) + } + + // `{2^n, 2^div_sb} <= duo < 2^n_d` + // `1 <= div < {2^duo_sb, 2^(n_d - 1)}` + // short division branch + if div_lz >= (n + $n_h) { + // `1 <= div < {2^duo_sb, 2^n_h}` + + // It is barely possible to improve the performance of this by calculating the + // reciprocal and removing one `$half_division`, but only if the CPU can do fast + // multiplications in parallel. Other reciprocal based methods can remove two + // `$half_division`s, but have multiplications that cannot be done in parallel and + // reduce performance. I have decided to use this trivial short division method and + // rely on the CPU having quick divisions. + + let duo_hi = (duo >> n) as $uX; + let div_0 = div as $uH as $uX; + let (quo_hi, rem_3) = $half_division(duo_hi, div_0); + + let duo_mid = + ((duo >> $n_h) as $uH as $uX) + | (rem_3 << $n_h); + let (quo_1, rem_2) = $half_division(duo_mid, div_0); + + let duo_lo = + (duo as $uH as $uX) + | (rem_2 << $n_h); + let (quo_0, rem_1) = $half_division(duo_lo, div_0); + + return ( + (quo_0 as $uD) + | ((quo_1 as $uD) << $n_h) + | ((quo_hi as $uD) << n), + rem_1 as $uD + ) + } + + // relative leading significant bits, cannot overflow because of above branches + let lz_diff = div_lz - duo_lz; + + // `{2^n, 2^div_sb} <= duo < 2^n_d` + // `2^n_h <= div < {2^duo_sb, 2^(n_d - 1)}` + // `mul` or `mul - 1` branch + if lz_diff < $n_h { + // Two possibility division algorithm + + // The most significant bits of `duo` and `div` are within `$n_h` bits of each + // other. If we take the `n` most significant bits of `duo` and divide them by the + // corresponding bits in `div`, it produces a quotient value `quo`. It happens that + // `quo` or `quo - 1` will always be the correct quotient for the whole number. In + // other words, the bits less significant than the `n` most significant bits of + // `duo` and `div` can only influence the quotient to be one of two values. + // Because there are only two possibilities, there only needs to be one `$uH` sized + // division, a `$uH` by `$uD` multiplication, and only one branch with a few simple + // operations. + // + // Proof that the true quotient can only be `quo` or `quo - 1`. + // All `/` operators here are floored divisions. + // + // `shift` is the number of bits not in the higher `n` significant bits of `duo`. + // (definitions) + // 0. shift = n - duo_lz + // 1. duo_sig_n == duo / 2^shift + // 2. div_sig_n == div / 2^shift + // 3. quo == duo_sig_n / div_sig_n + // + // + // We are trying to find the true quotient, `true_quo`. + // 4. true_quo = duo / div. (definition) + // + // This is true because of the bits that are cut off during the bit shift. + // 5. duo_sig_n * 2^shift <= duo < (duo_sig_n + 1) * 2^shift. + // 6. div_sig_n * 2^shift <= div < (div_sig_n + 1) * 2^shift. + // + // Dividing each bound of (5) by each bound of (6) gives 4 possibilities for what + // `true_quo == duo / div` is bounded by: + // (duo_sig_n * 2^shift) / (div_sig_n * 2^shift) + // (duo_sig_n * 2^shift) / ((div_sig_n + 1) * 2^shift) + // ((duo_sig_n + 1) * 2^shift) / (div_sig_n * 2^shift) + // ((duo_sig_n + 1) * 2^shift) / ((div_sig_n + 1) * 2^shift) + // + // Simplifying each of these four: + // duo_sig_n / div_sig_n + // duo_sig_n / (div_sig_n + 1) + // (duo_sig_n + 1) / div_sig_n + // (duo_sig_n + 1) / (div_sig_n + 1) + // + // Taking the smallest and the largest of these as the low and high bounds + // and replacing `duo / div` with `true_quo`: + // 7. duo_sig_n / (div_sig_n + 1) <= true_quo < (duo_sig_n + 1) / div_sig_n + // + // The `lz_diff < n_h` conditional on this branch makes sure that `div_sig_n` is at + // least `2^n_h`, and the `div_lz <= duo_lz` branch makes sure that the highest bit + // of `div_sig_n` is not the `2^(n - 1)` bit. + // 8. `2^(n - 1) <= duo_sig_n < 2^n` + // 9. `2^n_h <= div_sig_n < 2^(n - 1)` + // + // We want to prove that either + // `(duo_sig_n + 1) / div_sig_n == duo_sig_n / (div_sig_n + 1)` or that + // `(duo_sig_n + 1) / div_sig_n == duo_sig_n / (div_sig_n + 1) + 1`. + // + // We also want to prove that `quo` is one of these: + // `duo_sig_n / div_sig_n == duo_sig_n / (div_sig_n + 1)` or + // `duo_sig_n / div_sig_n == (duo_sig_n + 1) / div_sig_n`. + // + // When 1 is added to the numerator of `duo_sig_n / div_sig_n` to produce + // `(duo_sig_n + 1) / div_sig_n`, it is not possible that the value increases by + // more than 1 with floored integer arithmetic and `div_sig_n != 0`. Consider + // `x/y + 1 < (x + 1)/y` <=> `x/y + 1 < x/y + 1/y` <=> `1 < 1/y` <=> `y < 1`. + // `div_sig_n` is a nonzero integer. Thus, + // 10. `duo_sig_n / div_sig_n == (duo_sig_n + 1) / div_sig_n` or + // `(duo_sig_n / div_sig_n) + 1 == (duo_sig_n + 1) / div_sig_n. + // + // When 1 is added to the denominator of `duo_sig_n / div_sig_n` to produce + // `duo_sig_n / (div_sig_n + 1)`, it is not possible that the value decreases by + // more than 1 with the bounds (8) and (9). Consider `x/y - 1 <= x/(y + 1)` <=> + // `(x - y)/y < x/(y + 1)` <=> `(y + 1)*(x - y) < x*y` <=> `x*y - y*y + x - y < x*y` + // <=> `x < y*y + y`. The smallest value of `div_sig_n` is `2^n_h` and the largest + // value of `duo_sig_n` is `2^n - 1`. Substituting reveals `2^n - 1 < 2^n + 2^n_h`. + // Thus, + // 11. `duo_sig_n / div_sig_n == duo_sig_n / (div_sig_n + 1)` or + // `(duo_sig_n / div_sig_n) - 1` == duo_sig_n / (div_sig_n + 1)` + // + // Combining both (10) and (11), we know that + // `quo - 1 <= duo_sig_n / (div_sig_n + 1) <= true_quo + // < (duo_sig_n + 1) / div_sig_n <= quo + 1` and therefore: + // 12. quo - 1 <= true_quo < quo + 1 + // + // In a lot of division algorithms using smaller divisions to construct a larger + // division, we often encounter a situation where the approximate `quo` value + // calculated from a smaller division is multiple increments away from the true + // `quo` value. In those algorithms, multiple correction steps have to be applied. + // Those correction steps may need more multiplications to test `duo - (quo*div)` + // again. Because of the fact that our `quo` can only be one of two values, we can + // see if `duo - (quo*div)` overflows. If it did overflow, then we know that we have + // the larger of the two values (since the true quotient is unique, and any larger + // quotient will cause `duo - (quo*div)` to be negative). Also because there is only + // one correction needed, we can calculate the remainder `duo - (true_quo*div) == + // duo - ((quo - 1)*div) == duo - (quo*div - div) == duo + div - quo*div`. + // If `duo - (quo*div)` did not overflow, then we have the correct answer. + let shift = n - duo_lz; + let duo_sig_n = (duo >> shift) as $uX; + let div_sig_n = (div >> shift) as $uX; + let quo = $half_division(duo_sig_n, div_sig_n).0; + + // The larger `quo` value can overflow `$uD` in the right circumstances. This is a + // manual `carrying_mul_add` with overflow checking. + let div_lo = div as $uX; + let div_hi = (div >> n) as $uX; + let (tmp_lo, carry) = carrying_mul(quo, div_lo); + let (tmp_hi, overflow) = carrying_mul_add(quo, div_hi, carry); + let tmp = (tmp_lo as $uD) | ((tmp_hi as $uD) << n); + if (overflow != 0) || (duo < tmp) { + return ( + (quo - 1) as $uD, + // Both the addition and subtraction can overflow, but when combined end up + // as a correct positive number. + duo.wrapping_add(div).wrapping_sub(tmp) + ) + } else { + return ( + quo as $uD, + duo - tmp + ) + } + } + + // Undersubtracting long division algorithm. + // Instead of clearing a minimum of 1 bit from `duo` per iteration via binary long + // division, `n_h - 1` bits are cleared per iteration with this algorithm. It is a more + // complicated version of regular long division. Most integer division algorithms tend + // to guess a part of the quotient, and may have a larger quotient than the true + // quotient (which when multiplied by `div` will "oversubtract" the original dividend). + // They then check if the quotient was in fact too large and then have to correct it. + // This long division algorithm has been carefully constructed to always underguess the + // quotient by slim margins. This allows different subalgorithms to be blindly jumped to + // without needing an extra correction step. + // + // The only problem is that this subalgorithm will not work for many ranges of `duo` and + // `div`. Fortunately, the short division, two possibility algorithm, and other simple + // cases happen to exactly fill these gaps. + // + // For an example, consider the division of 76543210 by 213 and assume that `n_h` is + // equal to two decimal digits (note: we are working with base 10 here for readability). + // The first `sig_n_h` part of the divisor (21) is taken and is incremented by 1 to + // prevent oversubtraction. We also record the number of extra places not a part of + // the `sig_n` or `sig_n_h` parts. + // + // sig_n_h == 2 digits, sig_n == 4 digits + // + // vvvv <- `duo_sig_n` + // 76543210 + // ^^^^ <- extra places in duo, `duo_extra == 4` + // + // vv <- `div_sig_n_h` + // 213 + // ^ <- extra places in div, `div_extra == 1` + // + // The difference in extra places, `duo_extra - div_extra == extra_shl == 3`, is used + // for shifting partial sums in the long division. + // + // In the first step, the first `sig_n` part of duo (7654) is divided by + // `div_sig_n_h_add_1` (22), which results in a partial quotient of 347. This is + // multiplied by the whole divisor to make 73911, which is shifted left by `extra_shl` + // and subtracted from duo. The partial quotient is also shifted left by `extra_shl` to + // be added to `quo`. + // + // 347 + // ________ + // |76543210 + // -73911 + // 2632210 + // + // Variables dependent on duo have to be updated: + // + // vvvv <- `duo_sig_n == 2632` + // 2632210 + // ^^^ <- `duo_extra == 3` + // + // `extra_shl == 2` + // + // Two more steps are taken after this and then duo fits into `n` bits, and then a final + // normal long division step is made. The partial quotients are all progressively added + // to each other in the actual algorithm, but here I have left them all in a tower that + // can be added together to produce the quotient, 359357. + // + // 14 + // 443 + // 119 + // 347 + // ________ + // |76543210 + // -73911 + // 2632210 + // -25347 + // 97510 + // -94359 + // 3151 + // -2982 + // 169 <- the remainder + + let mut duo = duo; + let mut quo: $uD = 0; + + // The number of lesser significant bits not a part of `div_sig_n_h` + let div_extra = (n + $n_h) - div_lz; + + // The most significant `n_h` bits of div + let div_sig_n_h = (div >> div_extra) as $uH; + + // This needs to be a `$uX` in case of overflow from the increment + let div_sig_n_h_add1 = (div_sig_n_h as $uX) + 1; + + // `{2^n, 2^(div_sb + n_h)} <= duo < 2^n_d` + // `2^n_h <= div < {2^(duo_sb - n_h), 2^n}` + loop { + // The number of lesser significant bits not a part of `duo_sig_n` + let duo_extra = n - duo_lz; + + // The most significant `n` bits of `duo` + let duo_sig_n = (duo >> duo_extra) as $uX; + + // the two possibility algorithm requires that the difference between msbs is less + // than `n_h`, so the comparison is `<=` here. + if div_extra <= duo_extra { + // Undersubtracting long division step + let quo_part = $half_division(duo_sig_n, div_sig_n_h_add1).0 as $uD; + let extra_shl = duo_extra - div_extra; + + // Addition to the quotient. + quo += (quo_part << extra_shl); + + // Subtraction from `duo`. At least `n_h - 1` bits are cleared from `duo` here. + duo -= (div.wrapping_mul(quo_part) << extra_shl); + } else { + // Two possibility algorithm + let shift = n - duo_lz; + let duo_sig_n = (duo >> shift) as $uX; + let div_sig_n = (div >> shift) as $uX; + let quo_part = $half_division(duo_sig_n, div_sig_n).0; + let div_lo = div as $uX; + let div_hi = (div >> n) as $uX; + + let (tmp_lo, carry) = carrying_mul(quo_part, div_lo); + // The undersubtracting long division algorithm has already run once, so + // overflow beyond `$uD` bits is not possible here + let (tmp_hi, _) = carrying_mul_add(quo_part, div_hi, carry); + let tmp = (tmp_lo as $uD) | ((tmp_hi as $uD) << n); + + if duo < tmp { + return ( + quo + ((quo_part - 1) as $uD), + duo.wrapping_add(div).wrapping_sub(tmp) + ) + } else { + return ( + quo + (quo_part as $uD), + duo - tmp + ) + } + } + + duo_lz = duo.leading_zeros(); + + if div_lz <= duo_lz { + // quotient can have 0 or 1 added to it + if div <= duo { + return ( + quo + 1, + duo - div + ) + } else { + return ( + quo, + duo + ) + } + } + + // This can only happen if `div_sd < n` (because of previous "quo = 0 or 1" + // branches), but it is not worth it to unroll further. + if n <= duo_lz { + // simple division and addition + let tmp = $half_division(duo as $uX, div as $uX); + return ( + quo + (tmp.0 as $uD), + tmp.1 as $uD + ) + } + } + } + + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + $( + #[$signed_attr] + )* + pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) { + match (duo < 0, div < 0) { + (false, false) => { + let t = $unsigned_name(duo as $uD, div as $uD); + (t.0 as $iD, t.1 as $iD) + }, + (true, false) => { + let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD); + ((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg()) + }, + (false, true) => { + let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD); + ((t.0 as $iD).wrapping_neg(), t.1 as $iD) + }, + (true, true) => { + let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD); + (t.0 as $iD, (t.1 as $iD).wrapping_neg()) + }, + } + } + } +} diff --git a/src/int/udiv.rs b/src/int/udiv.rs index 491515949..3cd9be93c 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -1,4 +1,4 @@ -use int::{Int, LargeInt}; +use int::specialized_div_rem::*; intrinsics! { #[maybe_use_optimized_c_shim] diff --git a/src/lib.rs b/src/lib.rs index 34397e0d2..cbd23850b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ #![cfg_attr(feature = "compiler-builtins", compiler_builtins)] +#![cfg_attr(feature = "asm", feature(asm))] #![feature(abi_unadjusted)] #![feature(llvm_asm)] #![feature(global_asm)] diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 61282af0b..5c3df7961 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -28,7 +28,8 @@ utest-cortex-m-qemu = { default-features = false, git = "https://github.com/japa utest-macros = { git = "https://github.com/japaric/utest" } [features] +default = ["asm", "mangled-names"] +asm = ["compiler_builtins/asm"] c = ["compiler_builtins/c"] mem = ["compiler_builtins/mem"] mangled-names = ["compiler_builtins/mangled-names"] -default = ["mangled-names"] diff --git a/testcrate/tests/div_rem.rs b/testcrate/tests/div_rem.rs new file mode 100644 index 000000000..2a154f5eb --- /dev/null +++ b/testcrate/tests/div_rem.rs @@ -0,0 +1,143 @@ +use rand_xoshiro::rand_core::{RngCore, SeedableRng}; +use rand_xoshiro::Xoshiro128StarStar; + +use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divti3, __modti3}; +use compiler_builtins::int::udiv::{__udivmoddi4, __udivmodsi4, __udivmodti4}; + +// because `__divmodti4` does not exist, we synthesize it +fn __divmodti4(a: i128, b: i128, rem: &mut i128) -> i128 { + *rem = __modti3(a, b); + __divti3(a, b) +} + +/// Creates intensive test functions for division functions of a certain size +macro_rules! test { + ( + $n:expr, // the number of bits in a $iX or $uX + $uX:ident, // unsigned integer that will be shifted + $iX:ident, // signed version of $uX + $test_name:ident, // name of the test function + $unsigned_name:ident, // unsigned division function + $signed_name:ident // signed division function + ) => { + #[test] + fn $test_name() { + fn assert_invariants(lhs: $uX, rhs: $uX) { + let rem: &mut $uX = &mut 0; + let quo: $uX = $unsigned_name(lhs, rhs, Some(rem)); + let rem = *rem; + if rhs <= rem || (lhs != rhs.wrapping_mul(quo).wrapping_add(rem)) { + panic!( + "unsigned division function failed with lhs:{} rhs:{} \ + expected:({}, {}) found:({}, {})", + lhs, + rhs, + lhs.wrapping_div(rhs), + lhs.wrapping_rem(rhs), + quo, + rem + ); + } + + // test the signed division function also + let lhs = lhs as $iX; + let rhs = rhs as $iX; + let mut rem: $iX = 0; + let quo: $iX = $signed_name(lhs, rhs, &mut rem); + // We cannot just test that + // `lhs == rhs.wrapping_mul(quo).wrapping_add(rem)`, but also + // need to make sure the remainder isn't larger than the divisor + // and has the correct sign. + let incorrect_rem = if rem == 0 { + false + } else if rhs == $iX::MIN { + // `rhs.wrapping_abs()` would overflow, so handle this case + // separately. + (lhs.is_negative() != rem.is_negative()) || (rem == $iX::MIN) + } else { + (lhs.is_negative() != rem.is_negative()) + || (rhs.wrapping_abs() <= rem.wrapping_abs()) + }; + if incorrect_rem || lhs != rhs.wrapping_mul(quo).wrapping_add(rem) { + panic!( + "signed division function failed with lhs:{} rhs:{} \ + expected:({}, {}) found:({}, {})", + lhs, + rhs, + lhs.wrapping_div(rhs), + lhs.wrapping_rem(rhs), + quo, + rem + ); + } + } + + // Specially designed random fuzzer + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut lhs: $uX = 0; + let mut rhs: $uX = 0; + // all ones constant + let ones: $uX = !0; + // Alternating ones and zeros (e.x. 0b1010101010101010). This catches second-order + // problems that might occur for algorithms with two modes of operation (potentially + // there is some invariant that can be broken for large `duo` and maintained via + // alternating between modes, breaking the algorithm when it reaches the end). + let mut alt_ones: $uX = 1; + for _ in 0..($n / 2) { + alt_ones <<= 2; + alt_ones |= 1; + } + // creates a mask for indexing the bits of the type + let bit_indexing_mask = $n - 1; + for _ in 0..1_000_000 { + // Randomly OR, AND, and XOR randomly sized and shifted continuous strings of + // ones with `lhs` and `rhs`. This results in excellent fuzzing entropy such as: + // lhs:10101010111101000000000100101010 rhs: 1010101010000000000000001000001 + // lhs:10101010111101000000000101001010 rhs: 1010101010101010101010100010100 + // lhs:10101010111101000000000101001010 rhs:11101010110101010101010100001110 + // lhs:10101010000000000000000001001010 rhs:10100010100000000000000000001010 + // lhs:10101010000000000000000001001010 rhs: 10101010101010101000 + // lhs:10101010000000000000000001100000 rhs:11111111111101010101010101001111 + // lhs:10101010000000101010101011000000 rhs:11111111111101010101010100000111 + // lhs:10101010101010101010101011101010 rhs: 1010100000000000000 + // lhs:11111111110101101010101011010111 rhs: 1010100000000000000 + // The msb is set half of the time by the fuzzer, but `assert_invariants` tests + // both the signed and unsigned functions. + let r0: u32 = bit_indexing_mask & rng.next_u32(); + let r1: u32 = bit_indexing_mask & rng.next_u32(); + let mask = ones.wrapping_shr(r0).rotate_left(r1); + match rng.next_u32() % 8 { + 0 => lhs |= mask, + 1 => lhs &= mask, + // both 2 and 3 to make XORs as common as ORs and ANDs combined, otherwise + // the entropy gets destroyed too often + 2 | 3 => lhs ^= mask, + 4 => rhs |= mask, + 5 => rhs &= mask, + _ => rhs ^= mask, + } + // do the same for alternating ones and zeros + let r0: u32 = bit_indexing_mask & rng.next_u32(); + let r1: u32 = bit_indexing_mask & rng.next_u32(); + let mask = alt_ones.wrapping_shr(r0).rotate_left(r1); + match rng.next_u32() % 8 { + 0 => lhs |= mask, + 1 => lhs &= mask, + // both 2 and 3 to make XORs as common as ORs and ANDs combined, otherwise + // the entropy gets destroyed too often + 2 | 3 => lhs ^= mask, + 4 => rhs |= mask, + 5 => rhs &= mask, + _ => rhs ^= mask, + } + if rhs != 0 { + assert_invariants(lhs, rhs); + } + } + } + }; +} + +test!(32, u32, i32, div_rem_si4, __udivmodsi4, __divmodsi4); +test!(64, u64, i64, div_rem_di4, __udivmoddi4, __divmoddi4); +test!(128, u128, i128, div_rem_ti4, __udivmodti4, __divmodti4); From 0e6d75d746b2a0e8a948f662eec6347cc627be6d Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Tue, 21 Jul 2020 23:16:52 -0500 Subject: [PATCH 0381/1459] Change inlining to favor three underlying division functions --- src/int/specialized_div_rem/mod.rs | 35 ++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs index 5a034dcf1..0304aa2e7 100644 --- a/src/int/specialized_div_rem/mod.rs +++ b/src/int/specialized_div_rem/mod.rs @@ -113,6 +113,13 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) { zero_div_fn() } +// `inline(never)` is placed on unsigned division functions so that there are just three division +// functions (`u32_div_rem`, `u64_div_rem`, and `u128_div_rem`) backing all `compiler-builtins` +// division functions. The signed functions like `i32_div_rem` will get inlined into the +// `compiler-builtins` signed division functions, so that they directly call the three division +// functions. Otherwise, LLVM may try to inline the unsigned division functions 4 times into the +// signed division functions, which results in an explosion in code size. + // Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a // microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is // faster if the target pointer width is at least 64. @@ -129,7 +136,9 @@ impl_trifecta!( u32, u64, u128, - i128,; + i128, + inline(never); + inline ); // If the pointer width less than 64, then the target architecture almost certainly does not have @@ -148,7 +157,9 @@ impl_delegate!( u32, u64, u128, - i128,; + i128, + inline(never); + inline ); /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. @@ -190,7 +201,9 @@ impl_asymmetric!( u32, u64, u128, - i128,; + i128, + inline(never); + inline ); /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. @@ -223,7 +236,9 @@ impl_delegate!( u16, u32, u64, - i64,; + i64, + inline(never); + inline ); // When not on x86 and the pointer width is 64, use `binary_long`. @@ -238,7 +253,9 @@ impl_binary_long!( u64_normalization_shift, 64, u64, - i64,; + i64, + inline(never); + inline ); /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. @@ -280,7 +297,9 @@ impl_asymmetric!( u16, u32, u64, - i64,; + i64, + inline(never); + inline ); // 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division @@ -291,5 +310,7 @@ impl_binary_long!( u32_normalization_shift, 32, u32, - i32,; + i32, + inline(never); + inline ); From bc0646543eb23d48caf7d8fdcf5318704b6020e6 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Fri, 14 Aug 2020 15:28:48 -0500 Subject: [PATCH 0382/1459] Use unreachable_unchecked --- src/int/specialized_div_rem/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs index 0304aa2e7..f7dc044fa 100644 --- a/src/int/specialized_div_rem/mod.rs +++ b/src/int/specialized_div_rem/mod.rs @@ -57,9 +57,7 @@ mod asymmetric; /// impossible to reach by Rust users, unless `compiler-builtins` public division functions or /// `core/std::unchecked_div/rem` are directly used without a zero check in front. fn zero_div_fn() -> ! { - // TODO: change this once the algorithms are verified - //unsafe {core::hint::unreachable_unchecked()} - ::abort() + unsafe { core::hint::unreachable_unchecked() } } // The `B` extension on RISC-V determines if a CLZ assembly instruction exists From 6a67566b203daf3f2ca0ae9c60df0760ae0b912f Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 23 Aug 2020 23:31:51 +0100 Subject: [PATCH 0383/1459] Fix CI url for compiler-rt source --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1237bc032..e4bcc9c10 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -88,8 +88,8 @@ jobs: - run: rustup component add llvm-tools-preview - name: Download compiler-rt reference sources run: | - curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/10.0-2020-02-05.tar.gz - tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-10.0-2020-02-05/compiler-rt + curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/10.0-2020-05-05.tar.gz + tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-10.0-2020-05-05/compiler-rt echo "##[set-env name=RUST_COMPILER_RT_ROOT]./compiler-rt" shell: bash From c172ebdfd3bfc47c8ea71fa26758baf87a8a25d2 Mon Sep 17 00:00:00 2001 From: Thomas Vigouroux Date: Thu, 6 Aug 2020 10:59:15 +0200 Subject: [PATCH 0384/1459] add compiler-rt fallbacks on aarch64-musl --- build.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/build.rs b/build.rs index a3b722ab7..f948edba9 100644 --- a/build.rs +++ b/build.rs @@ -419,6 +419,18 @@ mod c { if target_os != "windows" { sources.extend(&[("__multc3", "multc3.c")]); } + + if target_env == "musl" { + sources.extend(&[ + ("__addtf3", "addtf3.c"), + ("__multf3", "multf3.c"), + ("__subtf3", "subtf3.c"), + ("__divtf3", "divtf3.c"), + ("__powitf2", "powitf2.c"), + ("__fe_getround", "fp_mode.c"), + ("__fe_raise_inexact", "fp_mode.c"), + ]); + } } if target_arch == "mips" { From 0fd1e004bc649707c7f5dc17f6ce9ded6e46707a Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 24 Aug 2020 12:45:58 +0100 Subject: [PATCH 0385/1459] Bump to 0.1.33 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index d4d1ad67d..957fd7beb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.32" +version = "0.1.33" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From f60b31725c5d569456f2609c0448ce1601de2abc Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 27 Aug 2020 17:49:46 +0100 Subject: [PATCH 0386/1459] Bump to 0.1.34 0.1.33 was published without the libm submodule --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 957fd7beb..60429681d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.33" +version = "0.1.34" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 2635ae9a6c7c7677df1ea5a74d49b537f88a8d32 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 28 Aug 2020 01:35:29 +0100 Subject: [PATCH 0387/1459] Bump to 0.1.35 This time using the proper procedure for including libm. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 60429681d..b4fbb0b1e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.34" +version = "0.1.35" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 26fe6ff9361f2855a7b1448ecba167316f09650f Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Sat, 29 Aug 2020 18:02:57 -0500 Subject: [PATCH 0388/1459] Add `__divmodti4` --- src/int/sdiv.rs | 9 ++++++++- testcrate/build.rs | 13 +++++++++++++ testcrate/tests/div_rem.rs | 8 +------- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index 57ef03cda..3d0c3afc1 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -54,5 +54,12 @@ intrinsics! { i128_div_rem(a, b).1 } - // LLVM does not currently have a `__divmodti4` function + // LLVM does not currently have a `__divmodti4` function, but GCC does + #[maybe_use_optimized_c_shim] + /// Returns `n / d` and sets `*rem = n % d` + pub extern "C" fn __divmodti4(a: i128, b: i128, rem: &mut i128) -> i128 { + let quo_rem = i128_div_rem(a, b); + *rem = quo_rem.1; + quo_rem.0 + } } diff --git a/testcrate/build.rs b/testcrate/build.rs index e1d4cf9e8..656fd2d20 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -805,6 +805,19 @@ fn main() { (builtins::int::sdiv::__divmodsi4(a, b, &mut r), r) }", ); + gen( + |(a, b): (MyI128, MyI128)| { + if b.0 == 0 { + None + } else { + Some((a.0 / b.0, a.0 % b.0)) + } + }, + "{ + let mut r = 0; + (builtins::int::sdiv::__divmodti4(a, b, &mut r), r) + }", + ); gen( |(a, b): (MyI32, MyI32)| { if b.0 == 0 { diff --git a/testcrate/tests/div_rem.rs b/testcrate/tests/div_rem.rs index 2a154f5eb..199fa9db7 100644 --- a/testcrate/tests/div_rem.rs +++ b/testcrate/tests/div_rem.rs @@ -1,15 +1,9 @@ use rand_xoshiro::rand_core::{RngCore, SeedableRng}; use rand_xoshiro::Xoshiro128StarStar; -use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divti3, __modti3}; +use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4}; use compiler_builtins::int::udiv::{__udivmoddi4, __udivmodsi4, __udivmodti4}; -// because `__divmodti4` does not exist, we synthesize it -fn __divmodti4(a: i128, b: i128, rem: &mut i128) -> i128 { - *rem = __modti3(a, b); - __divti3(a, b) -} - /// Creates intensive test functions for division functions of a certain size macro_rules! test { ( From eff506cd49b637f1ab5931625a33cef7e91fbbf6 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 13 Sep 2020 20:47:25 +0100 Subject: [PATCH 0389/1459] Bump to 0.1.36 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4ee769465..9262bb6d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.35" +version = "0.1.36" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From e9688c68a5dd3ebe98f27386c6459afaf87c0468 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Fri, 2 Oct 2020 16:35:55 -0500 Subject: [PATCH 0390/1459] Remove unneeded code from asymmetric.rs Rebenchmarking this showed that perf changed for the worse only on really low end CPUs --- src/int/specialized_div_rem/asymmetric.rs | 105 +++++----------------- 1 file changed, 20 insertions(+), 85 deletions(-) diff --git a/src/int/specialized_div_rem/asymmetric.rs b/src/int/specialized_div_rem/asymmetric.rs index 861e91742..16ab2baad 100644 --- a/src/int/specialized_div_rem/asymmetric.rs +++ b/src/int/specialized_div_rem/asymmetric.rs @@ -25,20 +25,8 @@ macro_rules! impl_asymmetric { #[$unsigned_attr] )* pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD,$uD) { - fn carrying_mul(lhs: $uX, rhs: $uX) -> ($uX, $uX) { - let tmp = (lhs as $uD).wrapping_mul(rhs as $uD); - (tmp as $uX, (tmp >> ($n_h * 2)) as $uX) - } - fn carrying_mul_add(lhs: $uX, mul: $uX, add: $uX) -> ($uX, $uX) { - let tmp = (lhs as $uD).wrapping_mul(mul as $uD).wrapping_add(add as $uD); - (tmp as $uX, (tmp >> ($n_h * 2)) as $uX) - } - let n: u32 = $n_h * 2; - // Many of these subalgorithms are taken from trifecta.rs, see that for better - // documentation. - let duo_lo = duo as $uX; let duo_hi = (duo >> n) as $uX; let div_lo = div as $uX; @@ -51,30 +39,6 @@ macro_rules! impl_asymmetric { // `$uD` by `$uX` division with a quotient that will fit into a `$uX` let (quo, rem) = unsafe { $asymmetric_division(duo, div_lo) }; return (quo as $uD, rem as $uD) - } else if (div_lo >> $n_h) == 0 { - // Short division of $uD by a $uH. - - // Some x86_64 CPUs have bad division implementations that make specializing - // this case faster. - let div_0 = div_lo as $uH as $uX; - let (quo_hi, rem_3) = $half_division(duo_hi, div_0); - - let duo_mid = - ((duo >> $n_h) as $uH as $uX) - | (rem_3 << $n_h); - let (quo_1, rem_2) = $half_division(duo_mid, div_0); - - let duo_lo = - (duo as $uH as $uX) - | (rem_2 << $n_h); - let (quo_0, rem_1) = $half_division(duo_lo, div_0); - - return ( - (quo_0 as $uD) - | ((quo_1 as $uD) << $n_h) - | ((quo_hi as $uD) << n), - rem_1 as $uD - ) } else { // Short division using the $uD by $uX division let (quo_hi, rem_hi) = $half_division(duo_hi, div_lo); @@ -85,59 +49,30 @@ macro_rules! impl_asymmetric { } } - let duo_lz = duo_hi.leading_zeros(); + // This has been adapted from + // https://www.codeproject.com/tips/785014/uint-division-modulus which was in turn + // adapted from Hacker's Delight. This is similar to the two possibility algorithm + // in that it uses only more significant parts of `duo` and `div` to divide a large + // integer with a smaller division instruction. let div_lz = div_hi.leading_zeros(); - let rel_leading_sb = div_lz.wrapping_sub(duo_lz); - if rel_leading_sb < $n_h { - // Some x86_64 CPUs have bad hardware division implementations that make putting - // a two possibility algorithm here beneficial. We also avoid a full `$uD` - // multiplication. - let shift = n - duo_lz; - let duo_sig_n = (duo >> shift) as $uX; - let div_sig_n = (div >> shift) as $uX; - let quo = $half_division(duo_sig_n, div_sig_n).0; - let div_lo = div as $uX; - let div_hi = (div >> n) as $uX; - let (tmp_lo, carry) = carrying_mul(quo, div_lo); - let (tmp_hi, overflow) = carrying_mul_add(quo, div_hi, carry); - let tmp = (tmp_lo as $uD) | ((tmp_hi as $uD) << n); - if (overflow != 0) || (duo < tmp) { - return ( - (quo - 1) as $uD, - duo.wrapping_add(div).wrapping_sub(tmp) - ) - } else { - return ( - quo as $uD, - duo - tmp - ) - } - } else { - // This has been adapted from - // https://www.codeproject.com/tips/785014/uint-division-modulus which was in turn - // adapted from Hacker's Delight. This is similar to the two possibility algorithm - // in that it uses only more significant parts of `duo` and `div` to divide a large - // integer with a smaller division instruction. - - let div_extra = n - div_lz; - let div_sig_n = (div >> div_extra) as $uX; - let tmp = unsafe { - $asymmetric_division(duo >> 1, div_sig_n) - }; + let div_extra = n - div_lz; + let div_sig_n = (div >> div_extra) as $uX; + let tmp = unsafe { + $asymmetric_division(duo >> 1, div_sig_n) + }; - let mut quo = tmp.0 >> ((n - 1) - div_lz); - if quo != 0 { - quo -= 1; - } + let mut quo = tmp.0 >> ((n - 1) - div_lz); + if quo != 0 { + quo -= 1; + } - // Note that this is a full `$uD` multiplication being used here - let mut rem = duo - (quo as $uD).wrapping_mul(div); - if div <= rem { - quo += 1; - rem -= div; - } - return (quo as $uD, rem) + // Note that this is a full `$uD` multiplication being used here + let mut rem = duo - (quo as $uD).wrapping_mul(div); + if div <= rem { + quo += 1; + rem -= div; } + return (quo as $uD, rem) } /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a From 1d15e4e5043edcde12b8a2412f084c387f07ecc6 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Fri, 2 Oct 2020 22:04:05 -0500 Subject: [PATCH 0391/1459] Construct signed division functions differently --- src/int/sdiv.rs | 207 +++++++++++++++------ src/int/specialized_div_rem/asymmetric.rs | 53 +----- src/int/specialized_div_rem/binary_long.rs | 64 +------ src/int/specialized_div_rem/delegate.rs | 76 ++------ src/int/specialized_div_rem/mod.rs | 45 +---- src/int/specialized_div_rem/trifecta.rs | 105 +++-------- 6 files changed, 221 insertions(+), 329 deletions(-) diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index 3d0c3afc1..e1e3f33bb 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -1,65 +1,166 @@ -use int::specialized_div_rem::*; +use int::udiv::*; -intrinsics! { - #[maybe_use_optimized_c_shim] - #[arm_aeabi_alias = __aeabi_idiv] - /// Returns `n / d` - pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 { - i32_div_rem(a, b).0 - } - - #[maybe_use_optimized_c_shim] - /// Returns `n % d` - pub extern "C" fn __modsi3(a: i32, b: i32) -> i32 { - i32_div_rem(a, b).1 - } - - #[maybe_use_optimized_c_shim] - /// Returns `n / d` and sets `*rem = n % d` - pub extern "C" fn __divmodsi4(a: i32, b: i32, rem: &mut i32) -> i32 { - let quo_rem = i32_div_rem(a, b); - *rem = quo_rem.1; - quo_rem.0 +macro_rules! sdivmod { + ( + $unsigned_fn:ident, // name of the unsigned division function + $signed_fn:ident, // name of the signed division function + $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` + $iX:ident, // signed integer type for the inputs and outputs of `$signed_name` + $($attr:tt),* // attributes + ) => { + intrinsics! { + $( + #[$attr] + )* + /// Returns `n / d` and sets `*rem = n % d` + pub extern "C" fn $signed_fn(a: $iX, b: $iX, rem: &mut $iX) -> $iX { + let a_neg = a < 0; + let b_neg = b < 0; + let mut a = a; + let mut b = b; + if a_neg { + a = a.wrapping_neg(); + } + if b_neg { + b = b.wrapping_neg(); + } + let mut r = *rem as $uX; + let t = $unsigned_fn(a as $uX, b as $uX, Some(&mut r)) as $iX; + let mut r = r as $iX; + if a_neg { + r = r.wrapping_neg(); + } + *rem = r; + if a_neg != b_neg { + t.wrapping_neg() + } else { + t + } + } + } } +} - #[maybe_use_optimized_c_shim] - /// Returns `n / d` - pub extern "C" fn __divdi3(a: i64, b: i64) -> i64 { - i64_div_rem(a, b).0 +macro_rules! sdiv { + ( + $unsigned_fn:ident, // name of the unsigned division function + $signed_fn:ident, // name of the signed division function + $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` + $iX:ident, // signed integer type for the inputs and outputs of `$signed_name` + $($attr:tt),* // attributes + ) => { + intrinsics! { + $( + #[$attr] + )* + /// Returns `n / d` + pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX { + let a_neg = a < 0; + let b_neg = b < 0; + let mut a = a; + let mut b = b; + if a_neg { + a = a.wrapping_neg(); + } + if b_neg { + b = b.wrapping_neg(); + } + let t = $unsigned_fn(a as $uX, b as $uX) as $iX; + if a_neg != b_neg { + t.wrapping_neg() + } else { + t + } + } + } } +} - #[maybe_use_optimized_c_shim] - /// Returns `n % d` - pub extern "C" fn __moddi3(a: i64, b: i64) -> i64 { - i64_div_rem(a, b).1 +macro_rules! smod { + ( + $unsigned_fn:ident, // name of the unsigned division function + $signed_fn:ident, // name of the signed division function + $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` + $iX:ident, // signed integer type for the inputs and outputs of `$signed_name` + $($attr:tt),* // attributes + ) => { + intrinsics! { + $( + #[$attr] + )* + /// Returns `n % d` + pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX { + let a_neg = a < 0; + let b_neg = b < 0; + let mut a = a; + let mut b = b; + if a_neg { + a = a.wrapping_neg(); + } + if b_neg { + b = b.wrapping_neg(); + } + let r = $unsigned_fn(a as $uX, b as $uX) as $iX; + if a_neg { + r.wrapping_neg() + } else { + r + } + } + } } +} +sdivmod!( + __udivmodsi4, + __divmodsi4, + u32, + i32, + maybe_use_optimized_c_shim +); +// The `#[arm_aeabi_alias = __aeabi_idiv]` attribute cannot be made to work with `intrinsics!` in macros +intrinsics! { #[maybe_use_optimized_c_shim] - /// Returns `n / d` and sets `*rem = n % d` - pub extern "C" fn __divmoddi4(a: i64, b: i64, rem: &mut i64) -> i64 { - let quo_rem = i64_div_rem(a, b); - *rem = quo_rem.1; - quo_rem.0 - } - - #[win64_128bit_abi_hack] + #[arm_aeabi_alias = __aeabi_idiv] /// Returns `n / d` - pub extern "C" fn __divti3(a: i128, b: i128) -> i128 { - i128_div_rem(a, b).0 + pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 { + let a_neg = a < 0; + let b_neg = b < 0; + let mut a = a; + let mut b = b; + if a_neg { + a = a.wrapping_neg(); + } + if b_neg { + b = b.wrapping_neg(); + } + let t = __udivsi3(a as u32, b as u32) as i32; + if a_neg != b_neg { + t.wrapping_neg() + } else { + t + } } +} +smod!(__umodsi3, __modsi3, u32, i32, maybe_use_optimized_c_shim); - #[win64_128bit_abi_hack] - /// Returns `n % d` - pub extern "C" fn __modti3(a: i128, b: i128) -> i128 { - i128_div_rem(a, b).1 - } +sdivmod!( + __udivmoddi4, + __divmoddi4, + u64, + i64, + maybe_use_optimized_c_shim +); +sdiv!(__udivdi3, __divdi3, u64, i64, maybe_use_optimized_c_shim); +smod!(__umoddi3, __moddi3, u64, i64, maybe_use_optimized_c_shim); - // LLVM does not currently have a `__divmodti4` function, but GCC does - #[maybe_use_optimized_c_shim] - /// Returns `n / d` and sets `*rem = n % d` - pub extern "C" fn __divmodti4(a: i128, b: i128, rem: &mut i128) -> i128 { - let quo_rem = i128_div_rem(a, b); - *rem = quo_rem.1; - quo_rem.0 - } -} +// LLVM does not currently have a `__divmodti4` function, but GCC does +sdivmod!( + __udivmodti4, + __divmodti4, + u128, + i128, + maybe_use_optimized_c_shim +); +sdiv!(__udivti3, __divti3, u128, i128, win64_128bit_abi_hack); +smod!(__umodti3, __modti3, u128, i128, win64_128bit_abi_hack); diff --git a/src/int/specialized_div_rem/asymmetric.rs b/src/int/specialized_div_rem/asymmetric.rs index 16ab2baad..bbb77722d 100644 --- a/src/int/specialized_div_rem/asymmetric.rs +++ b/src/int/specialized_div_rem/asymmetric.rs @@ -1,4 +1,4 @@ -/// Creates unsigned and signed division functions optimized for dividing integers with the same +/// Creates an unsigned division function optimized for dividing integers with the same /// bitwidth as the largest operand in an asymmetrically sized division. For example, x86-64 has an /// assembly instruction that can divide a 128 bit integer by a 64 bit integer if the quotient fits /// in 64 bits. The 128 bit version of this algorithm would use that fast hardware division to @@ -6,25 +6,18 @@ #[macro_export] macro_rules! impl_asymmetric { ( - $unsigned_name:ident, // name of the unsigned division function - $signed_name:ident, // name of the signed division function + $fn:ident, // name of the unsigned division function $zero_div_fn:ident, // function called when division by zero is attempted $half_division:ident, // function for division of a $uX by a $uX $asymmetric_division:ident, // function for division of a $uD by a $uX $n_h:expr, // the number of bits in a $iH or $uH $uH:ident, // unsigned integer with half the bit width of $uX $uX:ident, // unsigned integer with half the bit width of $uD - $uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` - $iD:ident, // signed integer type for the inputs and outputs of `$signed_name` - $($unsigned_attr:meta),*; // attributes for the unsigned function - $($signed_attr:meta),* // attributes for the signed function + $uD:ident // unsigned integer type for the inputs and outputs of `$fn` ) => { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. - $( - #[$unsigned_attr] - )* - pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD,$uD) { + pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) { let n: u32 = $n_h * 2; let duo_lo = duo as $uX; @@ -38,14 +31,14 @@ macro_rules! impl_asymmetric { if duo_hi < div_lo { // `$uD` by `$uX` division with a quotient that will fit into a `$uX` let (quo, rem) = unsafe { $asymmetric_division(duo, div_lo) }; - return (quo as $uD, rem as $uD) + return (quo as $uD, rem as $uD); } else { // Short division using the $uD by $uX division let (quo_hi, rem_hi) = $half_division(duo_hi, div_lo); let tmp = unsafe { $asymmetric_division((duo_lo as $uD) | ((rem_hi as $uD) << n), div_lo) }; - return ((tmp.0 as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD) + return ((tmp.0 as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD); } } @@ -57,9 +50,7 @@ macro_rules! impl_asymmetric { let div_lz = div_hi.leading_zeros(); let div_extra = n - div_lz; let div_sig_n = (div >> div_extra) as $uX; - let tmp = unsafe { - $asymmetric_division(duo >> 1, div_sig_n) - }; + let tmp = unsafe { $asymmetric_division(duo >> 1, div_sig_n) }; let mut quo = tmp.0 >> ((n - 1) - div_lz); if quo != 0 { @@ -72,33 +63,7 @@ macro_rules! impl_asymmetric { quo += 1; rem -= div; } - return (quo as $uD, rem) + return (quo as $uD, rem); } - - /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a - /// tuple. - $( - #[$signed_attr] - )* - pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) { - match (duo < 0, div < 0) { - (false, false) => { - let t = $unsigned_name(duo as $uD, div as $uD); - (t.0 as $iD, t.1 as $iD) - }, - (true, false) => { - let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD); - ((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg()) - }, - (false, true) => { - let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD); - ((t.0 as $iD).wrapping_neg(), t.1 as $iD) - }, - (true, true) => { - let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD); - (t.0 as $iD, (t.1 as $iD).wrapping_neg()) - }, - } - } - } + }; } diff --git a/src/int/specialized_div_rem/binary_long.rs b/src/int/specialized_div_rem/binary_long.rs index 4c63396a0..0f5e870b0 100644 --- a/src/int/specialized_div_rem/binary_long.rs +++ b/src/int/specialized_div_rem/binary_long.rs @@ -1,4 +1,4 @@ -/// Creates unsigned and signed division functions that use binary long division, designed for +/// Creates an unsigned division function that uses binary long division, designed for /// computer architectures without division instructions. These functions have good performance for /// microarchitectures with large branch miss penalties and architectures without the ability to /// predicate instructions. For architectures with predicated instructions, one of the algorithms @@ -7,29 +7,23 @@ #[macro_export] macro_rules! impl_binary_long { ( - $unsigned_name:ident, // name of the unsigned division function - $signed_name:ident, // name of the signed division function + $fn:ident, // name of the unsigned division function $zero_div_fn:ident, // function called when division by zero is attempted $normalization_shift:ident, // function for finding the normalization shift $n:tt, // the number of bits in a $iX or $uX - $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` - $iX:ident, // signed integer type for the inputs and outputs of `$signed_name` - $($unsigned_attr:meta),*; // attributes for the unsigned function - $($signed_attr:meta),* // attributes for the signed function + $uX:ident, // unsigned integer type for the inputs and outputs of `$fn` + $iX:ident // signed integer type with same bitwidth as `$uX` ) => { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. - $( - #[$unsigned_attr] - )* - pub fn $unsigned_name(duo: $uX, div: $uX) -> ($uX, $uX) { + pub fn $fn(duo: $uX, div: $uX) -> ($uX, $uX) { let mut duo = duo; // handle edge cases before calling `$normalization_shift` if div == 0 { $zero_div_fn() } if duo < div { - return (0, duo) + return (0, duo); } // There are many variations of binary division algorithm that could be used. This @@ -430,7 +424,7 @@ macro_rules! impl_binary_long { let mut i = shl; loop { if i == 0 { - break + break; } i -= 1; // shift left 1 and subtract @@ -550,47 +544,5 @@ macro_rules! impl_binary_long { return ((duo & mask) | quo, duo >> shl); */ } - - /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a - /// tuple. - $( - #[$signed_attr] - )* - pub fn $signed_name(duo: $iX, div: $iX) -> ($iX, $iX) { - // There is a way of doing this without any branches, but requires too many extra - // operations to be faster. - /* - let duo_s = duo >> ($n - 1); - let div_s = div >> ($n - 1); - let duo = (duo ^ duo_s).wrapping_sub(duo_s); - let div = (div ^ div_s).wrapping_sub(div_s); - let quo_s = duo_s ^ div_s; - let rem_s = duo_s; - let tmp = $unsigned_name(duo as $uX, div as $uX); - ( - ((tmp.0 as $iX) ^ quo_s).wrapping_sub(quo_s), - ((tmp.1 as $iX) ^ rem_s).wrapping_sub(rem_s), - ) - */ - - match (duo < 0, div < 0) { - (false, false) => { - let t = $unsigned_name(duo as $uX, div as $uX); - (t.0 as $iX, t.1 as $iX) - }, - (true, false) => { - let t = $unsigned_name(duo.wrapping_neg() as $uX, div as $uX); - ((t.0 as $iX).wrapping_neg(), (t.1 as $iX).wrapping_neg()) - }, - (false, true) => { - let t = $unsigned_name(duo as $uX, div.wrapping_neg() as $uX); - ((t.0 as $iX).wrapping_neg(), t.1 as $iX) - }, - (true, true) => { - let t = $unsigned_name(duo.wrapping_neg() as $uX, div.wrapping_neg() as $uX); - (t.0 as $iX, (t.1 as $iX).wrapping_neg()) - }, - } - } - } + }; } diff --git a/src/int/specialized_div_rem/delegate.rs b/src/int/specialized_div_rem/delegate.rs index 1ba72431d..a74bdac02 100644 --- a/src/int/specialized_div_rem/delegate.rs +++ b/src/int/specialized_div_rem/delegate.rs @@ -1,29 +1,23 @@ -/// Creates unsigned and signed division functions that use a combination of hardware division and +/// Creates an unsigned division function that uses a combination of hardware division and /// binary long division to divide integers larger than what hardware division by itself can do. This /// function is intended for microarchitectures that have division hardware, but not fast enough /// multiplication hardware for `impl_trifecta` to be faster. #[macro_export] macro_rules! impl_delegate { ( - $unsigned_name:ident, // name of the unsigned division function - $signed_name:ident, // name of the signed division function + $fn:ident, // name of the unsigned division function $zero_div_fn:ident, // function called when division by zero is attempted $half_normalization_shift:ident, // function for finding the normalization shift of $uX $half_division:ident, // function for division of a $uX by a $uX $n_h:expr, // the number of bits in $iH or $uH $uH:ident, // unsigned integer with half the bit width of $uX $uX:ident, // unsigned integer with half the bit width of $uD. - $uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` - $iD:ident, // signed integer type for the inputs and outputs of `$signed_name` - $($unsigned_attr:meta),*; // attributes for the unsigned function - $($signed_attr:meta),* // attributes for the signed function + $uD:ident, // unsigned integer type for the inputs and outputs of `$fn` + $iD:ident // signed integer type with the same bitwidth as `$uD` ) => { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. - $( - #[$unsigned_attr] - )* - pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD, $uD) { + pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) { // The two possibility algorithm, undersubtracting long division algorithm, or any kind // of reciprocal based algorithm will not be fastest, because they involve large // multiplications that we assume to not be fast enough relative to the divisions to @@ -38,17 +32,15 @@ macro_rules! impl_delegate { let div_hi = (div >> n) as $uX; match (div_lo == 0, div_hi == 0, duo_hi == 0) { - (true, true, _) => { - $zero_div_fn() - } + (true, true, _) => $zero_div_fn(), (_, false, true) => { // `duo` < `div` - return (0, duo) + return (0, duo); } (false, true, true) => { // delegate to smaller division let tmp = $half_division(duo_lo, div_lo); - return (tmp.0 as $uD, tmp.1 as $uD) + return (tmp.0 as $uD, tmp.1 as $uD); } (false, true, false) => { if duo_hi < div_lo { @@ -96,7 +88,7 @@ macro_rules! impl_delegate { // Delegate to get the rest of the quotient. Note that the // `div_lo` here is the original unshifted `div`. let tmp = $half_division(duo as $uX, div_lo); - return ((quo_lo | tmp.0) as $uD, tmp.1 as $uD) + return ((quo_lo | tmp.0) as $uD, tmp.1 as $uD); } } div >>= 1; @@ -105,7 +97,7 @@ macro_rules! impl_delegate { } else if duo_hi == div_lo { // `quo_hi == 1`. This branch is cheap and helps with edge cases. let tmp = $half_division(duo as $uX, div as $uX); - return ((1 << n) | (tmp.0 as $uD), tmp.1 as $uD) + return ((1 << n) | (tmp.0 as $uD), tmp.1 as $uD); } else { // `div_lo < duo_hi` // `rem_hi == 0` @@ -114,22 +106,16 @@ macro_rules! impl_delegate { let div_0 = div_lo as $uH as $uX; let (quo_hi, rem_3) = $half_division(duo_hi, div_0); - let duo_mid = - ((duo >> $n_h) as $uH as $uX) - | (rem_3 << $n_h); + let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h); let (quo_1, rem_2) = $half_division(duo_mid, div_0); - let duo_lo = - (duo as $uH as $uX) - | (rem_2 << $n_h); + let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h); let (quo_0, rem_1) = $half_division(duo_lo, div_0); return ( - (quo_0 as $uD) - | ((quo_1 as $uD) << $n_h) - | ((quo_hi as $uD) << n), - rem_1 as $uD - ) + (quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n), + rem_1 as $uD, + ); } // This is basically a short division composed of a half division for the hi @@ -161,7 +147,7 @@ macro_rules! impl_delegate { let tmp = $half_division(duo as $uX, div_lo); return ( (tmp.0) as $uD | (quo_lo as $uD) | ((quo_hi as $uD) << n), - tmp.1 as $uD + tmp.1 as $uD, ); } } @@ -187,7 +173,7 @@ macro_rules! impl_delegate { duo = sub; quo_lo |= pow_lo; if duo < div_original { - return (quo_lo as $uD, duo) + return (quo_lo as $uD, duo); } } div >>= 1; @@ -196,31 +182,5 @@ macro_rules! impl_delegate { } } } - - /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a - /// tuple. - $( - #[$signed_attr] - )* - pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) { - match (duo < 0, div < 0) { - (false, false) => { - let t = $unsigned_name(duo as $uD, div as $uD); - (t.0 as $iD, t.1 as $iD) - }, - (true, false) => { - let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD); - ((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg()) - }, - (false, true) => { - let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD); - ((t.0 as $iD).wrapping_neg(), t.1 as $iD) - }, - (true, true) => { - let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD); - (t.0 as $iD, (t.1 as $iD).wrapping_neg()) - }, - } - } - } + }; } diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs index f7dc044fa..3ac341b6f 100644 --- a/src/int/specialized_div_rem/mod.rs +++ b/src/int/specialized_div_rem/mod.rs @@ -111,13 +111,6 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) { zero_div_fn() } -// `inline(never)` is placed on unsigned division functions so that there are just three division -// functions (`u32_div_rem`, `u64_div_rem`, and `u128_div_rem`) backing all `compiler-builtins` -// division functions. The signed functions like `i32_div_rem` will get inlined into the -// `compiler-builtins` signed division functions, so that they directly call the three division -// functions. Otherwise, LLVM may try to inline the unsigned division functions 4 times into the -// signed division functions, which results in an explosion in code size. - // Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a // microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is // faster if the target pointer width is at least 64. @@ -127,16 +120,12 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) { ))] impl_trifecta!( u128_div_rem, - i128_div_rem, zero_div_fn, u64_by_u64_div_rem, 32, u32, u64, - u128, - i128, - inline(never); - inline + u128 ); // If the pointer width less than 64, then the target architecture almost certainly does not have @@ -147,7 +136,6 @@ impl_trifecta!( ))] impl_delegate!( u128_div_rem, - i128_div_rem, zero_div_fn, u64_normalization_shift, u64_by_u64_div_rem, @@ -155,9 +143,7 @@ impl_delegate!( u32, u64, u128, - i128, - inline(never); - inline + i128 ); /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. @@ -191,17 +177,13 @@ unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) { #[cfg(all(feature = "asm", target_arch = "x86_64"))] impl_asymmetric!( u128_div_rem, - i128_div_rem, zero_div_fn, u64_by_u64_div_rem, u128_by_u64_div_rem, 32, u32, u64, - u128, - i128, - inline(never); - inline + u128 ); /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. @@ -226,7 +208,6 @@ fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) { ))] impl_delegate!( u64_div_rem, - i64_div_rem, zero_div_fn, u32_normalization_shift, u32_by_u32_div_rem, @@ -234,9 +215,7 @@ impl_delegate!( u16, u32, u64, - i64, - inline(never); - inline + i64 ); // When not on x86 and the pointer width is 64, use `binary_long`. @@ -246,14 +225,11 @@ impl_delegate!( ))] impl_binary_long!( u64_div_rem, - i64_div_rem, zero_div_fn, u64_normalization_shift, 64, u64, - i64, - inline(never); - inline + i64 ); /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. @@ -287,28 +263,21 @@ unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) { #[cfg(all(feature = "asm", target_arch = "x86"))] impl_asymmetric!( u64_div_rem, - i64_div_rem, zero_div_fn, u32_by_u32_div_rem, u64_by_u32_div_rem, 16, u16, u32, - u64, - i64, - inline(never); - inline + u64 ); // 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division impl_binary_long!( u32_div_rem, - i32_div_rem, zero_div_fn, u32_normalization_shift, 32, u32, - i32, - inline(never); - inline + i32 ); diff --git a/src/int/specialized_div_rem/trifecta.rs b/src/int/specialized_div_rem/trifecta.rs index e76516f34..65ce1c3f0 100644 --- a/src/int/specialized_div_rem/trifecta.rs +++ b/src/int/specialized_div_rem/trifecta.rs @@ -1,28 +1,21 @@ -/// Creates unsigned and signed division functions optimized for division of integers with bitwidths +/// Creates an unsigned division function optimized for division of integers with bitwidths /// larger than the largest hardware integer division supported. These functions use large radix /// division algorithms that require both fast division and very fast widening multiplication on the /// target microarchitecture. Otherwise, `impl_delegate` should be used instead. #[macro_export] macro_rules! impl_trifecta { ( - $unsigned_name:ident, // name of the unsigned division function - $signed_name:ident, // name of the signed division function + $fn:ident, // name of the unsigned division function $zero_div_fn:ident, // function called when division by zero is attempted $half_division:ident, // function for division of a $uX by a $uX $n_h:expr, // the number of bits in $iH or $uH $uH:ident, // unsigned integer with half the bit width of $uX $uX:ident, // unsigned integer with half the bit width of $uD - $uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` - $iD:ident, // signed integer type for the inputs and outputs of `$signed_name` - $($unsigned_attr:meta),*; // attributes for the unsigned function - $($signed_attr:meta),* // attributes for the signed function + $uD:ident // unsigned integer type for the inputs and outputs of `$unsigned_name` ) => { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. - $( - #[$unsigned_attr] - )* - pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD, $uD) { + pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) { // This is called the trifecta algorithm because it uses three main algorithms: short // division for small divisors, the two possibility algorithm for large divisors, and an // undersubtracting long division algorithm for intermediate cases. @@ -34,7 +27,9 @@ macro_rules! impl_trifecta { (tmp as $uX, (tmp >> ($n_h * 2)) as $uX) } fn carrying_mul_add(lhs: $uX, mul: $uX, add: $uX) -> ($uX, $uX) { - let tmp = (lhs as $uD).wrapping_mul(mul as $uD).wrapping_add(add as $uD); + let tmp = (lhs as $uD) + .wrapping_mul(mul as $uD) + .wrapping_add(add as $uD); (tmp as $uX, (tmp >> ($n_h * 2)) as $uX) } @@ -62,9 +57,9 @@ macro_rules! impl_trifecta { // The quotient cannot be more than 1. The highest set bit of `duo` needs to be at // least one place higher than `div` for the quotient to be more than 1. if duo >= div { - return (1, duo - div) + return (1, duo - div); } else { - return (0, duo) + return (0, duo); } } @@ -76,10 +71,7 @@ macro_rules! impl_trifecta { // `duo < 2^n` so it will fit in a $uX. `div` will also fit in a $uX (because of the // `div_lz <= duo_lz` branch) so no numerical error. let (quo, rem) = $half_division(duo as $uX, div as $uX); - return ( - quo as $uD, - rem as $uD - ) + return (quo as $uD, rem as $uD); } // `{2^n, 2^div_sb} <= duo < 2^n_d` @@ -99,22 +91,16 @@ macro_rules! impl_trifecta { let div_0 = div as $uH as $uX; let (quo_hi, rem_3) = $half_division(duo_hi, div_0); - let duo_mid = - ((duo >> $n_h) as $uH as $uX) - | (rem_3 << $n_h); + let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h); let (quo_1, rem_2) = $half_division(duo_mid, div_0); - let duo_lo = - (duo as $uH as $uX) - | (rem_2 << $n_h); + let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h); let (quo_0, rem_1) = $half_division(duo_lo, div_0); return ( - (quo_0 as $uD) - | ((quo_1 as $uD) << $n_h) - | ((quo_hi as $uD) << n), - rem_1 as $uD - ) + (quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n), + rem_1 as $uD, + ); } // relative leading significant bits, cannot overflow because of above branches @@ -237,13 +223,10 @@ macro_rules! impl_trifecta { (quo - 1) as $uD, // Both the addition and subtraction can overflow, but when combined end up // as a correct positive number. - duo.wrapping_add(div).wrapping_sub(tmp) - ) + duo.wrapping_add(div).wrapping_sub(tmp), + ); } else { - return ( - quo as $uD, - duo - tmp - ) + return (quo as $uD, duo - tmp); } } @@ -372,13 +355,10 @@ macro_rules! impl_trifecta { if duo < tmp { return ( quo + ((quo_part - 1) as $uD), - duo.wrapping_add(div).wrapping_sub(tmp) - ) + duo.wrapping_add(div).wrapping_sub(tmp), + ); } else { - return ( - quo + (quo_part as $uD), - duo - tmp - ) + return (quo + (quo_part as $uD), duo - tmp); } } @@ -387,15 +367,9 @@ macro_rules! impl_trifecta { if div_lz <= duo_lz { // quotient can have 0 or 1 added to it if div <= duo { - return ( - quo + 1, - duo - div - ) + return (quo + 1, duo - div); } else { - return ( - quo, - duo - ) + return (quo, duo); } } @@ -404,38 +378,9 @@ macro_rules! impl_trifecta { if n <= duo_lz { // simple division and addition let tmp = $half_division(duo as $uX, div as $uX); - return ( - quo + (tmp.0 as $uD), - tmp.1 as $uD - ) + return (quo + (tmp.0 as $uD), tmp.1 as $uD); } } } - - /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a - /// tuple. - $( - #[$signed_attr] - )* - pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) { - match (duo < 0, div < 0) { - (false, false) => { - let t = $unsigned_name(duo as $uD, div as $uD); - (t.0 as $iD, t.1 as $iD) - }, - (true, false) => { - let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD); - ((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg()) - }, - (false, true) => { - let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD); - ((t.0 as $iD).wrapping_neg(), t.1 as $iD) - }, - (true, true) => { - let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD); - (t.0 as $iD, (t.1 as $iD).wrapping_neg()) - }, - } - } - } + }; } From d65785b8eebf0ee4b7af13e5120544481b4b9d37 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Fri, 2 Oct 2020 23:26:12 -0500 Subject: [PATCH 0392/1459] Hide macros and functions These macros and functions are only in the public interface for testing purposes or because of `#[macro_export]` pollution --- src/int/leading_zeros.rs | 2 ++ src/int/specialized_div_rem/asymmetric.rs | 1 + src/int/specialized_div_rem/binary_long.rs | 1 + src/int/specialized_div_rem/delegate.rs | 1 + src/int/specialized_div_rem/norm_shift.rs | 1 + src/int/specialized_div_rem/trifecta.rs | 1 + 6 files changed, 7 insertions(+) diff --git a/src/int/leading_zeros.rs b/src/int/leading_zeros.rs index 78556f0bc..e4a9e5eb2 100644 --- a/src/int/leading_zeros.rs +++ b/src/int/leading_zeros.rs @@ -4,6 +4,7 @@ // Compilers will insert the check for zero in cases where it is needed. /// Returns the number of leading binary zeros in `x`. +#[doc(hidden)] pub fn usize_leading_zeros_default(x: usize) -> usize { // The basic idea is to test if the higher bits of `x` are zero and bisect the number // of leading zeros. It is possible for all branches of the bisection to use the same @@ -75,6 +76,7 @@ pub fn usize_leading_zeros_default(x: usize) -> usize { // RISC-V that allows `(x >= power-of-two) as usize` to be branchless. /// Returns the number of leading binary zeros in `x`. +#[doc(hidden)] pub fn usize_leading_zeros_riscv(x: usize) -> usize { let mut x = x; // the number of potential leading zeros diff --git a/src/int/specialized_div_rem/asymmetric.rs b/src/int/specialized_div_rem/asymmetric.rs index bbb77722d..45da657e9 100644 --- a/src/int/specialized_div_rem/asymmetric.rs +++ b/src/int/specialized_div_rem/asymmetric.rs @@ -3,6 +3,7 @@ /// assembly instruction that can divide a 128 bit integer by a 64 bit integer if the quotient fits /// in 64 bits. The 128 bit version of this algorithm would use that fast hardware division to /// construct a full 128 bit by 128 bit division. +#[doc(hidden)] #[macro_export] macro_rules! impl_asymmetric { ( diff --git a/src/int/specialized_div_rem/binary_long.rs b/src/int/specialized_div_rem/binary_long.rs index 0f5e870b0..7de10e852 100644 --- a/src/int/specialized_div_rem/binary_long.rs +++ b/src/int/specialized_div_rem/binary_long.rs @@ -4,6 +4,7 @@ /// predicate instructions. For architectures with predicated instructions, one of the algorithms /// described in the documentation of these functions probably has higher performance, and a custom /// assembly routine should be used instead. +#[doc(hidden)] #[macro_export] macro_rules! impl_binary_long { ( diff --git a/src/int/specialized_div_rem/delegate.rs b/src/int/specialized_div_rem/delegate.rs index a74bdac02..8310c1429 100644 --- a/src/int/specialized_div_rem/delegate.rs +++ b/src/int/specialized_div_rem/delegate.rs @@ -2,6 +2,7 @@ /// binary long division to divide integers larger than what hardware division by itself can do. This /// function is intended for microarchitectures that have division hardware, but not fast enough /// multiplication hardware for `impl_trifecta` to be faster. +#[doc(hidden)] #[macro_export] macro_rules! impl_delegate { ( diff --git a/src/int/specialized_div_rem/norm_shift.rs b/src/int/specialized_div_rem/norm_shift.rs index 33348b373..be95d1b92 100644 --- a/src/int/specialized_div_rem/norm_shift.rs +++ b/src/int/specialized_div_rem/norm_shift.rs @@ -1,4 +1,5 @@ /// Creates a function used by some division algorithms to compute the "normalization shift". +#[doc(hidden)] #[macro_export] macro_rules! impl_normalization_shift { ( diff --git a/src/int/specialized_div_rem/trifecta.rs b/src/int/specialized_div_rem/trifecta.rs index 65ce1c3f0..a9ea60301 100644 --- a/src/int/specialized_div_rem/trifecta.rs +++ b/src/int/specialized_div_rem/trifecta.rs @@ -2,6 +2,7 @@ /// larger than the largest hardware integer division supported. These functions use large radix /// division algorithms that require both fast division and very fast widening multiplication on the /// target microarchitecture. Otherwise, `impl_delegate` should be used instead. +#[doc(hidden)] #[macro_export] macro_rules! impl_trifecta { ( From bc235bccb9a34a7f9ceca226981922b0c31780e4 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 15 Oct 2020 00:10:38 +0800 Subject: [PATCH 0393/1459] Use weak linkage for aeabi memory functions (#385) --- src/arm.rs | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/arm.rs b/src/arm.rs index 190bba726..3a2506091 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -138,18 +138,20 @@ pub unsafe fn __aeabi_ldivmod() { intrinsics::unreachable(); } +// The following functions use weak linkage to allow users to override +// with custom implementation. // FIXME: The `*4` and `*8` variants should be defined as aliases. #[cfg(not(target_os = "ios"))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[cfg_attr(thumb, linkage = "weak")] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) { ::mem::memcpy(dest, src, n); } #[cfg(not(target_os = "ios"))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[cfg_attr(thumb, linkage = "weak")] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, mut n: usize) { // We are guaranteed 4-alignment, so accessing at u32 is okay. let mut dest = dest as *mut u32; @@ -167,35 +169,35 @@ pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, mut #[cfg(not(target_os = "ios"))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[cfg_attr(thumb, linkage = "weak")] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memcpy4(dest, src, n); } #[cfg(not(target_os = "ios"))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[cfg_attr(thumb, linkage = "weak")] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) { ::mem::memmove(dest, src, n); } #[cfg(not(any(target_os = "ios", target_env = "msvc")))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[cfg_attr(thumb, linkage = "weak")] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memmove(dest, src, n); } #[cfg(not(any(target_os = "ios", target_env = "msvc")))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[cfg_attr(thumb, linkage = "weak")] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memmove(dest, src, n); } #[cfg(not(target_os = "ios"))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[cfg_attr(thumb, linkage = "weak")] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { // Note the different argument order ::mem::memset(dest, c, n); @@ -203,7 +205,7 @@ pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { #[cfg(not(target_os = "ios"))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[cfg_attr(thumb, linkage = "weak")] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, mut n: usize, c: i32) { let mut dest = dest as *mut u32; @@ -221,28 +223,28 @@ pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, mut n: usize, c: i32 #[cfg(not(target_os = "ios"))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[cfg_attr(thumb, linkage = "weak")] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) { __aeabi_memset4(dest, n, c); } #[cfg(not(target_os = "ios"))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[cfg_attr(thumb, linkage = "weak")] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) { __aeabi_memset(dest, n, 0); } #[cfg(not(any(target_os = "ios", target_env = "msvc")))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[cfg_attr(thumb, linkage = "weak")] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) { __aeabi_memset4(dest, n, 0); } #[cfg(not(any(target_os = "ios", target_env = "msvc")))] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[cfg_attr(thumb, linkage = "weak")] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { __aeabi_memset4(dest, n, 0); } From 33ad3669db0ac78a917aee787304d1391b6dc676 Mon Sep 17 00:00:00 2001 From: Joseph Richey Date: Sat, 24 Oct 2020 08:58:04 -0700 Subject: [PATCH 0394/1459] Use REP MOVSQ/STOSQ on x86_64 (#365) * mem: Move mem* functions to separate directory Signed-off-by: Joe Richey * memcpy: Create separate memcpy.rs file Signed-off-by: Joe Richey * benches: Add benchmarks for mem* functions This allows comparing the "normal" implementations to the implementations provided by this crate. Signed-off-by: Joe Richey * mem: Add REP MOVSB/STOSB implementations The assembly generated seems correct: https://rust.godbolt.org/z/GGnec8 Signed-off-by: Joe Richey * mem: Add documentations for REP string insturctions Signed-off-by: Joe Richey * Use quad-word rep string instructions Signed-off-by: Joe Richey * Prevent panic when compiled in debug mode Signed-off-by: Joe Richey * Add tests for mem* functions Signed-off-by: Joe Richey * Add build/test with the "asm" feature Signed-off-by: Joe Richey * Add byte length to Bencher Signed-off-by: Joe Richey --- ci/run.sh | 4 + src/mem/memcpy.rs | 41 ++++++++++ src/{mem.rs => mem/mod.rs} | 43 +--------- src/mem/x86_64.rs | 79 ++++++++++++++++++ testcrate/benches/mem.rs | 162 +++++++++++++++++++++++++++++++++++++ testcrate/tests/mem.rs | 133 ++++++++++++++++++++++++++++++ 6 files changed, 423 insertions(+), 39 deletions(-) create mode 100644 src/mem/memcpy.rs rename src/{mem.rs => mem/mod.rs} (84%) create mode 100644 src/mem/x86_64.rs create mode 100644 testcrate/benches/mem.rs create mode 100644 testcrate/tests/mem.rs diff --git a/ci/run.sh b/ci/run.sh index 3c9dc0247..9d1632333 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -12,12 +12,16 @@ else $run --release $run --features c $run --features c --release + $run --features asm + $run --features asm --release fi cargo build --target $1 cargo build --target $1 --release cargo build --target $1 --features c cargo build --target $1 --release --features c +cargo build --target $1 --features asm +cargo build --target $1 --release --features asm PREFIX=$(echo $1 | sed -e 's/unknown-//')- case $1 in diff --git a/src/mem/memcpy.rs b/src/mem/memcpy.rs new file mode 100644 index 000000000..8fada9bca --- /dev/null +++ b/src/mem/memcpy.rs @@ -0,0 +1,41 @@ +use super::c_int; + +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { + let mut i = 0; + while i < n { + *dest.offset(i as isize) = *src.offset(i as isize); + i += 1; + } + dest +} + +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { + if src < dest as *const u8 { + // copy from end + let mut i = n; + while i != 0 { + i -= 1; + *dest.offset(i as isize) = *src.offset(i as isize); + } + } else { + // copy from beginning + let mut i = 0; + while i < n { + *dest.offset(i as isize) = *src.offset(i as isize); + i += 1; + } + } + dest +} + +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 { + let mut i = 0; + while i < n { + *s.offset(i as isize) = c as u8; + i += 1; + } + s +} diff --git a/src/mem.rs b/src/mem/mod.rs similarity index 84% rename from src/mem.rs rename to src/mem/mod.rs index 24552ed85..aa9d4b61d 100644 --- a/src/mem.rs +++ b/src/mem/mod.rs @@ -9,45 +9,10 @@ use core::intrinsics::{atomic_load_unordered, atomic_store_unordered, exact_div} use core::mem; use core::ops::{BitOr, Shl}; -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { - let mut i = 0; - while i < n { - *dest.offset(i as isize) = *src.offset(i as isize); - i += 1; - } - dest -} - -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { - if src < dest as *const u8 { - // copy from end - let mut i = n; - while i != 0 { - i -= 1; - *dest.offset(i as isize) = *src.offset(i as isize); - } - } else { - // copy from beginning - let mut i = 0; - while i < n { - *dest.offset(i as isize) = *src.offset(i as isize); - i += 1; - } - } - dest -} - -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 { - let mut i = 0; - while i < n { - *s.offset(i as isize) = c as u8; - i += 1; - } - s -} +// memcpy/memmove/memset have optimized implementations on some architectures +#[cfg_attr(all(feature = "asm", target_arch = "x86_64"), path = "x86_64.rs")] +mod memcpy; +pub use self::memcpy::*; #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs new file mode 100644 index 000000000..1ecffce45 --- /dev/null +++ b/src/mem/x86_64.rs @@ -0,0 +1,79 @@ +use super::c_int; + +// On most modern Intel and AMD processors, "rep movsq" and "rep stosq" have +// been enhanced to perform better than an simple qword loop, making them ideal +// for implementing memcpy/memset. Note that "rep cmps" has received no such +// enhancement, so it is not used to implement memcmp. +// +// On certain recent Intel processors, "rep movsb" and "rep stosb" have been +// further enhanced to automatically select the best microarchitectural +// implementation based on length and alignment. See the following features from +// the "Intel® 64 and IA-32 Architectures Optimization Reference Manual": +// - ERMSB - Enhanced REP MOVSB and STOSB (Ivy Bridge and later) +// - FSRM - Fast Short REP MOV (Ice Lake and later) +// - Fast Zero-Length MOVSB (On no current hardware) +// - Fast Short STOSB (On no current hardware) +// However, to avoid run-time feature detection, we don't use these byte-based +// instructions for most of the copying, preferring the qword variants. + +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, count: usize) -> *mut u8 { + let qword_count = count >> 3; + let byte_count = count & 0b111; + asm!( + "rep movsq [rdi], [rsi]", + "mov ecx, {byte_count:e}", + "rep movsb [rdi], [rsi]", + byte_count = in(reg) byte_count, + inout("rcx") qword_count => _, + inout("rdi") dest => _, + inout("rsi") src => _, + options(nostack, preserves_flags) + ); + dest +} + +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, count: usize) -> *mut u8 { + let delta = (dest as usize).wrapping_sub(src as usize); + if delta >= count { + // We can copy forwards because either dest is far enough ahead of src, + // or src is ahead of dest (and delta overflowed). + return self::memcpy(dest, src, count); + } + // copy backwards + let qword_count = count >> 3; + let byte_count = count & 0b111; + asm!( + "std", + "rep movsq [rdi], [rsi]", + "mov ecx, {byte_count:e}", + "add rdi, 7", + "add rsi, 7", + "rep movsb [rdi], [rsi]", + "cld", + byte_count = in(reg) byte_count, + inout("rcx") qword_count => _, + inout("rdi") dest.offset(count as isize).wrapping_sub(8) => _, + inout("rsi") src.offset(count as isize).wrapping_sub(8) => _, + options(nostack) + ); + dest +} + +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn memset(dest: *mut u8, c: c_int, count: usize) -> *mut u8 { + let qword_count = count >> 3; + let byte_count = count & 0b111; + asm!( + "rep stosq [rdi], rax", + "mov ecx, {byte_count:e}", + "rep stosb [rdi], al", + byte_count = in(reg) byte_count, + inout("rcx") qword_count => _, + inout("rdi") dest => _, + in("rax") (c as u8 as u64) * 0x0101010101010101, + options(nostack, preserves_flags) + ); + dest +} diff --git a/testcrate/benches/mem.rs b/testcrate/benches/mem.rs new file mode 100644 index 000000000..57d575086 --- /dev/null +++ b/testcrate/benches/mem.rs @@ -0,0 +1,162 @@ +#![feature(test)] + +extern crate test; +use test::{black_box, Bencher}; + +extern crate compiler_builtins; +use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; + +fn memcpy_builtin(b: &mut Bencher, n: usize) { + let v1 = vec![1u8; n]; + let mut v2 = vec![0u8; n]; + b.bytes = n as u64; + b.iter(|| { + let src: &[u8] = black_box(&v1); + let dst: &mut [u8] = black_box(&mut v2); + dst.copy_from_slice(src); + }) +} + +fn memcpy_rust(b: &mut Bencher, n: usize) { + let v1 = vec![1u8; n]; + let mut v2 = vec![0u8; n]; + b.bytes = n as u64; + b.iter(|| { + let src: &[u8] = black_box(&v1); + let dst: &mut [u8] = black_box(&mut v2); + unsafe { memcpy(dst.as_mut_ptr(), src.as_ptr(), n) } + }) +} + +fn memset_builtin(b: &mut Bencher, n: usize) { + let mut v1 = vec![0u8; n]; + b.bytes = n as u64; + b.iter(|| { + let dst: &mut [u8] = black_box(&mut v1); + let val: u8 = black_box(27); + for b in dst { + *b = val; + } + }) +} + +fn memset_rust(b: &mut Bencher, n: usize) { + let mut v1 = vec![0u8; n]; + b.bytes = n as u64; + b.iter(|| { + let dst: &mut [u8] = black_box(&mut v1); + let val = black_box(27); + unsafe { memset(dst.as_mut_ptr(), val, n) } + }) +} + +fn memcmp_builtin(b: &mut Bencher, n: usize) { + let v1 = vec![0u8; n]; + let mut v2 = vec![0u8; n]; + v2[n - 1] = 1; + b.bytes = n as u64; + b.iter(|| { + let s1: &[u8] = black_box(&v1); + let s2: &[u8] = black_box(&v2); + s1.cmp(s2) + }) +} + +fn memcmp_rust(b: &mut Bencher, n: usize) { + let v1 = vec![0u8; n]; + let mut v2 = vec![0u8; n]; + v2[n - 1] = 1; + b.bytes = n as u64; + b.iter(|| { + let s1: &[u8] = black_box(&v1); + let s2: &[u8] = black_box(&v2); + unsafe { memcmp(s1.as_ptr(), s2.as_ptr(), n) } + }) +} + +fn memmove_builtin(b: &mut Bencher, n: usize) { + let mut v = vec![0u8; n + n / 2]; + b.bytes = n as u64; + b.iter(|| { + let s: &mut [u8] = black_box(&mut v); + s.copy_within(0..n, n / 2); + }) +} + +fn memmove_rust(b: &mut Bencher, n: usize) { + let mut v = vec![0u8; n + n / 2]; + b.bytes = n as u64; + b.iter(|| { + let dst: *mut u8 = black_box(&mut v[n / 2..]).as_mut_ptr(); + let src: *const u8 = black_box(&v).as_ptr(); + unsafe { memmove(dst, src, n) }; + }) +} + +#[bench] +fn memcpy_builtin_4096(b: &mut Bencher) { + memcpy_builtin(b, 4096) +} +#[bench] +fn memcpy_rust_4096(b: &mut Bencher) { + memcpy_rust(b, 4096) +} +#[bench] +fn memcpy_builtin_1048576(b: &mut Bencher) { + memcpy_builtin(b, 1048576) +} +#[bench] +fn memcpy_rust_1048576(b: &mut Bencher) { + memcpy_rust(b, 1048576) +} + +#[bench] +fn memset_builtin_4096(b: &mut Bencher) { + memset_builtin(b, 4096) +} +#[bench] +fn memset_rust_4096(b: &mut Bencher) { + memset_rust(b, 4096) +} +#[bench] +fn memset_builtin_1048576(b: &mut Bencher) { + memset_builtin(b, 1048576) +} +#[bench] +fn memset_rust_1048576(b: &mut Bencher) { + memset_rust(b, 1048576) +} + +#[bench] +fn memcmp_builtin_4096(b: &mut Bencher) { + memcmp_builtin(b, 4096) +} +#[bench] +fn memcmp_rust_4096(b: &mut Bencher) { + memcmp_rust(b, 4096) +} +#[bench] +fn memcmp_builtin_1048576(b: &mut Bencher) { + memcmp_builtin(b, 1048576) +} +#[bench] +fn memcmp_rust_1048576(b: &mut Bencher) { + memcmp_rust(b, 1048576) +} + +#[bench] +fn memmove_builtin_4096(b: &mut Bencher) { + memmove_builtin(b, 4096) +} +#[bench] +fn memmove_rust_4096(b: &mut Bencher) { + memmove_rust(b, 4096) +} +#[bench] +fn memmove_builtin_1048576(b: &mut Bencher) { + memmove_builtin(b, 1048576) +} +#[bench] +fn memmove_rust_1048576(b: &mut Bencher) { + memmove_rust(b, 1048576) +} diff --git a/testcrate/tests/mem.rs b/testcrate/tests/mem.rs new file mode 100644 index 000000000..a5596b281 --- /dev/null +++ b/testcrate/tests/mem.rs @@ -0,0 +1,133 @@ +extern crate compiler_builtins; +use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; + +#[test] +fn memcpy_3() { + let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(9); + let dst = arr.as_mut_ptr().offset(1); + assert_eq!(memcpy(dst, src, 3), dst); + assert_eq!(arr, [0, 9, 10, 11, 4, 5, 6, 7, 8, 9, 10, 11]); + } + arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(1); + let dst = arr.as_mut_ptr().offset(9); + assert_eq!(memcpy(dst, src, 3), dst); + assert_eq!(arr, [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3]); + } +} + +#[test] +fn memcpy_10() { + let arr: [u8; 18] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]; + let mut dst: [u8; 12] = [0; 12]; + unsafe { + let src = arr.as_ptr().offset(1); + assert_eq!(memcpy(dst.as_mut_ptr(), src, 10), dst.as_mut_ptr()); + assert_eq!(dst, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0]); + } + unsafe { + let src = arr.as_ptr().offset(8); + assert_eq!(memcpy(dst.as_mut_ptr(), src, 10), dst.as_mut_ptr()); + assert_eq!(dst, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0, 0]); + } +} + +#[test] +fn memcpy_big() { + // Make the arrays cross 3 pages + const SIZE: usize = 8193; + let src: [u8; SIZE] = [22; SIZE]; + struct Dst { + start: usize, + buf: [u8; SIZE], + end: usize, + } + + let mut dst = Dst { + start: 0, + buf: [0; SIZE], + end: 0, + }; + unsafe { + assert_eq!( + memcpy(dst.buf.as_mut_ptr(), src.as_ptr(), SIZE), + dst.buf.as_mut_ptr() + ); + assert_eq!(dst.start, 0); + assert_eq!(dst.buf, [22; SIZE]); + assert_eq!(dst.end, 0); + } +} + +#[test] +fn memmove_forward() { + let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(6); + let dst = arr.as_mut_ptr().offset(3); + assert_eq!(memmove(dst, src, 5), dst); + assert_eq!(arr, [0, 1, 2, 6, 7, 8, 9, 10, 8, 9, 10, 11]); + } +} + +#[test] +fn memmove_backward() { + let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(3); + let dst = arr.as_mut_ptr().offset(6); + assert_eq!(memmove(dst, src, 5), dst); + assert_eq!(arr, [0, 1, 2, 3, 4, 5, 3, 4, 5, 6, 7, 11]); + } +} + +#[test] +fn memset_zero() { + let mut arr: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + unsafe { + let ptr = arr.as_mut_ptr().offset(5); + assert_eq!(memset(ptr, 0, 2), ptr); + assert_eq!(arr, [0, 1, 2, 3, 4, 0, 0, 7]); + + // Only the LSB matters for a memset + assert_eq!(memset(arr.as_mut_ptr(), 0x2000, 8), arr.as_mut_ptr()); + assert_eq!(arr, [0, 0, 0, 0, 0, 0, 0, 0]); + } +} + +#[test] +fn memset_nonzero() { + let mut arr: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + unsafe { + let ptr = arr.as_mut_ptr().offset(2); + assert_eq!(memset(ptr, 22, 3), ptr); + assert_eq!(arr, [0, 1, 22, 22, 22, 5, 6, 7]); + + // Only the LSB matters for a memset + assert_eq!(memset(arr.as_mut_ptr(), 0x2009, 8), arr.as_mut_ptr()); + assert_eq!(arr, [9, 9, 9, 9, 9, 9, 9, 9]); + } +} + +#[test] +fn memcmp_eq() { + let arr1: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + let arr2: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + unsafe { + assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 8), 0); + assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 3), 0); + } +} + +#[test] +fn memcmp_ne() { + let arr1: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + let arr2: [u8; 8] = [0, 1, 2, 3, 4, 5, 7, 7]; + unsafe { + assert!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 8) < 0); + assert!(memcmp(arr2.as_ptr(), arr1.as_ptr(), 8) > 0); + } +} From 0f2271e566f78dbe7aa3441c9e497c655592acef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Mon, 26 Oct 2020 17:37:02 +0100 Subject: [PATCH 0395/1459] math: add {fmin,fmax}{f,} for thumb*-none-eabi* (#389) These are exposed in core::f32 close #354 c.f. rust-lang/rust#62729 Patch from @whitequark (https://paste.debian.net/1168430/) --- src/math.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/math.rs b/src/math.rs index 4b27cb80f..94f91124b 100644 --- a/src/math.rs +++ b/src/math.rs @@ -92,6 +92,10 @@ no_mangle! { // only for the thumb*-none-eabi* targets #[cfg(all(target_arch = "arm", target_os = "none"))] no_mangle! { + fn fmin(x: f64, y: f64) -> f64; + fn fminf(x: f32, y: f32) -> f32; + fn fmax(x: f64, y: f64) -> f64; + fn fmaxf(x: f32, y: f32) -> f32; // `f64 % f64` fn fmod(x: f64, y: f64) -> f64; // `f32 % f32` From 63c0091a6132a5b732ca425e816e67a66b004754 Mon Sep 17 00:00:00 2001 From: Joseph Richey Date: Tue, 3 Nov 2020 06:57:08 -0800 Subject: [PATCH 0396/1459] Use REP MOVSB/STOSB when the ERMSB feature is present (#392) * Reorganize mem functions This reduces the amount of platform-specific code Signed-off-by: Joe Richey * Use ERMSB implementations if the feature is set Signed-off-by: Joe Richey * Add non-aligned benchmarks Signed-off-by: Joe Richey --- src/mem/impls.rs | 29 +++++++++++++++ src/mem/memcpy.rs | 41 -------------------- src/mem/mod.rs | 28 +++++++++++++- src/mem/x86_64.rs | 58 ++++++++++++++++++----------- testcrate/benches/mem.rs | 80 ++++++++++++++++++++++++++++------------ 5 files changed, 148 insertions(+), 88 deletions(-) create mode 100644 src/mem/impls.rs delete mode 100644 src/mem/memcpy.rs diff --git a/src/mem/impls.rs b/src/mem/impls.rs new file mode 100644 index 000000000..6bd1a7ba1 --- /dev/null +++ b/src/mem/impls.rs @@ -0,0 +1,29 @@ +use super::c_int; + +#[inline(always)] +pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, n: usize) { + let mut i = 0; + while i < n { + *dest.offset(i as isize) = *src.offset(i as isize); + i += 1; + } +} + +#[inline(always)] +pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, n: usize) { + // copy from end + let mut i = n; + while i != 0 { + i -= 1; + *dest.offset(i as isize) = *src.offset(i as isize); + } +} + +#[inline(always)] +pub unsafe fn set_bytes(s: *mut u8, c: u8, n: usize) { + let mut i = 0; + while i < n { + *s.offset(i as isize) = c; + i += 1; + } +} diff --git a/src/mem/memcpy.rs b/src/mem/memcpy.rs deleted file mode 100644 index 8fada9bca..000000000 --- a/src/mem/memcpy.rs +++ /dev/null @@ -1,41 +0,0 @@ -use super::c_int; - -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { - let mut i = 0; - while i < n { - *dest.offset(i as isize) = *src.offset(i as isize); - i += 1; - } - dest -} - -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { - if src < dest as *const u8 { - // copy from end - let mut i = n; - while i != 0 { - i -= 1; - *dest.offset(i as isize) = *src.offset(i as isize); - } - } else { - // copy from beginning - let mut i = 0; - while i < n { - *dest.offset(i as isize) = *src.offset(i as isize); - i += 1; - } - } - dest -} - -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 { - let mut i = 0; - while i < n { - *s.offset(i as isize) = c as u8; - i += 1; - } - s -} diff --git a/src/mem/mod.rs b/src/mem/mod.rs index aa9d4b61d..6bc76337c 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -11,8 +11,32 @@ use core::ops::{BitOr, Shl}; // memcpy/memmove/memset have optimized implementations on some architectures #[cfg_attr(all(feature = "asm", target_arch = "x86_64"), path = "x86_64.rs")] -mod memcpy; -pub use self::memcpy::*; +mod impls; + +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { + impls::copy_forward(dest, src, n); + dest +} + +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { + let delta = (dest as usize).wrapping_sub(src as usize); + if delta >= n { + // We can copy forwards because either dest is far enough ahead of src, + // or src is ahead of dest (and delta overflowed). + impls::copy_forward(dest, src, n); + } else { + impls::copy_backward(dest, src, n); + } + dest +} + +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 { + impls::set_bytes(s, c as u8, n); + s +} #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 1ecffce45..7eefd8099 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -1,5 +1,3 @@ -use super::c_int; - // On most modern Intel and AMD processors, "rep movsq" and "rep stosq" have // been enhanced to perform better than an simple qword loop, making them ideal // for implementing memcpy/memset. Note that "rep cmps" has received no such @@ -13,11 +11,26 @@ use super::c_int; // - FSRM - Fast Short REP MOV (Ice Lake and later) // - Fast Zero-Length MOVSB (On no current hardware) // - Fast Short STOSB (On no current hardware) -// However, to avoid run-time feature detection, we don't use these byte-based -// instructions for most of the copying, preferring the qword variants. +// +// To simplify things, we switch to using the byte-based variants if the "ermsb" +// feature is present at compile-time. We don't bother detecting other features. +// Note that ERMSB does not enhance the backwards (DF=1) "rep movsb". + +#[inline(always)] +#[cfg(target_feature = "ermsb")] +pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { + asm!( + "rep movsb [rdi], [rsi]", + inout("rcx") count => _, + inout("rdi") dest => _, + inout("rsi") src => _, + options(nostack, preserves_flags) + ); +} -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, count: usize) -> *mut u8 { +#[inline(always)] +#[cfg(not(target_feature = "ermsb"))] +pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { let qword_count = count >> 3; let byte_count = count & 0b111; asm!( @@ -30,18 +43,10 @@ pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, count: usize) -> inout("rsi") src => _, options(nostack, preserves_flags) ); - dest } -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, count: usize) -> *mut u8 { - let delta = (dest as usize).wrapping_sub(src as usize); - if delta >= count { - // We can copy forwards because either dest is far enough ahead of src, - // or src is ahead of dest (and delta overflowed). - return self::memcpy(dest, src, count); - } - // copy backwards +#[inline(always)] +pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { let qword_count = count >> 3; let byte_count = count & 0b111; asm!( @@ -58,11 +63,23 @@ pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, count: usize) -> inout("rsi") src.offset(count as isize).wrapping_sub(8) => _, options(nostack) ); - dest } -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -pub unsafe extern "C" fn memset(dest: *mut u8, c: c_int, count: usize) -> *mut u8 { +#[inline(always)] +#[cfg(target_feature = "ermsb")] +pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { + asm!( + "rep stosb [rdi], al", + inout("rcx") count => _, + inout("rdi") dest => _, + inout("al") c => _, + options(nostack, preserves_flags) + ) +} + +#[inline(always)] +#[cfg(not(target_feature = "ermsb"))] +pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { let qword_count = count >> 3; let byte_count = count & 0b111; asm!( @@ -72,8 +89,7 @@ pub unsafe extern "C" fn memset(dest: *mut u8, c: c_int, count: usize) -> *mut u byte_count = in(reg) byte_count, inout("rcx") qword_count => _, inout("rdi") dest => _, - in("rax") (c as u8 as u64) * 0x0101010101010101, + in("rax") (c as u64) * 0x0101010101010101, options(nostack, preserves_flags) ); - dest } diff --git a/testcrate/benches/mem.rs b/testcrate/benches/mem.rs index 57d575086..cee64ae4d 100644 --- a/testcrate/benches/mem.rs +++ b/testcrate/benches/mem.rs @@ -6,33 +6,33 @@ use test::{black_box, Bencher}; extern crate compiler_builtins; use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; -fn memcpy_builtin(b: &mut Bencher, n: usize) { - let v1 = vec![1u8; n]; - let mut v2 = vec![0u8; n]; +fn memcpy_builtin(b: &mut Bencher, n: usize, offset: usize) { + let v1 = vec![1u8; n + offset]; + let mut v2 = vec![0u8; n + offset]; b.bytes = n as u64; b.iter(|| { - let src: &[u8] = black_box(&v1); - let dst: &mut [u8] = black_box(&mut v2); + let src: &[u8] = black_box(&v1[offset..]); + let dst: &mut [u8] = black_box(&mut v2[offset..]); dst.copy_from_slice(src); }) } -fn memcpy_rust(b: &mut Bencher, n: usize) { - let v1 = vec![1u8; n]; - let mut v2 = vec![0u8; n]; +fn memcpy_rust(b: &mut Bencher, n: usize, offset: usize) { + let v1 = vec![1u8; n + offset]; + let mut v2 = vec![0u8; n + offset]; b.bytes = n as u64; b.iter(|| { - let src: &[u8] = black_box(&v1); - let dst: &mut [u8] = black_box(&mut v2); + let src: &[u8] = black_box(&v1[offset..]); + let dst: &mut [u8] = black_box(&mut v2[offset..]); unsafe { memcpy(dst.as_mut_ptr(), src.as_ptr(), n) } }) } -fn memset_builtin(b: &mut Bencher, n: usize) { - let mut v1 = vec![0u8; n]; +fn memset_builtin(b: &mut Bencher, n: usize, offset: usize) { + let mut v1 = vec![0u8; n + offset]; b.bytes = n as u64; b.iter(|| { - let dst: &mut [u8] = black_box(&mut v1); + let dst: &mut [u8] = black_box(&mut v1[offset..]); let val: u8 = black_box(27); for b in dst { *b = val; @@ -40,11 +40,11 @@ fn memset_builtin(b: &mut Bencher, n: usize) { }) } -fn memset_rust(b: &mut Bencher, n: usize) { - let mut v1 = vec![0u8; n]; +fn memset_rust(b: &mut Bencher, n: usize, offset: usize) { + let mut v1 = vec![0u8; n + offset]; b.bytes = n as u64; b.iter(|| { - let dst: &mut [u8] = black_box(&mut v1); + let dst: &mut [u8] = black_box(&mut v1[offset..]); let val = black_box(27); unsafe { memset(dst.as_mut_ptr(), val, n) } }) @@ -95,36 +95,68 @@ fn memmove_rust(b: &mut Bencher, n: usize) { #[bench] fn memcpy_builtin_4096(b: &mut Bencher) { - memcpy_builtin(b, 4096) + memcpy_builtin(b, 4096, 0) } #[bench] fn memcpy_rust_4096(b: &mut Bencher) { - memcpy_rust(b, 4096) + memcpy_rust(b, 4096, 0) } #[bench] fn memcpy_builtin_1048576(b: &mut Bencher) { - memcpy_builtin(b, 1048576) + memcpy_builtin(b, 1048576, 0) } #[bench] fn memcpy_rust_1048576(b: &mut Bencher) { - memcpy_rust(b, 1048576) + memcpy_rust(b, 1048576, 0) +} +#[bench] +fn memcpy_builtin_4096_offset(b: &mut Bencher) { + memcpy_builtin(b, 4096, 65) +} +#[bench] +fn memcpy_rust_4096_offset(b: &mut Bencher) { + memcpy_rust(b, 4096, 65) +} +#[bench] +fn memcpy_builtin_1048576_offset(b: &mut Bencher) { + memcpy_builtin(b, 1048576, 65) +} +#[bench] +fn memcpy_rust_1048576_offset(b: &mut Bencher) { + memcpy_rust(b, 1048576, 65) } #[bench] fn memset_builtin_4096(b: &mut Bencher) { - memset_builtin(b, 4096) + memset_builtin(b, 4096, 0) } #[bench] fn memset_rust_4096(b: &mut Bencher) { - memset_rust(b, 4096) + memset_rust(b, 4096, 0) } #[bench] fn memset_builtin_1048576(b: &mut Bencher) { - memset_builtin(b, 1048576) + memset_builtin(b, 1048576, 0) } #[bench] fn memset_rust_1048576(b: &mut Bencher) { - memset_rust(b, 1048576) + memset_rust(b, 1048576, 0) +} +#[bench] +fn memset_builtin_4096_offset(b: &mut Bencher) { + memset_builtin(b, 4096, 65) +} +#[bench] +fn memset_rust_4096_offset(b: &mut Bencher) { + memset_rust(b, 4096, 65) +} +#[bench] +fn memset_builtin_1048576_offset(b: &mut Bencher) { + memset_builtin(b, 1048576, 65) +} +#[bench] +fn memset_rust_1048576_offset(b: &mut Bencher) { + memset_rust(b, 1048576, 65) } #[bench] From 63ccaf11f08fb5d0b39cc33884c5a1a63f547ace Mon Sep 17 00:00:00 2001 From: Joseph Richey Date: Mon, 9 Nov 2020 07:24:25 -0800 Subject: [PATCH 0397/1459] Move from an "asm" flag to a "no-asm" feature flag (#386) * Use a no-asm feature instead of an asm feature This works better as core/alloc/std have trouble supporting default featues in this crate. Signed-off-by: Joe Richey * Have no-asm disable arm assembly intrinsics Signed-off-by: Joe Richey --- Cargo.toml | 8 ++++---- ci/run.sh | 8 ++++---- src/arm.rs | 2 ++ src/int/specialized_div_rem/mod.rs | 16 ++++++++-------- src/lib.rs | 6 +++--- src/mem/mod.rs | 5 ++++- src/probestack.rs | 2 ++ src/x86.rs | 21 ++++++++++++++++++--- src/x86_64.rs | 21 ++++++++++++++++++--- testcrate/Cargo.toml | 4 ++-- 10 files changed, 65 insertions(+), 28 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9262bb6d6..0c5fd1531 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,14 +40,14 @@ panic-handler = { path = 'crates/panic-handler' } [features] default = ["compiler-builtins"] -# Some algorithms benefit from inline assembly, but some compiler backends do -# not support it, so inline assembly is only enabled when this flag is set. -asm = [] - # Enable compilation of C code in compiler-rt, filling in some more optimized # implementations and also filling in unimplemented intrinsics c = ["cc"] +# Workaround for the Cranelift codegen backend. Disables any implementations +# which use inline assembly and fall back to pure Rust versions (if avalible). +no-asm = [] + # Flag this library as the unstable compiler-builtins lib compiler-builtins = [] diff --git a/ci/run.sh b/ci/run.sh index 9d1632333..44ec30fb7 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -12,16 +12,16 @@ else $run --release $run --features c $run --features c --release - $run --features asm - $run --features asm --release + $run --features no-asm + $run --features no-asm --release fi cargo build --target $1 cargo build --target $1 --release cargo build --target $1 --features c cargo build --target $1 --release --features c -cargo build --target $1 --features asm -cargo build --target $1 --release --features asm +cargo build --target $1 --features no-asm +cargo build --target $1 --release --features no-asm PREFIX=$(echo $1 | sed -e 's/unknown-//')- case $1 in diff --git a/src/arm.rs b/src/arm.rs index 3a2506091..2b17b4f96 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -1,3 +1,5 @@ +#![cfg(not(feature = "no-asm"))] + use core::intrinsics; // NOTE This function and the ones below are implemented using assembly because they using a custom diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs index 3ac341b6f..7f37f6220 100644 --- a/src/int/specialized_div_rem/mod.rs +++ b/src/int/specialized_div_rem/mod.rs @@ -115,7 +115,7 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) { // microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is // faster if the target pointer width is at least 64. #[cfg(all( - not(all(feature = "asm", target_arch = "x86_64")), + not(all(not(feature = "no-asm"), target_arch = "x86_64")), not(any(target_pointer_width = "16", target_pointer_width = "32")) ))] impl_trifecta!( @@ -131,7 +131,7 @@ impl_trifecta!( // If the pointer width less than 64, then the target architecture almost certainly does not have // the fast 64 to 128 bit widening multiplication needed for `trifecta` to be faster. #[cfg(all( - not(all(feature = "asm", target_arch = "x86_64")), + not(all(not(feature = "no-asm"), target_arch = "x86_64")), any(target_pointer_width = "16", target_pointer_width = "32") ))] impl_delegate!( @@ -152,7 +152,7 @@ impl_delegate!( /// /// If the quotient does not fit in a `u64`, a floating point exception occurs. /// If `div == 0`, then a division by zero exception occurs. -#[cfg(all(feature = "asm", target_arch = "x86_64"))] +#[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))] #[inline] unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) { let duo_lo = duo as u64; @@ -174,7 +174,7 @@ unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) { } // use `asymmetric` instead of `trifecta` on x86_64 -#[cfg(all(feature = "asm", target_arch = "x86_64"))] +#[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))] impl_asymmetric!( u128_div_rem, zero_div_fn, @@ -203,7 +203,7 @@ fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) { // When not on x86 and the pointer width is not 64, use `delegate` since the division size is larger // than register size. #[cfg(all( - not(all(feature = "asm", target_arch = "x86")), + not(all(not(feature = "no-asm"), target_arch = "x86")), not(target_pointer_width = "64") ))] impl_delegate!( @@ -220,7 +220,7 @@ impl_delegate!( // When not on x86 and the pointer width is 64, use `binary_long`. #[cfg(all( - not(all(feature = "asm", target_arch = "x86")), + not(all(not(feature = "no-asm"), target_arch = "x86")), target_pointer_width = "64" ))] impl_binary_long!( @@ -238,7 +238,7 @@ impl_binary_long!( /// /// If the quotient does not fit in a `u32`, a floating point exception occurs. /// If `div == 0`, then a division by zero exception occurs. -#[cfg(all(feature = "asm", target_arch = "x86"))] +#[cfg(all(not(feature = "no-asm"), target_arch = "x86"))] #[inline] unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) { let duo_lo = duo as u32; @@ -260,7 +260,7 @@ unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) { } // use `asymmetric` instead of `delegate` on x86 -#[cfg(all(feature = "asm", target_arch = "x86"))] +#[cfg(all(not(feature = "no-asm"), target_arch = "x86"))] impl_asymmetric!( u64_div_rem, zero_div_fn, diff --git a/src/lib.rs b/src/lib.rs index cbd23850b..4a7c746a2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,8 @@ #![cfg_attr(feature = "compiler-builtins", compiler_builtins)] -#![cfg_attr(feature = "asm", feature(asm))] +#![cfg_attr(not(feature = "no-asm"), feature(asm))] #![feature(abi_unadjusted)] -#![feature(llvm_asm)] -#![feature(global_asm)] +#![cfg_attr(not(feature = "no-asm"), feature(llvm_asm))] +#![cfg_attr(not(feature = "no-asm"), feature(global_asm))] #![feature(cfg_target_has_atomic)] #![feature(compiler_builtins)] #![feature(core_intrinsics)] diff --git a/src/mem/mod.rs b/src/mem/mod.rs index 6bc76337c..adb7c2c64 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -10,7 +10,10 @@ use core::mem; use core::ops::{BitOr, Shl}; // memcpy/memmove/memset have optimized implementations on some architectures -#[cfg_attr(all(feature = "asm", target_arch = "x86_64"), path = "x86_64.rs")] +#[cfg_attr( + all(not(feature = "no-asm"), target_arch = "x86_64"), + path = "x86_64.rs" +)] mod impls; #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] diff --git a/src/probestack.rs b/src/probestack.rs index 2f37a104e..ac3ae1ebb 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -44,6 +44,8 @@ #![cfg(not(feature = "mangled-names"))] // Windows already has builtins to do this. #![cfg(not(windows))] +// All these builtins require assembly +#![cfg(not(feature = "no-asm"))] // We only define stack probing for these architectures today. #![cfg(any(target_arch = "x86_64", target_arch = "x86"))] diff --git a/src/x86.rs b/src/x86.rs index 5511c4572..e038231bb 100644 --- a/src/x86.rs +++ b/src/x86.rs @@ -8,7 +8,12 @@ use core::intrinsics; // NOTE These functions are never mangled as they are not tested against compiler-rt // and mangling ___chkstk would break the `jmp ___chkstk` instruction in __alloca -#[cfg(all(windows, target_env = "gnu", not(feature = "mangled-names")))] +#[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm"), + not(feature = "mangled-names") +))] #[naked] #[no_mangle] pub unsafe fn ___chkstk_ms() { @@ -34,7 +39,12 @@ pub unsafe fn ___chkstk_ms() { } // FIXME: __alloca should be an alias to __chkstk -#[cfg(all(windows, target_env = "gnu", not(feature = "mangled-names")))] +#[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm"), + not(feature = "mangled-names") +))] #[naked] #[no_mangle] pub unsafe fn __alloca() { @@ -43,7 +53,12 @@ pub unsafe fn __alloca() { intrinsics::unreachable(); } -#[cfg(all(windows, target_env = "gnu", not(feature = "mangled-names")))] +#[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm"), + not(feature = "mangled-names") +))] #[naked] #[no_mangle] pub unsafe fn ___chkstk() { diff --git a/src/x86_64.rs b/src/x86_64.rs index 6a0cd5668..91c0f24fc 100644 --- a/src/x86_64.rs +++ b/src/x86_64.rs @@ -8,7 +8,12 @@ use core::intrinsics; // NOTE These functions are never mangled as they are not tested against compiler-rt // and mangling ___chkstk would break the `jmp ___chkstk` instruction in __alloca -#[cfg(all(windows, target_env = "gnu", not(feature = "mangled-names")))] +#[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm"), + not(feature = "mangled-names") +))] #[naked] #[no_mangle] pub unsafe fn ___chkstk_ms() { @@ -33,7 +38,12 @@ pub unsafe fn ___chkstk_ms() { intrinsics::unreachable(); } -#[cfg(all(windows, target_env = "gnu", not(feature = "mangled-names")))] +#[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm"), + not(feature = "mangled-names") +))] #[naked] #[no_mangle] pub unsafe fn __alloca() { @@ -43,7 +53,12 @@ pub unsafe fn __alloca() { intrinsics::unreachable(); } -#[cfg(all(windows, target_env = "gnu", not(feature = "mangled-names")))] +#[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm"), + not(feature = "mangled-names") +))] #[naked] #[no_mangle] pub unsafe fn ___chkstk() { diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 5c3df7961..ce8df2d12 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -28,8 +28,8 @@ utest-cortex-m-qemu = { default-features = false, git = "https://github.com/japa utest-macros = { git = "https://github.com/japaric/utest" } [features] -default = ["asm", "mangled-names"] -asm = ["compiler_builtins/asm"] +default = ["mangled-names"] c = ["compiler_builtins/c"] +no-asm = ["compiler_builtins/no-asm"] mem = ["compiler_builtins/mem"] mangled-names = ["compiler_builtins/mangled-names"] From 9c137ced80e8a013cff9a03c32dadc88c3d5bc83 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 20 Nov 2020 10:29:35 -0600 Subject: [PATCH 0398/1459] Update CI to fix on GitHub Actions (#394) Should hopefully fix the master branch... --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e4bcc9c10..01dffb845 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -90,7 +90,7 @@ jobs: run: | curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/10.0-2020-05-05.tar.gz tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-10.0-2020-05-05/compiler-rt - echo "##[set-env name=RUST_COMPILER_RT_ROOT]./compiler-rt" + echo RUST_COMPILER_RT_ROOT=./compiler-rt >> $GITHUB_ENV shell: bash # Non-linux tests just use our raw script From c975b0e9fe9369031b0a818ac09ebd09ed7f2866 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Mon, 23 Nov 2020 09:05:02 -0600 Subject: [PATCH 0399/1459] fix division on SPARC (#393) --- src/int/specialized_div_rem/delegate.rs | 130 ++++++++++++++++++++++++ src/int/specialized_div_rem/mod.rs | 49 +++++---- src/int/udiv.rs | 38 +++++-- 3 files changed, 190 insertions(+), 27 deletions(-) diff --git a/src/int/specialized_div_rem/delegate.rs b/src/int/specialized_div_rem/delegate.rs index 8310c1429..135d3402a 100644 --- a/src/int/specialized_div_rem/delegate.rs +++ b/src/int/specialized_div_rem/delegate.rs @@ -185,3 +185,133 @@ macro_rules! impl_delegate { } }; } + +/// Returns `n / d` and sets `*rem = n % d`. +/// +/// This specialization exists because: +/// - The LLVM backend for 32-bit SPARC cannot compile functions that return `(u128, u128)`, +/// so we have to use an old fashioned `&mut u128` argument to return the remainder. +/// - 64-bit SPARC does not have u64 * u64 => u128 widening multiplication, which makes the +/// delegate algorithm strategy the only reasonably fast way to perform `u128` division. +#[doc(hidden)] +pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { + use super::*; + let duo_lo = duo as u64; + let duo_hi = (duo >> 64) as u64; + let div_lo = div as u64; + let div_hi = (div >> 64) as u64; + + match (div_lo == 0, div_hi == 0, duo_hi == 0) { + (true, true, _) => zero_div_fn(), + (_, false, true) => { + *rem = duo; + return 0; + } + (false, true, true) => { + let tmp = u64_by_u64_div_rem(duo_lo, div_lo); + *rem = tmp.1 as u128; + return tmp.0 as u128; + } + (false, true, false) => { + if duo_hi < div_lo { + let norm_shift = u64_normalization_shift(div_lo, duo_hi, false); + let shl = if norm_shift == 0 { + 64 - 1 + } else { + 64 - norm_shift + }; + + let mut div: u128 = div << shl; + let mut pow_lo: u64 = 1 << shl; + let mut quo_lo: u64 = 0; + let mut duo = duo; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as i128) { + duo = sub; + quo_lo |= pow_lo; + let duo_hi = (duo >> 64) as u64; + if duo_hi == 0 { + let tmp = u64_by_u64_div_rem(duo as u64, div_lo); + *rem = tmp.1 as u128; + return (quo_lo | tmp.0) as u128; + } + } + div >>= 1; + pow_lo >>= 1; + } + } else if duo_hi == div_lo { + let tmp = u64_by_u64_div_rem(duo as u64, div as u64); + *rem = tmp.1 as u128; + return (1 << 64) | (tmp.0 as u128); + } else { + if (div_lo >> 32) == 0 { + let div_0 = div_lo as u32 as u64; + let (quo_hi, rem_3) = u64_by_u64_div_rem(duo_hi, div_0); + + let duo_mid = ((duo >> 32) as u32 as u64) | (rem_3 << 32); + let (quo_1, rem_2) = u64_by_u64_div_rem(duo_mid, div_0); + + let duo_lo = (duo as u32 as u64) | (rem_2 << 32); + let (quo_0, rem_1) = u64_by_u64_div_rem(duo_lo, div_0); + + *rem = rem_1 as u128; + return (quo_0 as u128) | ((quo_1 as u128) << 32) | ((quo_hi as u128) << 64); + } + + let duo_lo = duo as u64; + let tmp = u64_by_u64_div_rem(duo_hi, div_lo); + let quo_hi = tmp.0; + let mut duo = (duo_lo as u128) | ((tmp.1 as u128) << 64); + if duo < div { + *rem = duo; + return (quo_hi as u128) << 64; + } + + let mut div: u128 = div << (64 - 1); + let mut pow_lo: u64 = 1 << (64 - 1); + let mut quo_lo: u64 = 0; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as i128) { + duo = sub; + quo_lo |= pow_lo; + let duo_hi = (duo >> 64) as u64; + if duo_hi == 0 { + let tmp = u64_by_u64_div_rem(duo as u64, div_lo); + *rem = tmp.1 as u128; + return (tmp.0) as u128 | (quo_lo as u128) | ((quo_hi as u128) << 64); + } + } + div >>= 1; + pow_lo >>= 1; + } + } + } + (_, false, false) => { + if duo < div { + *rem = duo; + return 0; + } + let div_original = div; + let shl = u64_normalization_shift(duo_hi, div_hi, false); + let mut duo = duo; + let mut div: u128 = div << shl; + let mut pow_lo: u64 = 1 << shl; + let mut quo_lo: u64 = 0; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as i128) { + duo = sub; + quo_lo |= pow_lo; + if duo < div_original { + *rem = duo; + return quo_lo as u128; + } + } + div >>= 1; + pow_lo >>= 1; + } + } + } +} diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs index 7f37f6220..eaeb030e3 100644 --- a/src/int/specialized_div_rem/mod.rs +++ b/src/int/specialized_div_rem/mod.rs @@ -46,6 +46,7 @@ mod binary_long; #[macro_use] mod delegate; +pub use self::delegate::u128_divide_sparc; #[macro_use] mod trifecta; @@ -60,27 +61,31 @@ fn zero_div_fn() -> ! { unsafe { core::hint::unreachable_unchecked() } } -// The `B` extension on RISC-V determines if a CLZ assembly instruction exists -#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] -const USE_LZ: bool = cfg!(target_feature = "b"); - -#[cfg(target_arch = "arm")] -const USE_LZ: bool = if cfg!(target_feature = "thumb-mode") { - // ARM thumb targets have CLZ instructions if the instruction set of ARMv6T2 is supported. This - // is needed to successfully differentiate between targets like `thumbv8.base` and - // `thumbv8.main`. - cfg!(target_feature = "v6t2") -} else { - // Regular ARM targets have CLZ instructions if the ARMv5TE instruction set is supported. - // Technically, ARMv5T was the first to have CLZ, but the "v5t" target feature does not seem to - // work. - cfg!(target_feature = "v5te") +const USE_LZ: bool = { + if cfg!(target_arch = "arm") { + if cfg!(target_feature = "thumb-mode") { + // ARM thumb targets have CLZ instructions if the instruction set of ARMv6T2 is + // supported. This is needed to successfully differentiate between targets like + // `thumbv8.base` and `thumbv8.main`. + cfg!(target_feature = "v6t2") + } else { + // Regular ARM targets have CLZ instructions if the ARMv5TE instruction set is + // supported. Technically, ARMv5T was the first to have CLZ, but the "v5t" target + // feature does not seem to work. + cfg!(target_feature = "v5te") + } + } else if cfg!(any(target_arch = "sparc", target_arch = "sparc64")) { + // LZD or LZCNT on SPARC only exists for the VIS 3 extension and later. + cfg!(target_feature = "vis3") + } else if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { + // The `B` extension on RISC-V determines if a CLZ assembly instruction exists + cfg!(target_feature = "b") + } else { + // All other common targets Rust supports should have CLZ instructions + true + } }; -// All other targets Rust supports have CLZ instructions -#[cfg(not(any(target_arch = "arm", target_arch = "riscv32", target_arch = "riscv64")))] -const USE_LZ: bool = true; - impl_normalization_shift!( u32_normalization_shift, USE_LZ, @@ -115,8 +120,9 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) { // microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is // faster if the target pointer width is at least 64. #[cfg(all( + not(any(target_pointer_width = "16", target_pointer_width = "32")), not(all(not(feature = "no-asm"), target_arch = "x86_64")), - not(any(target_pointer_width = "16", target_pointer_width = "32")) + not(any(target_arch = "sparc", target_arch = "sparc64")) ))] impl_trifecta!( u128_div_rem, @@ -131,8 +137,9 @@ impl_trifecta!( // If the pointer width less than 64, then the target architecture almost certainly does not have // the fast 64 to 128 bit widening multiplication needed for `trifecta` to be faster. #[cfg(all( + any(target_pointer_width = "16", target_pointer_width = "32"), not(all(not(feature = "no-asm"), target_arch = "x86_64")), - any(target_pointer_width = "16", target_pointer_width = "32") + not(any(target_arch = "sparc", target_arch = "sparc64")) ))] impl_delegate!( u128_div_rem, diff --git a/src/int/udiv.rs b/src/int/udiv.rs index 3cd9be93c..d97178078 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -1,3 +1,4 @@ +pub use int::specialized_div_rem::u128_divide_sparc; use int::specialized_div_rem::*; intrinsics! { @@ -46,25 +47,50 @@ intrinsics! { quo_rem.0 } + // Note: we use block configuration and not `if cfg!(...)`, because we need to entirely disable + // the existence of `u128_div_rem` to get 32-bit SPARC to compile, see `u128_divide_sparc` docs. + #[win64_128bit_abi_hack] /// Returns `n / d` pub extern "C" fn __udivti3(n: u128, d: u128) -> u128 { - u128_div_rem(n, d).0 + #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { + u128_div_rem(n, d).0 + } + #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] { + u128_divide_sparc(n, d, &mut 0) + } } #[win64_128bit_abi_hack] /// Returns `n % d` pub extern "C" fn __umodti3(n: u128, d: u128) -> u128 { - u128_div_rem(n, d).1 + #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { + u128_div_rem(n, d).1 + } + #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] { + let mut rem = 0; + u128_divide_sparc(n, d, &mut rem); + rem + } } #[win64_128bit_abi_hack] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmodti4(n: u128, d: u128, rem: Option<&mut u128>) -> u128 { - let quo_rem = u128_div_rem(n, d); - if let Some(rem) = rem { - *rem = quo_rem.1; + #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { + let quo_rem = u128_div_rem(n, d); + if let Some(rem) = rem { + *rem = quo_rem.1; + } + quo_rem.0 + } + #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] { + let mut tmp = 0; + let quo = u128_divide_sparc(n, d, &mut tmp); + if let Some(rem) = rem { + *rem = tmp; + } + quo } - quo_rem.0 } } From 080f6d3d6cf9bf97b524ed475218540df1b92ebf Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Tue, 13 Oct 2020 00:06:27 -0500 Subject: [PATCH 0400/1459] Remove `aapcs_on_arm` mistake --- src/int/mul.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/int/mul.rs b/src/int/mul.rs index 42f13913e..1e32560c2 100644 --- a/src/int/mul.rs +++ b/src/int/mul.rs @@ -90,7 +90,6 @@ intrinsics! { a.mul(b) } - #[aapcs_on_arm] pub extern "C" fn __multi3(a: i128, b: i128) -> i128 { a.mul(b) } From b1a7a00e488c0c27eac0fa510238121bde5a1298 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Mon, 12 Oct 2020 17:58:02 -0500 Subject: [PATCH 0401/1459] Introduce the `DInt` and `HInt` traits and add various methods that will be used for improved fuzzing --- src/int/mod.rs | 203 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 202 insertions(+), 1 deletion(-) diff --git a/src/int/mod.rs b/src/int/mod.rs index 5e695d5f7..da2263f6e 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -12,13 +12,15 @@ pub mod udiv; pub use self::leading_zeros::__clzsi2; /// Trait for some basic operations on integers -pub(crate) trait Int: +#[doc(hidden)] +pub trait Int: Copy + PartialEq + PartialOrd + ops::AddAssign + ops::BitAndAssign + ops::BitOrAssign + + ops::BitXorAssign + ops::ShlAssign + ops::ShrAssign + ops::Add @@ -41,6 +43,14 @@ pub(crate) trait Int: const ZERO: Self; const ONE: Self; + const MIN: Self; + + /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing + /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, + /// 112,119,120,125,126,127]. + const FUZZ_LENGTHS: [u8; 20]; + /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. + const FUZZ_NUM: usize; /// Extracts the sign from self and returns a tuple. /// @@ -59,17 +69,25 @@ pub(crate) trait Int: fn from_bool(b: bool) -> Self; + /// Prevents the need for excessive conversions between signed and unsigned + fn logical_shr(self, other: u32) -> Self; + // copied from primitive integers, but put in a trait + fn is_zero(self) -> bool; fn max_value() -> Self; fn min_value() -> Self; + fn wrapping_neg(self) -> Self; fn wrapping_add(self, other: Self) -> Self; fn wrapping_mul(self, other: Self) -> Self; fn wrapping_sub(self, other: Self) -> Self; fn wrapping_shl(self, other: u32) -> Self; + fn wrapping_shr(self, other: u32) -> Self; + fn rotate_left(self, other: u32) -> Self; fn overflowing_add(self, other: Self) -> (Self, bool); fn aborting_div(self, other: Self) -> Self; fn aborting_rem(self, other: Self) -> Self; fn leading_zeros(self) -> u32; + fn count_ones(self) -> u32; } fn unwrap(t: Option) -> T { @@ -85,11 +103,78 @@ macro_rules! int_impl_common { const ZERO: Self = 0; const ONE: Self = 1; + const MIN: Self = ::MIN; + + const FUZZ_LENGTHS: [u8; 20] = { + let bits = ::BITS; + let mut v = [0u8; 20]; + v[0] = 0; + v[1] = 1; + v[2] = 2; // important for parity and the iX::MIN case when reversed + let mut i = 3; + // No need for any more until the byte boundary, because there should be no algorithms + // that are sensitive to anything not next to byte boundaries after 2. We also scale + // in powers of two, which is important to prevent u128 corner tests from getting too + // big. + let mut l = 8; + loop { + if l >= ((bits / 2) as u8) { + break; + } + // get both sides of the byte boundary + v[i] = l - 1; + i += 1; + v[i] = l; + i += 1; + l *= 2; + } + + if bits != 8 { + // add the lower side of the middle boundary + v[i] = ((bits / 2) - 1) as u8; + i += 1; + } + + // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS + // boundary because of algorithms that split the high part up. We reverse the scaling + // as we go to Self::BITS. + let mid = i; + let mut j = 1; + loop { + v[i] = (bits as u8) - (v[mid - j]) - 1; + if j == mid { + break; + } + i += 1; + j += 1; + } + v + }; + + const FUZZ_NUM: usize = { + let log2 = (::BITS - 1).count_ones() as usize; + if log2 == 3 { + // case for u8 + 6 + } else { + // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate + // boundaries. + 8 + (4 * (log2 - 4)) + } + }; fn from_bool(b: bool) -> Self { b as $ty } + fn logical_shr(self, other: u32) -> Self { + Self::from_unsigned(self.unsigned().wrapping_shr(other)) + } + + fn is_zero(self) -> bool { + self == Self::ZERO + } + fn max_value() -> Self { ::max_value() } @@ -98,6 +183,10 @@ macro_rules! int_impl_common { ::min_value() } + fn wrapping_neg(self) -> Self { + ::wrapping_neg(self) + } + fn wrapping_add(self, other: Self) -> Self { ::wrapping_add(self, other) } @@ -114,6 +203,14 @@ macro_rules! int_impl_common { ::wrapping_shl(self, other) } + fn wrapping_shr(self, other: u32) -> Self { + ::wrapping_shr(self, other) + } + + fn rotate_left(self, other: u32) -> Self { + ::rotate_left(self, other) + } + fn overflowing_add(self, other: Self) -> (Self, bool) { ::overflowing_add(self, other) } @@ -129,6 +226,10 @@ macro_rules! int_impl_common { fn leading_zeros(self) -> u32 { ::leading_zeros(self) } + + fn count_ones(self) -> u32 { + ::count_ones(self) + } }; } @@ -178,11 +279,111 @@ macro_rules! int_impl { }; } +int_impl!(isize, usize, usize::MAX.count_ones()); +int_impl!(i8, u8, 8); int_impl!(i16, u16, 16); int_impl!(i32, u32, 32); int_impl!(i64, u64, 64); int_impl!(i128, u128, 128); +/// Trait for integers twice the bit width of another integer. This is implemented for all +/// primitives except for `u8`, because there is not a smaller primitive. +#[doc(hidden)] +pub trait DInt: Int { + /// Integer that is half the bit width of the integer this trait is implemented for + type H: HInt + Int; + + /// Returns the low half of `self` + fn lo(self) -> Self::H; + /// Returns the high half of `self` + fn hi(self) -> Self::H; + /// Returns the low and high halves of `self` as a tuple + fn lo_hi(self) -> (Self::H, Self::H); + /// Constructs an integer using lower and higher half parts + fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self; +} + +/// Trait for integers half the bit width of another integer. This is implemented for all +/// primitives except for `u128`, because it there is not a larger primitive. +#[doc(hidden)] +pub trait HInt: Int { + /// Integer that is double the bit width of the integer this trait is implemented for + type D: DInt + Int; + + /// Widens (using default extension) the integer to have double bit width + fn widen(self) -> Self::D; + /// Widens (zero extension only) the integer to have double bit width. This is needed to get + /// around problems with associated type bounds (such as `Int`) being unstable + fn zero_widen(self) -> Self::D; + /// Widens the integer to have double bit width and shifts the integer into the higher bits + fn widen_hi(self) -> Self::D; + /// Widening multiplication with zero widening. This cannot overflow. + fn zero_widen_mul(self, rhs: Self) -> Self::D; + /// Widening multiplication. This cannot overflow. + fn widen_mul(self, rhs: Self) -> Self::D; +} + +macro_rules! impl_d_int { + ($($X:ident $D:ident),*) => { + $( + impl DInt for $D { + type H = $X; + + fn lo(self) -> Self::H { + self as $X + } + fn hi(self) -> Self::H { + (self >> <$X as Int>::BITS) as $X + } + fn lo_hi(self) -> (Self::H, Self::H) { + (self.lo(), self.hi()) + } + fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { + lo.zero_widen() | hi.widen_hi() + } + } + )* + }; +} + +macro_rules! impl_h_int { + ($($H:ident $uH:ident $X:ident),*) => { + $( + impl HInt for $H { + type D = $X; + + fn widen(self) -> Self::D { + self as $X + } + fn zero_widen(self) -> Self::D { + (self as $uH) as $X + } + fn widen_hi(self) -> Self::D { + (self as $X) << <$H as Int>::BITS + } + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen().wrapping_mul(rhs.zero_widen()) + } + fn widen_mul(self, rhs: Self) -> Self::D { + self.widen().wrapping_mul(rhs.widen()) + } + } + )* + }; +} + +impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); +impl_h_int!( + u8 u8 u16, + u16 u16 u32, + u32 u32 u64, + u64 u64 u128, + i8 u8 i16, + i16 u16 i32, + i32 u32 i64, + i64 u64 i128 +); + /// Trait to convert an integer to/from smaller parts pub(crate) trait LargeInt: Int { type LowHalf: Int; From d1960ecb0cd698e28812899cb8461d80f4898b03 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Mon, 12 Oct 2020 17:59:34 -0500 Subject: [PATCH 0402/1459] Overhaul overflowing multiplication impls --- src/float/conv.rs | 6 +-- src/int/mul.rs | 123 +++++++++++++++++++++++++--------------------- 2 files changed, 71 insertions(+), 58 deletions(-) diff --git a/src/float/conv.rs b/src/float/conv.rs index 8a0fc6cb4..e9ca0f138 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -11,7 +11,7 @@ macro_rules! int_to_float { let mant_dig = <$fty>::SIGNIFICAND_BITS + 1; let exponent_bias = <$fty>::EXPONENT_BIAS; - let n = <$ity>::BITS; + let n = <$ity as Int>::BITS; let (s, a) = i.extract_sign(); let mut a = a; @@ -21,7 +21,7 @@ macro_rules! int_to_float { // exponent let mut e = sd - 1; - if <$ity>::BITS < mant_dig { + if <$ity as Int>::BITS < mant_dig { return <$fty>::from_parts( s, (e + exponent_bias) as <$fty as Float>::Int, @@ -165,7 +165,7 @@ macro_rules! float_to_int { let f = $f; let fixint_min = <$ity>::min_value(); let fixint_max = <$ity>::max_value(); - let fixint_bits = <$ity>::BITS as usize; + let fixint_bits = <$ity as Int>::BITS as usize; let fixint_unsigned = fixint_min == 0; let sign_bit = <$fty>::SIGN_MASK; diff --git a/src/int/mul.rs b/src/int/mul.rs index 1e32560c2..e5c0afc1e 100644 --- a/src/int/mul.rs +++ b/src/int/mul.rs @@ -1,7 +1,5 @@ -use core::ops; - -use int::Int; use int::LargeInt; +use int::{DInt, HInt, Int}; trait Mul: LargeInt { fn mul(self, other: Self) -> Self { @@ -29,59 +27,72 @@ trait Mul: LargeInt { impl Mul for u64 {} impl Mul for i128 {} -trait Mulo: Int + ops::Neg { - fn mulo(self, other: Self, overflow: &mut i32) -> Self { - *overflow = 0; - let result = self.wrapping_mul(other); - if self == Self::min_value() { - if other != Self::ZERO && other != Self::ONE { - *overflow = 1; +pub(crate) trait UMulo: Int + DInt { + fn mulo(self, rhs: Self) -> (Self, bool) { + match (self.hi().is_zero(), rhs.hi().is_zero()) { + // overflow is guaranteed + (false, false) => (self.wrapping_mul(rhs), true), + (true, false) => { + let mul_lo = self.lo().widen_mul(rhs.lo()); + let mul_hi = self.lo().widen_mul(rhs.hi()); + let (mul, o) = mul_lo.overflowing_add(mul_hi.lo().widen_hi()); + (mul, o || !mul_hi.hi().is_zero()) } - return result; - } - if other == Self::min_value() { - if self != Self::ZERO && self != Self::ONE { - *overflow = 1; + (false, true) => { + let mul_lo = rhs.lo().widen_mul(self.lo()); + let mul_hi = rhs.lo().widen_mul(self.hi()); + let (mul, o) = mul_lo.overflowing_add(mul_hi.lo().widen_hi()); + (mul, o || !mul_hi.hi().is_zero()) } - return result; + // overflow is guaranteed to not happen, and use a smaller widening multiplication + (true, true) => (self.lo().widen_mul(rhs.lo()), false), } + } +} - let sa = self >> (Self::BITS - 1); - let abs_a = (self ^ sa) - sa; - let sb = other >> (Self::BITS - 1); - let abs_b = (other ^ sb) - sb; - let two = Self::ONE + Self::ONE; - if abs_a < two || abs_b < two { - return result; - } - if sa == sb { - if abs_a > Self::max_value().aborting_div(abs_b) { - *overflow = 1; +impl UMulo for u32 {} +impl UMulo for u64 {} +impl UMulo for u128 {} + +macro_rules! impl_signed_mulo { + ($fn:ident, $iD:ident, $uD:ident) => { + fn $fn(lhs: $iD, rhs: $iD) -> ($iD, bool) { + let mut lhs = lhs; + let mut rhs = rhs; + // the test against `mul_neg` below fails without this early return + if lhs == 0 || rhs == 0 { + return (0, false); } - } else { - if abs_a > Self::min_value().aborting_div(-abs_b) { - *overflow = 1; + + let lhs_neg = lhs < 0; + let rhs_neg = rhs < 0; + if lhs_neg { + lhs = lhs.wrapping_neg(); } - } - result - } -} + if rhs_neg { + rhs = rhs.wrapping_neg(); + } + let mul_neg = lhs_neg != rhs_neg; -impl Mulo for i32 {} -impl Mulo for i64 {} -impl Mulo for i128 {} + let (mul, o) = (lhs as $uD).mulo(rhs as $uD); + let mut mul = mul as $iD; -trait UMulo: Int { - fn mulo(self, other: Self, overflow: &mut i32) -> Self { - *overflow = 0; - let result = self.wrapping_mul(other); - if self > Self::max_value().aborting_div(other) { - *overflow = 1; + if mul_neg { + mul = mul.wrapping_neg(); + } + if (mul < 0) != mul_neg { + // this one check happens to catch all edge cases related to `$iD::MIN` + (mul, true) + } else { + (mul, o) + } } - result - } + }; } -impl UMulo for u128 {} + +impl_signed_mulo!(i32_overflowing_mul, i32, u32); +impl_signed_mulo!(i64_overflowing_mul, i64, u64); +impl_signed_mulo!(i128_overflowing_mul, i128, u128); intrinsics! { #[maybe_use_optimized_c_shim] @@ -95,27 +106,29 @@ intrinsics! { } pub extern "C" fn __mulosi4(a: i32, b: i32, oflow: &mut i32) -> i32 { - a.mulo(b, oflow) + let (mul, o) = i32_overflowing_mul(a, b); + *oflow = o as i32; + mul } pub extern "C" fn __mulodi4(a: i64, b: i64, oflow: &mut i32) -> i64 { - a.mulo(b, oflow) + let (mul, o) = i64_overflowing_mul(a, b); + *oflow = o as i32; + mul } #[unadjusted_on_win64] pub extern "C" fn __muloti4(a: i128, b: i128, oflow: &mut i32) -> i128 { - a.mulo(b, oflow) + let (mul, o) = i128_overflowing_mul(a, b); + *oflow = o as i32; + mul } pub extern "C" fn __rust_i128_mulo(a: i128, b: i128) -> (i128, bool) { - let mut oflow = 0; - let r = __muloti4(a, b, &mut oflow); - (r, oflow != 0) + i128_overflowing_mul(a, b) } pub extern "C" fn __rust_u128_mulo(a: u128, b: u128) -> (u128, bool) { - let mut oflow = 0; - let r = a.mulo(b, &mut oflow); - (r, oflow != 0) + a.mulo(b) } } From 400c5042d8fcc951e1c27252615cb4b9557ad084 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Mon, 12 Oct 2020 23:58:55 -0500 Subject: [PATCH 0403/1459] Completely replace `LargeInt` --- src/int/addsub.rs | 84 +++++++++++++---------------------------------- src/int/mod.rs | 44 ------------------------- src/int/mul.rs | 45 +++++++++++++------------ src/int/shift.rs | 72 ++++++++++++++++++---------------------- src/macros.rs | 8 ++--- 5 files changed, 83 insertions(+), 170 deletions(-) diff --git a/src/int/addsub.rs b/src/int/addsub.rs index 0a88e2fc8..f4841e90f 100644 --- a/src/int/addsub.rs +++ b/src/int/addsub.rs @@ -1,25 +1,16 @@ -use int::Int; -use int::LargeInt; +use int::{DInt, Int}; -trait UAddSub: LargeInt { +trait UAddSub: DInt { fn uadd(self, other: Self) -> Self { - let (low, carry) = self.low().overflowing_add(other.low()); - let high = self.high().wrapping_add(other.high()); - let carry = if carry { - Self::HighHalf::ONE - } else { - Self::HighHalf::ZERO - }; - Self::from_parts(low, high.wrapping_add(carry)) + let (lo, carry) = self.lo().overflowing_add(other.lo()); + let hi = self.hi().wrapping_add(other.hi()); + let carry = if carry { Self::H::ONE } else { Self::H::ZERO }; + Self::from_lo_hi(lo, hi.wrapping_add(carry)) } fn uadd_one(self) -> Self { - let (low, carry) = self.low().overflowing_add(Self::LowHalf::ONE); - let carry = if carry { - Self::HighHalf::ONE - } else { - Self::HighHalf::ZERO - }; - Self::from_parts(low, self.high().wrapping_add(carry)) + let (lo, carry) = self.lo().overflowing_add(Self::H::ONE); + let carry = if carry { Self::H::ONE } else { Self::H::ZERO }; + Self::from_lo_hi(lo, self.hi().wrapping_add(carry)) } fn usub(self, other: Self) -> Self { let uneg = (!other).uadd_one(); @@ -48,19 +39,9 @@ trait Addo: AddSub where ::UnsignedInt: UAddSub, { - fn addo(self, other: Self, overflow: &mut i32) -> Self { - *overflow = 0; - let result = AddSub::add(self, other); - if other >= Self::ZERO { - if result < self { - *overflow = 1; - } - } else { - if result >= self { - *overflow = 1; - } - } - result + fn addo(self, other: Self) -> (Self, bool) { + let sum = AddSub::add(self, other); + (sum, (other < Self::ZERO) != (sum < self)) } } @@ -71,19 +52,9 @@ trait Subo: AddSub where ::UnsignedInt: UAddSub, { - fn subo(self, other: Self, overflow: &mut i32) -> Self { - *overflow = 0; - let result = AddSub::sub(self, other); - if other >= Self::ZERO { - if result > self { - *overflow = 1; - } - } else { - if result <= self { - *overflow = 1; - } - } - result + fn subo(self, other: Self) -> (Self, bool) { + let sum = AddSub::sub(self, other); + (sum, (other < Self::ZERO) != (self < sum)) } } @@ -92,43 +63,34 @@ impl Subo for u128 {} intrinsics! { pub extern "C" fn __rust_i128_add(a: i128, b: i128) -> i128 { - __rust_u128_add(a as _, b as _) as _ + AddSub::add(a,b) } pub extern "C" fn __rust_i128_addo(a: i128, b: i128) -> (i128, bool) { - let mut oflow = 0; - let r = a.addo(b, &mut oflow); - (r, oflow != 0) + a.addo(b) } pub extern "C" fn __rust_u128_add(a: u128, b: u128) -> u128 { - a.add(b) + AddSub::add(a,b) } pub extern "C" fn __rust_u128_addo(a: u128, b: u128) -> (u128, bool) { - let mut oflow = 0; - let r = a.addo(b, &mut oflow); - (r, oflow != 0) + a.addo(b) } - pub extern "C" fn __rust_i128_sub(a: i128, b: i128) -> i128 { - __rust_u128_sub(a as _, b as _) as _ + AddSub::sub(a,b) } pub extern "C" fn __rust_i128_subo(a: i128, b: i128) -> (i128, bool) { - let mut oflow = 0; - let r = a.subo(b, &mut oflow); - (r, oflow != 0) + a.subo(b) } pub extern "C" fn __rust_u128_sub(a: u128, b: u128) -> u128 { - a.sub(b) + AddSub::sub(a,b) } pub extern "C" fn __rust_u128_subo(a: u128, b: u128) -> (u128, bool) { - let mut oflow = 0; - let r = a.subo(b, &mut oflow); - (r, oflow != 0) + a.subo(b) } } diff --git a/src/int/mod.rs b/src/int/mod.rs index da2263f6e..1ce3d92da 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -384,50 +384,6 @@ impl_h_int!( i64 u64 i128 ); -/// Trait to convert an integer to/from smaller parts -pub(crate) trait LargeInt: Int { - type LowHalf: Int; - type HighHalf: Int; - - fn low(self) -> Self::LowHalf; - fn low_as_high(low: Self::LowHalf) -> Self::HighHalf; - fn high(self) -> Self::HighHalf; - fn high_as_low(low: Self::HighHalf) -> Self::LowHalf; - fn from_parts(low: Self::LowHalf, high: Self::HighHalf) -> Self; -} - -macro_rules! large_int { - ($ty:ty, $tylow:ty, $tyhigh:ty, $halfbits:expr) => { - impl LargeInt for $ty { - type LowHalf = $tylow; - type HighHalf = $tyhigh; - - fn low(self) -> $tylow { - self as $tylow - } - fn low_as_high(low: $tylow) -> $tyhigh { - low as $tyhigh - } - fn high(self) -> $tyhigh { - (self >> $halfbits) as $tyhigh - } - fn high_as_low(high: $tyhigh) -> $tylow { - high as $tylow - } - fn from_parts(low: $tylow, high: $tyhigh) -> $ty { - low as $ty | ((high as $ty) << $halfbits) - } - } - }; -} - -large_int!(u32, u16, u16, 16); -large_int!(i32, u16, i16, 16); -large_int!(u64, u32, u32, 32); -large_int!(i64, u32, i32, 32); -large_int!(u128, u64, u64, 64); -large_int!(i128, u64, i64, 64); - /// Trait to express (possibly lossy) casting of integers pub(crate) trait CastInto: Copy { fn cast(self) -> T; diff --git a/src/int/mul.rs b/src/int/mul.rs index e5c0afc1e..a5238eeac 100644 --- a/src/int/mul.rs +++ b/src/int/mul.rs @@ -1,26 +1,29 @@ -use int::LargeInt; use int::{DInt, HInt, Int}; -trait Mul: LargeInt { - fn mul(self, other: Self) -> Self { - let half_bits = Self::BITS / 4; - let lower_mask = !<::LowHalf>::ZERO >> half_bits; - let mut low = (self.low() & lower_mask).wrapping_mul(other.low() & lower_mask); - let mut t = low >> half_bits; - low &= lower_mask; - t += (self.low() >> half_bits).wrapping_mul(other.low() & lower_mask); - low += (t & lower_mask) << half_bits; - let mut high = Self::low_as_high(t >> half_bits); - t = low >> half_bits; - low &= lower_mask; - t += (other.low() >> half_bits).wrapping_mul(self.low() & lower_mask); - low += (t & lower_mask) << half_bits; - high += Self::low_as_high(t >> half_bits); - high += Self::low_as_high((self.low() >> half_bits).wrapping_mul(other.low() >> half_bits)); - high = high - .wrapping_add(self.high().wrapping_mul(Self::low_as_high(other.low()))) - .wrapping_add(Self::low_as_high(self.low()).wrapping_mul(other.high())); - Self::from_parts(low, high) +trait Mul: DInt +where + Self::H: DInt, +{ + fn mul(self, rhs: Self) -> Self { + // In order to prevent infinite recursion, we cannot use the `widen_mul` in this: + //self.lo().widen_mul(rhs.lo()) + // .wrapping_add(self.lo().wrapping_mul(rhs.hi()).widen_hi()) + // .wrapping_add(self.hi().wrapping_mul(rhs.lo()).widen_hi()) + + let lhs_lo = self.lo(); + let rhs_lo = rhs.lo(); + // construct the widening multiplication using only `Self::H` sized multiplications + let tmp_0 = lhs_lo.lo().zero_widen_mul(rhs_lo.lo()); + let tmp_1 = lhs_lo.lo().zero_widen_mul(rhs_lo.hi()); + let tmp_2 = lhs_lo.hi().zero_widen_mul(rhs_lo.lo()); + let tmp_3 = lhs_lo.hi().zero_widen_mul(rhs_lo.hi()); + // sum up all widening partials + let mul = Self::from_lo_hi(tmp_0, tmp_3) + .wrapping_add(tmp_1.zero_widen() << (Self::BITS / 4)) + .wrapping_add(tmp_2.zero_widen() << (Self::BITS / 4)); + // add the higher partials + mul.wrapping_add(lhs_lo.wrapping_mul(rhs.hi()).widen_hi()) + .wrapping_add(self.hi().wrapping_mul(rhs_lo).widen_hi()) } } diff --git a/src/int/shift.rs b/src/int/shift.rs index 674c3ee8c..20561786b 100644 --- a/src/int/shift.rs +++ b/src/int/shift.rs @@ -1,20 +1,18 @@ -use int::{Int, LargeInt}; +use int::{DInt, HInt, Int}; -trait Ashl: Int + LargeInt { +trait Ashl: DInt { /// Returns `a << b`, requires `b < Self::BITS` - fn ashl(self, offset: u32) -> Self - where - Self: LargeInt::LowHalf>, - { - let half_bits = Self::BITS / 2; - if offset & half_bits != 0 { - Self::from_parts(Int::ZERO, self.low() << (offset - half_bits)) - } else if offset == 0 { + fn ashl(self, shl: u32) -> Self { + let n_h = Self::H::BITS; + if shl & n_h != 0 { + // we only need `self.lo()` because `self.hi()` will be shifted out entirely + (self.lo() << (shl - n_h)).widen_hi() + } else if shl == 0 { self } else { - Self::from_parts( - self.low() << offset, - (self.high() << offset) | (self.low() >> (half_bits - offset)), + Self::from_lo_hi( + self.lo() << shl, + self.lo().logical_shr(n_h - shl) | (self.hi() << shl), ) } } @@ -24,25 +22,22 @@ impl Ashl for u32 {} impl Ashl for u64 {} impl Ashl for u128 {} -trait Ashr: Int + LargeInt { +trait Ashr: DInt { /// Returns arithmetic `a >> b`, requires `b < Self::BITS` - fn ashr(self, offset: u32) -> Self - where - Self: LargeInt::HighHalf as Int>::UnsignedInt>, - { - let half_bits = Self::BITS / 2; - if offset & half_bits != 0 { - Self::from_parts( - (self.high() >> (offset - half_bits)).unsigned(), - self.high() >> (half_bits - 1), + fn ashr(self, shr: u32) -> Self { + let n_h = Self::H::BITS; + if shr & n_h != 0 { + Self::from_lo_hi( + self.hi() >> (shr - n_h), + // smear the sign bit + self.hi() >> (n_h - 1), ) - } else if offset == 0 { + } else if shr == 0 { self } else { - let high_unsigned = self.high().unsigned(); - Self::from_parts( - (high_unsigned << (half_bits - offset)) | (self.low() >> offset), - self.high() >> offset, + Self::from_lo_hi( + self.lo().logical_shr(shr) | (self.hi() << (n_h - shr)), + self.hi() >> shr, ) } } @@ -52,21 +47,18 @@ impl Ashr for i32 {} impl Ashr for i64 {} impl Ashr for i128 {} -trait Lshr: Int + LargeInt { +trait Lshr: DInt { /// Returns logical `a >> b`, requires `b < Self::BITS` - fn lshr(self, offset: u32) -> Self - where - Self: LargeInt::LowHalf>, - { - let half_bits = Self::BITS / 2; - if offset & half_bits != 0 { - Self::from_parts(self.high() >> (offset - half_bits), Int::ZERO) - } else if offset == 0 { + fn lshr(self, shr: u32) -> Self { + let n_h = Self::H::BITS; + if shr & n_h != 0 { + self.hi().logical_shr(shr - n_h).zero_widen() + } else if shr == 0 { self } else { - Self::from_parts( - (self.high() << (half_bits - offset)) | (self.low() >> offset), - self.high() >> offset, + Self::from_lo_hi( + self.lo().logical_shr(shr) | (self.hi() << (n_h - shr)), + self.hi().logical_shr(shr), ) } } diff --git a/src/macros.rs b/src/macros.rs index b02f3ea5c..56f27164a 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -284,16 +284,16 @@ pub mod win64_128bit_abi_hack { impl From for U64x2 { fn from(i: i128) -> U64x2 { - use int::LargeInt; + use int::DInt; let j = i as u128; - U64x2(j.low(), j.high()) + U64x2(j.lo(), j.hi()) } } impl From for U64x2 { fn from(i: u128) -> U64x2 { - use int::LargeInt; - U64x2(i.low(), i.high()) + use int::DInt; + U64x2(i.lo(), i.hi()) } } } From 35e323aa0097208d4b7db75fcb8d73d0c83ff3d3 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Tue, 1 Dec 2020 23:16:26 -0600 Subject: [PATCH 0404/1459] Remove `WideInt` --- src/float/cmp.rs | 19 ++++--------------- src/float/div.rs | 10 +++++----- src/float/mod.rs | 11 ++++++++--- src/float/mul.rs | 30 +++++++++++++++++++----------- src/int/mod.rs | 45 --------------------------------------------- 5 files changed, 36 insertions(+), 79 deletions(-) diff --git a/src/float/cmp.rs b/src/float/cmp.rs index 20ab92e4b..79c26b099 100644 --- a/src/float/cmp.rs +++ b/src/float/cmp.rs @@ -1,7 +1,7 @@ #![allow(unreachable_code)] use float::Float; -use int::{CastInto, Int}; +use int::Int; #[derive(Clone, Copy)] enum Result { @@ -31,13 +31,7 @@ impl Result { } } -fn cmp(a: F, b: F) -> Result -where - u32: CastInto, - F::Int: CastInto, - i32: CastInto, - F::Int: CastInto, -{ +fn cmp(a: F, b: F) -> Result { let one = F::Int::ONE; let zero = F::Int::ZERO; let szero = F::SignedInt::ZERO; @@ -90,13 +84,8 @@ where } } } -fn unord(a: F, b: F) -> bool -where - u32: CastInto, - F::Int: CastInto, - i32: CastInto, - F::Int: CastInto, -{ + +fn unord(a: F, b: F) -> bool { let one = F::Int::ONE; let sign_bit = F::SIGN_MASK as F::Int; diff --git a/src/float/div.rs b/src/float/div.rs index 7c582a440..dd6467f88 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -1,5 +1,5 @@ use float::Float; -use int::{CastInto, Int, WideInt}; +use int::{CastInto, DInt, HInt, Int}; fn div32(a: F, b: F) -> F where @@ -7,7 +7,7 @@ where F::Int: CastInto, i32: CastInto, F::Int: CastInto, - F::Int: WideInt, + F::Int: HInt, { let one = F::Int::ONE; let zero = F::Int::ZERO; @@ -156,7 +156,7 @@ where // is the error in the reciprocal of b scaled by the maximum // possible value of a. As a consequence of this error bound, // either q or nextafter(q) is the correctly rounded - let (mut quotient, _) = ::wide_mul(a_significand << 1, reciprocal.cast()); + let mut quotient = (a_significand << 1).widen_mul(reciprocal.cast()).hi(); // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). // In either case, we are going to compute a residual of the form @@ -211,7 +211,7 @@ where F::Int: CastInto, i64: CastInto, F::Int: CastInto, - F::Int: WideInt, + F::Int: HInt, { let one = F::Int::ONE; let zero = F::Int::ZERO; @@ -394,7 +394,7 @@ where // We need a 64 x 64 multiply high to compute q, which isn't a basic // operation in C, so we need to be a little bit fussy. - let (mut quotient, _) = ::wide_mul(a_significand << 2, reciprocal.cast()); + let mut quotient = (a_significand << 2).widen_mul(reciprocal.cast()).hi(); // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). // In either case, we are going to compute a residual of the form diff --git a/src/float/mod.rs b/src/float/mod.rs index 06e9aad4b..34b3c6ac1 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -13,7 +13,8 @@ pub mod pow; pub mod sub; /// Trait for some basic operations on floats -pub(crate) trait Float: +#[doc(hidden)] +pub trait Float: Copy + PartialEq + PartialOrd @@ -66,7 +67,6 @@ pub(crate) trait Float: /// Returns `self` transmuted to `Self::SignedInt` fn signed_repr(self) -> Self::SignedInt; - #[cfg(test)] /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be /// represented in multiple different ways. This method returns `true` if two NaNs are /// compared. @@ -80,6 +80,9 @@ pub(crate) trait Float: /// Returns (normalized exponent, normalized significand) fn normalize(significand: Self::Int) -> (i32, Self::Int); + + /// Returns if `self` is subnormal + fn is_subnormal(&self) -> bool; } // FIXME: Some of this can be removed if RFC Issue #1424 is resolved @@ -106,7 +109,6 @@ macro_rules! float_impl { fn signed_repr(self) -> Self::SignedInt { unsafe { mem::transmute(self) } } - #[cfg(test)] fn eq_repr(self, rhs: Self) -> bool { if self.is_nan() && rhs.is_nan() { true @@ -133,6 +135,9 @@ macro_rules! float_impl { significand << shift as Self::Int, ) } + fn is_subnormal(&self) -> bool { + (self.repr() & Self::EXPONENT_MASK) == Self::Int::ZERO + } } }; } diff --git a/src/float/mul.rs b/src/float/mul.rs index 7b28793c8..540e7bdcf 100644 --- a/src/float/mul.rs +++ b/src/float/mul.rs @@ -1,5 +1,5 @@ use float::Float; -use int::{CastInto, Int, WideInt}; +use int::{CastInto, DInt, HInt, Int}; fn mul(a: F, b: F) -> F where @@ -7,7 +7,7 @@ where F::Int: CastInto, i32: CastInto, F::Int: CastInto, - F::Int: WideInt, + F::Int: HInt, { let one = F::Int::ONE; let zero = F::Int::ZERO; @@ -112,8 +112,9 @@ where // have (exponentBits + 2) integral digits, all but two of which must be // zero. Normalizing this result is just a conditional left-shift by one // and bumping the exponent accordingly. - let (mut product_high, mut product_low) = - ::wide_mul(a_significand, b_significand << exponent_bits); + let (mut product_low, mut product_high) = a_significand + .widen_mul(b_significand << exponent_bits) + .lo_hi(); let a_exponent_i32: i32 = a_exponent.cast(); let b_exponent_i32: i32 = b_exponent.cast(); @@ -126,7 +127,8 @@ where if (product_high & implicit_bit) != zero { product_exponent = product_exponent.wrapping_add(1); } else { - ::wide_shift_left(&mut product_high, &mut product_low, 1); + product_high = (product_high << 1) | (product_low >> (bits - 1)); + product_low <<= 1; } // If we have overflowed the type, return +/- infinity. @@ -142,17 +144,23 @@ where // handle this case separately, but we make it a special case to // simplify the shift logic. let shift = one.wrapping_sub(product_exponent.cast()).cast(); - if shift >= bits as i32 { + if shift >= bits { return F::from_repr(product_sign); } // Otherwise, shift the significand of the result so that the round // bit is the high bit of productLo. - ::wide_shift_right_with_sticky( - &mut product_high, - &mut product_low, - shift, - ) + if shift < bits { + let sticky = product_low << (bits - shift); + product_low = product_high << (bits - shift) | product_low >> shift | sticky; + product_high >>= shift; + } else if shift < (2 * bits) { + let sticky = product_high << (2 * bits - shift) | product_low; + product_low = product_high >> (shift - bits) | sticky; + product_high = zero; + } else { + product_high = zero; + } } else { // Result is normal before rounding; insert the exponent. product_high &= significand_mask; diff --git a/src/int/mod.rs b/src/int/mod.rs index 1ce3d92da..d23028263 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -408,48 +408,3 @@ cast_into!(u64); cast_into!(i64); cast_into!(u128); cast_into!(i128); - -pub(crate) trait WideInt: Int { - type Output: Int; - - fn wide_mul(self, other: Self) -> (Self, Self); - fn wide_shift_left(&mut self, low: &mut Self, count: i32); - fn wide_shift_right_with_sticky(&mut self, low: &mut Self, count: i32); -} - -macro_rules! impl_wide_int { - ($ty:ty, $tywide:ty, $bits:expr) => { - impl WideInt for $ty { - type Output = $ty; - - fn wide_mul(self, other: Self) -> (Self, Self) { - let product = (self as $tywide).wrapping_mul(other as $tywide); - ((product >> ($bits as $ty)) as $ty, product as $ty) - } - - fn wide_shift_left(&mut self, low: &mut Self, count: i32) { - *self = (*self << count) | (*low >> ($bits - count)); - *low = *low << count; - } - - fn wide_shift_right_with_sticky(&mut self, low: &mut Self, count: i32) { - if count < $bits { - let sticky = *low << ($bits - count); - *low = *self << ($bits - count) | *low >> count | sticky; - *self = *self >> count; - } else if count < 2 * $bits { - let sticky = *self << (2 * $bits - count) | *low; - *low = *self >> (count - $bits) | sticky; - *self = 0; - } else { - let sticky = *self | *low; - *self = sticky; - *self = 0; - } - } - } - }; -} - -impl_wide_int!(u32, u64, 32); -impl_wide_int!(u64, u128, 64); From 96eaffff5a9bc300114d2ba135707e17ffb0ba92 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Tue, 1 Dec 2020 23:09:57 -0600 Subject: [PATCH 0405/1459] replace some transmutes --- src/float/mod.rs | 9 +++------ testcrate/build.rs | 8 ++++---- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/float/mod.rs b/src/float/mod.rs index 34b3c6ac1..c4b690161 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -1,4 +1,3 @@ -use core::mem; use core::ops; use super::int::Int; @@ -85,8 +84,6 @@ pub trait Float: fn is_subnormal(&self) -> bool; } -// FIXME: Some of this can be removed if RFC Issue #1424 is resolved -// https://github.com/rust-lang/rfcs/issues/1424 macro_rules! float_impl { ($ty:ident, $ity:ident, $sity:ident, $bits:expr, $significand_bits:expr) => { impl Float for $ty { @@ -104,10 +101,10 @@ macro_rules! float_impl { const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK); fn repr(self) -> Self::Int { - unsafe { mem::transmute(self) } + self.to_bits() } fn signed_repr(self) -> Self::SignedInt { - unsafe { mem::transmute(self) } + self.to_bits() as Self::SignedInt } fn eq_repr(self, rhs: Self) -> bool { if self.is_nan() && rhs.is_nan() { @@ -117,7 +114,7 @@ macro_rules! float_impl { } } fn from_repr(a: Self::Int) -> Self { - unsafe { mem::transmute(a) } + Self::from_bits(a) } fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self { Self::from_repr( diff --git a/testcrate/build.rs b/testcrate/build.rs index 1baa6a966..1ecd0179e 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -633,7 +633,7 @@ fn main() { if a.0.is_nan() || b.0.is_nan() || c.is_nan() - || c.abs() <= unsafe { mem::transmute(4503599627370495u64) } + || c.abs() <= f64::from_bits(4503599627370495u64) { None } else { @@ -651,7 +651,7 @@ fn main() { if a.0.is_nan() || b.0.is_nan() || c.is_nan() - || c.abs() <= unsafe { mem::transmute(16777215u32) } + || c.abs() <= f32::from_bits(16777215u32) { None } else { @@ -671,7 +671,7 @@ fn main() { if a.0.is_nan() || b.0.is_nan() || c.is_nan() - || c.abs() <= unsafe { mem::transmute(4503599627370495u64) } + || c.abs() <= f64::from_bits(4503599627370495u64) { None } else { @@ -689,7 +689,7 @@ fn main() { if a.0.is_nan() || b.0.is_nan() || c.is_nan() - || c.abs() <= unsafe { mem::transmute(16777215u32) } + || c.abs() <= f32::from_bits(16777215u32) { None } else { From 430c0b41d08821cd81ec5afb748d86606fdc4d40 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Sat, 7 Nov 2020 13:09:51 -0600 Subject: [PATCH 0406/1459] fix some clippy warnings --- src/mem/impls.rs | 8 +++----- src/mem/mod.rs | 4 ++-- src/mem/x86_64.rs | 4 ++-- testcrate/build.rs | 5 +---- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/mem/impls.rs b/src/mem/impls.rs index 6bd1a7ba1..b3eef9901 100644 --- a/src/mem/impls.rs +++ b/src/mem/impls.rs @@ -1,10 +1,8 @@ -use super::c_int; - #[inline(always)] pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, n: usize) { let mut i = 0; while i < n { - *dest.offset(i as isize) = *src.offset(i as isize); + *dest.add(i) = *src.add(i); i += 1; } } @@ -15,7 +13,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, n: usize) { let mut i = n; while i != 0 { i -= 1; - *dest.offset(i as isize) = *src.offset(i as isize); + *dest.add(i) = *src.add(i); } } @@ -23,7 +21,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, n: usize) { pub unsafe fn set_bytes(s: *mut u8, c: u8, n: usize) { let mut i = 0; while i < n { - *s.offset(i as isize) = c; + *s.add(i) = c; i += 1; } } diff --git a/src/mem/mod.rs b/src/mem/mod.rs index adb7c2c64..107762c43 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -45,8 +45,8 @@ pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 { pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { let mut i = 0; while i < n { - let a = *s1.offset(i as isize); - let b = *s2.offset(i as isize); + let a = *s1.add(i); + let b = *s2.add(i); if a != b { return a as i32 - b as i32; } diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 7eefd8099..8cbbdf779 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -59,8 +59,8 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { "cld", byte_count = in(reg) byte_count, inout("rcx") qword_count => _, - inout("rdi") dest.offset(count as isize).wrapping_sub(8) => _, - inout("rsi") src.offset(count as isize).wrapping_sub(8) => _, + inout("rdi") dest.add(count).wrapping_sub(8) => _, + inout("rsi") src.add(count).wrapping_sub(8) => _, options(nostack) ); } diff --git a/testcrate/build.rs b/testcrate/build.rs index 1ecd0179e..39c2486c6 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -648,10 +648,7 @@ fn main() { return None; } let c = a.0 / b.0; - if a.0.is_nan() - || b.0.is_nan() - || c.is_nan() - || c.abs() <= f32::from_bits(16777215u32) + if a.0.is_nan() || b.0.is_nan() || c.is_nan() || c.abs() <= f32::from_bits(16777215u32) { None } else { From c2ff1b3119dafb4c56e8e9b8b75f20b9fd4ba3ed Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Mon, 7 Dec 2020 23:25:42 -0600 Subject: [PATCH 0407/1459] Completely overhaul fuzz testing adds testing for almost every numerical intrinsic --- src/int/mod.rs | 11 ++ testcrate/Cargo.toml | 2 +- testcrate/src/lib.rs | 258 +++++++++++++++++++++++++++++++ testcrate/tests/addsub.rs | 109 +++++++++++++ testcrate/tests/cmp.rs | 52 +++++++ testcrate/tests/conv.rs | 125 +++++++++++++++ testcrate/tests/div_rem.rs | 147 +++++++++--------- testcrate/tests/leading_zeros.rs | 54 ------- testcrate/tests/misc.rs | 134 ++++++++++++++++ testcrate/tests/mul.rs | 114 ++++++++++++++ testcrate/tests/shift.rs | 60 +++++++ 11 files changed, 937 insertions(+), 129 deletions(-) create mode 100644 testcrate/tests/addsub.rs create mode 100644 testcrate/tests/cmp.rs create mode 100644 testcrate/tests/conv.rs delete mode 100644 testcrate/tests/leading_zeros.rs create mode 100644 testcrate/tests/misc.rs create mode 100644 testcrate/tests/mul.rs create mode 100644 testcrate/tests/shift.rs diff --git a/src/int/mod.rs b/src/int/mod.rs index d23028263..cb94803a4 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -72,6 +72,9 @@ pub trait Int: /// Prevents the need for excessive conversions between signed and unsigned fn logical_shr(self, other: u32) -> Self; + /// Absolute difference between two integers. + fn abs_diff(self, other: Self) -> Self::UnsignedInt; + // copied from primitive integers, but put in a trait fn is_zero(self) -> bool; fn max_value() -> Self; @@ -251,6 +254,10 @@ macro_rules! int_impl { me } + fn abs_diff(self, other: Self) -> Self { + (self.wrapping_sub(other) as $ity).wrapping_abs() as $uty + } + int_impl_common!($uty, $bits); } @@ -274,6 +281,10 @@ macro_rules! int_impl { me as $ity } + fn abs_diff(self, other: Self) -> $uty { + self.wrapping_sub(other).wrapping_abs() as $uty + } + int_impl_common!($ity, $bits); } }; diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index ce8df2d12..ff9a6a453 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -11,7 +11,7 @@ doctest = false [build-dependencies] rand = "0.7" -[dev-dependencies] +[dependencies] # For fuzzing tests we want a deterministic seedable RNG. We also eliminate potential # problems with system RNGs on the variety of platforms this crate is tested on. # `xoshiro128**` is used for its quality, size, and speed at generating `u32` shift amounts. diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs index 0c9ac1ac8..9bd155f6f 100644 --- a/testcrate/src/lib.rs +++ b/testcrate/src/lib.rs @@ -1 +1,259 @@ +//! This crate is for integration testing and fuzz testing of functions in `compiler-builtins`. This +//! includes publicly documented intrinsics and some internal alternative implementation functions +//! such as `usize_leading_zeros_riscv` (which are tested because they are configured for +//! architectures not tested by the CI). +//! +//! The general idea is to use a combination of edge case testing and randomized fuzz testing. The +//! edge case testing is crucial for checking cases like where both inputs are equal or equal to +//! special values such as `i128::MIN`, which is unlikely for the random fuzzer by itself to +//! encounter. The randomized fuzz testing is specially designed to cover wide swaths of search +//! space in as few iterations as possible. See `fuzz_values` in `testcrate/tests/misc.rs` for an +//! example. +//! +//! Some floating point tests are disabled for specific architectures, because they do not have +//! correct rounding. #![no_std] + +use compiler_builtins::float::Float; +use compiler_builtins::int::Int; + +use rand_xoshiro::rand_core::{RngCore, SeedableRng}; +use rand_xoshiro::Xoshiro128StarStar; + +/// Sets the number of fuzz iterations run for most tests. In practice, the vast majority of bugs +/// are caught by the edge case testers. Most of the remaining bugs triggered by more complex +/// sequences are caught well within 10_000 fuzz iterations. For classes of algorithms like division +/// that are vulnerable to rare edge cases, we want 1_000_000 iterations to be more confident. In +/// practical CI, however, we only want to run the more strenuous test once to catch algorithmic +/// level bugs, and run the 10_000 iteration test on most targets. Target-dependent bugs are likely +/// to involve miscompilation and misconfiguration that is likely to break algorithms in quickly +/// caught ways. We choose to configure `N = 1_000_000` iterations for `x86_64` targets (and if +/// debug assertions are disabled. Tests without `--release` would take too long) which are likely +/// to have fast hardware, and run `N = 10_000` for all other targets. +pub const N: u32 = if cfg!(target_arch = "x86_64") && !cfg!(debug_assertions) { + 1_000_000 +} else { + 10_000 +}; + +/// Random fuzzing step. When run several times, it results in excellent fuzzing entropy such as: +/// 11110101010101011110111110011111 +/// 10110101010100001011101011001010 +/// 1000000000000000 +/// 10000000000000110111110000001010 +/// 1111011111111101010101111110101 +/// 101111111110100000000101000000 +/// 10000000110100000000100010101 +/// 1010101010101000 +fn fuzz_step(rng: &mut Xoshiro128StarStar, x: &mut I) { + let ones = !I::ZERO; + let bit_indexing_mask: u32 = I::BITS - 1; + // It happens that all the RNG we need can come from one call. 7 bits are needed to index a + // worst case 128 bit integer, and there are 4 indexes that need to be made plus 4 bits for + // selecting operations + let rng32 = rng.next_u32(); + + // Randomly OR, AND, and XOR randomly sized and shifted continuous strings of + // ones with `lhs` and `rhs`. + let r0 = bit_indexing_mask & rng32; + let r1 = bit_indexing_mask & (rng32 >> 7); + let mask = ones.wrapping_shl(r0).rotate_left(r1); + match (rng32 >> 14) % 4 { + 0 => *x |= mask, + 1 => *x &= mask, + // both 2 and 3 to make XORs as common as ORs and ANDs combined + _ => *x ^= mask, + } + + // Alternating ones and zeros (e.x. 0b1010101010101010). This catches second-order + // problems that might occur for algorithms with two modes of operation (potentially + // there is some invariant that can be broken and maintained via alternating between modes, + // breaking the algorithm when it reaches the end). + let mut alt_ones = I::ONE; + for _ in 0..(I::BITS / 2) { + alt_ones <<= 2; + alt_ones |= I::ONE; + } + let r0 = bit_indexing_mask & (rng32 >> 16); + let r1 = bit_indexing_mask & (rng32 >> 23); + let mask = alt_ones.wrapping_shl(r0).rotate_left(r1); + match rng32 >> 30 { + 0 => *x |= mask, + 1 => *x &= mask, + _ => *x ^= mask, + } +} + +// We need macros like this, because `#![no_std]` prevents us from using iterators +macro_rules! edge_cases { + ($I:ident, $case:ident, $inner:block) => { + for i0 in 0..$I::FUZZ_NUM { + let mask_lo = (!$I::UnsignedInt::ZERO).wrapping_shr($I::FUZZ_LENGTHS[i0] as u32); + for i1 in i0..I::FUZZ_NUM { + let mask_hi = + (!$I::UnsignedInt::ZERO).wrapping_shl($I::FUZZ_LENGTHS[i1 - i0] as u32); + let $case = I::from_unsigned(mask_lo & mask_hi); + $inner + } + } + }; +} + +/// Feeds a series of fuzzing inputs to `f`. The fuzzer first uses an algorithm designed to find +/// edge cases, followed by a more random fuzzer that runs `n` times. +pub fn fuzz(n: u32, mut f: F) { + // edge case tester. Calls `f` 210 times for u128. + // zero gets skipped by the loop + f(I::ZERO); + edge_cases!(I, case, { + f(case); + }); + + // random fuzzer + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x: I = Int::ZERO; + for _ in 0..n { + fuzz_step(&mut rng, &mut x); + f(x) + } +} + +/// The same as `fuzz`, except `f` has two inputs. +pub fn fuzz_2(n: u32, f: F) { + // Check cases where the first and second inputs are zero. Both call `f` 210 times for `u128`. + edge_cases!(I, case, { + f(I::ZERO, case); + }); + edge_cases!(I, case, { + f(case, I::ZERO); + }); + // Nested edge tester. Calls `f` 44100 times for `u128`. + edge_cases!(I, case0, { + edge_cases!(I, case1, { + f(case0, case1); + }) + }); + + // random fuzzer + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x: I = I::ZERO; + let mut y: I = I::ZERO; + for _ in 0..n { + fuzz_step(&mut rng, &mut x); + fuzz_step(&mut rng, &mut y); + f(x, y) + } +} + +/// Tester for shift functions +pub fn fuzz_shift(f: F) { + // Shift functions are very simple and do not need anything other than shifting a small + // set of random patterns for every fuzz length. + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x: I = Int::ZERO; + for i in 0..I::FUZZ_NUM { + fuzz_step(&mut rng, &mut x); + f(x, Int::ZERO); + f(x, I::FUZZ_LENGTHS[i] as u32); + } +} + +fn fuzz_float_step(rng: &mut Xoshiro128StarStar, f: &mut F) { + let rng32 = rng.next_u32(); + // we need to fuzz the different parts of the float separately, because the masking on larger + // significands will tend to set the exponent to all ones or all zeros frequently + + // sign bit fuzzing + let sign = (rng32 & 1) != 0; + + // exponent fuzzing. Only 4 bits for the selector needed. + let ones = (F::Int::ONE << F::EXPONENT_BITS) - F::Int::ONE; + let r0 = (rng32 >> 1) % F::EXPONENT_BITS; + let r1 = (rng32 >> 5) % F::EXPONENT_BITS; + // custom rotate shift. Note that `F::Int` is unsigned, so we can shift right without smearing + // the sign bit. + let mask = if r1 == 0 { + ones.wrapping_shr(r0) + } else { + let tmp = ones.wrapping_shr(r0); + (tmp.wrapping_shl(r1) | tmp.wrapping_shr(F::EXPONENT_BITS - r1)) & ones + }; + let mut exp = (f.repr() & F::EXPONENT_MASK) >> F::SIGNIFICAND_BITS; + match (rng32 >> 9) % 4 { + 0 => exp |= mask, + 1 => exp &= mask, + _ => exp ^= mask, + } + + // significand fuzzing + let mut sig = f.repr() & F::SIGNIFICAND_MASK; + fuzz_step(rng, &mut sig); + sig &= F::SIGNIFICAND_MASK; + + *f = F::from_parts(sign, exp, sig); +} + +macro_rules! float_edge_cases { + ($F:ident, $case:ident, $inner:block) => { + for exponent in [ + F::Int::ZERO, + F::Int::ONE, + F::Int::ONE << (F::EXPONENT_BITS / 2), + (F::Int::ONE << (F::EXPONENT_BITS - 1)) - F::Int::ONE, + F::Int::ONE << (F::EXPONENT_BITS - 1), + (F::Int::ONE << (F::EXPONENT_BITS - 1)) + F::Int::ONE, + (F::Int::ONE << F::EXPONENT_BITS) - F::Int::ONE, + ] + .iter() + { + for significand in [ + F::Int::ZERO, + F::Int::ONE, + F::Int::ONE << (F::SIGNIFICAND_BITS / 2), + (F::Int::ONE << (F::SIGNIFICAND_BITS - 1)) - F::Int::ONE, + F::Int::ONE << (F::SIGNIFICAND_BITS - 1), + (F::Int::ONE << (F::SIGNIFICAND_BITS - 1)) + F::Int::ONE, + (F::Int::ONE << F::SIGNIFICAND_BITS) - F::Int::ONE, + ] + .iter() + { + for sign in [false, true].iter() { + let $case = F::from_parts(*sign, *exponent, *significand); + $inner + } + } + } + }; +} + +pub fn fuzz_float(n: u32, f: E) { + float_edge_cases!(F, case, { + f(case); + }); + + // random fuzzer + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x = F::ZERO; + for _ in 0..n { + fuzz_float_step(&mut rng, &mut x); + f(x); + } +} + +pub fn fuzz_float_2(n: u32, f: E) { + float_edge_cases!(F, case0, { + float_edge_cases!(F, case1, { + f(case0, case1); + }); + }); + + // random fuzzer + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x = F::ZERO; + let mut y = F::ZERO; + for _ in 0..n { + fuzz_float_step(&mut rng, &mut x); + fuzz_float_step(&mut rng, &mut y); + f(x, y) + } +} diff --git a/testcrate/tests/addsub.rs b/testcrate/tests/addsub.rs new file mode 100644 index 000000000..ff56668b7 --- /dev/null +++ b/testcrate/tests/addsub.rs @@ -0,0 +1,109 @@ +use testcrate::*; + +macro_rules! sum { + ($($i:ty, $fn_add:ident, $fn_sub:ident);*;) => { + $( + fuzz_2(N, |x: $i, y: $i| { + let add0 = x.wrapping_add(y); + let sub0 = x.wrapping_sub(y); + let add1: $i = $fn_add(x, y); + let sub1: $i = $fn_sub(x, y); + if add0 != add1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn_add), x, y, add0, add1 + ); + } + if sub0 != sub1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn_sub), x, y, sub0, sub1 + ); + } + }); + )* + }; +} + +macro_rules! overflowing_sum { + ($($i:ty, $fn_add:ident, $fn_sub:ident);*;) => { + $( + fuzz_2(N, |x: $i, y: $i| { + let add0 = x.overflowing_add(y); + let sub0 = x.overflowing_sub(y); + let add1: ($i, bool) = $fn_add(x, y); + let sub1: ($i, bool) = $fn_sub(x, y); + if add0.0 != add1.0 || add0.1 != add1.1 { + panic!( + "{}({}, {}): std: {:?}, builtins: {:?}", + stringify!($fn_add), x, y, add0, add1 + ); + } + if sub0.0 != sub1.0 || sub0.1 != sub1.1 { + panic!( + "{}({}, {}): std: {:?}, builtins: {:?}", + stringify!($fn_sub), x, y, sub0, sub1 + ); + } + }); + )* + }; +} + +#[test] +fn addsub() { + use compiler_builtins::int::addsub::{ + __rust_i128_add, __rust_i128_addo, __rust_i128_sub, __rust_i128_subo, __rust_u128_add, + __rust_u128_addo, __rust_u128_sub, __rust_u128_subo, + }; + + // Integer addition and subtraction is very simple, so 100 fuzzing passes should be plenty. + sum!( + u128, __rust_u128_add, __rust_u128_sub; + i128, __rust_i128_add, __rust_i128_sub; + ); + overflowing_sum!( + u128, __rust_u128_addo, __rust_u128_subo; + i128, __rust_i128_addo, __rust_i128_subo; + ); +} + +macro_rules! float_sum { + ($($f:ty, $fn_add:ident, $fn_sub:ident);*;) => { + $( + fuzz_float_2(N, |x: $f, y: $f| { + let add0 = x + y; + let sub0 = x - y; + let add1: $f = $fn_add(x, y); + let sub1: $f = $fn_sub(x, y); + if !Float::eq_repr(add0, add1) { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn_add), x, y, add0, add1 + ); + } + if !Float::eq_repr(sub0, sub1) { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn_sub), x, y, sub0, sub1 + ); + } + }); + )* + }; +} + +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +#[test] +fn float_addsub() { + use compiler_builtins::float::{ + add::{__adddf3, __addsf3}, + sub::{__subdf3, __subsf3}, + Float, + }; + + float_sum!( + f32, __addsf3, __subsf3; + f64, __adddf3, __subdf3; + ); +} diff --git a/testcrate/tests/cmp.rs b/testcrate/tests/cmp.rs new file mode 100644 index 000000000..d359b65d7 --- /dev/null +++ b/testcrate/tests/cmp.rs @@ -0,0 +1,52 @@ +use testcrate::*; + +macro_rules! cmp { + ($x:ident, $y:ident, $($unordered_val:expr, $fn:ident);*;) => { + $( + let cmp0 = if $x.is_nan() || $y.is_nan() { + $unordered_val + } else if $x < $y { + -1 + } else if $x == $y { + 0 + } else { + 1 + }; + let cmp1 = $fn($x, $y); + if cmp0 != cmp1 { + panic!("{}({}, {}): std: {}, builtins: {}", stringify!($fn_builtins), $x, $y, cmp0, cmp1); + } + )* + }; +} + +#[test] +fn float_comparisons() { + use compiler_builtins::float::cmp::{ + __eqdf2, __eqsf2, __gedf2, __gesf2, __gtdf2, __gtsf2, __ledf2, __lesf2, __ltdf2, __ltsf2, + __nedf2, __nesf2, __unorddf2, __unordsf2, + }; + + fuzz_float_2(N, |x: f32, y: f32| { + assert_eq!(__unordsf2(x, y) != 0, x.is_nan() || y.is_nan()); + cmp!(x, y, + 1, __ltsf2; + 1, __lesf2; + 1, __eqsf2; + -1, __gesf2; + -1, __gtsf2; + 1, __nesf2; + ); + }); + fuzz_float_2(N, |x: f64, y: f64| { + assert_eq!(__unorddf2(x, y) != 0, x.is_nan() || y.is_nan()); + cmp!(x, y, + 1, __ltdf2; + 1, __ledf2; + 1, __eqdf2; + -1, __gedf2; + -1, __gtdf2; + 1, __nedf2; + ); + }); +} diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs new file mode 100644 index 000000000..7cdbf9fbb --- /dev/null +++ b/testcrate/tests/conv.rs @@ -0,0 +1,125 @@ +use testcrate::*; + +macro_rules! i_to_f { + ($($from:ty, $into:ty, $fn:ident);*;) => { + $( + fuzz(N, |x: $from| { + let f0 = x as $into; + let f1: $into = $fn(x); + // This makes sure that the conversion produced the best rounding possible, and does + // this independent of `x as $into` rounding correctly. + // This assumes that float to integer conversion is correct. + let y_minus_ulp = <$into>::from_bits(f1.to_bits().wrapping_sub(1)) as $from; + let y = f1 as $from; + let y_plus_ulp = <$into>::from_bits(f1.to_bits().wrapping_add(1)) as $from; + let error_minus = <$from as Int>::abs_diff(y_minus_ulp, x); + let error = <$from as Int>::abs_diff(y, x); + let error_plus = <$from as Int>::abs_diff(y_plus_ulp, x); + // The first two conditions check that none of the two closest float values are + // strictly closer in representation to `x`. The second makes sure that rounding is + // towards even significand if two float values are equally close to the integer. + if error_minus < error + || error_plus < error + || ((error_minus == error || error_plus == error) + && ((f0.to_bits() & 1) != 0)) + { + panic!( + "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})", + stringify!($fn), + x, + f1.to_bits(), + y_minus_ulp, + y, + y_plus_ulp, + error_minus, + error, + error_plus, + ); + } + // Test against native conversion. We disable testing on all `x86` because of + // rounding bugs with `i686`. `powerpc` also has the same rounding bug. + if f0 != f1 && !cfg!(any( + target_arch = "x86", + target_arch = "powerpc", + target_arch = "powerpc64" + )) { + panic!( + "{}({}): std: {}, builtins: {}", + stringify!($fn), + x, + f0, + f1, + ); + } + }); + )* + }; +} + +#[test] +fn int_to_float() { + use compiler_builtins::float::conv::{ + __floatdidf, __floatdisf, __floatsidf, __floatsisf, __floattidf, __floattisf, + __floatundidf, __floatundisf, __floatunsidf, __floatunsisf, __floatuntidf, __floatuntisf, + }; + use compiler_builtins::int::Int; + + i_to_f!( + u32, f32, __floatunsisf; + u32, f64, __floatunsidf; + i32, f32, __floatsisf; + i32, f64, __floatsidf; + u64, f32, __floatundisf; + u64, f64, __floatundidf; + i64, f32, __floatdisf; + i64, f64, __floatdidf; + u128, f32, __floatuntisf; + u128, f64, __floatuntidf; + i128, f32, __floattisf; + i128, f64, __floattidf; + ); +} + +macro_rules! f_to_i { + ($x:ident, $($f:ty, $fn:ident);*;) => { + $( + // it is undefined behavior in the first place to do conversions with NaNs + if !$x.is_nan() { + let conv0 = $x as $f; + let conv1: $f = $fn($x); + if conv0 != conv1 { + panic!("{}({}): std: {}, builtins: {}", stringify!($fn), $x, conv0, conv1); + } + } + )* + }; +} + +#[test] +fn float_to_int() { + use compiler_builtins::float::conv::{ + __fixdfdi, __fixdfsi, __fixdfti, __fixsfdi, __fixsfsi, __fixsfti, __fixunsdfdi, + __fixunsdfsi, __fixunsdfti, __fixunssfdi, __fixunssfsi, __fixunssfti, + }; + + fuzz_float(N, |x: f32| { + f_to_i!(x, + u32, __fixunssfsi; + u64, __fixunssfdi; + u128, __fixunssfti; + i32, __fixsfsi; + i64, __fixsfdi; + i128, __fixsfti; + ); + }); + fuzz_float(N, |x: f64| { + f_to_i!(x, + u32, __fixunsdfsi; + u64, __fixunsdfdi; + u128, __fixunsdfti; + i32, __fixdfsi; + i64, __fixdfdi; + i128, __fixdfti; + ); + }); +} diff --git a/testcrate/tests/div_rem.rs b/testcrate/tests/div_rem.rs index 199fa9db7..0007c15ae 100644 --- a/testcrate/tests/div_rem.rs +++ b/testcrate/tests/div_rem.rs @@ -1,8 +1,9 @@ -use rand_xoshiro::rand_core::{RngCore, SeedableRng}; -use rand_xoshiro::Xoshiro128StarStar; - use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4}; -use compiler_builtins::int::udiv::{__udivmoddi4, __udivmodsi4, __udivmodti4}; +use compiler_builtins::int::udiv::{__udivmoddi4, __udivmodsi4, __udivmodti4, u128_divide_sparc}; +use testcrate::*; + +// Division algorithms have by far the nastiest and largest number of edge cases, and experience shows +// that sometimes 100_000 iterations of the random fuzzer is needed. /// Creates intensive test functions for division functions of a certain size macro_rules! test { @@ -16,14 +17,17 @@ macro_rules! test { ) => { #[test] fn $test_name() { - fn assert_invariants(lhs: $uX, rhs: $uX) { - let rem: &mut $uX = &mut 0; - let quo: $uX = $unsigned_name(lhs, rhs, Some(rem)); - let rem = *rem; + fuzz_2(N, |lhs, rhs| { + if rhs == 0 { + return; + } + + let mut rem: $uX = 0; + let quo: $uX = $unsigned_name(lhs, rhs, Some(&mut rem)); if rhs <= rem || (lhs != rhs.wrapping_mul(quo).wrapping_add(rem)) { panic!( "unsigned division function failed with lhs:{} rhs:{} \ - expected:({}, {}) found:({}, {})", + std:({}, {}) builtins:({}, {})", lhs, rhs, lhs.wrapping_div(rhs), @@ -55,7 +59,7 @@ macro_rules! test { if incorrect_rem || lhs != rhs.wrapping_mul(quo).wrapping_add(rem) { panic!( "signed division function failed with lhs:{} rhs:{} \ - expected:({}, {}) found:({}, {})", + std:({}, {}) builtins:({}, {})", lhs, rhs, lhs.wrapping_div(rhs), @@ -64,70 +68,7 @@ macro_rules! test { rem ); } - } - - // Specially designed random fuzzer - let mut rng = Xoshiro128StarStar::seed_from_u64(0); - let mut lhs: $uX = 0; - let mut rhs: $uX = 0; - // all ones constant - let ones: $uX = !0; - // Alternating ones and zeros (e.x. 0b1010101010101010). This catches second-order - // problems that might occur for algorithms with two modes of operation (potentially - // there is some invariant that can be broken for large `duo` and maintained via - // alternating between modes, breaking the algorithm when it reaches the end). - let mut alt_ones: $uX = 1; - for _ in 0..($n / 2) { - alt_ones <<= 2; - alt_ones |= 1; - } - // creates a mask for indexing the bits of the type - let bit_indexing_mask = $n - 1; - for _ in 0..1_000_000 { - // Randomly OR, AND, and XOR randomly sized and shifted continuous strings of - // ones with `lhs` and `rhs`. This results in excellent fuzzing entropy such as: - // lhs:10101010111101000000000100101010 rhs: 1010101010000000000000001000001 - // lhs:10101010111101000000000101001010 rhs: 1010101010101010101010100010100 - // lhs:10101010111101000000000101001010 rhs:11101010110101010101010100001110 - // lhs:10101010000000000000000001001010 rhs:10100010100000000000000000001010 - // lhs:10101010000000000000000001001010 rhs: 10101010101010101000 - // lhs:10101010000000000000000001100000 rhs:11111111111101010101010101001111 - // lhs:10101010000000101010101011000000 rhs:11111111111101010101010100000111 - // lhs:10101010101010101010101011101010 rhs: 1010100000000000000 - // lhs:11111111110101101010101011010111 rhs: 1010100000000000000 - // The msb is set half of the time by the fuzzer, but `assert_invariants` tests - // both the signed and unsigned functions. - let r0: u32 = bit_indexing_mask & rng.next_u32(); - let r1: u32 = bit_indexing_mask & rng.next_u32(); - let mask = ones.wrapping_shr(r0).rotate_left(r1); - match rng.next_u32() % 8 { - 0 => lhs |= mask, - 1 => lhs &= mask, - // both 2 and 3 to make XORs as common as ORs and ANDs combined, otherwise - // the entropy gets destroyed too often - 2 | 3 => lhs ^= mask, - 4 => rhs |= mask, - 5 => rhs &= mask, - _ => rhs ^= mask, - } - // do the same for alternating ones and zeros - let r0: u32 = bit_indexing_mask & rng.next_u32(); - let r1: u32 = bit_indexing_mask & rng.next_u32(); - let mask = alt_ones.wrapping_shr(r0).rotate_left(r1); - match rng.next_u32() % 8 { - 0 => lhs |= mask, - 1 => lhs &= mask, - // both 2 and 3 to make XORs as common as ORs and ANDs combined, otherwise - // the entropy gets destroyed too often - 2 | 3 => lhs ^= mask, - 4 => rhs |= mask, - 5 => rhs &= mask, - _ => rhs ^= mask, - } - if rhs != 0 { - assert_invariants(lhs, rhs); - } - } + }); } }; } @@ -135,3 +76,61 @@ macro_rules! test { test!(32, u32, i32, div_rem_si4, __udivmodsi4, __divmodsi4); test!(64, u64, i64, div_rem_di4, __udivmoddi4, __divmoddi4); test!(128, u128, i128, div_rem_ti4, __udivmodti4, __divmodti4); + +#[test] +fn divide_sparc() { + fuzz_2(N, |lhs, rhs| { + if rhs == 0 { + return; + } + + let mut rem: u128 = 0; + let quo: u128 = u128_divide_sparc(lhs, rhs, &mut rem); + if rhs <= rem || (lhs != rhs.wrapping_mul(quo).wrapping_add(rem)) { + panic!( + "u128_divide_sparc({}, {}): \ + std:({}, {}), builtins:({}, {})", + lhs, + rhs, + lhs.wrapping_div(rhs), + lhs.wrapping_rem(rhs), + quo, + rem + ); + } + }); +} + +macro_rules! float { + ($($i:ty, $fn:ident);*;) => { + $( + fuzz_float_2(N, |x: $i, y: $i| { + let quo0 = x / y; + let quo1: $i = $fn(x, y); + // division of subnormals is not currently handled + if !(Float::is_subnormal(&quo0) || Float::is_subnormal(&quo1)) { + if !Float::eq_repr(quo0, quo1) { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn), x, y, quo0, quo1 + ); + } + } + }); + )* + }; +} + +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +#[test] +fn float_div() { + use compiler_builtins::float::{ + div::{__divdf3, __divsf3}, + Float, + }; + + float!( + f32, __divsf3; + f64, __divdf3; + ); +} diff --git a/testcrate/tests/leading_zeros.rs b/testcrate/tests/leading_zeros.rs deleted file mode 100644 index b857d9e0c..000000000 --- a/testcrate/tests/leading_zeros.rs +++ /dev/null @@ -1,54 +0,0 @@ -use rand_xoshiro::rand_core::{RngCore, SeedableRng}; -use rand_xoshiro::Xoshiro128StarStar; - -use compiler_builtins::int::__clzsi2; -use compiler_builtins::int::leading_zeros::{ - usize_leading_zeros_default, usize_leading_zeros_riscv, -}; - -#[test] -fn __clzsi2_test() { - // Binary fuzzer. We cannot just send a random number directly to `__clzsi2()`, because we need - // large sequences of zeros to test. This XORs, ANDs, and ORs random length strings of 1s to - // `x`. ORs insure sequences of ones, ANDs insures sequences of zeros, and XORs are not often - // destructive but add entropy. - let mut rng = Xoshiro128StarStar::seed_from_u64(0); - let mut x = 0usize; - // creates a mask for indexing the bits of the type - let bit_indexing_mask = usize::MAX.count_ones() - 1; - // 10000 iterations is enough to make sure edge cases like single set bits are tested and to go - // through many paths. - for _ in 0..10_000 { - let r0 = bit_indexing_mask & rng.next_u32(); - // random length of ones - let ones: usize = !0 >> r0; - let r1 = bit_indexing_mask & rng.next_u32(); - // random circular shift - let mask = ones.rotate_left(r1); - match rng.next_u32() % 4 { - 0 => x |= mask, - 1 => x &= mask, - // both 2 and 3 to make XORs as common as ORs and ANDs combined - _ => x ^= mask, - } - let lz = x.leading_zeros() as usize; - let lz0 = __clzsi2(x); - let lz1 = usize_leading_zeros_default(x); - let lz2 = usize_leading_zeros_riscv(x); - if lz0 != lz { - panic!("__clzsi2({}): expected: {}, found: {}", x, lz, lz0); - } - if lz1 != lz { - panic!( - "usize_leading_zeros_default({}): expected: {}, found: {}", - x, lz, lz1 - ); - } - if lz2 != lz { - panic!( - "usize_leading_zeros_riscv({}): expected: {}, found: {}", - x, lz, lz2 - ); - } - } -} diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs new file mode 100644 index 000000000..d31e3e6b6 --- /dev/null +++ b/testcrate/tests/misc.rs @@ -0,0 +1,134 @@ +use testcrate::*; + +/// Make sure that the the edge case tester and randomized tester don't break, and list examples of +/// fuzz values for documentation purposes. +#[test] +fn fuzz_values() { + const VALS: [u16; 47] = [ + 0b0, // edge cases + 0b1111111111111111, + 0b1111111111111110, + 0b1111111111111100, + 0b1111111110000000, + 0b1111111100000000, + 0b1110000000000000, + 0b1100000000000000, + 0b1000000000000000, + 0b111111111111111, + 0b111111111111110, + 0b111111111111100, + 0b111111110000000, + 0b111111100000000, + 0b110000000000000, + 0b100000000000000, + 0b11111111111111, + 0b11111111111110, + 0b11111111111100, + 0b11111110000000, + 0b11111100000000, + 0b10000000000000, + 0b111111111, + 0b111111110, + 0b111111100, + 0b110000000, + 0b100000000, + 0b11111111, + 0b11111110, + 0b11111100, + 0b10000000, + 0b111, + 0b110, + 0b100, + 0b11, + 0b10, + 0b1, + 0b1010110100000, // beginning of random fuzzing + 0b1100011001011010, + 0b1001100101001111, + 0b1101010100011010, + 0b100010001, + 0b1000000000000000, + 0b1100000000000101, + 0b1100111101010101, + 0b1100010111111111, + 0b1111110101111111, + ]; + let mut i = 0; + fuzz(10, |x: u16| { + assert_eq!(x, VALS[i]); + i += 1; + }); +} + +#[test] +fn leading_zeros() { + use compiler_builtins::int::__clzsi2; + use compiler_builtins::int::leading_zeros::{ + usize_leading_zeros_default, usize_leading_zeros_riscv, + }; + fuzz(N, |x: usize| { + let lz = x.leading_zeros() as usize; + let lz0 = __clzsi2(x); + let lz1 = usize_leading_zeros_default(x); + let lz2 = usize_leading_zeros_riscv(x); + if lz0 != lz { + panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0); + } + if lz1 != lz { + panic!( + "usize_leading_zeros_default({}): std: {}, builtins: {}", + x, lz, lz1 + ); + } + if lz2 != lz { + panic!( + "usize_leading_zeros_riscv({}): std: {}, builtins: {}", + x, lz, lz2 + ); + } + }) +} + +#[test] +fn float_extend() { + fuzz_float(N, |x: f32| { + let tmp0 = x as f64; + let tmp1: f64 = compiler_builtins::float::extend::__extendsfdf2(x); + if !compiler_builtins::float::Float::eq_repr(tmp0, tmp1) { + panic!("__extendsfdf2({}): std: {}, builtins: {}", x, tmp0, tmp1); + } + }); +} + +// This doesn't quite work because of issues related to +// https://github.com/rust-lang/rust/issues/73920. +// TODO how do we resolve this? +/* +macro_rules! pow { + ($($f:ty, $fn:ident);*;) => { + $( + fuzz_float_2(N, |x: $f, y: $f| { + let n = y as i32; + let tmp0: $f = x.powi(n); + let tmp1: $f = $fn(x, n); + if tmp0 != tmp1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn), x, y, tmp0, tmp1 + ); + } + }); + )* + }; +} + +#[test] +fn float_pow() { + use compiler_builtins::float::pow::{__powidf2, __powisf2}; + + pow!( + f32, __powisf2; + f64, __powidf2; + ); +} +*/ diff --git a/testcrate/tests/mul.rs b/testcrate/tests/mul.rs new file mode 100644 index 000000000..8b97ea46c --- /dev/null +++ b/testcrate/tests/mul.rs @@ -0,0 +1,114 @@ +use testcrate::*; + +macro_rules! mul { + ($($i:ty, $fn:ident);*;) => { + $( + fuzz_2(N, |x: $i, y: $i| { + let mul0 = x.wrapping_mul(y); + let mul1: $i = $fn(x, y); + if mul0 != mul1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn), x, y, mul0, mul1 + ); + } + }); + )* + }; +} + +#[test] +fn mul() { + use compiler_builtins::int::mul::{__muldi3, __multi3}; + + mul!( + u64, __muldi3; + i128, __multi3; + ); +} + +macro_rules! overflowing_mul { + ($($i:ty, $fn:ident);*;) => { + $( + fuzz_2(N, |x: $i, y: $i| { + let (mul0, o0) = x.overflowing_mul(y); + let mut o1 = 0i32; + let mul1: $i = $fn(x, y, &mut o1); + let o1 = o1 != 0; + if mul0 != mul1 || o0 != o1 { + panic!( + "{}({}, {}): std: ({}, {}), builtins: ({}, {})", + stringify!($fn), x, y, mul0, o0, mul1, o1 + ); + } + }); + )* + }; +} + +#[test] +fn overflowing_mul() { + use compiler_builtins::int::mul::{ + __mulodi4, __mulosi4, __muloti4, __rust_i128_mulo, __rust_u128_mulo, + }; + + overflowing_mul!( + i32, __mulosi4; + i64, __mulodi4; + i128, __muloti4; + ); + fuzz_2(N, |x: u128, y: u128| { + let (mul0, o0) = x.overflowing_mul(y); + let (mul1, o1) = __rust_u128_mulo(x, y); + if mul0 != mul1 || o0 != o1 { + panic!( + "__rust_u128_mulo({}, {}): std: ({}, {}), builtins: ({}, {})", + x, y, mul0, o0, mul1, o1 + ); + } + let x = x as i128; + let y = y as i128; + let (mul0, o0) = x.overflowing_mul(y); + let (mul1, o1) = __rust_i128_mulo(x, y); + if mul0 != mul1 || o0 != o1 { + panic!( + "__rust_i128_mulo({}, {}): std: ({}, {}), builtins: ({}, {})", + x, y, mul0, o0, mul1, o1 + ); + } + }); +} + +macro_rules! float_mul { + ($($f:ty, $fn:ident);*;) => { + $( + fuzz_float_2(N, |x: $f, y: $f| { + let mul0 = x * y; + let mul1: $f = $fn(x, y); + // multiplication of subnormals is not currently handled + if !(Float::is_subnormal(&mul0) || Float::is_subnormal(&mul1)) { + if !Float::eq_repr(mul0, mul1) { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn), x, y, mul0, mul1 + ); + } + } + }); + )* + }; +} + +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +#[test] +fn float_mul() { + use compiler_builtins::float::{ + mul::{__muldf3, __mulsf3}, + Float, + }; + + float_mul!( + f32, __mulsf3; + f64, __muldf3; + ); +} diff --git a/testcrate/tests/shift.rs b/testcrate/tests/shift.rs new file mode 100644 index 000000000..ecb13a133 --- /dev/null +++ b/testcrate/tests/shift.rs @@ -0,0 +1,60 @@ +use testcrate::*; + +macro_rules! shift { + ($($i:ty, $fn_std:ident, $fn_builtins:ident);*;) => { + $( + fuzz_shift(|x: $i, s: u32| { + let tmp0: $i = x.$fn_std(s); + let tmp1: $i = $fn_builtins(x, s); + if tmp0 != tmp1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn_builtins), x, s, tmp0, tmp1 + ); + } + }); + )* + }; +} + +macro_rules! overflowing_shift { + ($($i:ty, $fn_std:ident, $fn_builtins:ident);*;) => { + $( + fuzz_shift(|x: $i, s: u32| { + let tmp0: $i = x.$fn_std(s); + let (tmp1, o1): ($i, bool) = $fn_builtins(x, s.into()); + if tmp0 != tmp1 || o1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn_builtins), x, s, tmp0, tmp1 + ); + } + }); + )* + }; +} + +#[test] +fn shift() { + use compiler_builtins::int::shift::{ + __ashldi3, __ashlsi3, __ashlti3, __ashrdi3, __ashrsi3, __ashrti3, __lshrdi3, __lshrsi3, + __lshrti3, __rust_i128_shlo, __rust_i128_shro, __rust_u128_shlo, __rust_u128_shro, + }; + shift!( + u32, wrapping_shl, __ashlsi3; + u64, wrapping_shl, __ashldi3; + u128, wrapping_shl, __ashlti3; + i32, wrapping_shr, __ashrsi3; + i64, wrapping_shr, __ashrdi3; + i128, wrapping_shr, __ashrti3; + u32, wrapping_shr, __lshrsi3; + u64, wrapping_shr, __lshrdi3; + u128, wrapping_shr, __lshrti3; + ); + overflowing_shift!( + u128, wrapping_shl, __rust_u128_shlo; + i128, wrapping_shl, __rust_i128_shlo; + u128, wrapping_shr, __rust_u128_shro; + i128, wrapping_shr, __rust_i128_shro; + ); +} From 793465e1bd728c51e8776349aa60433fbe7f8936 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 9 Dec 2020 12:17:29 +0000 Subject: [PATCH 0408/1459] Bump to 0.1.37 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0c5fd1531..c9ac6d0a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.36" +version = "0.1.37" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 4d5ca5b66afaf7b0d1d98eba831de317e75b7f23 Mon Sep 17 00:00:00 2001 From: Yuki Okushi Date: Fri, 11 Dec 2020 21:56:10 +0900 Subject: [PATCH 0409/1459] Use the AT&T syntax to support old LLVM on rust-lang/rust --- src/int/specialized_div_rem/mod.rs | 6 +++-- src/mem/x86_64.rs | 41 +++++++++++++++++------------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs index eaeb030e3..14e758fc5 100644 --- a/src/int/specialized_div_rem/mod.rs +++ b/src/int/specialized_div_rem/mod.rs @@ -169,12 +169,13 @@ unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) { unsafe { // divides the combined registers rdx:rax (`duo` is split into two 64 bit parts to do this) // by `div`. The quotient is stored in rax and the remainder in rdx. + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. asm!( "div {0}", in(reg) div, inlateout("rax") duo_lo => quo, inlateout("rdx") duo_hi => rem, - options(pure, nomem, nostack) + options(att_syntax, pure, nomem, nostack) ); } (quo, rem) @@ -255,12 +256,13 @@ unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) { unsafe { // divides the combined registers rdx:rax (`duo` is split into two 32 bit parts to do this) // by `div`. The quotient is stored in rax and the remainder in rdx. + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. asm!( "div {0}", in(reg) div, inlateout("rax") duo_lo => quo, inlateout("rdx") duo_hi => rem, - options(pure, nomem, nostack) + options(att_syntax, pure, nomem, nostack) ); } (quo, rem) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 8cbbdf779..abdb8eb67 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -19,12 +19,13 @@ #[inline(always)] #[cfg(target_feature = "ermsb")] pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. asm!( - "rep movsb [rdi], [rsi]", + "repe movsb (%rsi), (%rdi)", inout("rcx") count => _, inout("rdi") dest => _, inout("rsi") src => _, - options(nostack, preserves_flags) + options(att_syntax, nostack, preserves_flags) ); } @@ -33,15 +34,16 @@ pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { let qword_count = count >> 3; let byte_count = count & 0b111; + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. asm!( - "rep movsq [rdi], [rsi]", - "mov ecx, {byte_count:e}", - "rep movsb [rdi], [rsi]", + "repe movsq (%rsi), (%rdi)", + "mov {byte_count:e}, %ecx", + "repe movsb (%rsi), (%rdi)", byte_count = in(reg) byte_count, inout("rcx") qword_count => _, inout("rdi") dest => _, inout("rsi") src => _, - options(nostack, preserves_flags) + options(att_syntax, nostack, preserves_flags) ); } @@ -49,31 +51,33 @@ pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { let qword_count = count >> 3; let byte_count = count & 0b111; + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. asm!( "std", - "rep movsq [rdi], [rsi]", - "mov ecx, {byte_count:e}", - "add rdi, 7", - "add rsi, 7", - "rep movsb [rdi], [rsi]", + "repe movsq (%rsi), (%rdi)", + "movl {byte_count:e}, %ecx", + "addq $7, %rdi", + "addq $7, %rsi", + "repe movsb (%rsi), (%rdi)", "cld", byte_count = in(reg) byte_count, inout("rcx") qword_count => _, inout("rdi") dest.add(count).wrapping_sub(8) => _, inout("rsi") src.add(count).wrapping_sub(8) => _, - options(nostack) + options(att_syntax, nostack) ); } #[inline(always)] #[cfg(target_feature = "ermsb")] pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. asm!( - "rep stosb [rdi], al", + "repe stosb %al, (%rdi)", inout("rcx") count => _, inout("rdi") dest => _, inout("al") c => _, - options(nostack, preserves_flags) + options(att_syntax, nostack, preserves_flags) ) } @@ -82,14 +86,15 @@ pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { let qword_count = count >> 3; let byte_count = count & 0b111; + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. asm!( - "rep stosq [rdi], rax", - "mov ecx, {byte_count:e}", - "rep stosb [rdi], al", + "repe stosq %rax, (%rdi)", + "mov {byte_count:e}, %ecx", + "repe stosb %al, (%rdi)", byte_count = in(reg) byte_count, inout("rcx") qword_count => _, inout("rdi") dest => _, in("rax") (c as u64) * 0x0101010101010101, - options(nostack, preserves_flags) + options(att_syntax, nostack, preserves_flags) ); } From 6dd4eb718384f1844610350721445436bd9bf951 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 12 Dec 2020 12:11:21 +0000 Subject: [PATCH 0410/1459] Bump to 0.1.38 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c9ac6d0a7..901b33c3f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.37" +version = "0.1.38" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 702146718f016691597f9e42e95e4e903b3128a0 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Mon, 4 Jan 2021 09:17:44 -0600 Subject: [PATCH 0411/1459] Remove `count_ones` (#399) --- src/int/mod.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/int/mod.rs b/src/int/mod.rs index cb94803a4..06054c84a 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -90,7 +90,6 @@ pub trait Int: fn aborting_div(self, other: Self) -> Self; fn aborting_rem(self, other: Self) -> Self; fn leading_zeros(self) -> u32; - fn count_ones(self) -> u32; } fn unwrap(t: Option) -> T { @@ -229,10 +228,6 @@ macro_rules! int_impl_common { fn leading_zeros(self) -> u32 { ::leading_zeros(self) } - - fn count_ones(self) -> u32 { - ::count_ones(self) - } }; } From e6fd1b272ff4cc34810e20126ffe17888a708f39 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 6 Jan 2021 23:39:48 +0000 Subject: [PATCH 0412/1459] Bump to 0.1.39 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 901b33c3f..f1052d59d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.38" +version = "0.1.39" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 0c6d35782fc97f2515cbff42291ca64932780e5d Mon Sep 17 00:00:00 2001 From: est31 Date: Fri, 5 Feb 2021 23:40:17 +0100 Subject: [PATCH 0413/1459] Use the newly stabilized BITS constant on the integer types --- src/int/mod.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/int/mod.rs b/src/int/mod.rs index 06054c84a..e50b2e608 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -100,8 +100,8 @@ fn unwrap(t: Option) -> T { } macro_rules! int_impl_common { - ($ty:ty, $bits:expr) => { - const BITS: u32 = $bits; + ($ty:ty) => { + const BITS: u32 = ::BITS; const ZERO: Self = 0; const ONE: Self = 1; @@ -232,7 +232,7 @@ macro_rules! int_impl_common { } macro_rules! int_impl { - ($ity:ty, $uty:ty, $bits:expr) => { + ($ity:ty, $uty:ty) => { impl Int for $uty { type OtherSign = $ity; type UnsignedInt = $uty; @@ -253,7 +253,7 @@ macro_rules! int_impl { (self.wrapping_sub(other) as $ity).wrapping_abs() as $uty } - int_impl_common!($uty, $bits); + int_impl_common!($uty); } impl Int for $ity { @@ -280,17 +280,17 @@ macro_rules! int_impl { self.wrapping_sub(other).wrapping_abs() as $uty } - int_impl_common!($ity, $bits); + int_impl_common!($ity); } }; } -int_impl!(isize, usize, usize::MAX.count_ones()); -int_impl!(i8, u8, 8); -int_impl!(i16, u16, 16); -int_impl!(i32, u32, 32); -int_impl!(i64, u64, 64); -int_impl!(i128, u128, 128); +int_impl!(isize, usize); +int_impl!(i8, u8); +int_impl!(i16, u16); +int_impl!(i32, u32); +int_impl!(i64, u64); +int_impl!(i128, u128); /// Trait for integers twice the bit width of another integer. This is implemented for all /// primitives except for `u8`, because there is not a smaller primitive. From 745ad0f43bedb32ac5270064c154a2fc3414ee45 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 21 Feb 2021 11:36:47 +0100 Subject: [PATCH 0414/1459] Remove unused __rust_* shift intrinsics They are rust specific and used by neither cg_llvm nor cg_clif --- src/int/shift.rs | 16 ---------------- testcrate/tests/shift.rs | 25 +------------------------ 2 files changed, 1 insertion(+), 40 deletions(-) diff --git a/src/int/shift.rs b/src/int/shift.rs index 20561786b..59909929e 100644 --- a/src/int/shift.rs +++ b/src/int/shift.rs @@ -113,20 +113,4 @@ intrinsics! { pub extern "C" fn __lshrti3(a: u128, b: u32) -> u128 { a.lshr(b) } - - pub extern "C" fn __rust_i128_shlo(a: i128, b: u128) -> (i128, bool) { - (__ashlti3(a as _, b as _) as _, b >= 128) - } - - pub extern "C" fn __rust_u128_shlo(a: u128, b: u128) -> (u128, bool) { - (__ashlti3(a, b as _), b >= 128) - } - - pub extern "C" fn __rust_i128_shro(a: i128, b: u128) -> (i128, bool) { - (__ashrti3(a, b as _), b >= 128) - } - - pub extern "C" fn __rust_u128_shro(a: u128, b: u128) -> (u128, bool) { - (__lshrti3(a, b as _), b >= 128) - } } diff --git a/testcrate/tests/shift.rs b/testcrate/tests/shift.rs index ecb13a133..7a76b1646 100644 --- a/testcrate/tests/shift.rs +++ b/testcrate/tests/shift.rs @@ -17,28 +17,11 @@ macro_rules! shift { }; } -macro_rules! overflowing_shift { - ($($i:ty, $fn_std:ident, $fn_builtins:ident);*;) => { - $( - fuzz_shift(|x: $i, s: u32| { - let tmp0: $i = x.$fn_std(s); - let (tmp1, o1): ($i, bool) = $fn_builtins(x, s.into()); - if tmp0 != tmp1 || o1 { - panic!( - "{}({}, {}): std: {}, builtins: {}", - stringify!($fn_builtins), x, s, tmp0, tmp1 - ); - } - }); - )* - }; -} - #[test] fn shift() { use compiler_builtins::int::shift::{ __ashldi3, __ashlsi3, __ashlti3, __ashrdi3, __ashrsi3, __ashrti3, __lshrdi3, __lshrsi3, - __lshrti3, __rust_i128_shlo, __rust_i128_shro, __rust_u128_shlo, __rust_u128_shro, + __lshrti3, }; shift!( u32, wrapping_shl, __ashlsi3; @@ -51,10 +34,4 @@ fn shift() { u64, wrapping_shr, __lshrdi3; u128, wrapping_shr, __lshrti3; ); - overflowing_shift!( - u128, wrapping_shl, __rust_u128_shlo; - i128, wrapping_shl, __rust_i128_shlo; - u128, wrapping_shr, __rust_u128_shro; - i128, wrapping_shr, __rust_i128_shro; - ); } From d9b646cc07545d738007e8c0749c359daf9d3983 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Sun, 28 Mar 2021 12:24:08 +0200 Subject: [PATCH 0415/1459] add "readme"-key to Cargo.toml in order for this crate to have a preview on crates.io --- libm/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index d9d668040..106de51df 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -6,6 +6,7 @@ documentation = "https://docs.rs/libm" keywords = ["libm", "math"] license = "MIT OR Apache-2.0" name = "libm" +readme = "README.md" repository = "https://github.com/rust-lang/libm" version = "0.2.1" edition = "2018" From 9e76b9115fa9571501d378b6329ce557266908b7 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 2 Apr 2021 12:36:57 +0100 Subject: [PATCH 0416/1459] Disable AArch64 FP-to-int tests This is a temporary workaround for https://github.com/rust-lang/rust/issues/83467 --- testcrate/tests/conv.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 7cdbf9fbb..17c31a8a0 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -95,6 +95,8 @@ macro_rules! f_to_i { }; } +// AArch64 tests are currently broken due to https://github.com/rust-lang/rust/issues/83467 +#[cfg(not(target_arch = "aarch64"))] #[test] fn float_to_int() { use compiler_builtins::float::conv::{ From a0d09b00f8bdf4f8d18e227162c10b8de7c44ed2 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Thu, 10 Dec 2020 17:00:45 -0600 Subject: [PATCH 0417/1459] fix abs_diff bug --- src/int/mod.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/int/mod.rs b/src/int/mod.rs index e50b2e608..a186a95aa 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -250,7 +250,11 @@ macro_rules! int_impl { } fn abs_diff(self, other: Self) -> Self { - (self.wrapping_sub(other) as $ity).wrapping_abs() as $uty + if self < other { + other.wrapping_sub(self) + } else { + self.wrapping_sub(other) + } } int_impl_common!($uty); From 01eaf808c6e491632492c7d67f239ad0086d0cbd Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Tue, 8 Dec 2020 19:28:05 -0600 Subject: [PATCH 0418/1459] refactor float conversion --- src/float/conv.rs | 299 ++++++++++++++++++------------------- src/float/mod.rs | 39 ++++- src/float/pow.rs | 3 +- src/int/mod.rs | 70 +++------ src/lib.rs | 5 - testcrate/tests/div_rem.rs | 2 +- testcrate/tests/mul.rs | 2 +- 7 files changed, 198 insertions(+), 222 deletions(-) diff --git a/src/float/conv.rs b/src/float/conv.rs index e9ca0f138..8c46e4d2e 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -1,90 +1,88 @@ use float::Float; -use int::Int; - -macro_rules! int_to_float { - ($i:expr, $ity:ty, $fty:ty) => {{ - let i = $i; - if i == 0 { - return 0.0; - } - - let mant_dig = <$fty>::SIGNIFICAND_BITS + 1; - let exponent_bias = <$fty>::EXPONENT_BIAS; - - let n = <$ity as Int>::BITS; - let (s, a) = i.extract_sign(); - let mut a = a; - - // number of significant digits - let sd = n - a.leading_zeros(); - - // exponent - let mut e = sd - 1; +use int::{CastInto, Int}; + +fn int_to_float(i: I) -> F +where + F::Int: CastInto, + F::Int: CastInto, + I::UnsignedInt: CastInto, + u32: CastInto, +{ + if i == I::ZERO { + return F::ZERO; + } - if <$ity as Int>::BITS < mant_dig { - return <$fty>::from_parts( - s, - (e + exponent_bias) as <$fty as Float>::Int, - (a as <$fty as Float>::Int) << (mant_dig - e - 1), - ); - } + let two = I::UnsignedInt::ONE + I::UnsignedInt::ONE; + let four = two + two; + let sign = i < I::ZERO; + let mut x = Int::abs_diff(i, I::ZERO); + + // number of significant digits in the integer + let i_sd = I::BITS - x.leading_zeros(); + // significant digits for the float, including implicit bit + let f_sd = F::SIGNIFICAND_BITS + 1; + + // exponent + let mut exp = i_sd - 1; + + if I::BITS < f_sd { + return F::from_parts( + sign, + (exp + F::EXPONENT_BIAS).cast(), + x.cast() << (f_sd - exp - 1), + ); + } - a = if sd > mant_dig { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit MANT_DIG-1 bits to the right of 1 - * Q = bit MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - let mant_dig_plus_one = mant_dig + 1; - let mant_dig_plus_two = mant_dig + 2; - a = if sd == mant_dig_plus_one { - a << 1 - } else if sd == mant_dig_plus_two { - a - } else { - (a >> (sd - mant_dig_plus_two)) as <$ity as Int>::UnsignedInt - | ((a & <$ity as Int>::UnsignedInt::max_value()) - .wrapping_shl((n + mant_dig_plus_two) - sd) - != 0) as <$ity as Int>::UnsignedInt - }; - - /* finish: */ - a |= ((a & 4) != 0) as <$ity as Int>::UnsignedInt; /* Or P into R */ - a += 1; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - - /* a is now rounded to mant_dig or mant_dig+1 bits */ - if (a & (1 << mant_dig)) != 0 { - a >>= 1; - e += 1; - } - a - /* a is now rounded to mant_dig bits */ + x = if i_sd > f_sd { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = the implicit bit + // P = bit f_sd-1 bits to the right of 1 + // Q = bit f_sd bits to the right of 1 + // R = "or" of all bits to the right of Q + let f_sd_add2 = f_sd + 2; + x = if i_sd == (f_sd + 1) { + x << 1 + } else if i_sd == f_sd_add2 { + x } else { - a.wrapping_shl(mant_dig - sd) - /* a is now rounded to mant_dig bits */ + (x >> (i_sd - f_sd_add2)) + | Int::from_bool( + (x & I::UnsignedInt::MAX).wrapping_shl((I::BITS + f_sd_add2) - i_sd) + != Int::ZERO, + ) }; - <$fty>::from_parts( - s, - (e + exponent_bias) as <$fty as Float>::Int, - a as <$fty as Float>::Int, - ) - }}; + // R |= P + x |= Int::from_bool((x & four) != I::UnsignedInt::ZERO); + // round - this step may add a significant bit + x += Int::ONE; + // dump Q and R + x >>= 2; + + // a is now rounded to f_sd or f_sd+1 bits + if (x & (I::UnsignedInt::ONE << f_sd)) != Int::ZERO { + x >>= 1; + exp += 1; + } + x + } else { + x.wrapping_shl(f_sd - i_sd) + }; + + F::from_parts(sign, (exp + F::EXPONENT_BIAS).cast(), x.cast()) } intrinsics! { #[arm_aeabi_alias = __aeabi_i2f] pub extern "C" fn __floatsisf(i: i32) -> f32 { - int_to_float!(i, i32, f32) + int_to_float(i) } #[arm_aeabi_alias = __aeabi_i2d] pub extern "C" fn __floatsidf(i: i32) -> f64 { - int_to_float!(i, i32, f64) + int_to_float(i) } #[maybe_use_optimized_c_shim] @@ -95,7 +93,7 @@ intrinsics! { if cfg!(target_arch = "x86_64") { i as f32 } else { - int_to_float!(i, i64, f32) + int_to_float(i) } } @@ -107,181 +105,172 @@ intrinsics! { if cfg!(target_arch = "x86_64") { i as f64 } else { - int_to_float!(i, i64, f64) + int_to_float(i) } } #[unadjusted_on_win64] pub extern "C" fn __floattisf(i: i128) -> f32 { - int_to_float!(i, i128, f32) + int_to_float(i) } #[unadjusted_on_win64] pub extern "C" fn __floattidf(i: i128) -> f64 { - int_to_float!(i, i128, f64) + int_to_float(i) } #[arm_aeabi_alias = __aeabi_ui2f] pub extern "C" fn __floatunsisf(i: u32) -> f32 { - int_to_float!(i, u32, f32) + int_to_float(i) } #[arm_aeabi_alias = __aeabi_ui2d] pub extern "C" fn __floatunsidf(i: u32) -> f64 { - int_to_float!(i, u32, f64) + int_to_float(i) } #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_ul2f] pub extern "C" fn __floatundisf(i: u64) -> f32 { - int_to_float!(i, u64, f32) + int_to_float(i) } #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_ul2d] pub extern "C" fn __floatundidf(i: u64) -> f64 { - int_to_float!(i, u64, f64) + int_to_float(i) } #[unadjusted_on_win64] pub extern "C" fn __floatuntisf(i: u128) -> f32 { - int_to_float!(i, u128, f32) + int_to_float(i) } #[unadjusted_on_win64] pub extern "C" fn __floatuntidf(i: u128) -> f64 { - int_to_float!(i, u128, f64) + int_to_float(i) } } -#[derive(PartialEq)] -enum Sign { - Positive, - Negative, -} +fn float_to_int(f: F) -> I +where + F::ExpInt: CastInto, + u32: CastInto, + F::Int: CastInto, +{ + // converting NaNs is UB, so we don't consider them + + let sign = f.sign(); + let mut exp = f.exp(); -macro_rules! float_to_int { - ($f:expr, $fty:ty, $ity:ty) => {{ - let f = $f; - let fixint_min = <$ity>::min_value(); - let fixint_max = <$ity>::max_value(); - let fixint_bits = <$ity as Int>::BITS as usize; - let fixint_unsigned = fixint_min == 0; - - let sign_bit = <$fty>::SIGN_MASK; - let significand_bits = <$fty>::SIGNIFICAND_BITS as usize; - let exponent_bias = <$fty>::EXPONENT_BIAS as usize; - //let exponent_max = <$fty>::exponent_max() as usize; - - // Break a into sign, exponent, significand - let a_rep = <$fty>::repr(f); - let a_abs = a_rep & !sign_bit; - - // this is used to work around -1 not being available for unsigned - let sign = if (a_rep & sign_bit) == 0 { - Sign::Positive + // if less than one or unsigned & negative + if (exp < F::EXPONENT_BIAS.cast()) || (!I::SIGNED && sign) { + return I::ZERO; + } + exp -= F::EXPONENT_BIAS.cast(); + + // If the value is too large for `I`, saturate. + let bits: F::ExpInt = I::BITS.cast(); + let max = if I::SIGNED { + bits - F::ExpInt::ONE + } else { + bits + }; + if max <= exp { + return if sign { + // It happens that I::MIN is handled correctly + I::MIN } else { - Sign::Negative + I::MAX }; - let mut exponent = (a_abs >> significand_bits) as usize; - let significand = (a_abs & <$fty>::SIGNIFICAND_MASK) | <$fty>::IMPLICIT_BIT; + }; - // if < 1 or unsigned & negative - if exponent < exponent_bias || fixint_unsigned && sign == Sign::Negative { - return 0; - } - exponent -= exponent_bias; - - // If the value is infinity, saturate. - // If the value is too large for the integer type, 0. - if exponent - >= (if fixint_unsigned { - fixint_bits - } else { - fixint_bits - 1 - }) - { - return if sign == Sign::Positive { - fixint_max - } else { - fixint_min - }; - } - // If 0 <= exponent < significand_bits, right shift to get the result. - // Otherwise, shift left. - // (sign - 1) will never overflow as negative signs are already returned as 0 for unsigned - let r = if exponent < significand_bits { - (significand >> (significand_bits - exponent)) as $ity + // `0 <= exp < max` + + // If 0 <= exponent < F::SIGNIFICAND_BITS, right shift to get the result. Otherwise, shift left. + let sig_bits: F::ExpInt = F::SIGNIFICAND_BITS.cast(); + // The larger integer has to be casted into, or else the shift overflows + let r: I = if F::Int::BITS < I::BITS { + let tmp: I = if exp < sig_bits { + f.imp_frac().cast() >> (sig_bits - exp).cast() } else { - (significand as $ity) << (exponent - significand_bits) + f.imp_frac().cast() << (exp - sig_bits).cast() }; - - if sign == Sign::Negative { - (!r).wrapping_add(1) + tmp + } else { + let tmp: F::Int = if exp < sig_bits { + f.imp_frac() >> (sig_bits - exp).cast() } else { - r - } - }}; + f.imp_frac() << (exp - sig_bits).cast() + }; + tmp.cast() + }; + + if sign { + r.wrapping_neg() + } else { + r + } } intrinsics! { #[arm_aeabi_alias = __aeabi_f2iz] pub extern "C" fn __fixsfsi(f: f32) -> i32 { - float_to_int!(f, f32, i32) + float_to_int(f) } #[arm_aeabi_alias = __aeabi_f2lz] pub extern "C" fn __fixsfdi(f: f32) -> i64 { - float_to_int!(f, f32, i64) + float_to_int(f) } #[unadjusted_on_win64] pub extern "C" fn __fixsfti(f: f32) -> i128 { - float_to_int!(f, f32, i128) + float_to_int(f) } #[arm_aeabi_alias = __aeabi_d2iz] pub extern "C" fn __fixdfsi(f: f64) -> i32 { - float_to_int!(f, f64, i32) + float_to_int(f) } #[arm_aeabi_alias = __aeabi_d2lz] pub extern "C" fn __fixdfdi(f: f64) -> i64 { - float_to_int!(f, f64, i64) + float_to_int(f) } #[unadjusted_on_win64] pub extern "C" fn __fixdfti(f: f64) -> i128 { - float_to_int!(f, f64, i128) + float_to_int(f) } #[arm_aeabi_alias = __aeabi_f2uiz] pub extern "C" fn __fixunssfsi(f: f32) -> u32 { - float_to_int!(f, f32, u32) + float_to_int(f) } #[arm_aeabi_alias = __aeabi_f2ulz] pub extern "C" fn __fixunssfdi(f: f32) -> u64 { - float_to_int!(f, f32, u64) + float_to_int(f) } #[unadjusted_on_win64] pub extern "C" fn __fixunssfti(f: f32) -> u128 { - float_to_int!(f, f32, u128) + float_to_int(f) } #[arm_aeabi_alias = __aeabi_d2uiz] pub extern "C" fn __fixunsdfsi(f: f64) -> u32 { - float_to_int!(f, f64, u32) + float_to_int(f) } #[arm_aeabi_alias = __aeabi_d2ulz] pub extern "C" fn __fixunsdfdi(f: f64) -> u64 { - float_to_int!(f, f64, u64) + float_to_int(f) } #[unadjusted_on_win64] pub extern "C" fn __fixunsdfti(f: f64) -> u128 { - float_to_int!(f, f64, u128) + float_to_int(f) } } diff --git a/src/float/mod.rs b/src/float/mod.rs index c4b690161..5a0d37a7d 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -30,6 +30,9 @@ pub trait Float: /// A int of the same with as the float type SignedInt: Int; + /// An int capable of containing the exponent bits plus a sign bit. This is signed. + type ExpInt: Int; + const ZERO: Self; const ONE: Self; @@ -71,6 +74,18 @@ pub trait Float: /// compared. fn eq_repr(self, rhs: Self) -> bool; + /// Returns the sign bit + fn sign(self) -> bool; + + /// Returns the exponent with bias + fn exp(self) -> Self::ExpInt; + + /// Returns the significand with no implicit bit (or the "fractional" part) + fn frac(self) -> Self::Int; + + /// Returns the significand with implicit bit + fn imp_frac(self) -> Self::Int; + /// Returns a `Self::Int` transmuted back to `Self` fn from_repr(a: Self::Int) -> Self; @@ -81,14 +96,16 @@ pub trait Float: fn normalize(significand: Self::Int) -> (i32, Self::Int); /// Returns if `self` is subnormal - fn is_subnormal(&self) -> bool; + fn is_subnormal(self) -> bool; } macro_rules! float_impl { - ($ty:ident, $ity:ident, $sity:ident, $bits:expr, $significand_bits:expr) => { + ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { impl Float for $ty { type Int = $ity; type SignedInt = $sity; + type ExpInt = $expty; + const ZERO: Self = 0.0; const ONE: Self = 1.0; @@ -113,6 +130,18 @@ macro_rules! float_impl { self.repr() == rhs.repr() } } + fn sign(self) -> bool { + self.signed_repr() < Self::SignedInt::ZERO + } + fn exp(self) -> Self::ExpInt { + ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt + } + fn frac(self) -> Self::Int { + self.to_bits() & Self::SIGNIFICAND_MASK + } + fn imp_frac(self) -> Self::Int { + self.frac() | Self::IMPLICIT_BIT + } fn from_repr(a: Self::Int) -> Self { Self::from_bits(a) } @@ -132,12 +161,12 @@ macro_rules! float_impl { significand << shift as Self::Int, ) } - fn is_subnormal(&self) -> bool { + fn is_subnormal(self) -> bool { (self.repr() & Self::EXPONENT_MASK) == Self::Int::ZERO } } }; } -float_impl!(f32, u32, i32, 32, 23); -float_impl!(f64, u64, i64, 64, 52); +float_impl!(f32, u32, i32, i16, 32, 23); +float_impl!(f64, u64, i64, i16, 64, 52); diff --git a/src/float/pow.rs b/src/float/pow.rs index 2eedf6758..7d7f75972 100644 --- a/src/float/pow.rs +++ b/src/float/pow.rs @@ -1,5 +1,4 @@ use float::Float; -use int::Int; trait Pow: Float { /// Returns `a` raised to the power `b` @@ -11,7 +10,7 @@ trait Pow: Float { if (b & 1) != 0 { r *= a; } - b = b.aborting_div(2); + b = ((b as u32) >> 1) as i32; if b == 0 { break; } diff --git a/src/int/mod.rs b/src/int/mod.rs index a186a95aa..d8524a58a 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -15,9 +15,11 @@ pub use self::leading_zeros::__clzsi2; #[doc(hidden)] pub trait Int: Copy + + core::fmt::Debug + PartialEq + PartialOrd + ops::AddAssign + + ops::SubAssign + ops::BitAndAssign + ops::BitOrAssign + ops::BitXorAssign @@ -38,12 +40,16 @@ pub trait Int: /// Unsigned version of Self type UnsignedInt: Int; + /// If `Self` is a signed integer + const SIGNED: bool; + /// The bitwidth of the int type const BITS: u32; const ZERO: Self; const ONE: Self; const MIN: Self; + const MAX: Self; /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, @@ -52,18 +58,6 @@ pub trait Int: /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. const FUZZ_NUM: usize; - /// Extracts the sign from self and returns a tuple. - /// - /// # Examples - /// - /// ```rust,ignore - /// let i = -25_i32; - /// let (sign, u) = i.extract_sign(); - /// assert_eq!(sign, true); - /// assert_eq!(u, 25_u32); - /// ``` - fn extract_sign(self) -> (bool, Self::UnsignedInt); - fn unsigned(self) -> Self::UnsignedInt; fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; @@ -77,8 +71,6 @@ pub trait Int: // copied from primitive integers, but put in a trait fn is_zero(self) -> bool; - fn max_value() -> Self; - fn min_value() -> Self; fn wrapping_neg(self) -> Self; fn wrapping_add(self, other: Self) -> Self; fn wrapping_mul(self, other: Self) -> Self; @@ -87,25 +79,18 @@ pub trait Int: fn wrapping_shr(self, other: u32) -> Self; fn rotate_left(self, other: u32) -> Self; fn overflowing_add(self, other: Self) -> (Self, bool); - fn aborting_div(self, other: Self) -> Self; - fn aborting_rem(self, other: Self) -> Self; fn leading_zeros(self) -> u32; } -fn unwrap(t: Option) -> T { - match t { - Some(t) => t, - None => ::abort(), - } -} - macro_rules! int_impl_common { ($ty:ty) => { const BITS: u32 = ::BITS; + const SIGNED: bool = Self::MIN != Self::ZERO; const ZERO: Self = 0; const ONE: Self = 1; const MIN: Self = ::MIN; + const MAX: Self = ::MAX; const FUZZ_LENGTHS: [u8; 20] = { let bits = ::BITS; @@ -177,14 +162,6 @@ macro_rules! int_impl_common { self == Self::ZERO } - fn max_value() -> Self { - ::max_value() - } - - fn min_value() -> Self { - ::min_value() - } - fn wrapping_neg(self) -> Self { ::wrapping_neg(self) } @@ -217,14 +194,6 @@ macro_rules! int_impl_common { ::overflowing_add(self, other) } - fn aborting_div(self, other: Self) -> Self { - unwrap(::checked_div(self, other)) - } - - fn aborting_rem(self, other: Self) -> Self { - unwrap(::checked_rem(self, other)) - } - fn leading_zeros(self) -> u32 { ::leading_zeros(self) } @@ -237,10 +206,6 @@ macro_rules! int_impl { type OtherSign = $ity; type UnsignedInt = $uty; - fn extract_sign(self) -> (bool, $uty) { - (false, self) - } - fn unsigned(self) -> $uty { self } @@ -264,14 +229,6 @@ macro_rules! int_impl { type OtherSign = $uty; type UnsignedInt = $uty; - fn extract_sign(self) -> (bool, $uty) { - if self < 0 { - (true, (!(self as $uty)).wrapping_add(1)) - } else { - (false, self as $uty) - } - } - fn unsigned(self) -> $uty { self as $uty } @@ -395,13 +352,14 @@ impl_h_int!( ); /// Trait to express (possibly lossy) casting of integers -pub(crate) trait CastInto: Copy { +#[doc(hidden)] +pub trait CastInto: Copy { fn cast(self) -> T; } macro_rules! cast_into { ($ty:ty) => { - cast_into!($ty; usize, isize, u32, i32, u64, i64, u128, i128); + cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); }; ($ty:ty; $($into:ty),*) => {$( impl CastInto<$into> for $ty { @@ -412,6 +370,12 @@ macro_rules! cast_into { )*}; } +cast_into!(usize); +cast_into!(isize); +cast_into!(u8); +cast_into!(i8); +cast_into!(u16); +cast_into!(i16); cast_into!(u32); cast_into!(i32); cast_into!(u64); diff --git a/src/lib.rs b/src/lib.rs index 4a7c746a2..9190c4251 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,11 +31,6 @@ #[cfg(test)] extern crate core; -#[allow(unused_unsafe)] -fn abort() -> ! { - unsafe { core::intrinsics::abort() } -} - #[macro_use] mod macros; diff --git a/testcrate/tests/div_rem.rs b/testcrate/tests/div_rem.rs index 0007c15ae..bb4a08e42 100644 --- a/testcrate/tests/div_rem.rs +++ b/testcrate/tests/div_rem.rs @@ -108,7 +108,7 @@ macro_rules! float { let quo0 = x / y; let quo1: $i = $fn(x, y); // division of subnormals is not currently handled - if !(Float::is_subnormal(&quo0) || Float::is_subnormal(&quo1)) { + if !(Float::is_subnormal(quo0) || Float::is_subnormal(quo1)) { if !Float::eq_repr(quo0, quo1) { panic!( "{}({}, {}): std: {}, builtins: {}", diff --git a/testcrate/tests/mul.rs b/testcrate/tests/mul.rs index 8b97ea46c..272bfa068 100644 --- a/testcrate/tests/mul.rs +++ b/testcrate/tests/mul.rs @@ -86,7 +86,7 @@ macro_rules! float_mul { let mul0 = x * y; let mul1: $f = $fn(x, y); // multiplication of subnormals is not currently handled - if !(Float::is_subnormal(&mul0) || Float::is_subnormal(&mul1)) { + if !(Float::is_subnormal(mul0) || Float::is_subnormal(mul1)) { if !Float::eq_repr(mul0, mul1) { panic!( "{}({}, {}): std: {}, builtins: {}", From 4a405df3d91a2aa55cc5cb88e67560beae1002eb Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Thu, 10 Dec 2020 19:56:36 -0600 Subject: [PATCH 0419/1459] add remaining floating point tests --- testcrate/tests/addsub.rs | 17 ++++++++++ testcrate/tests/cmp.rs | 60 ++++++++++++++++++++++++++++++++++ testcrate/tests/div_rem.rs | 16 +++++++++ testcrate/tests/misc.rs | 66 +++++++++++++++++++++++++++++--------- testcrate/tests/mul.rs | 16 +++++++++ 5 files changed, 160 insertions(+), 15 deletions(-) diff --git a/testcrate/tests/addsub.rs b/testcrate/tests/addsub.rs index ff56668b7..da7684ec9 100644 --- a/testcrate/tests/addsub.rs +++ b/testcrate/tests/addsub.rs @@ -1,3 +1,5 @@ +#![allow(unused_macros)] + use testcrate::*; macro_rules! sum { @@ -107,3 +109,18 @@ fn float_addsub() { f64, __adddf3, __subdf3; ); } + +#[cfg(target_arch = "arm")] +#[test] +fn float_addsub_arm() { + use compiler_builtins::float::{ + add::{__adddf3vfp, __addsf3vfp}, + sub::{__subdf3vfp, __subsf3vfp}, + Float, + }; + + float_sum!( + f32, __addsf3vfp, __subsf3vfp; + f64, __adddf3vfp, __subdf3vfp; + ); +} diff --git a/testcrate/tests/cmp.rs b/testcrate/tests/cmp.rs index d359b65d7..a49779ad0 100644 --- a/testcrate/tests/cmp.rs +++ b/testcrate/tests/cmp.rs @@ -1,3 +1,5 @@ +#![allow(unused_macros)] + use testcrate::*; macro_rules! cmp { @@ -50,3 +52,61 @@ fn float_comparisons() { ); }); } + +macro_rules! cmp2 { + ($x:ident, $y:ident, $($unordered_val:expr, $fn_std:expr, $fn_builtins:ident);*;) => { + $( + let cmp0: i32 = if $x.is_nan() || $y.is_nan() { + $unordered_val + } else { + $fn_std as i32 + }; + let cmp1: i32 = $fn_builtins($x, $y); + if cmp0 != cmp1 { + panic!("{}({}, {}): std: {}, builtins: {}", stringify!($fn_builtins), $x, $y, cmp0, cmp1); + } + )* + }; +} + +#[cfg(target_arch = "arm")] +#[test] +fn float_comparisons_arm() { + use compiler_builtins::float::cmp::{ + __aeabi_dcmpeq, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmple, __aeabi_dcmplt, + __aeabi_fcmpeq, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmple, __aeabi_fcmplt, __eqdf2vfp, + __eqsf2vfp, __gedf2vfp, __gesf2vfp, __gtdf2vfp, __gtsf2vfp, __ledf2vfp, __lesf2vfp, + __ltdf2vfp, __ltsf2vfp, __nedf2vfp, __nesf2vfp, + }; + + fuzz_float_2(N, |x: f32, y: f32| { + cmp2!(x, y, + 0, x < y, __aeabi_fcmplt; + 0, x <= y, __aeabi_fcmple; + 0, x == y, __aeabi_fcmpeq; + 0, x >= y, __aeabi_fcmpge; + 0, x > y, __aeabi_fcmpgt; + 0, x < y, __ltsf2vfp; + 0, x <= y, __lesf2vfp; + 0, x == y, __eqsf2vfp; + 0, x >= y, __gesf2vfp; + 0, x > y, __gtsf2vfp; + 1, x != y, __nesf2vfp; + ); + }); + fuzz_float_2(N, |x: f64, y: f64| { + cmp2!(x, y, + 0, x < y, __aeabi_dcmplt; + 0, x <= y, __aeabi_dcmple; + 0, x == y, __aeabi_dcmpeq; + 0, x >= y, __aeabi_dcmpge; + 0, x > y, __aeabi_dcmpgt; + 0, x < y, __ltdf2vfp; + 0, x <= y, __ledf2vfp; + 0, x == y, __eqdf2vfp; + 0, x >= y, __gedf2vfp; + 0, x > y, __gtdf2vfp; + 1, x != y, __nedf2vfp; + ); + }); +} diff --git a/testcrate/tests/div_rem.rs b/testcrate/tests/div_rem.rs index bb4a08e42..c3f067640 100644 --- a/testcrate/tests/div_rem.rs +++ b/testcrate/tests/div_rem.rs @@ -1,3 +1,5 @@ +#![allow(unused_macros)] + use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4}; use compiler_builtins::int::udiv::{__udivmoddi4, __udivmodsi4, __udivmodti4, u128_divide_sparc}; use testcrate::*; @@ -134,3 +136,17 @@ fn float_div() { f64, __divdf3; ); } + +#[cfg(target_arch = "arm")] +#[test] +fn float_div_arm() { + use compiler_builtins::float::{ + div::{__divdf3vfp, __divsf3vfp}, + Float, + }; + + float!( + f32, __divsf3vfp; + f64, __divdf3vfp; + ); +} diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs index d31e3e6b6..ec3e9d96d 100644 --- a/testcrate/tests/misc.rs +++ b/testcrate/tests/misc.rs @@ -1,3 +1,7 @@ +// makes configuration easier +#![allow(unused_macros)] + +use compiler_builtins::float::Float; use testcrate::*; /// Make sure that the the edge case tester and randomized tester don't break, and list examples of @@ -89,15 +93,37 @@ fn leading_zeros() { }) } +macro_rules! extend { + ($fX:ident, $fD:ident, $fn:ident) => { + fuzz_float(N, |x: $fX| { + let tmp0 = x as $fD; + let tmp1: $fD = $fn(x); + if !Float::eq_repr(tmp0, tmp1) { + panic!( + "{}({}): std: {}, builtins: {}", + stringify!($fn), + x, + tmp0, + tmp1 + ); + } + }); + }; +} + #[test] fn float_extend() { - fuzz_float(N, |x: f32| { - let tmp0 = x as f64; - let tmp1: f64 = compiler_builtins::float::extend::__extendsfdf2(x); - if !compiler_builtins::float::Float::eq_repr(tmp0, tmp1) { - panic!("__extendsfdf2({}): std: {}, builtins: {}", x, tmp0, tmp1); - } - }); + use compiler_builtins::float::extend::__extendsfdf2; + + extend!(f32, f64, __extendsfdf2); +} + +#[cfg(target_arch = "arm")] +#[test] +fn float_extend_arm() { + use compiler_builtins::float::extend::__extendsfdf2vfp; + + extend!(f32, f64, __extendsfdf2vfp); } // This doesn't quite work because of issues related to @@ -108,14 +134,16 @@ macro_rules! pow { ($($f:ty, $fn:ident);*;) => { $( fuzz_float_2(N, |x: $f, y: $f| { - let n = y as i32; - let tmp0: $f = x.powi(n); - let tmp1: $f = $fn(x, n); - if tmp0 != tmp1 { - panic!( - "{}({}, {}): std: {}, builtins: {}", - stringify!($fn), x, y, tmp0, tmp1 - ); + if !(Float::is_subnormal(x) || Float::is_subnormal(y) || x < 0. || y < 0.) { + let n = y as i32; + let tmp0: $f = x.powi(n); + let tmp1: $f = $fn(x, n); + if tmp0 != tmp1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn), x, y, tmp0, tmp1 + ); + } } }); )* @@ -132,3 +160,11 @@ fn float_pow() { ); } */ + +// placeholder test to make sure basic functionality works +#[test] +fn float_pow() { + use compiler_builtins::float::pow::{__powidf2, __powisf2}; + assert_eq!(__powisf2(-3.0, 3), -27.0); + assert_eq!(__powidf2(-3.0, 3), -27.0); +} diff --git a/testcrate/tests/mul.rs b/testcrate/tests/mul.rs index 272bfa068..819f06ca9 100644 --- a/testcrate/tests/mul.rs +++ b/testcrate/tests/mul.rs @@ -1,3 +1,5 @@ +#![allow(unused_macros)] + use testcrate::*; macro_rules! mul { @@ -112,3 +114,17 @@ fn float_mul() { f64, __muldf3; ); } + +#[cfg(target_arch = "arm")] +#[test] +fn float_mul_arm() { + use compiler_builtins::float::{ + mul::{__muldf3vfp, __mulsf3vfp}, + Float, + }; + + float_mul!( + f32, __mulsf3vfp; + f64, __muldf3vfp; + ); +} From adb7096157c1d4815a9a06ac92eaaa17d85c0a22 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Thu, 10 Dec 2020 20:30:48 -0600 Subject: [PATCH 0420/1459] fix `powi` --- src/float/mod.rs | 1 + src/float/pow.rs | 47 +++++++++++++++++++---------------------- testcrate/tests/misc.rs | 47 ++++++++++++++++++++++++----------------- 3 files changed, 51 insertions(+), 44 deletions(-) diff --git a/src/float/mod.rs b/src/float/mod.rs index 5a0d37a7d..69e4dc635 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -15,6 +15,7 @@ pub mod sub; #[doc(hidden)] pub trait Float: Copy + + core::fmt::Debug + PartialEq + PartialOrd + ops::AddAssign diff --git a/src/float/pow.rs b/src/float/pow.rs index 7d7f75972..5ab5e4201 100644 --- a/src/float/pow.rs +++ b/src/float/pow.rs @@ -1,39 +1,36 @@ use float::Float; +use int::Int; -trait Pow: Float { - /// Returns `a` raised to the power `b` - fn pow(self, mut b: i32) -> Self { - let mut a = self; - let recip = b < 0; - let mut r = Self::ONE; - loop { - if (b & 1) != 0 { - r *= a; - } - b = ((b as u32) >> 1) as i32; - if b == 0 { - break; - } - a *= a; +/// Returns `a` raised to the power `b` +fn pow(a: F, b: i32) -> F { + let mut a = a; + let recip = b < 0; + let mut pow = i32::abs_diff(b, 0); + let mut mul = F::ONE; + loop { + if (pow & 1) != 0 { + mul *= a; } - - if recip { - Self::ONE / r - } else { - r + pow >>= 1; + if pow == 0 { + break; } + a *= a; } -} -impl Pow for f32 {} -impl Pow for f64 {} + if recip { + F::ONE / mul + } else { + mul + } +} intrinsics! { pub extern "C" fn __powisf2(a: f32, b: i32) -> f32 { - a.pow(b) + pow(a, b) } pub extern "C" fn __powidf2(a: f64, b: i32) -> f64 { - a.pow(b) + pow(a, b) } } diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs index ec3e9d96d..82a1ea27b 100644 --- a/testcrate/tests/misc.rs +++ b/testcrate/tests/misc.rs @@ -126,22 +126,39 @@ fn float_extend_arm() { extend!(f32, f64, __extendsfdf2vfp); } -// This doesn't quite work because of issues related to +// This is approximate because of issues related to // https://github.com/rust-lang/rust/issues/73920. -// TODO how do we resolve this? -/* +// TODO how do we resolve this indeterminacy? macro_rules! pow { - ($($f:ty, $fn:ident);*;) => { + ($($f:ty, $tolerance:expr, $fn:ident);*;) => { $( fuzz_float_2(N, |x: $f, y: $f| { - if !(Float::is_subnormal(x) || Float::is_subnormal(y) || x < 0. || y < 0.) { - let n = y as i32; + if !(Float::is_subnormal(x) || Float::is_subnormal(y) || x.is_nan()) { + let n = y.to_bits() & !<$f as Float>::SIGNIFICAND_MASK; + let n = (n as <$f as Float>::SignedInt) >> <$f as Float>::SIGNIFICAND_BITS; + let n = n as i32; let tmp0: $f = x.powi(n); let tmp1: $f = $fn(x, n); - if tmp0 != tmp1 { + let (a, b) = if tmp0 < tmp1 { + (tmp0, tmp1) + } else { + (tmp1, tmp0) + }; + let good = { + if a == b { + // handles infinity equality + true + } else if a < $tolerance { + b < $tolerance + } else { + let quo = b / a; + (quo < (1. + $tolerance)) && (quo > (1. - $tolerance)) + } + }; + if !good { panic!( "{}({}, {}): std: {}, builtins: {}", - stringify!($fn), x, y, tmp0, tmp1 + stringify!($fn), x, n, tmp0, tmp1 ); } } @@ -150,21 +167,13 @@ macro_rules! pow { }; } +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] #[test] fn float_pow() { use compiler_builtins::float::pow::{__powidf2, __powisf2}; pow!( - f32, __powisf2; - f64, __powidf2; + f32, 1e-4, __powisf2; + f64, 1e-12, __powidf2; ); } -*/ - -// placeholder test to make sure basic functionality works -#[test] -fn float_pow() { - use compiler_builtins::float::pow::{__powidf2, __powisf2}; - assert_eq!(__powisf2(-3.0, 3), -27.0); - assert_eq!(__powidf2(-3.0, 3), -27.0); -} From def56b7225a2964364ea7507a8a4ae4126584b0a Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Fri, 11 Dec 2020 13:26:41 -0600 Subject: [PATCH 0421/1459] Delete redundant tests The old tests were hacky and did not cover nearly as many cases as the new tests do. --- testcrate/build.rs | 1443 ---------------------------------- testcrate/tests/generated.rs | 37 - 2 files changed, 1480 deletions(-) delete mode 100644 testcrate/build.rs delete mode 100644 testcrate/tests/generated.rs diff --git a/testcrate/build.rs b/testcrate/build.rs deleted file mode 100644 index 39c2486c6..000000000 --- a/testcrate/build.rs +++ /dev/null @@ -1,1443 +0,0 @@ -use rand::seq::SliceRandom; -use rand::Rng; -use std::collections::HashMap; -use std::fmt; -use std::fmt::Write as FmtWrite; -use std::fs::{self, OpenOptions}; -use std::hash::{Hash, Hasher}; -use std::io::Write; -use std::path::PathBuf; -use std::{env, mem}; - -const NTESTS: usize = 1_000; - -fn main() { - let out_dir = PathBuf::from(env::var_os("OUT_DIR").unwrap()); - let out_file = out_dir.join("generated.rs"); - drop(fs::remove_file(&out_file)); - - let target = env::var("TARGET").unwrap(); - let target_arch_arm = target.contains("arm") || target.contains("thumb"); - let target_arch_mips = target.contains("mips"); - - // TODO accept NaNs. We don't do that right now because we can't check - // for NaN-ness on the thumb targets (due to missing intrinsics) - - // float/add.rs - gen( - |(a, b): (MyF64, MyF64)| { - let c = a.0 + b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() { - None - } else { - Some(c) - } - }, - "builtins::float::add::__adddf3(a, b)", - ); - gen( - |(a, b): (MyF32, MyF32)| { - let c = a.0 + b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() { - None - } else { - Some(c) - } - }, - "builtins::float::add::__addsf3(a, b)", - ); - - if target_arch_arm { - gen( - |(a, b): (MyF64, MyF64)| { - let c = a.0 + b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() { - None - } else { - Some(c) - } - }, - "builtins::float::add::__adddf3vfp(a, b)", - ); - gen( - |(a, b): (LargeF32, LargeF32)| { - let c = a.0 + b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() { - None - } else { - Some(c) - } - }, - "builtins::float::add::__addsf3vfp(a, b)", - ); - } - - // float/cmp.rs - gen( - |(a, b): (MyF64, MyF64)| { - let (a, b) = (a.0, b.0); - if a.is_nan() || b.is_nan() { - return None; - } - - if a.is_nan() || b.is_nan() { - Some(-1) - } else if a < b { - Some(-1) - } else if a > b { - Some(1) - } else { - Some(0) - } - }, - "builtins::float::cmp::__gedf2(a, b)", - ); - gen( - |(a, b): (MyF32, MyF32)| { - let (a, b) = (a.0, b.0); - if a.is_nan() || b.is_nan() { - return None; - } - - if a.is_nan() || b.is_nan() { - Some(-1) - } else if a < b { - Some(-1) - } else if a > b { - Some(1) - } else { - Some(0) - } - }, - "builtins::float::cmp::__gesf2(a, b)", - ); - gen( - |(a, b): (MyF64, MyF64)| { - let (a, b) = (a.0, b.0); - if a.is_nan() || b.is_nan() { - return None; - } - - if a.is_nan() || b.is_nan() { - Some(1) - } else if a < b { - Some(-1) - } else if a > b { - Some(1) - } else { - Some(0) - } - }, - "builtins::float::cmp::__ledf2(a, b)", - ); - gen( - |(a, b): (MyF32, MyF32)| { - let (a, b) = (a.0, b.0); - if a.is_nan() || b.is_nan() { - return None; - } - - if a.is_nan() || b.is_nan() { - Some(1) - } else if a < b { - Some(-1) - } else if a > b { - Some(1) - } else { - Some(0) - } - }, - "builtins::float::cmp::__lesf2(a, b)", - ); - - gen( - |(a, b): (MyF32, MyF32)| { - let c = a.0.is_nan() || b.0.is_nan(); - Some(c as i32) - }, - "builtins::float::cmp::__unordsf2(a, b)", - ); - - gen( - |(a, b): (MyF64, MyF64)| { - let c = a.0.is_nan() || b.0.is_nan(); - Some(c as i32) - }, - "builtins::float::cmp::__unorddf2(a, b)", - ); - - if target_arch_arm { - gen( - |(a, b): (MyF32, MyF32)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - let c = (a.0 <= b.0) as i32; - Some(c) - }, - "builtins::float::cmp::__aeabi_fcmple(a, b)", - ); - - gen( - |(a, b): (MyF32, MyF32)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - let c = (a.0 >= b.0) as i32; - Some(c) - }, - "builtins::float::cmp::__aeabi_fcmpge(a, b)", - ); - - gen( - |(a, b): (MyF32, MyF32)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - let c = (a.0 == b.0) as i32; - Some(c) - }, - "builtins::float::cmp::__aeabi_fcmpeq(a, b)", - ); - - gen( - |(a, b): (MyF32, MyF32)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - let c = (a.0 < b.0) as i32; - Some(c) - }, - "builtins::float::cmp::__aeabi_fcmplt(a, b)", - ); - - gen( - |(a, b): (MyF32, MyF32)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - let c = (a.0 > b.0) as i32; - Some(c) - }, - "builtins::float::cmp::__aeabi_fcmpgt(a, b)", - ); - - gen( - |(a, b): (MyF64, MyF64)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - let c = (a.0 <= b.0) as i32; - Some(c) - }, - "builtins::float::cmp::__aeabi_dcmple(a, b)", - ); - - gen( - |(a, b): (MyF64, MyF64)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - let c = (a.0 >= b.0) as i32; - Some(c) - }, - "builtins::float::cmp::__aeabi_dcmpge(a, b)", - ); - - gen( - |(a, b): (MyF64, MyF64)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - let c = (a.0 == b.0) as i32; - Some(c) - }, - "builtins::float::cmp::__aeabi_dcmpeq(a, b)", - ); - - gen( - |(a, b): (MyF64, MyF64)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - let c = (a.0 < b.0) as i32; - Some(c) - }, - "builtins::float::cmp::__aeabi_dcmplt(a, b)", - ); - - gen( - |(a, b): (MyF64, MyF64)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - let c = (a.0 > b.0) as i32; - Some(c) - }, - "builtins::float::cmp::__aeabi_dcmpgt(a, b)", - ); - - gen( - |(a, b): (LargeF32, LargeF32)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - Some((a.0 >= b.0) as i32) - }, - "builtins::float::cmp::__gesf2vfp(a, b)", - ); - gen( - |(a, b): (MyF64, MyF64)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - Some((a.0 >= b.0) as i32) - }, - "builtins::float::cmp::__gedf2vfp(a, b)", - ); - gen( - |(a, b): (LargeF32, LargeF32)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - Some((a.0 > b.0) as i32) - }, - "builtins::float::cmp::__gtsf2vfp(a, b)", - ); - gen( - |(a, b): (MyF64, MyF64)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - Some((a.0 > b.0) as i32) - }, - "builtins::float::cmp::__gtdf2vfp(a, b)", - ); - gen( - |(a, b): (LargeF32, LargeF32)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - Some((a.0 < b.0) as i32) - }, - "builtins::float::cmp::__ltsf2vfp(a, b)", - ); - gen( - |(a, b): (MyF64, MyF64)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - Some((a.0 < b.0) as i32) - }, - "builtins::float::cmp::__ltdf2vfp(a, b)", - ); - gen( - |(a, b): (LargeF32, LargeF32)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - Some((a.0 <= b.0) as i32) - }, - "builtins::float::cmp::__lesf2vfp(a, b)", - ); - gen( - |(a, b): (MyF64, MyF64)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - Some((a.0 <= b.0) as i32) - }, - "builtins::float::cmp::__ledf2vfp(a, b)", - ); - gen( - |(a, b): (LargeF32, LargeF32)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - Some((a.0 != b.0) as i32) - }, - "builtins::float::cmp::__nesf2vfp(a, b)", - ); - gen( - |(a, b): (MyF64, MyF64)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - Some((a.0 != b.0) as i32) - }, - "builtins::float::cmp::__nedf2vfp(a, b)", - ); - gen( - |(a, b): (LargeF32, LargeF32)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - Some((a.0 == b.0) as i32) - }, - "builtins::float::cmp::__eqsf2vfp(a, b)", - ); - gen( - |(a, b): (MyF64, MyF64)| { - if a.0.is_nan() || b.0.is_nan() { - return None; - } - Some((a.0 == b.0) as i32) - }, - "builtins::float::cmp::__eqdf2vfp(a, b)", - ); - } - - // float/extend.rs - gen( - |a: MyF32| { - if a.0.is_nan() { - return None; - } - Some(f64::from(a.0)) - }, - "builtins::float::extend::__extendsfdf2(a)", - ); - if target_arch_arm { - gen( - |a: LargeF32| { - if a.0.is_nan() { - return None; - } - Some(f64::from(a.0)) - }, - "builtins::float::extend::__extendsfdf2vfp(a)", - ); - } - - // float/conv.rs - gen( - |a: MyF64| i64::cast(a.0), - "builtins::float::conv::__fixdfdi(a)", - ); - gen( - |a: MyF64| i32::cast(a.0), - "builtins::float::conv::__fixdfsi(a)", - ); - gen( - |a: MyF32| i64::cast(a.0), - "builtins::float::conv::__fixsfdi(a)", - ); - gen( - |a: MyF32| i32::cast(a.0), - "builtins::float::conv::__fixsfsi(a)", - ); - gen( - |a: MyF32| i128::cast(a.0), - "builtins::float::conv::__fixsfti(a)", - ); - gen( - |a: MyF64| i128::cast(a.0), - "builtins::float::conv::__fixdfti(a)", - ); - gen( - |a: MyF64| u64::cast(a.0), - "builtins::float::conv::__fixunsdfdi(a)", - ); - gen( - |a: MyF64| u32::cast(a.0), - "builtins::float::conv::__fixunsdfsi(a)", - ); - gen( - |a: MyF32| u64::cast(a.0), - "builtins::float::conv::__fixunssfdi(a)", - ); - gen( - |a: MyF32| u32::cast(a.0), - "builtins::float::conv::__fixunssfsi(a)", - ); - gen( - |a: MyF32| u128::cast(a.0), - "builtins::float::conv::__fixunssfti(a)", - ); - gen( - |a: MyF64| u128::cast(a.0), - "builtins::float::conv::__fixunsdfti(a)", - ); - gen( - |a: MyI64| Some(a.0 as f64), - "builtins::float::conv::__floatdidf(a)", - ); - gen( - |a: MyI32| Some(a.0 as f64), - "builtins::float::conv::__floatsidf(a)", - ); - gen( - |a: MyI32| Some(a.0 as f32), - "builtins::float::conv::__floatsisf(a)", - ); - gen( - |a: MyU64| Some(a.0 as f64), - "builtins::float::conv::__floatundidf(a)", - ); - gen( - |a: MyU32| Some(a.0 as f64), - "builtins::float::conv::__floatunsidf(a)", - ); - gen( - |a: MyU32| Some(a.0 as f32), - "builtins::float::conv::__floatunsisf(a)", - ); - gen( - |a: MyU128| Some(a.0 as f32), - "builtins::float::conv::__floatuntisf(a)", - ); - if !target_arch_mips { - gen( - |a: MyI128| Some(a.0 as f32), - "builtins::float::conv::__floattisf(a)", - ); - gen( - |a: MyI128| Some(a.0 as f64), - "builtins::float::conv::__floattidf(a)", - ); - gen( - |a: MyU128| Some(a.0 as f64), - "builtins::float::conv::__floatuntidf(a)", - ); - } - - // float/pow.rs - gen( - |(a, b): (MyF64, MyI32)| { - let c = a.0.powi(b.0); - if a.0.is_nan() || c.is_nan() { - None - } else { - Some(c) - } - }, - "builtins::float::pow::__powidf2(a, b)", - ); - gen( - |(a, b): (MyF32, MyI32)| { - let c = a.0.powi(b.0); - if a.0.is_nan() || c.is_nan() { - None - } else { - Some(c) - } - }, - "builtins::float::pow::__powisf2(a, b)", - ); - - // float/sub.rs - gen( - |(a, b): (MyF64, MyF64)| { - let c = a.0 - b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() { - None - } else { - Some(c) - } - }, - "builtins::float::sub::__subdf3(a, b)", - ); - gen( - |(a, b): (MyF32, MyF32)| { - let c = a.0 - b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() { - None - } else { - Some(c) - } - }, - "builtins::float::sub::__subsf3(a, b)", - ); - - if target_arch_arm { - gen( - |(a, b): (MyF64, MyF64)| { - let c = a.0 - b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() { - None - } else { - Some(c) - } - }, - "builtins::float::sub::__subdf3vfp(a, b)", - ); - gen( - |(a, b): (LargeF32, LargeF32)| { - let c = a.0 - b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() { - None - } else { - Some(c) - } - }, - "builtins::float::sub::__subsf3vfp(a, b)", - ); - } - - // float/mul.rs - gen( - |(a, b): (MyF64, MyF64)| { - let c = a.0 * b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() { - None - } else { - Some(c) - } - }, - "builtins::float::mul::__muldf3(a, b)", - ); - gen( - |(a, b): (LargeF32, LargeF32)| { - let c = a.0 * b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() { - None - } else { - Some(c) - } - }, - "builtins::float::mul::__mulsf3(a, b)", - ); - - if target_arch_arm { - gen( - |(a, b): (MyF64, MyF64)| { - let c = a.0 * b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() { - None - } else { - Some(c) - } - }, - "builtins::float::mul::__muldf3vfp(a, b)", - ); - gen( - |(a, b): (LargeF32, LargeF32)| { - let c = a.0 * b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() { - None - } else { - Some(c) - } - }, - "builtins::float::mul::__mulsf3vfp(a, b)", - ); - } - - // float/div.rs - gen( - |(a, b): (MyF64, MyF64)| { - if b.0 == 0.0 { - return None; - } - let c = a.0 / b.0; - if a.0.is_nan() - || b.0.is_nan() - || c.is_nan() - || c.abs() <= f64::from_bits(4503599627370495u64) - { - None - } else { - Some(c) - } - }, - "builtins::float::div::__divdf3(a, b)", - ); - gen( - |(a, b): (LargeF32, LargeF32)| { - if b.0 == 0.0 { - return None; - } - let c = a.0 / b.0; - if a.0.is_nan() || b.0.is_nan() || c.is_nan() || c.abs() <= f32::from_bits(16777215u32) - { - None - } else { - Some(c) - } - }, - "builtins::float::div::__divsf3(a, b)", - ); - - if target_arch_arm { - gen( - |(a, b): (MyF64, MyF64)| { - if b.0 == 0.0 { - return None; - } - let c = a.0 / b.0; - if a.0.is_nan() - || b.0.is_nan() - || c.is_nan() - || c.abs() <= f64::from_bits(4503599627370495u64) - { - None - } else { - Some(c) - } - }, - "builtins::float::div::__divdf3vfp(a, b)", - ); - gen( - |(a, b): (LargeF32, LargeF32)| { - if b.0 == 0.0 { - return None; - } - let c = a.0 / b.0; - if a.0.is_nan() - || b.0.is_nan() - || c.is_nan() - || c.abs() <= f32::from_bits(16777215u32) - { - None - } else { - Some(c) - } - }, - "builtins::float::div::__divsf3vfp(a, b)", - ); - } - - // int/addsub.rs - gen( - |(a, b): (MyU128, MyU128)| Some(a.0.wrapping_add(b.0)), - "builtins::int::addsub::__rust_u128_add(a, b)", - ); - gen( - |(a, b): (MyI128, MyI128)| Some(a.0.wrapping_add(b.0)), - "builtins::int::addsub::__rust_i128_add(a, b)", - ); - gen( - |(a, b): (MyU128, MyU128)| Some(a.0.overflowing_add(b.0)), - "builtins::int::addsub::__rust_u128_addo(a, b)", - ); - gen( - |(a, b): (MyI128, MyI128)| Some(a.0.overflowing_add(b.0)), - "builtins::int::addsub::__rust_i128_addo(a, b)", - ); - gen( - |(a, b): (MyU128, MyU128)| Some(a.0.wrapping_sub(b.0)), - "builtins::int::addsub::__rust_u128_sub(a, b)", - ); - gen( - |(a, b): (MyI128, MyI128)| Some(a.0.wrapping_sub(b.0)), - "builtins::int::addsub::__rust_i128_sub(a, b)", - ); - gen( - |(a, b): (MyU128, MyU128)| Some(a.0.overflowing_sub(b.0)), - "builtins::int::addsub::__rust_u128_subo(a, b)", - ); - gen( - |(a, b): (MyI128, MyI128)| Some(a.0.overflowing_sub(b.0)), - "builtins::int::addsub::__rust_i128_subo(a, b)", - ); - - // int/mul.rs - gen( - |(a, b): (MyU64, MyU64)| Some(a.0.wrapping_mul(b.0)), - "builtins::int::mul::__muldi3(a, b)", - ); - gen( - |(a, b): (MyI64, MyI64)| Some(a.0.overflowing_mul(b.0)), - "{ - let mut o = 2; - let c = builtins::int::mul::__mulodi4(a, b, &mut o); - (c, match o { 0 => false, 1 => true, _ => panic!() }) - }", - ); - gen( - |(a, b): (MyI32, MyI32)| Some(a.0.overflowing_mul(b.0)), - "{ - let mut o = 2; - let c = builtins::int::mul::__mulosi4(a, b, &mut o); - (c, match o { 0 => false, 1 => true, _ => panic!() }) - }", - ); - gen( - |(a, b): (MyI128, MyI128)| Some(a.0.wrapping_mul(b.0)), - "builtins::int::mul::__multi3(a, b)", - ); - gen( - |(a, b): (MyI128, MyI128)| Some(a.0.overflowing_mul(b.0)), - "{ - let mut o = 2; - let c = builtins::int::mul::__muloti4(a, b, &mut o); - (c, match o { 0 => false, 1 => true, _ => panic!() }) - }", - ); - - // int/sdiv.rs - gen( - |(a, b): (MyI64, MyI64)| { - if b.0 == 0 { - None - } else { - Some(a.0 / b.0) - } - }, - "builtins::int::sdiv::__divdi3(a, b)", - ); - gen( - |(a, b): (MyI64, MyI64)| { - if b.0 == 0 { - None - } else { - Some((a.0 / b.0, a.0 % b.0)) - } - }, - "{ - let mut r = 0; - (builtins::int::sdiv::__divmoddi4(a, b, &mut r), r) - }", - ); - gen( - |(a, b): (MyI32, MyI32)| { - if b.0 == 0 { - None - } else { - Some((a.0 / b.0, a.0 % b.0)) - } - }, - "{ - let mut r = 0; - (builtins::int::sdiv::__divmodsi4(a, b, &mut r), r) - }", - ); - gen( - |(a, b): (MyI128, MyI128)| { - if b.0 == 0 { - None - } else { - Some((a.0 / b.0, a.0 % b.0)) - } - }, - "{ - let mut r = 0; - (builtins::int::sdiv::__divmodti4(a, b, &mut r), r) - }", - ); - gen( - |(a, b): (MyI32, MyI32)| { - if b.0 == 0 { - None - } else { - Some(a.0 / b.0) - } - }, - "builtins::int::sdiv::__divsi3(a, b)", - ); - gen( - |(a, b): (MyI32, MyI32)| { - if b.0 == 0 { - None - } else { - Some(a.0 % b.0) - } - }, - "builtins::int::sdiv::__modsi3(a, b)", - ); - gen( - |(a, b): (MyI64, MyI64)| { - if b.0 == 0 { - None - } else { - Some(a.0 % b.0) - } - }, - "builtins::int::sdiv::__moddi3(a, b)", - ); - gen( - |(a, b): (MyI128, MyI128)| { - if b.0 == 0 { - None - } else { - Some(a.0 / b.0) - } - }, - "builtins::int::sdiv::__divti3(a, b)", - ); - gen( - |(a, b): (MyI128, MyI128)| { - if b.0 == 0 { - None - } else { - Some(a.0 % b.0) - } - }, - "builtins::int::sdiv::__modti3(a, b)", - ); - - // int/shift.rs - gen( - |(a, b): (MyU32, MyU32)| Some(a.0 << (b.0 % 32)), - "builtins::int::shift::__ashlsi3(a, b % 32)", - ); - gen( - |(a, b): (MyU64, MyU32)| Some(a.0 << (b.0 % 64)), - "builtins::int::shift::__ashldi3(a, b % 64)", - ); - gen( - |(a, b): (MyU128, MyU32)| Some(a.0 << (b.0 % 128)), - "builtins::int::shift::__ashlti3(a, b % 128)", - ); - gen( - |(a, b): (MyI32, MyU32)| Some(a.0 >> (b.0 % 32)), - "builtins::int::shift::__ashrsi3(a, b % 32)", - ); - gen( - |(a, b): (MyI64, MyU32)| Some(a.0 >> (b.0 % 64)), - "builtins::int::shift::__ashrdi3(a, b % 64)", - ); - gen( - |(a, b): (MyI128, MyU32)| Some(a.0 >> (b.0 % 128)), - "builtins::int::shift::__ashrti3(a, b % 128)", - ); - gen( - |(a, b): (MyU32, MyU32)| Some(a.0 >> (b.0 % 32)), - "builtins::int::shift::__lshrsi3(a, b % 32)", - ); - gen( - |(a, b): (MyU64, MyU32)| Some(a.0 >> (b.0 % 64)), - "builtins::int::shift::__lshrdi3(a, b % 64)", - ); - gen( - |(a, b): (MyU128, MyU32)| Some(a.0 >> (b.0 % 128)), - "builtins::int::shift::__lshrti3(a, b % 128)", - ); - - // int/udiv.rs - gen( - |(a, b): (MyU64, MyU64)| { - if b.0 == 0 { - None - } else { - Some(a.0 / b.0) - } - }, - "builtins::int::udiv::__udivdi3(a, b)", - ); - gen( - |(a, b): (MyU64, MyU64)| { - if b.0 == 0 { - None - } else { - Some((a.0 / b.0, a.0 % b.0)) - } - }, - "{ - let mut r = 0; - (builtins::int::udiv::__udivmoddi4(a, b, Some(&mut r)), r) - }", - ); - gen( - |(a, b): (MyU32, MyU32)| { - if b.0 == 0 { - None - } else { - Some((a.0 / b.0, a.0 % b.0)) - } - }, - "{ - let mut r = 0; - (builtins::int::udiv::__udivmodsi4(a, b, Some(&mut r)), r) - }", - ); - gen( - |(a, b): (MyU32, MyU32)| { - if b.0 == 0 { - None - } else { - Some(a.0 / b.0) - } - }, - "builtins::int::udiv::__udivsi3(a, b)", - ); - gen( - |(a, b): (MyU32, MyU32)| { - if b.0 == 0 { - None - } else { - Some(a.0 % b.0) - } - }, - "builtins::int::udiv::__umodsi3(a, b)", - ); - gen( - |(a, b): (MyU64, MyU64)| { - if b.0 == 0 { - None - } else { - Some(a.0 % b.0) - } - }, - "builtins::int::udiv::__umoddi3(a, b)", - ); - gen( - |(a, b): (MyU128, MyU128)| { - if b.0 == 0 { - None - } else { - Some(a.0 / b.0) - } - }, - "builtins::int::udiv::__udivti3(a, b)", - ); - gen( - |(a, b): (MyU128, MyU128)| { - if b.0 == 0 { - None - } else { - Some(a.0 % b.0) - } - }, - "builtins::int::udiv::__umodti3(a, b)", - ); - gen( - |(a, b): (MyU128, MyU128)| { - if b.0 == 0 { - None - } else { - Some((a.0 / b.0, a.0 % b.0)) - } - }, - "{ - let mut r = 0; - (builtins::int::udiv::__udivmodti4(a, b, Some(&mut r)), r) - }", - ); -} - -macro_rules! gen_float { - ($name:ident, - $fty:ident, - $uty:ident, - $bits:expr, - $significand_bits:expr) => { - pub fn $name(rng: &mut R) -> $fty - where - R: Rng + ?Sized, - { - const BITS: u8 = $bits; - const SIGNIFICAND_BITS: u8 = $significand_bits; - - const SIGNIFICAND_MASK: $uty = (1 << SIGNIFICAND_BITS) - 1; - const SIGN_MASK: $uty = (1 << (BITS - 1)); - const EXPONENT_MASK: $uty = !(SIGN_MASK | SIGNIFICAND_MASK); - - fn mk_f32(sign: bool, exponent: $uty, significand: $uty) -> $fty { - unsafe { - mem::transmute( - ((sign as $uty) << (BITS - 1)) - | ((exponent & EXPONENT_MASK) << SIGNIFICAND_BITS) - | (significand & SIGNIFICAND_MASK), - ) - } - } - - if rng.gen_range(0, 10) == 1 { - // Special values - *[ - -0.0, - 0.0, - ::std::$fty::MIN, - ::std::$fty::MIN_POSITIVE, - ::std::$fty::MAX, - ::std::$fty::NAN, - ::std::$fty::INFINITY, - -::std::$fty::INFINITY, - ] - .choose(rng) - .unwrap() - } else if rng.gen_range(0, 10) == 1 { - // NaN patterns - mk_f32(rng.gen(), rng.gen(), 0) - } else if rng.gen() { - // Denormalized - mk_f32(rng.gen(), 0, rng.gen()) - } else { - // Random anything - mk_f32(rng.gen(), rng.gen(), rng.gen()) - } - } - }; -} - -gen_float!(gen_f32, f32, u32, 32, 23); -gen_float!(gen_f64, f64, u64, 64, 52); - -macro_rules! gen_large_float { - ($name:ident, - $fty:ident, - $uty:ident, - $bits:expr, - $significand_bits:expr) => { - pub fn $name(rng: &mut R) -> $fty - where - R: Rng + ?Sized, - { - const BITS: u8 = $bits; - const SIGNIFICAND_BITS: u8 = $significand_bits; - - const SIGNIFICAND_MASK: $uty = (1 << SIGNIFICAND_BITS) - 1; - const SIGN_MASK: $uty = (1 << (BITS - 1)); - const EXPONENT_MASK: $uty = !(SIGN_MASK | SIGNIFICAND_MASK); - - fn mk_f32(sign: bool, exponent: $uty, significand: $uty) -> $fty { - unsafe { - mem::transmute( - ((sign as $uty) << (BITS - 1)) - | ((exponent & EXPONENT_MASK) << SIGNIFICAND_BITS) - | (significand & SIGNIFICAND_MASK), - ) - } - } - - if rng.gen_range(0, 10) == 1 { - // Special values - *[ - -0.0, - 0.0, - ::std::$fty::MIN, - ::std::$fty::MIN_POSITIVE, - ::std::$fty::MAX, - ::std::$fty::NAN, - ::std::$fty::INFINITY, - -::std::$fty::INFINITY, - ] - .choose(rng) - .unwrap() - } else if rng.gen_range(0, 10) == 1 { - // NaN patterns - mk_f32(rng.gen(), rng.gen(), 0) - } else if rng.gen() { - // Denormalized - mk_f32(rng.gen(), 0, rng.gen()) - } else { - // Random anything - rng.gen::<$fty>() - } - } - }; -} - -gen_large_float!(gen_large_f32, f32, u32, 32, 23); -gen_large_float!(gen_large_f64, f64, u64, 64, 52); - -trait TestInput: Hash + Eq + fmt::Debug { - fn ty_name() -> String; - fn generate_lets(container: &str, cnt: &mut u8) -> String; - fn generate_static(&self, dst: &mut String); -} - -trait TestOutput { - fn ty_name() -> String; - fn generate_static(&self, dst: &mut String); - fn generate_expr(container: &str) -> String; -} - -fn gen(mut generate: F, test: &str) -where - F: FnMut(A) -> Option, - A: TestInput + Copy, - R: TestOutput, - rand::distributions::Standard: rand::distributions::Distribution, -{ - let rng = &mut rand::thread_rng(); - let testname = test.split("::").last().unwrap().split("(").next().unwrap(); - let out_dir = PathBuf::from(env::var_os("OUT_DIR").unwrap()); - let out_file = out_dir.join("generated.rs"); - - let mut testcases = HashMap::new(); - let mut n = NTESTS; - while n > 0 { - let input: A = rng.gen(); - if testcases.contains_key(&input) { - continue; - } - let output = match generate(input) { - Some(o) => o, - None => continue, - }; - testcases.insert(input, output); - n -= 1; - } - - let mut contents = String::new(); - contents.push_str(&format!("mod {} {{\nuse super::*;\n", testname)); - contents.push_str("#[test]\n"); - contents.push_str("fn test() {\n"); - contents.push_str(&format!( - "static TESTS: [({}, {}); {}] = [\n", - A::ty_name(), - R::ty_name(), - NTESTS - )); - for (input, output) in testcases { - contents.push_str(" ("); - input.generate_static(&mut contents); - contents.push_str(", "); - output.generate_static(&mut contents); - contents.push_str("),\n"); - } - contents.push_str("];\n"); - - contents.push_str(&format!( - r#" - for &(inputs, output) in TESTS.iter() {{ - {} - assert_eq!({}, {}, "inputs {{:?}}", inputs) - }} - "#, - A::generate_lets("inputs", &mut 0), - R::generate_expr("output"), - test, - )); - contents.push_str("\n}\n"); - contents.push_str("\n}\n"); - - OpenOptions::new() - .write(true) - .append(true) - .create(true) - .open(out_file) - .unwrap() - .write_all(contents.as_bytes()) - .unwrap(); -} - -macro_rules! my_float { - ($(struct $name:ident($inner:ident) = $gen:ident;)*) => ($( - #[derive(Debug, Clone, Copy)] - struct $name($inner); - - impl TestInput for $name { - fn ty_name() -> String { - format!("u{}", &stringify!($inner)[1..]) - } - - fn generate_lets(container: &str, cnt: &mut u8) -> String { - let me = *cnt; - *cnt += 1; - format!("let {} = {}::from_bits({});\n", - (b'a' + me) as char, - stringify!($inner), - container) - } - - fn generate_static(&self, dst: &mut String) { - write!(dst, "{}", self.0.to_bits()).unwrap(); - } - } - - impl rand::distributions::Distribution<$name> for rand::distributions::Standard { - fn sample(&self, r: &mut R) -> $name { - $name($gen(r)) - } - } - - impl Hash for $name { - fn hash(&self, h: &mut H) { - self.0.to_bits().hash(h) - } - } - - impl PartialEq for $name { - fn eq(&self, other: &$name) -> bool { - self.0.to_bits() == other.0.to_bits() - } - } - - impl Eq for $name {} - - )*) -} - -my_float! { - struct MyF64(f64) = gen_f64; - struct LargeF64(f64) = gen_large_f64; - struct MyF32(f32) = gen_f32; - struct LargeF32(f32) = gen_large_f32; -} - -macro_rules! my_integer { - ($(struct $name:ident($inner:ident);)*) => ($( - #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] - struct $name($inner); - - impl TestInput for $name { - fn ty_name() -> String { - stringify!($inner).to_string() - } - - fn generate_lets(container: &str, cnt: &mut u8) -> String { - let me = *cnt; - *cnt += 1; - format!("let {} = {};\n", - (b'a' + me) as char, - container) - } - - fn generate_static(&self, dst: &mut String) { - write!(dst, "{}", self.0).unwrap(); - } - } - - impl rand::distributions::Distribution<$name> for rand::distributions::Standard { - fn sample(&self, r: &mut R) -> $name { - let bits = (0 as $inner).count_zeros(); - let mut mk = || { - if r.gen_range(0, 10) == 1 { - *[ - ::std::$inner::MAX >> (bits / 2), - 0, - ::std::$inner::MIN >> (bits / 2), - ].choose(r).unwrap() - } else { - r.gen::<$inner>() - } - }; - let a = mk(); - let b = mk(); - $name((a << (bits / 2)) | (b & (!0 << (bits / 2)))) - } - } - )*) -} - -my_integer! { - struct MyI32(i32); - struct MyI64(i64); - struct MyI128(i128); - struct MyU16(u16); - struct MyU32(u32); - struct MyU64(u64); - struct MyU128(u128); -} - -impl TestInput for (A, B) -where - A: TestInput, - B: TestInput, -{ - fn ty_name() -> String { - format!("({}, {})", A::ty_name(), B::ty_name()) - } - - fn generate_lets(container: &str, cnt: &mut u8) -> String { - format!( - "{}{}", - A::generate_lets(&format!("{}.0", container), cnt), - B::generate_lets(&format!("{}.1", container), cnt) - ) - } - - fn generate_static(&self, dst: &mut String) { - dst.push_str("("); - self.0.generate_static(dst); - dst.push_str(", "); - self.1.generate_static(dst); - dst.push_str(")"); - } -} - -impl TestOutput for f64 { - fn ty_name() -> String { - "u64".to_string() - } - - fn generate_static(&self, dst: &mut String) { - write!(dst, "{}", self.to_bits()).unwrap(); - } - - fn generate_expr(container: &str) -> String { - format!("f64::from_bits({})", container) - } -} - -impl TestOutput for f32 { - fn ty_name() -> String { - "u32".to_string() - } - - fn generate_static(&self, dst: &mut String) { - write!(dst, "{}", self.to_bits()).unwrap(); - } - - fn generate_expr(container: &str) -> String { - format!("f32::from_bits({})", container) - } -} - -macro_rules! plain_test_output { - ($($i:tt)*) => ($( - impl TestOutput for $i { - fn ty_name() -> String { - stringify!($i).to_string() - } - - fn generate_static(&self, dst: &mut String) { - write!(dst, "{}", self).unwrap(); - } - - fn generate_expr(container: &str) -> String { - container.to_string() - } - } - )*) -} - -plain_test_output!(i32 i64 i128 u32 u64 u128 bool); - -impl TestOutput for (A, B) -where - A: TestOutput, - B: TestOutput, -{ - fn ty_name() -> String { - format!("({}, {})", A::ty_name(), B::ty_name()) - } - - fn generate_static(&self, dst: &mut String) { - dst.push_str("("); - self.0.generate_static(dst); - dst.push_str(", "); - self.1.generate_static(dst); - dst.push_str(")"); - } - - fn generate_expr(container: &str) -> String { - container.to_string() - } -} - -trait FromFloat: Sized { - fn cast(src: T) -> Option; -} - -macro_rules! from_float { - ($($src:ident => $($dst:ident),+);+;) => { - $( - $( - impl FromFloat<$src> for $dst { - fn cast(src: $src) -> Option<$dst> { - use std::{$dst, $src}; - - if src.is_nan() || - src.is_infinite() || - src < std::$dst::MIN as $src || - src > std::$dst::MAX as $src - { - None - } else { - Some(src as $dst) - } - } - } - )+ - )+ - } -} - -from_float! { - f32 => i32, i64, i128, u32, u64, u128; - f64 => i32, i64, i128, u32, u64, u128; -} diff --git a/testcrate/tests/generated.rs b/testcrate/tests/generated.rs deleted file mode 100644 index a296db22d..000000000 --- a/testcrate/tests/generated.rs +++ /dev/null @@ -1,37 +0,0 @@ -#![feature(lang_items)] -#![allow(bad_style)] -#![allow(unused_imports)] -#![no_std] - -extern crate compiler_builtins as builtins; - -#[cfg(all( - target_arch = "arm", - not(any(target_env = "gnu", target_env = "musl")), - target_os = "linux", - test -))] -extern crate utest_cortex_m_qemu; - -#[cfg(all( - target_arch = "arm", - not(any(target_env = "gnu", target_env = "musl")), - target_os = "linux", - test -))] -#[macro_use] -extern crate utest_macros; - -#[cfg(all( - target_arch = "arm", - not(any(target_env = "gnu", target_env = "musl")), - target_os = "linux", - test -))] -macro_rules! panic { // overrides `panic!` - ($($tt:tt)*) => { - upanic!($($tt)*); - }; -} - -include!(concat!(env!("OUT_DIR"), "/generated.rs")); From 084fbd2b0b1dc0766475d73a37912edad9be144b Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Fri, 11 Dec 2020 14:20:51 -0600 Subject: [PATCH 0422/1459] Fix panic-handler documentation rust-lang/rust#51647 is fixed but panic-handler is still needed --- crates/panic-handler/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/panic-handler/src/lib.rs b/crates/panic-handler/src/lib.rs index a75999a4b..673e00522 100644 --- a/crates/panic-handler/src/lib.rs +++ b/crates/panic-handler/src/lib.rs @@ -1,4 +1,4 @@ -// Hack of a crate until rust-lang/rust#51647 is fixed +//! This is needed for tests on targets that require a `#[panic_handler]` function #![feature(no_core)] #![no_core] From 5e2b3c7b27cf0c7515125b61b68896e8c2be2894 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Fri, 11 Dec 2020 14:45:35 -0600 Subject: [PATCH 0423/1459] Fix all clippy warnings --- src/float/add.rs | 5 ++--- src/float/cmp.rs | 21 +++++++++------------ src/float/div.rs | 4 ++++ src/float/mul.rs | 2 +- src/int/specialized_div_rem/mod.rs | 7 +++++++ src/lib.rs | 2 ++ src/mem/mod.rs | 3 +++ 7 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/float/add.rs b/src/float/add.rs index e8b9f9e77..67f6c2c14 100644 --- a/src/float/add.rs +++ b/src/float/add.rs @@ -137,9 +137,8 @@ where a_significand <<= shift; a_exponent -= shift; } - } else - /* addition */ - { + } else { + // addition a_significand += b_significand; // If the addition carried up, we need to right-shift the result and diff --git a/src/float/cmp.rs b/src/float/cmp.rs index 79c26b099..1d4e38433 100644 --- a/src/float/cmp.rs +++ b/src/float/cmp.rs @@ -63,25 +63,22 @@ fn cmp(a: F, b: F) -> Result { // a and b as signed integers as we would with a fp_ting-point compare. if a_srep & b_srep >= szero { if a_srep < b_srep { - return Result::Less; + Result::Less } else if a_srep == b_srep { - return Result::Equal; + Result::Equal } else { - return Result::Greater; + Result::Greater } - } // Otherwise, both are negative, so we need to flip the sense of the // comparison to get the correct result. (This assumes a twos- or ones- // complement integer representation; if integers are represented in a // sign-magnitude representation, then this flip is incorrect). - else { - if a_srep > b_srep { - return Result::Less; - } else if a_srep == b_srep { - return Result::Equal; - } else { - return Result::Greater; - } + } else if a_srep > b_srep { + Result::Less + } else if a_srep == b_srep { + Result::Equal + } else { + Result::Greater } } diff --git a/src/float/div.rs b/src/float/div.rs index dd6467f88..9ac1e87b4 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -1,3 +1,7 @@ +// The functions are complex with many branches, and explicit +// `return`s makes it clear where function exit points are +#![allow(clippy::needless_return)] + use float::Float; use int::{CastInto, DInt, HInt, Int}; diff --git a/src/float/mul.rs b/src/float/mul.rs index 540e7bdcf..c89f22756 100644 --- a/src/float/mul.rs +++ b/src/float/mul.rs @@ -181,7 +181,7 @@ where product_high += product_high & one; } - return F::from_repr(product_high); + F::from_repr(product_high) } intrinsics! { diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs index 14e758fc5..391db765d 100644 --- a/src/int/specialized_div_rem/mod.rs +++ b/src/int/specialized_div_rem/mod.rs @@ -1,5 +1,12 @@ // TODO: when `unsafe_block_in_unsafe_fn` is stabilized, remove this #![allow(unused_unsafe)] +// The functions are complex with many branches, and explicit +// `return`s makes it clear where function exit points are +#![allow(clippy::needless_return)] +#![allow(clippy::comparison_chain)] +// Clippy is confused by the complex configuration +#![allow(clippy::if_same_then_else)] +#![allow(clippy::needless_bool)] //! This `specialized_div_rem` module is originally from version 1.0.0 of the //! `specialized-div-rem` crate. Note that `for` loops with ranges are not used in this diff --git a/src/lib.rs b/src/lib.rs index 9190c4251..efd63ff77 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,8 @@ // compiler on ABIs and such, so we should be "good enough" for now and changes // to the `u128` ABI will be reflected here. #![allow(improper_ctypes, improper_ctypes_definitions)] +// `mem::swap` cannot be used because it may generate references to memcpy in unoptimized code. +#![allow(clippy::manual_swap)] // We disable #[no_mangle] for tests so that we can verify the test results // against the native compiler-rt implementations of the builtins. diff --git a/src/mem/mod.rs b/src/mem/mod.rs index 107762c43..03dc965ca 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -1,3 +1,6 @@ +// Trying to satisfy clippy here is hopeless +#![allow(clippy::style)] + #[allow(warnings)] #[cfg(target_pointer_width = "16")] type c_int = i16; From 3231cee4ade124dcde07954e565061ac278b2d24 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Fri, 2 Apr 2021 09:24:00 -0500 Subject: [PATCH 0424/1459] fix CTFE cycle --- src/int/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/int/mod.rs b/src/int/mod.rs index d8524a58a..2190a69b9 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -84,7 +84,7 @@ pub trait Int: macro_rules! int_impl_common { ($ty:ty) => { - const BITS: u32 = ::BITS; + const BITS: u32 = ::ZERO.count_zeros(); const SIGNED: bool = Self::MIN != Self::ZERO; const ZERO: Self = 0; From ddf6e3cf2158e75a0028b52c00cc80a8122b6068 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Fri, 2 Apr 2021 09:35:01 -0500 Subject: [PATCH 0425/1459] Remove `rand` dependency, update `rand_xoshiro` --- testcrate/Cargo.toml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index ff9a6a453..a066a1134 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -8,14 +8,11 @@ edition = "2018" test = false doctest = false -[build-dependencies] -rand = "0.7" - [dependencies] # For fuzzing tests we want a deterministic seedable RNG. We also eliminate potential # problems with system RNGs on the variety of platforms this crate is tested on. # `xoshiro128**` is used for its quality, size, and speed at generating `u32` shift amounts. -rand_xoshiro = "0.4" +rand_xoshiro = "0.6" [dependencies.compiler_builtins] path = ".." From f06e6d1a1127f2979a71d4af98b674a704e6157e Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 2 Apr 2021 20:31:08 +0100 Subject: [PATCH 0426/1459] Replace llvm_asm! with asm! --- examples/intrinsics.rs | 9 +- src/arm.rs | 188 ++++++++++++++++++++--------------------- src/lib.rs | 1 - src/x86.rs | 96 ++++++++++----------- src/x86_64.rs | 99 +++++++++++----------- 5 files changed, 194 insertions(+), 199 deletions(-) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 519cea2ae..ddbb976a2 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -6,7 +6,7 @@ #![allow(unused_features)] #![cfg_attr(thumb, no_main)] #![deny(dead_code)] -#![feature(llvm_asm)] +#![feature(test)] #![feature(lang_items)] #![feature(start)] #![feature(allocator_api)] @@ -276,14 +276,9 @@ mod intrinsics { } fn run() { + use core::hint::black_box as bb; use intrinsics::*; - // A copy of "test::black_box". Used to prevent LLVM from optimizing away the intrinsics during LTO - fn bb(dummy: T) -> T { - unsafe { llvm_asm!("" : : "r"(&dummy)) } - dummy - } - bb(aeabi_d2f(bb(2.))); bb(aeabi_d2i(bb(2.))); bb(aeabi_d2l(bb(2.))); diff --git a/src/arm.rs b/src/arm.rs index 2b17b4f96..545d6a17a 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -9,135 +9,135 @@ use core::intrinsics; #[cfg(not(any(target_os = "ios", target_env = "msvc")))] #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe fn __aeabi_uidivmod() { - llvm_asm!(" - push {lr} - sub sp, sp, #4 - mov r2, sp - bl __udivmodsi4 - ldr r1, [sp] - add sp, sp, #4 - pop {pc} - " ::: "memory" : "volatile"); - intrinsics::unreachable(); +pub unsafe extern "C" fn __aeabi_uidivmod() { + asm!( + "push {{lr}}", + "sub sp, sp, #4", + "mov r2, sp", + "bl __udivmodsi4", + "ldr r1, [sp]", + "add sp, sp, #4", + "pop {{pc}}", + options(noreturn) + ); } #[cfg(target_os = "ios")] #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe fn __aeabi_uidivmod() { - llvm_asm!(" - push {lr} - sub sp, sp, #4 - mov r2, sp - bl ___udivmodsi4 - ldr r1, [sp] - add sp, sp, #4 - pop {pc} - " ::: "memory" : "volatile"); - intrinsics::unreachable(); +pub unsafe extern "C" fn __aeabi_uidivmod() { + asm!( + "push {{lr}}", + "sub sp, sp, #4", + "mov r2, sp", + "bl ___udivmodsi4", + "ldr r1, [sp]", + "add sp, sp, #4", + "pop {{pc}}", + options(noreturn) + ); } #[cfg(not(target_os = "ios"))] #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe fn __aeabi_uldivmod() { - llvm_asm!(" - push {r4, lr} - sub sp, sp, #16 - add r4, sp, #8 - str r4, [sp] - bl __udivmoddi4 - ldr r2, [sp, #8] - ldr r3, [sp, #12] - add sp, sp, #16 - pop {r4, pc} - " ::: "memory" : "volatile"); - intrinsics::unreachable(); +pub unsafe extern "C" fn __aeabi_uldivmod() { + asm!( + "push {{r4, lr}}", + "sub sp, sp, #16", + "add r4, sp, #8", + "str r4, [sp]", + "bl __udivmoddi4", + "ldr r2, [sp, #8]", + "ldr r3, [sp, #12]", + "add sp, sp, #16", + "pop {{r4, pc}}", + options(noreturn) + ); } #[cfg(target_os = "ios")] #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe fn __aeabi_uldivmod() { - llvm_asm!(" - push {r4, lr} - sub sp, sp, #16 - add r4, sp, #8 - str r4, [sp] - bl ___udivmoddi4 - ldr r2, [sp, #8] - ldr r3, [sp, #12] - add sp, sp, #16 - pop {r4, pc} - " ::: "memory" : "volatile"); - intrinsics::unreachable(); +pub unsafe extern "C" fn __aeabi_uldivmod() { + asm!( + "push {{r4, lr}}", + "sub sp, sp, #16", + "add r4, sp, #8", + "str r4, [sp]", + "bl ___udivmoddi4", + "ldr r2, [sp, #8]", + "ldr r3, [sp, #12]", + "add sp, sp, #16", + "pop {{r4, pc}}", + options(noreturn) + ); } #[cfg(not(target_os = "ios"))] #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe fn __aeabi_idivmod() { - llvm_asm!(" - push {r0, r1, r4, lr} - bl __aeabi_idiv - pop {r1, r2} - muls r2, r2, r0 - subs r1, r1, r2 - pop {r4, pc} - " ::: "memory" : "volatile"); - intrinsics::unreachable(); +pub unsafe extern "C" fn __aeabi_idivmod() { + asm!( + "push {{r0, r1, r4, lr}}", + "bl __aeabi_idiv", + "pop {{r1, r2}}", + "muls r2, r2, r0", + "subs r1, r1, r2", + "pop {{r4, pc}}", + options(noreturn) + ); } #[cfg(target_os = "ios")] #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe fn __aeabi_idivmod() { - llvm_asm!(" - push {r0, r1, r4, lr} - bl ___aeabi_idiv - pop {r1, r2} - muls r2, r2, r0 - subs r1, r1, r2 - pop {r4, pc} - " ::: "memory" : "volatile"); - intrinsics::unreachable(); +pub unsafe extern "C" fn __aeabi_idivmod() { + asm!( + "push {{r0, r1, r4, lr}}", + "bl ___aeabi_idiv", + "pop {{r1, r2}}", + "muls r2, r2, r0", + "subs r1, r1, r2", + "pop {{r4, pc}}", + options(noreturn) + ); } #[cfg(not(target_os = "ios"))] #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe fn __aeabi_ldivmod() { - llvm_asm!(" - push {r4, lr} - sub sp, sp, #16 - add r4, sp, #8 - str r4, [sp] - bl __divmoddi4 - ldr r2, [sp, #8] - ldr r3, [sp, #12] - add sp, sp, #16 - pop {r4, pc} - " ::: "memory" : "volatile"); - intrinsics::unreachable(); +pub unsafe extern "C" fn __aeabi_ldivmod() { + asm!( + "push {{r4, lr}}", + "sub sp, sp, #16", + "add r4, sp, #8", + "str r4, [sp]", + "bl __divmoddi4", + "ldr r2, [sp, #8]", + "ldr r3, [sp, #12]", + "add sp, sp, #16", + "pop {{r4, pc}}", + options(noreturn) + ); } #[cfg(target_os = "ios")] #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe fn __aeabi_ldivmod() { - llvm_asm!(" - push {r4, lr} - sub sp, sp, #16 - add r4, sp, #8 - str r4, [sp] - bl ___divmoddi4 - ldr r2, [sp, #8] - ldr r3, [sp, #12] - add sp, sp, #16 - pop {r4, pc} - " ::: "memory" : "volatile"); - intrinsics::unreachable(); +pub unsafe extern "C" fn __aeabi_ldivmod() { + asm!( + "push {{r4, lr}}", + "sub sp, sp, #16", + "add r4, sp, #8", + "str r4, [sp]", + "bl ___divmoddi4", + "ldr r2, [sp, #8]", + "ldr r3, [sp, #12]", + "add sp, sp, #16", + "pop {{r4, pc}}", + options(noreturn) + ); } // The following functions use weak linkage to allow users to override diff --git a/src/lib.rs b/src/lib.rs index 4a7c746a2..1f93042e4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,6 @@ #![cfg_attr(feature = "compiler-builtins", compiler_builtins)] #![cfg_attr(not(feature = "no-asm"), feature(asm))] #![feature(abi_unadjusted)] -#![cfg_attr(not(feature = "no-asm"), feature(llvm_asm))] #![cfg_attr(not(feature = "no-asm"), feature(global_asm))] #![feature(cfg_target_has_atomic)] #![feature(compiler_builtins)] diff --git a/src/x86.rs b/src/x86.rs index e038231bb..4992de9da 100644 --- a/src/x86.rs +++ b/src/x86.rs @@ -16,26 +16,27 @@ use core::intrinsics; ))] #[naked] #[no_mangle] -pub unsafe fn ___chkstk_ms() { - llvm_asm!(" - push %ecx - push %eax - cmp $$0x1000,%eax - lea 12(%esp),%ecx - jb 1f - 2: - sub $$0x1000,%ecx - test %ecx,(%ecx) - sub $$0x1000,%eax - cmp $$0x1000,%eax - ja 2b - 1: - sub %eax,%ecx - test %ecx,(%ecx) - pop %eax - pop %ecx - ret" ::: "memory" : "volatile"); - intrinsics::unreachable(); +pub unsafe extern "C" fn ___chkstk_ms() { + asm!( + "push %ecx", + "push %eax", + "cmp $0x1000,%eax", + "lea 12(%esp),%ecx", + "jb 1f", + "2:", + "sub $0x1000,%ecx", + "test %ecx,(%ecx)", + "sub $0x1000,%eax", + "cmp $0x1000,%eax", + "ja 2b", + "1:", + "sub %eax,%ecx", + "test %ecx,(%ecx)", + "pop %eax", + "pop %ecx", + "ret", + options(noreturn, att_syntax) + ); } // FIXME: __alloca should be an alias to __chkstk @@ -47,10 +48,11 @@ pub unsafe fn ___chkstk_ms() { ))] #[naked] #[no_mangle] -pub unsafe fn __alloca() { - llvm_asm!("jmp ___chkstk // Jump to ___chkstk since fallthrough may be unreliable" - ::: "memory" : "volatile"); - intrinsics::unreachable(); +pub unsafe extern "C" fn __alloca() { + asm!( + "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable" + options(noreturn, att_syntax) + ); } #[cfg(all( @@ -61,27 +63,27 @@ pub unsafe fn __alloca() { ))] #[naked] #[no_mangle] -pub unsafe fn ___chkstk() { - llvm_asm!(" - push %ecx - cmp $$0x1000,%eax - lea 8(%esp),%ecx // esp before calling this routine -> ecx - jb 1f - 2: - sub $$0x1000,%ecx - test %ecx,(%ecx) - sub $$0x1000,%eax - cmp $$0x1000,%eax - ja 2b - 1: - sub %eax,%ecx - test %ecx,(%ecx) - - lea 4(%esp),%eax // load pointer to the return address into eax - mov %ecx,%esp // install the new top of stack pointer into esp - mov -4(%eax),%ecx // restore ecx - push (%eax) // push return address onto the stack - sub %esp,%eax // restore the original value in eax - ret" ::: "memory" : "volatile"); - intrinsics::unreachable(); +pub unsafe extern "C" fn ___chkstk() { + asm!( + "push %ecx", + "cmp $0x1000,%eax", + "lea 8(%esp),%ecx", // esp before calling this routine -> ecx + "jb 1f", + "2:", + "sub $0x1000,%ecx", + "test %ecx,(%ecx)", + "sub $0x1000,%eax", + "cmp $0x1000,%eax", + "ja 2b", + "1:", + "sub %eax,%ecx", + "test %ecx,(%ecx)", + "lea 4(%esp),%eax", // load pointer to the return address into eax + "mov %ecx,%esp", // install the new top of stack pointer into esp + "mov -4(%eax),%ecx", // restore ecx + "push (%eax)", // push return address onto the stack + "sub %esp,%eax", // restore the original value in eax + "ret", + options(noreturn, att_syntax) + ); } diff --git a/src/x86_64.rs b/src/x86_64.rs index 91c0f24fc..b382b886c 100644 --- a/src/x86_64.rs +++ b/src/x86_64.rs @@ -16,26 +16,27 @@ use core::intrinsics; ))] #[naked] #[no_mangle] -pub unsafe fn ___chkstk_ms() { - llvm_asm!(" - push %rcx - push %rax - cmp $$0x1000,%rax - lea 24(%rsp),%rcx - jb 1f - 2: - sub $$0x1000,%rcx - test %rcx,(%rcx) - sub $$0x1000,%rax - cmp $$0x1000,%rax - ja 2b - 1: - sub %rax,%rcx - test %rcx,(%rcx) - pop %rax - pop %rcx - ret" ::: "memory" : "volatile"); - intrinsics::unreachable(); +pub unsafe extern "C" fn ___chkstk_ms() { + asm!( + "push %rcx", + "push %rax", + "cmp $0x1000,%rax", + "lea 24(%rsp),%rcx", + "jb 1f", + "2:", + "sub $0x1000,%rcx", + "test %rcx,(%rcx)", + "sub $0x1000,%rax", + "cmp $0x1000,%rax", + "ja 2b", + "1:", + "sub %rax,%rcx", + "test %rcx,(%rcx)", + "pop %rax", + "pop %rcx", + "ret", + options(noreturn, att_syntax) + ); } #[cfg(all( @@ -46,11 +47,12 @@ pub unsafe fn ___chkstk_ms() { ))] #[naked] #[no_mangle] -pub unsafe fn __alloca() { - llvm_asm!("mov %rcx,%rax // x64 _alloca is a normal function with parameter in rcx - jmp ___chkstk // Jump to ___chkstk since fallthrough may be unreliable" - ::: "memory" : "volatile"); - intrinsics::unreachable(); +pub unsafe extern "C" fn __alloca() { + asm!( + "mov %rcx,%rax", // x64 _alloca is a normal function with parameter in rcx + "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable" + options(noreturn, att_syntax) + ); } #[cfg(all( @@ -61,32 +63,29 @@ pub unsafe fn __alloca() { ))] #[naked] #[no_mangle] -pub unsafe fn ___chkstk() { - llvm_asm!( - " - push %rcx - cmp $$0x1000,%rax - lea 16(%rsp),%rcx // rsp before calling this routine -> rcx - jb 1f - 2: - sub $$0x1000,%rcx - test %rcx,(%rcx) - sub $$0x1000,%rax - cmp $$0x1000,%rax - ja 2b - 1: - sub %rax,%rcx - test %rcx,(%rcx) - - lea 8(%rsp),%rax // load pointer to the return address into rax - mov %rcx,%rsp // install the new top of stack pointer into rsp - mov -8(%rax),%rcx // restore rcx - push (%rax) // push return address onto the stack - sub %rsp,%rax // restore the original value in rax - ret" - ::: "memory" : "volatile" +pub unsafe extern "C" fn ___chkstk() { + asm!( + "push %rcx", + "cmp $0x1000,%rax", + "lea 16(%rsp),%rcx", // rsp before calling this routine -> rcx + "jb 1f", + "2:", + "sub $0x1000,%rcx", + "test %rcx,(%rcx)", + "sub $0x1000,%rax", + "cmp $0x1000,%rax", + "ja 2b", + "1:", + "sub %rax,%rcx", + "test %rcx,(%rcx)", + "lea 8(%rsp),%rax", // load pointer to the return address into rax + "mov %rcx,%rsp", // install the new top of stack pointer into rsp + "mov -8(%rax),%rcx", // restore rcx + "push (%rax)", // push return address onto the stack + "sub %rsp,%rax", // restore the original value in rax + "ret", + options(noreturn, att_syntax) ); - intrinsics::unreachable(); } // HACK(https://github.com/rust-lang/rust/issues/62785): x86_64-unknown-uefi needs special LLVM From d662521625e903ccd624401cc638b2b0b770d498 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Fri, 2 Apr 2021 09:44:10 -0500 Subject: [PATCH 0427/1459] add clippy to CI --- .github/workflows/main.yml | 18 ++++++++++++++++-- src/int/mod.rs | 2 ++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 01dffb845..6e2d83879 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -109,6 +109,20 @@ jobs: - uses: actions/checkout@v1 with: submodules: true - - name: Install Rust - run: rustup update stable && rustup default stable && rustup component add rustfmt + - name: Install stable `rustfmt` + run: rustup set profile minimal && rustup default stable && rustup component add rustfmt - run: cargo fmt -- --check + + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + with: + submodules: true + # Unlike rustfmt, stable clippy does not work on code with nightly features. + # This acquires the most recent nightly with a clippy component. + - name: Install nightly `clippy` + run: | + rustup set profile minimal && rustup default "nightly-$(curl -s https://rust-lang.github.io/rustup-components-history/x86_64-unknown-linux-gnu/clippy)" && rustup component add clippy + - run: cargo clippy -- -D clippy::all diff --git a/src/int/mod.rs b/src/int/mod.rs index 2190a69b9..080a415de 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -210,6 +210,8 @@ macro_rules! int_impl { self } + // It makes writing macros easier if this is implemented for both signed and unsigned + #[allow(clippy::wrong_self_convention)] fn from_unsigned(me: $uty) -> Self { me } From 07c97e38bf39df1facca33d59f6a25892ad2b259 Mon Sep 17 00:00:00 2001 From: messense Date: Wed, 24 Mar 2021 13:49:12 +0800 Subject: [PATCH 0428/1459] Add compiler-rt fallbacks on mips64-musl --- build.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build.rs b/build.rs index f948edba9..701c4d495 100644 --- a/build.rs +++ b/build.rs @@ -449,6 +449,8 @@ mod c { ("__fixunstfsi", "fixunstfsi.c"), ("__floatunsitf", "floatunsitf.c"), ("__fe_getround", "fp_mode.c"), + ("__divtf3", "divtf3.c"), + ("__trunctfdf2", "trunctfdf2.c"), ]); } From ba9e00c49e6764da9bc6bd29b58cda1a993fbcff Mon Sep 17 00:00:00 2001 From: Scott Mabin Date: Sat, 3 Apr 2021 18:57:16 +0100 Subject: [PATCH 0429/1459] Add `#[linkage = "weak"]` attribute to all `mem` instrinics. --- src/mem/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mem/mod.rs b/src/mem/mod.rs index 03dc965ca..3d7372c82 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -20,12 +20,14 @@ use core::ops::{BitOr, Shl}; mod impls; #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +#[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { impls::copy_forward(dest, src, n); dest } #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +#[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { let delta = (dest as usize).wrapping_sub(src as usize); if delta >= n { @@ -39,12 +41,14 @@ pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mu } #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +#[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 { impls::set_bytes(s, c as u8, n); s } #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +#[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { let mut i = 0; while i < n { @@ -59,6 +63,7 @@ pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { } #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +#[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { memcmp(s1, s2, n) } From 5e7374bbf6ad959f6175af0085cb26e953e43db4 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 10 Apr 2021 16:03:19 +0200 Subject: [PATCH 0430/1459] Fix typo --- src/arm.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arm.rs b/src/arm.rs index 545d6a17a..82c41fc5a 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -2,8 +2,8 @@ use core::intrinsics; -// NOTE This function and the ones below are implemented using assembly because they using a custom -// calling convention which can't be implemented using a normal Rust function. +// NOTE This function and the ones below are implemented using assembly because they are using a +// custom calling convention which can't be implemented using a normal Rust function. // NOTE The only difference between the iOS and non-iOS versions of those functions is that the iOS // versions use 3 leading underscores in the names of called functions instead of 2. #[cfg(not(any(target_os = "ios", target_env = "msvc")))] From 72bad8fa668d6506881291d93a1667cd1aa6372a Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 10 Apr 2021 19:13:02 +0100 Subject: [PATCH 0431/1459] Mark global_asm! code with .att_syntax global_asm! will soon change to use Intel syntax by default. --- src/probestack.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/probestack.rs b/src/probestack.rs index ac3ae1ebb..6892ab2d3 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -48,6 +48,8 @@ #![cfg(not(feature = "no-asm"))] // We only define stack probing for these architectures today. #![cfg(any(target_arch = "x86_64", target_arch = "x86"))] +// We need to add .att_syntax for bootstraping the new global_asm! +#![allow(unknown_lints, bad_asm_style)] extern "C" { pub fn __rust_probestack(); @@ -63,6 +65,7 @@ macro_rules! define_rust_probestack { ($body: expr) => { concat!( " + .att_syntax .pushsection .text.__rust_probestack .globl __rust_probestack .type __rust_probestack, @function From 6e4ee67f95d45c7ba2704afe9dbfabb12c6b9009 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 11 Apr 2021 20:56:38 +0100 Subject: [PATCH 0432/1459] Bump to 0.1.40 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f1052d59d..8d0c05084 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.39" +version = "0.1.40" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From b6ace6ee2c74e1412813c37b686b62e7f83de307 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 23 Apr 2021 18:54:12 +0100 Subject: [PATCH 0433/1459] Revert "Disable AArch64 FP-to-int tests" This reverts commit 9e76b9115fa9571501d378b6329ce557266908b7. --- testcrate/tests/conv.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 17c31a8a0..7cdbf9fbb 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -95,8 +95,6 @@ macro_rules! f_to_i { }; } -// AArch64 tests are currently broken due to https://github.com/rust-lang/rust/issues/83467 -#[cfg(not(target_arch = "aarch64"))] #[test] fn float_to_int() { use compiler_builtins::float::conv::{ From 76b24f99d45975ee15cace629f88a582c942684a Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 30 Apr 2021 19:15:53 +0100 Subject: [PATCH 0434/1459] Fix CI on latest nightly --- examples/intrinsics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index ddbb976a2..dfa46b1b8 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -6,7 +6,7 @@ #![allow(unused_features)] #![cfg_attr(thumb, no_main)] #![deny(dead_code)] -#![feature(test)] +#![feature(bench_black_box)] #![feature(lang_items)] #![feature(start)] #![feature(allocator_api)] From 0a3bd800246eea7c0c6b7d24d8eefff5aef48547 Mon Sep 17 00:00:00 2001 From: George Burgess IV Date: Tue, 2 Mar 2021 15:50:09 -0800 Subject: [PATCH 0435/1459] add support for building outlined aarch64 intrinsics llvm/llvm-project@a4ac434c47434d80bca54bab96f295ed4e972cc6 saw the addition of out-of-line aarch64 atomic intrinsics. LLVM will sometimes emit these, so we need to ensure they're included in Rust's compiler-rt. --- build.rs | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/build.rs b/build.rs index 701c4d495..2ff6443d7 100644 --- a/build.rs +++ b/build.rs @@ -81,7 +81,7 @@ mod c { use std::collections::BTreeMap; use std::env; - use std::path::PathBuf; + use std::path::{Path, PathBuf}; struct Sources { // SYMBOL -> PATH TO SOURCE @@ -489,7 +489,20 @@ mod c { // use of that macro in lib/builtins/int_util.h in compiler-rt. cfg.flag_if_supported(&format!("-ffile-prefix-map={}=.", root.display())); + // Include out-of-line atomics for aarch64, which are all generated by supplying different + // sets of flags to the same source file. let src_dir = root.join("lib/builtins"); + if target_arch == "aarch64" { + let atomics_libs = build_aarch64_out_of_line_atomics_libraries(&src_dir, cfg); + if !atomics_libs.is_empty() { + for library in atomics_libs { + cfg.object(library); + } + // Some run-time CPU feature detection is necessary, as well. + sources.extend(&[("__aarch64_have_lse_atomics", "cpu_model.c")]); + } + } + for (sym, src) in sources.map.iter() { let src = src_dir.join(src); cfg.file(&src); @@ -499,4 +512,60 @@ mod c { cfg.compile("libcompiler-rt.a"); } + + fn build_aarch64_out_of_line_atomics_libraries( + builtins_dir: &Path, + cfg: &cc::Build, + ) -> Vec { + // NOTE: because we're recompiling the same source file in N different ways, building + // serially is necessary. If we want to lift this restriction, we can either: + // - create symlinks to lse.S and build those_(though we'd still need to pass special + // #define-like flags to each of these), or + // - synthesizing tiny .S files in out/ with the proper #defines, which ultimately #include + // lse.S. + // That said, it's unclear how useful this added complexity will be, so just do the simple + // thing for now. + let outlined_atomics_file = builtins_dir.join("aarch64/lse.S"); + + // A stable release hasn't been made with lse.S yet. Until we pick that up, do nothing. + if !outlined_atomics_file.exists() { + return vec![]; + } + + println!("cargo:rerun-if-changed={}", outlined_atomics_file.display()); + let out_dir: PathBuf = env::var("OUT_DIR").unwrap().into(); + + // Ideally, this would be a Vec of object files, but cc doesn't make it *entirely* + // trivial to build an individual object. + let mut atomics_libraries = Vec::new(); + for instruction_type in &["cas", "cwp", "ldadd", "ldclr", "ldeor", "ldset"] { + for size in &[1, 2, 4, 8, 16] { + if *size == 16 && *instruction_type != "cas" { + continue; + } + + for (model_number, model_name) in + &[(1, "relax"), (2, "acq"), (3, "rel"), (4, "acq_rel")] + { + let library_name = format!( + "liboutline_atomic_helper_{}_{}_{}.a", + instruction_type, size, model_name + ); + let sym = format!("__aarch64_{}{}_{}", instruction_type, size, model_name); + let mut cfg = cfg.clone(); + + cfg.include(&builtins_dir) + .define(&format!("L_{}", instruction_type), None) + .define("SIZE", size.to_string().as_str()) + .define("MODEL", model_number.to_string().as_str()) + .file(&outlined_atomics_file); + cfg.compile(&library_name); + + atomics_libraries.push(out_dir.join(library_name)); + println!("cargo:rustc-cfg={}=\"optimized-c\"", sym); + } + } + } + atomics_libraries + } } From 8f4003bd671c98b50d915f77ba3c7a7803e76548 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Fri, 30 Apr 2021 02:38:39 -0700 Subject: [PATCH 0436/1459] Fix typo in instruction name: s/cwp/swp/ --- build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.rs b/build.rs index 2ff6443d7..26296db0c 100644 --- a/build.rs +++ b/build.rs @@ -538,7 +538,7 @@ mod c { // Ideally, this would be a Vec of object files, but cc doesn't make it *entirely* // trivial to build an individual object. let mut atomics_libraries = Vec::new(); - for instruction_type in &["cas", "cwp", "ldadd", "ldclr", "ldeor", "ldset"] { + for instruction_type in &["cas", "swp", "ldadd", "ldclr", "ldeor", "ldset"] { for size in &[1, 2, 4, 8, 16] { if *size == 16 && *instruction_type != "cas" { continue; From 84f19722d3a144b14f1348c71b32a9b739d1bfe2 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Fri, 30 Apr 2021 02:39:07 -0700 Subject: [PATCH 0437/1459] Require lse.S (now available in current LLVM) --- build.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/build.rs b/build.rs index 26296db0c..2178dc935 100644 --- a/build.rs +++ b/build.rs @@ -526,13 +526,8 @@ mod c { // That said, it's unclear how useful this added complexity will be, so just do the simple // thing for now. let outlined_atomics_file = builtins_dir.join("aarch64/lse.S"); - - // A stable release hasn't been made with lse.S yet. Until we pick that up, do nothing. - if !outlined_atomics_file.exists() { - return vec![]; - } - println!("cargo:rerun-if-changed={}", outlined_atomics_file.display()); + let out_dir: PathBuf = env::var("OUT_DIR").unwrap().into(); // Ideally, this would be a Vec of object files, but cc doesn't make it *entirely* From 9ed6cf26f5dc35ecc90f8a3266f30bc2882ba9c6 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Fri, 30 Apr 2021 02:41:44 -0700 Subject: [PATCH 0438/1459] Make the name of the intermediate library more closely match the intrinsic --- build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.rs b/build.rs index 2178dc935..3ba6d5883 100644 --- a/build.rs +++ b/build.rs @@ -543,7 +543,7 @@ mod c { &[(1, "relax"), (2, "acq"), (3, "rel"), (4, "acq_rel")] { let library_name = format!( - "liboutline_atomic_helper_{}_{}_{}.a", + "liboutline_atomic_helper_{}{}_{}.a", instruction_type, size, model_name ); let sym = format!("__aarch64_{}{}_{}", instruction_type, size, model_name); From e54b78aa6496be699948da404915836c74d39b62 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Fri, 30 Apr 2021 12:24:12 -0700 Subject: [PATCH 0439/1459] Update the version of compiler-rt in CI --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6e2d83879..3d63bc070 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -88,8 +88,8 @@ jobs: - run: rustup component add llvm-tools-preview - name: Download compiler-rt reference sources run: | - curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/10.0-2020-05-05.tar.gz - tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-10.0-2020-05-05/compiler-rt + curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/12.0-2021-04-15.tar.gz + tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-12.0-2021-04-15/compiler-rt echo RUST_COMPILER_RT_ROOT=./compiler-rt >> $GITHUB_ENV shell: bash From cfc2ed8f8cd618f67959e80f799779ff2194206a Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 30 Apr 2021 21:03:06 +0100 Subject: [PATCH 0440/1459] Bump to 0.1.41 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8d0c05084..c59f78728 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.40" +version = "0.1.41" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 9e36e7266f9acdd9e7fa73c81723cddb4a852688 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 2 May 2021 21:29:00 +0100 Subject: [PATCH 0441/1459] Add missing .att_syntax from #414 --- src/probestack.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/probestack.rs b/src/probestack.rs index 6892ab2d3..4d6cd6949 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -86,6 +86,7 @@ macro_rules! define_rust_probestack { ($body: expr) => { concat!( " + .att_syntax .globl __rust_probestack __rust_probestack: ", @@ -101,6 +102,7 @@ macro_rules! define_rust_probestack { ($body: expr) => { concat!( " + .att_syntax .globl ___rust_probestack ___rust_probestack: ", @@ -115,6 +117,7 @@ macro_rules! define_rust_probestack { ($body: expr) => { concat!( " + .att_syntax .globl ___rust_probestack ___rust_probestack: ", From 449098749e340763bc3b88f123bc3f8836f05dae Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 2 May 2021 22:12:49 +0100 Subject: [PATCH 0442/1459] Bump to 0.1.42 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c59f78728..0f5faebac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.41" +version = "0.1.42" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 67e97d8f894960f707b28cee65d7c558a4290428 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 13 May 2021 21:35:34 +0100 Subject: [PATCH 0443/1459] Don't embed lse_*.a inside another static library --- build.rs | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/build.rs b/build.rs index 3ba6d5883..297a0adff 100644 --- a/build.rs +++ b/build.rs @@ -493,14 +493,11 @@ mod c { // sets of flags to the same source file. let src_dir = root.join("lib/builtins"); if target_arch == "aarch64" { - let atomics_libs = build_aarch64_out_of_line_atomics_libraries(&src_dir, cfg); - if !atomics_libs.is_empty() { - for library in atomics_libs { - cfg.object(library); - } - // Some run-time CPU feature detection is necessary, as well. - sources.extend(&[("__aarch64_have_lse_atomics", "cpu_model.c")]); - } + // See below for why we're building these as separate libraries. + build_aarch64_out_of_line_atomics_libraries(&src_dir, cfg); + + // Some run-time CPU feature detection is necessary, as well. + sources.extend(&[("__aarch64_have_lse_atomics", "cpu_model.c")]); } for (sym, src) in sources.map.iter() { @@ -513,10 +510,7 @@ mod c { cfg.compile("libcompiler-rt.a"); } - fn build_aarch64_out_of_line_atomics_libraries( - builtins_dir: &Path, - cfg: &cc::Build, - ) -> Vec { + fn build_aarch64_out_of_line_atomics_libraries(builtins_dir: &Path, cfg: &cc::Build) { // NOTE: because we're recompiling the same source file in N different ways, building // serially is necessary. If we want to lift this restriction, we can either: // - create symlinks to lse.S and build those_(though we'd still need to pass special @@ -528,11 +522,8 @@ mod c { let outlined_atomics_file = builtins_dir.join("aarch64/lse.S"); println!("cargo:rerun-if-changed={}", outlined_atomics_file.display()); - let out_dir: PathBuf = env::var("OUT_DIR").unwrap().into(); - // Ideally, this would be a Vec of object files, but cc doesn't make it *entirely* // trivial to build an individual object. - let mut atomics_libraries = Vec::new(); for instruction_type in &["cas", "swp", "ldadd", "ldclr", "ldeor", "ldset"] { for size in &[1, 2, 4, 8, 16] { if *size == 16 && *instruction_type != "cas" { @@ -556,11 +547,9 @@ mod c { .file(&outlined_atomics_file); cfg.compile(&library_name); - atomics_libraries.push(out_dir.join(library_name)); println!("cargo:rustc-cfg={}=\"optimized-c\"", sym); } } } - atomics_libraries } } From eb9a1bd767a2b08d63f0472b2c9ec1b664413ed1 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 13 May 2021 21:41:46 +0100 Subject: [PATCH 0444/1459] Bump to 0.1.43 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0f5faebac..f5ba632a0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.42" +version = "0.1.43" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From acf4f894851f155a6734ad5b4a1b01597d98b528 Mon Sep 17 00:00:00 2001 From: Yuki Okushi Date: Wed, 26 May 2021 12:22:52 +0900 Subject: [PATCH 0445/1459] Suppress some warnings --- src/arm.rs | 1 + src/mem/mod.rs | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/arm.rs b/src/arm.rs index 82c41fc5a..7203d91e4 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -1,4 +1,5 @@ #![cfg(not(feature = "no-asm"))] +#![allow(unused_imports)] use core::intrinsics; diff --git a/src/mem/mod.rs b/src/mem/mod.rs index 3d7372c82..2f9a9fd94 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -69,6 +69,7 @@ pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { } // `bytes` must be a multiple of `mem::size_of::()` +#[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))] fn memcpy_element_unordered_atomic(dest: *mut T, src: *const T, bytes: usize) { unsafe { let n = exact_div(bytes, mem::size_of::()); @@ -81,6 +82,7 @@ fn memcpy_element_unordered_atomic(dest: *mut T, src: *const T, bytes: } // `bytes` must be a multiple of `mem::size_of::()` +#[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))] fn memmove_element_unordered_atomic(dest: *mut T, src: *const T, bytes: usize) { unsafe { let n = exact_div(bytes, mem::size_of::()); @@ -103,6 +105,7 @@ fn memmove_element_unordered_atomic(dest: *mut T, src: *const T, bytes: } // `T` must be a primitive integer type, and `bytes` must be a multiple of `mem::size_of::()` +#[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))] fn memset_element_unordered_atomic(s: *mut T, c: u8, bytes: usize) where T: Copy + From + Shl + BitOr, From 186517b3266a7bb2b2310927f7342ea7f41790c3 Mon Sep 17 00:00:00 2001 From: Tilmann Meyer Date: Mon, 31 May 2021 16:32:46 +0200 Subject: [PATCH 0446/1459] Include Linux atomic emulation on androideabi The old androideabi uses armv5 and thus also needs the atomic emulation and because Android is basically Linux it can use the same implementation. --- build.rs | 7 +++++-- src/lib.rs | 6 +++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/build.rs b/build.rs index 297a0adff..0fb1e17be 100644 --- a/build.rs +++ b/build.rs @@ -69,8 +69,11 @@ fn main() { println!("cargo:rustc-cfg=thumb_1") } - // Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. - if llvm_target[0] == "armv4t" || llvm_target[0] == "armv5te" { + // Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. This + // includes the old androideabi. It is deprecated but it is available as a + // rustc target (arm-linux-androideabi). + if llvm_target[0] == "armv4t" || llvm_target[0] == "armv5te" || llvm_target[2] == "androideabi" + { println!("cargo:rustc-cfg=kernel_user_helpers") } } diff --git a/src/lib.rs b/src/lib.rs index facdf946f..21796ca55 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -49,7 +49,11 @@ pub mod mem; #[cfg(target_arch = "arm")] pub mod arm; -#[cfg(all(kernel_user_helpers, target_os = "linux", target_arch = "arm"))] +#[cfg(all( + kernel_user_helpers, + any(target_os = "linux", target_os = "android"), + target_arch = "arm" +))] pub mod arm_linux; #[cfg(any(target_arch = "riscv32"))] From f9257b7f0b9e4b3a0acc336109ac6490d7212697 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Tue, 1 Jun 2021 19:08:32 +0100 Subject: [PATCH 0447/1459] Bump to 0.1.44 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f5ba632a0..5c917fc68 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.43" +version = "0.1.44" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 2ad41ef64b870e32f38e13a0fd05c4d3bceb287a Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Wed, 2 Jun 2021 13:11:28 -0500 Subject: [PATCH 0448/1459] Add public-test-deps feature for better visibility control --- Cargo.toml | 4 ++++ src/float/mod.rs | 5 +++-- src/int/leading_zeros.rs | 10 ++++++---- src/int/mod.rs | 20 ++++++++++++-------- src/int/specialized_div_rem/asymmetric.rs | 3 +-- src/int/specialized_div_rem/binary_long.rs | 3 +-- src/int/specialized_div_rem/delegate.rs | 10 ++++++---- src/int/specialized_div_rem/mod.rs | 7 +++++++ src/int/specialized_div_rem/norm_shift.rs | 3 +-- src/int/specialized_div_rem/trifecta.rs | 3 +-- src/int/udiv.rs | 7 +++++-- src/macros.rs | 16 ++++++++++++++++ testcrate/Cargo.toml | 2 +- 13 files changed, 64 insertions(+), 29 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f5ba632a0..e1f5cbf8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,6 +64,10 @@ no-lang-items = [] # Only used in the compiler's build system rustc-dep-of-std = ['compiler-builtins', 'core'] +# This makes certain traits and function specializations public that +# are not normally public but are required by the `testcrate` +public-test-deps = [] + [[example]] name = "intrinsics" required-features = ["compiler-builtins"] diff --git a/src/float/mod.rs b/src/float/mod.rs index 69e4dc635..11680e7a9 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -11,9 +11,9 @@ pub mod mul; pub mod pow; pub mod sub; +public_test_dep! { /// Trait for some basic operations on floats -#[doc(hidden)] -pub trait Float: +pub(crate) trait Float: Copy + core::fmt::Debug + PartialEq @@ -99,6 +99,7 @@ pub trait Float: /// Returns if `self` is subnormal fn is_subnormal(self) -> bool; } +} macro_rules! float_impl { ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { diff --git a/src/int/leading_zeros.rs b/src/int/leading_zeros.rs index e4a9e5eb2..0265b9a9d 100644 --- a/src/int/leading_zeros.rs +++ b/src/int/leading_zeros.rs @@ -3,9 +3,9 @@ // adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`. // Compilers will insert the check for zero in cases where it is needed. +public_test_dep! { /// Returns the number of leading binary zeros in `x`. -#[doc(hidden)] -pub fn usize_leading_zeros_default(x: usize) -> usize { +pub(crate) fn usize_leading_zeros_default(x: usize) -> usize { // The basic idea is to test if the higher bits of `x` are zero and bisect the number // of leading zeros. It is possible for all branches of the bisection to use the same // code path by conditionally shifting the higher parts down to let the next bisection @@ -69,15 +69,16 @@ pub fn usize_leading_zeros_default(x: usize) -> usize { // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO // execution effects. Changing to using a LUT and branching is risky for smaller cores. } +} // The above method does not compile well on RISC-V (because of the lack of predicated // instructions), producing code with many branches or using an excessively long // branchless solution. This method takes advantage of the set-if-less-than instruction on // RISC-V that allows `(x >= power-of-two) as usize` to be branchless. +public_test_dep! { /// Returns the number of leading binary zeros in `x`. -#[doc(hidden)] -pub fn usize_leading_zeros_riscv(x: usize) -> usize { +pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize { let mut x = x; // the number of potential leading zeros let mut z = usize::MAX.count_ones() as usize; @@ -126,6 +127,7 @@ pub fn usize_leading_zeros_riscv(x: usize) -> usize { // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. z - x } +} intrinsics! { #[maybe_use_optimized_c_shim] diff --git a/src/int/mod.rs b/src/int/mod.rs index 080a415de..509f9fdae 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -11,9 +11,9 @@ pub mod udiv; pub use self::leading_zeros::__clzsi2; +public_test_dep! { /// Trait for some basic operations on integers -#[doc(hidden)] -pub trait Int: +pub(crate) trait Int: Copy + core::fmt::Debug + PartialEq @@ -81,6 +81,7 @@ pub trait Int: fn overflowing_add(self, other: Self) -> (Self, bool); fn leading_zeros(self) -> u32; } +} macro_rules! int_impl_common { ($ty:ty) => { @@ -255,10 +256,10 @@ int_impl!(i32, u32); int_impl!(i64, u64); int_impl!(i128, u128); +public_test_dep! { /// Trait for integers twice the bit width of another integer. This is implemented for all /// primitives except for `u8`, because there is not a smaller primitive. -#[doc(hidden)] -pub trait DInt: Int { +pub(crate) trait DInt: Int { /// Integer that is half the bit width of the integer this trait is implemented for type H: HInt + Int; @@ -271,11 +272,12 @@ pub trait DInt: Int { /// Constructs an integer using lower and higher half parts fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self; } +} +public_test_dep! { /// Trait for integers half the bit width of another integer. This is implemented for all /// primitives except for `u128`, because it there is not a larger primitive. -#[doc(hidden)] -pub trait HInt: Int { +pub(crate) trait HInt: Int { /// Integer that is double the bit width of the integer this trait is implemented for type D: DInt + Int; @@ -291,6 +293,7 @@ pub trait HInt: Int { /// Widening multiplication. This cannot overflow. fn widen_mul(self, rhs: Self) -> Self::D; } +} macro_rules! impl_d_int { ($($X:ident $D:ident),*) => { @@ -353,11 +356,12 @@ impl_h_int!( i64 u64 i128 ); +public_test_dep! { /// Trait to express (possibly lossy) casting of integers -#[doc(hidden)] -pub trait CastInto: Copy { +pub(crate) trait CastInto: Copy { fn cast(self) -> T; } +} macro_rules! cast_into { ($ty:ty) => { diff --git a/src/int/specialized_div_rem/asymmetric.rs b/src/int/specialized_div_rem/asymmetric.rs index 45da657e9..56ce188a3 100644 --- a/src/int/specialized_div_rem/asymmetric.rs +++ b/src/int/specialized_div_rem/asymmetric.rs @@ -3,8 +3,7 @@ /// assembly instruction that can divide a 128 bit integer by a 64 bit integer if the quotient fits /// in 64 bits. The 128 bit version of this algorithm would use that fast hardware division to /// construct a full 128 bit by 128 bit division. -#[doc(hidden)] -#[macro_export] +#[allow(unused_macros)] macro_rules! impl_asymmetric { ( $fn:ident, // name of the unsigned division function diff --git a/src/int/specialized_div_rem/binary_long.rs b/src/int/specialized_div_rem/binary_long.rs index 7de10e852..0d7822882 100644 --- a/src/int/specialized_div_rem/binary_long.rs +++ b/src/int/specialized_div_rem/binary_long.rs @@ -4,8 +4,7 @@ /// predicate instructions. For architectures with predicated instructions, one of the algorithms /// described in the documentation of these functions probably has higher performance, and a custom /// assembly routine should be used instead. -#[doc(hidden)] -#[macro_export] +#[allow(unused_macros)] macro_rules! impl_binary_long { ( $fn:ident, // name of the unsigned division function diff --git a/src/int/specialized_div_rem/delegate.rs b/src/int/specialized_div_rem/delegate.rs index 135d3402a..330c6e4f8 100644 --- a/src/int/specialized_div_rem/delegate.rs +++ b/src/int/specialized_div_rem/delegate.rs @@ -2,8 +2,7 @@ /// binary long division to divide integers larger than what hardware division by itself can do. This /// function is intended for microarchitectures that have division hardware, but not fast enough /// multiplication hardware for `impl_trifecta` to be faster. -#[doc(hidden)] -#[macro_export] +#[allow(unused_macros)] macro_rules! impl_delegate { ( $fn:ident, // name of the unsigned division function @@ -186,6 +185,7 @@ macro_rules! impl_delegate { }; } +public_test_dep! { /// Returns `n / d` and sets `*rem = n % d`. /// /// This specialization exists because: @@ -193,8 +193,9 @@ macro_rules! impl_delegate { /// so we have to use an old fashioned `&mut u128` argument to return the remainder. /// - 64-bit SPARC does not have u64 * u64 => u128 widening multiplication, which makes the /// delegate algorithm strategy the only reasonably fast way to perform `u128` division. -#[doc(hidden)] -pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { +// used on SPARC +#[allow(dead_code)] +pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { use super::*; let duo_lo = duo as u64; let duo_hi = (duo >> 64) as u64; @@ -315,3 +316,4 @@ pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { } } } +} diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs index 391db765d..f5b2af235 100644 --- a/src/int/specialized_div_rem/mod.rs +++ b/src/int/specialized_div_rem/mod.rs @@ -53,6 +53,13 @@ mod binary_long; #[macro_use] mod delegate; + +// used on SPARC +#[allow(unused_imports)] +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use self::delegate::u128_divide_sparc; + +#[cfg(feature = "public-test-deps")] pub use self::delegate::u128_divide_sparc; #[macro_use] diff --git a/src/int/specialized_div_rem/norm_shift.rs b/src/int/specialized_div_rem/norm_shift.rs index be95d1b92..61b67b6bc 100644 --- a/src/int/specialized_div_rem/norm_shift.rs +++ b/src/int/specialized_div_rem/norm_shift.rs @@ -1,6 +1,5 @@ /// Creates a function used by some division algorithms to compute the "normalization shift". -#[doc(hidden)] -#[macro_export] +#[allow(unused_macros)] macro_rules! impl_normalization_shift { ( $name:ident, // name of the normalization shift function diff --git a/src/int/specialized_div_rem/trifecta.rs b/src/int/specialized_div_rem/trifecta.rs index a9ea60301..7e104053b 100644 --- a/src/int/specialized_div_rem/trifecta.rs +++ b/src/int/specialized_div_rem/trifecta.rs @@ -2,8 +2,7 @@ /// larger than the largest hardware integer division supported. These functions use large radix /// division algorithms that require both fast division and very fast widening multiplication on the /// target microarchitecture. Otherwise, `impl_delegate` should be used instead. -#[doc(hidden)] -#[macro_export] +#[allow(unused_macros)] macro_rules! impl_trifecta { ( $fn:ident, // name of the unsigned division function diff --git a/src/int/udiv.rs b/src/int/udiv.rs index d97178078..2f236346d 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -1,5 +1,8 @@ -pub use int::specialized_div_rem::u128_divide_sparc; -use int::specialized_div_rem::*; +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use int::specialized_div_rem::*; + +#[cfg(feature = "public-test-deps")] +pub use int::specialized_div_rem::*; intrinsics! { #[maybe_use_optimized_c_shim] diff --git a/src/macros.rs b/src/macros.rs index 56f27164a..214f0795f 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -1,5 +1,21 @@ //! Macros shared throughout the compiler-builtins implementation +/// Changes the visibility to `pub` if feature "public-test-deps" is set +#[cfg(not(feature = "public-test-deps"))] +macro_rules! public_test_dep { + ($(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*) => { + $(#[$($meta)*])* pub(crate) $ident $($tokens)* + }; +} + +/// Changes the visibility to `pub` if feature "public-test-deps" is set +#[cfg(feature = "public-test-deps")] +macro_rules! public_test_dep { + {$(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*} => { + $(#[$($meta)*])* pub $ident $($tokens)* + }; +} + /// The "main macro" used for defining intrinsics. /// /// The compiler-builtins library is super platform-specific with tons of crazy diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index a066a1134..1f77b2554 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -17,7 +17,7 @@ rand_xoshiro = "0.6" [dependencies.compiler_builtins] path = ".." default-features = false -features = ["no-lang-items"] +features = ["no-lang-items", "public-test-deps"] [target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies] test = { git = "https://github.com/japaric/utest" } From 2571b1a99168988f1b78c3fe418efa02c7c2a6de Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 3 Jun 2021 22:59:34 +0100 Subject: [PATCH 0449/1459] Fix build on targets with fewer than 3 components in their name --- build.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/build.rs b/build.rs index 0fb1e17be..f0617b6e9 100644 --- a/build.rs +++ b/build.rs @@ -72,7 +72,9 @@ fn main() { // Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. This // includes the old androideabi. It is deprecated but it is available as a // rustc target (arm-linux-androideabi). - if llvm_target[0] == "armv4t" || llvm_target[0] == "armv5te" || llvm_target[2] == "androideabi" + if llvm_target[0] == "armv4t" + || llvm_target[0] == "armv5te" + || llvm_target.get(2) == Some(&"androideabi") { println!("cargo:rustc-cfg=kernel_user_helpers") } From 8ec7648739c03d4cf7f7a2cfeb42fefaa0972359 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 4 Jun 2021 00:20:57 +0100 Subject: [PATCH 0450/1459] Bump to 0.1.45 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f5d450938..2dd5a61eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.44" +version = "0.1.45" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 18334248d5b6e378d4ecdbef44de712bd9653382 Mon Sep 17 00:00:00 2001 From: Jethro Beekman Date: Thu, 24 Jun 2021 15:58:36 +0200 Subject: [PATCH 0451/1459] Fix build failure with latest nightly --- libm/src/math/pow.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index c7fd0dfa1..f79680a05 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -604,7 +604,7 @@ mod tests { // Factoring -1 out: // (negative anything ^ integer should be (-1 ^ integer) * (positive anything ^ integer)) - &[POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS] + (&[POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS]) .iter() .for_each(|int_set| { int_set.iter().for_each(|int| { @@ -616,7 +616,7 @@ mod tests { // Negative base (imaginary results): // (-anything except 0 and Infinity ^ non-integer should be NAN) - &NEG[1..(NEG.len() - 1)].iter().for_each(|set| { + (&NEG[1..(NEG.len() - 1)]).iter().for_each(|set| { set.iter().for_each(|val| { test_sets(&ALL[3..7], &|v: f64| pow(*val, v), &|_| NAN); }) From c3d72c2f25c7100335d2fec005c9b701b789e755 Mon Sep 17 00:00:00 2001 From: Jethro Beekman Date: Tue, 22 Jun 2021 18:06:17 +0200 Subject: [PATCH 0452/1459] Fix substract with borrow in FMA Fixes rust-lang/libm#242 --- libm/src/math/fma.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 3219dbcce..85d842119 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -122,9 +122,9 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { rhi += zhi + (rlo < zlo) as u64; } else { /* r -= z */ - let t = rlo; - rlo = rlo.wrapping_sub(zlo); - rhi = rhi.wrapping_sub(zhi.wrapping_sub((t < rlo) as u64)); + let (res, borrow) = rlo.overflowing_sub(zlo); + rlo = res; + rhi = rhi.wrapping_sub(zhi.wrapping_add(borrow as u64)); if (rhi >> 63) != 0 { rlo = (-(rlo as i64)) as u64; rhi = (-(rhi as i64)) as u64 - (rlo != 0) as u64; @@ -218,6 +218,14 @@ mod tests { -0.00000000000000022204460492503126, ); - assert_eq!(fma(-0.992, -0.992, -0.992), -0.00793599999988632,); + assert_eq!(fma(-0.992, -0.992, -0.992), -0.007936000000000007,); + } + + #[test] + fn fma_sbb() { + assert_eq!( + fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), + -3991680619069439e277 + ); } } From ef9ec63ee8d5ceb9a33bdd9e6c61246e29a0b365 Mon Sep 17 00:00:00 2001 From: Jethro Beekman Date: Fri, 25 Jun 2021 11:52:14 +0200 Subject: [PATCH 0453/1459] Update libm --- Cargo.toml | 2 +- libm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2dd5a61eb..6f579f57b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.45" +version = "0.1.46" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" diff --git a/libm b/libm index fe396e00b..c2d22bf95 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit fe396e00b7e47821a81c4c87a481ddc6af1d2cdf +Subproject commit c2d22bf95e2f032ae6b237e8e4c336bc795a151c From 827d438f54114b420dcf61e72120b1f9c3c0820e Mon Sep 17 00:00:00 2001 From: Brian Vincent Date: Wed, 30 Jun 2021 22:35:47 -0500 Subject: [PATCH 0454/1459] Optimize round and roundf --- libm/src/math/round.rs | 46 +++++++++++++---------------------------- libm/src/math/roundf.rs | 44 +++++++++++++-------------------------- 2 files changed, 28 insertions(+), 62 deletions(-) diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs index bf72f5b94..46fabc90f 100644 --- a/libm/src/math/round.rs +++ b/libm/src/math/round.rs @@ -1,38 +1,10 @@ +use super::copysign; +use super::trunc; use core::f64; -const TOINT: f64 = 1.0 / f64::EPSILON; - #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn round(mut x: f64) -> f64 { - let i = x.to_bits(); - let e: u64 = i >> 52 & 0x7ff; - let mut y: f64; - - if e >= 0x3ff + 52 { - return x; - } - if e < 0x3ff - 1 { - // raise inexact if x!=0 - force_eval!(x + TOINT); - return 0.0 * x; - } - if i >> 63 != 0 { - x = -x; - } - y = x + TOINT - TOINT - x; - if y > 0.5 { - y = y + x - 1.0; - } else if y <= -0.5 { - y = y + x + 1.0; - } else { - y = y + x; - } - - if i >> 63 != 0 { - -y - } else { - y - } +pub fn round(x: f64) -> f64 { + trunc(x + copysign(0.5 - 0.25 * f64::EPSILON, x)) } #[cfg(test)] @@ -43,4 +15,14 @@ mod tests { fn negative_zero() { assert_eq!(round(-0.0_f64).to_bits(), (-0.0_f64).to_bits()); } + + #[test] + fn sanity_check() { + assert_eq!(round(-1.0), -1.0); + assert_eq!(round(2.8), 3.0); + assert_eq!(round(-0.5), -1.0); + assert_eq!(round(0.5), 1.0); + assert_eq!(round(-1.5), -2.0); + assert_eq!(round(1.5), 2.0); + } } diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs index 497e88d62..c0872a782 100644 --- a/libm/src/math/roundf.rs +++ b/libm/src/math/roundf.rs @@ -1,36 +1,10 @@ +use super::copysignf; +use super::truncf; use core::f32; -const TOINT: f32 = 1.0 / f32::EPSILON; - #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn roundf(mut x: f32) -> f32 { - let i = x.to_bits(); - let e: u32 = i >> 23 & 0xff; - let mut y: f32; - - if e >= 0x7f + 23 { - return x; - } - if e < 0x7f - 1 { - force_eval!(x + TOINT); - return 0.0 * x; - } - if i >> 31 != 0 { - x = -x; - } - y = x + TOINT - TOINT - x; - if y > 0.5f32 { - y = y + x - 1.0; - } else if y <= -0.5f32 { - y = y + x + 1.0; - } else { - y = y + x; - } - if i >> 31 != 0 { - -y - } else { - y - } +pub fn roundf(x: f32) -> f32 { + truncf(x + copysignf(0.5 - 0.25 * f32::EPSILON, x)) } #[cfg(test)] @@ -41,4 +15,14 @@ mod tests { fn negative_zero() { assert_eq!(roundf(-0.0_f32).to_bits(), (-0.0_f32).to_bits()); } + + #[test] + fn sanity_check() { + assert_eq!(roundf(-1.0), -1.0); + assert_eq!(roundf(2.8), 3.0); + assert_eq!(roundf(-0.5), -1.0); + assert_eq!(roundf(0.5), 1.0); + assert_eq!(roundf(-1.5), -2.0); + assert_eq!(roundf(1.5), 2.0); + } } From 3c3bdfbe201c397457c5252605b8de7942959b57 Mon Sep 17 00:00:00 2001 From: SuKiN-a Date: Fri, 9 Jul 2021 09:52:08 +0530 Subject: [PATCH 0455/1459] updated link to musl website --- libm/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/README.md b/libm/README.md index 5d9e9bddb..b864b5df8 100644 --- a/libm/README.md +++ b/libm/README.md @@ -2,7 +2,7 @@ A port of [MUSL]'s libm to Rust. -[MUSL]: https://www.musl-libc.org/ +[MUSL]: https://musl.libc.org/ ## Goals From 96bc62e669403a82a1becfc7863bdc17f7317d68 Mon Sep 17 00:00:00 2001 From: Rich Kadel Date: Tue, 13 Jul 2021 21:11:12 -0700 Subject: [PATCH 0456/1459] Support `long double` intrinsics in any aarch64 linux Expands the support added in #377 from just musl to any linux. Also checks for and avoids adding duplicate sources. Fixes #428 --- build.rs | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/build.rs b/build.rs index f0617b6e9..730ea61df 100644 --- a/build.rs +++ b/build.rs @@ -84,7 +84,7 @@ fn main() { mod c { extern crate cc; - use std::collections::BTreeMap; + use std::collections::{BTreeMap, HashSet}; use std::env; use std::path::{Path, PathBuf}; @@ -419,23 +419,18 @@ mod c { ("__floatunsitf", "floatunsitf.c"), ("__trunctfdf2", "trunctfdf2.c"), ("__trunctfsf2", "trunctfsf2.c"), + ("__addtf3", "addtf3.c"), + ("__multf3", "multf3.c"), + ("__subtf3", "subtf3.c"), + ("__divtf3", "divtf3.c"), + ("__powitf2", "powitf2.c"), + ("__fe_getround", "fp_mode.c"), + ("__fe_raise_inexact", "fp_mode.c"), ]); if target_os != "windows" { sources.extend(&[("__multc3", "multc3.c")]); } - - if target_env == "musl" { - sources.extend(&[ - ("__addtf3", "addtf3.c"), - ("__multf3", "multf3.c"), - ("__subtf3", "subtf3.c"), - ("__divtf3", "divtf3.c"), - ("__powitf2", "powitf2.c"), - ("__fe_getround", "fp_mode.c"), - ("__fe_raise_inexact", "fp_mode.c"), - ]); - } } if target_arch == "mips" { @@ -505,10 +500,13 @@ mod c { sources.extend(&[("__aarch64_have_lse_atomics", "cpu_model.c")]); } + let mut added_sources = HashSet::new(); for (sym, src) in sources.map.iter() { let src = src_dir.join(src); - cfg.file(&src); - println!("cargo:rerun-if-changed={}", src.display()); + if added_sources.insert(src.clone()) { + cfg.file(&src); + println!("cargo:rerun-if-changed={}", src.display()); + } println!("cargo:rustc-cfg={}=\"optimized-c\"", sym); } From 31cf80ba26665c660cb2424d2b569c9b6011bac0 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 14 Jul 2021 21:40:46 +0200 Subject: [PATCH 0457/1459] Bump to 0.1.47 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6f579f57b..604e54d30 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.46" +version = "0.1.47" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From c03b91ff9975b2c793ae999c4585f322da77ce04 Mon Sep 17 00:00:00 2001 From: Wesley Wiser Date: Wed, 28 Jul 2021 13:44:49 -0400 Subject: [PATCH 0458/1459] Don't try to build out-of-line aarch64 atomics with the msvc toolchain The msvc toolchain does not support building `.s` files, clang only supports generating out-of-line atomics on Linux and gcc does not support aarch64 for Windows at all. Therefore, we don't need to compile `lse.s` on `aarch64-pc-windows-msvc`. --- build.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build.rs b/build.rs index 730ea61df..d4cfe0e1c 100644 --- a/build.rs +++ b/build.rs @@ -491,8 +491,9 @@ mod c { // Include out-of-line atomics for aarch64, which are all generated by supplying different // sets of flags to the same source file. + // Note: Out-of-line aarch64 atomics are not supported by the msvc toolchain (#430). let src_dir = root.join("lib/builtins"); - if target_arch == "aarch64" { + if target_arch == "aarch64" && target_env != "msvc" { // See below for why we're building these as separate libraries. build_aarch64_out_of_line_atomics_libraries(&src_dir, cfg); From 2ae1a63c75b5894a87af440178e7b7e48c4b7f8d Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 28 Jul 2021 22:30:59 +0200 Subject: [PATCH 0459/1459] Bump to 0.1.48 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 604e54d30..b1dcd11c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.47" +version = "0.1.48" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From e81581d40e65c46a9074d51d5caaf64914694425 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 30 Jul 2021 00:23:15 +0200 Subject: [PATCH 0460/1459] Don't panic if the shift intrinsics receive out-of-range shifts LLVM sometimes emits calls with out-of-range shifts but then discards the results. We should avoid panics in these cases. --- src/int/shift.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/int/shift.rs b/src/int/shift.rs index 59909929e..908e619e1 100644 --- a/src/int/shift.rs +++ b/src/int/shift.rs @@ -6,13 +6,13 @@ trait Ashl: DInt { let n_h = Self::H::BITS; if shl & n_h != 0 { // we only need `self.lo()` because `self.hi()` will be shifted out entirely - (self.lo() << (shl - n_h)).widen_hi() + self.lo().wrapping_shl(shl - n_h).widen_hi() } else if shl == 0 { self } else { Self::from_lo_hi( - self.lo() << shl, - self.lo().logical_shr(n_h - shl) | (self.hi() << shl), + self.lo().wrapping_shl(shl), + self.lo().logical_shr(n_h - shl) | self.hi().wrapping_shl(shl), ) } } @@ -28,16 +28,16 @@ trait Ashr: DInt { let n_h = Self::H::BITS; if shr & n_h != 0 { Self::from_lo_hi( - self.hi() >> (shr - n_h), + self.hi().wrapping_shr(shr - n_h), // smear the sign bit - self.hi() >> (n_h - 1), + self.hi().wrapping_shr(n_h - 1), ) } else if shr == 0 { self } else { Self::from_lo_hi( - self.lo().logical_shr(shr) | (self.hi() << (n_h - shr)), - self.hi() >> shr, + self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h - shr), + self.hi().wrapping_shr(shr), ) } } @@ -57,7 +57,7 @@ trait Lshr: DInt { self } else { Self::from_lo_hi( - self.lo().logical_shr(shr) | (self.hi() << (n_h - shr)), + self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h - shr), self.hi().logical_shr(shr), ) } From 826d9e9d41dca79c9893118fdbb600fc9a837764 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 30 Jul 2021 18:49:46 +0200 Subject: [PATCH 0461/1459] Bump to 0.1.49 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b1dcd11c4..7979032d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.48" +version = "0.1.49" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 0c41d6085ae8279cfd53558e541a40df6e79438c Mon Sep 17 00:00:00 2001 From: Gary Guo Date: Tue, 9 Feb 2021 05:07:58 +0000 Subject: [PATCH 0462/1459] Add test cases for memcpy, memmove and memset for different alignment --- testcrate/tests/mem.rs | 129 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/testcrate/tests/mem.rs b/testcrate/tests/mem.rs index a5596b281..3f20e72a0 100644 --- a/testcrate/tests/mem.rs +++ b/testcrate/tests/mem.rs @@ -1,6 +1,8 @@ extern crate compiler_builtins; use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; +const WORD_SIZE: usize = core::mem::size_of::(); + #[test] fn memcpy_3() { let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; @@ -131,3 +133,130 @@ fn memcmp_ne() { assert!(memcmp(arr2.as_ptr(), arr1.as_ptr(), 8) > 0); } } + +#[derive(Clone, Copy)] +struct AlignedStorage([u8; N], [usize; 0]); + +fn gen_arr() -> AlignedStorage { + let mut ret = AlignedStorage::([0; N], []); + for i in 0..N { + ret.0[i] = i as u8; + } + ret +} + +#[test] +fn memmove_forward_misaligned_nonaligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().offset(6); + let dst = arr.0.as_mut_ptr().offset(3); + assert_eq!(memmove(dst, src, 17), dst); + reference.0.copy_within(6..6 + 17, 3); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_forward_misaligned_aligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().offset(6); + let dst = arr.0.as_mut_ptr().add(0); + assert_eq!(memmove(dst, src, 17), dst); + reference.0.copy_within(6..6 + 17, 0); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_forward_aligned() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().add(3 + WORD_SIZE); + let dst = arr.0.as_mut_ptr().add(3); + assert_eq!(memmove(dst, src, 17), dst); + reference + .0 + .copy_within(3 + WORD_SIZE..3 + WORD_SIZE + 17, 3); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_backward_misaligned_nonaligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().offset(3); + let dst = arr.0.as_mut_ptr().offset(6); + assert_eq!(memmove(dst, src, 17), dst); + reference.0.copy_within(3..3 + 17, 6); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_backward_misaligned_aligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().offset(3); + let dst = arr.0.as_mut_ptr().add(WORD_SIZE); + assert_eq!(memmove(dst, src, 17), dst); + reference.0.copy_within(3..3 + 17, WORD_SIZE); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_backward_aligned() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().add(3); + let dst = arr.0.as_mut_ptr().add(3 + WORD_SIZE); + assert_eq!(memmove(dst, src, 17), dst); + reference.0.copy_within(3..3 + 17, 3 + WORD_SIZE); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memset_backward_misaligned_nonaligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let ptr = arr.0.as_mut_ptr().offset(6); + assert_eq!(memset(ptr, 0xCC, 17), ptr); + core::ptr::write_bytes(reference.0.as_mut_ptr().add(6), 0xCC, 17); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memset_backward_misaligned_aligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let ptr = arr.0.as_mut_ptr().add(WORD_SIZE); + assert_eq!(memset(ptr, 0xCC, 17), ptr); + core::ptr::write_bytes(reference.0.as_mut_ptr().add(WORD_SIZE), 0xCC, 17); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memset_backward_aligned() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let ptr = arr.0.as_mut_ptr().add(3 + WORD_SIZE); + assert_eq!(memset(ptr, 0xCC, 17), ptr); + core::ptr::write_bytes(reference.0.as_mut_ptr().add(3 + WORD_SIZE), 0xCC, 17); + assert_eq!(arr.0, reference.0); + } +} From fcfecc1857af1ee83a62fc9f9e6a03e0a0e5c7d7 Mon Sep 17 00:00:00 2001 From: Gary Guo Date: Tue, 9 Feb 2021 05:10:39 +0000 Subject: [PATCH 0463/1459] Implement word-sized copy --- src/mem/impls.rs | 224 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 208 insertions(+), 16 deletions(-) diff --git a/src/mem/impls.rs b/src/mem/impls.rs index b3eef9901..e60b160fd 100644 --- a/src/mem/impls.rs +++ b/src/mem/impls.rs @@ -1,27 +1,219 @@ +use core::intrinsics::likely; + +const WORD_SIZE: usize = core::mem::size_of::(); +const WORD_MASK: usize = WORD_SIZE - 1; + +// If the number of bytes involved exceed this threshold we will opt in word-wise copy. +// The value here selected is max(2 * WORD_SIZE, 16): +// * We need at least 2 * WORD_SIZE bytes to guarantee that at least 1 word will be copied through +// word-wise copy. +// * The word-wise copy logic needs to perform some checks so it has some small overhead. +// ensures that even on 32-bit platforms we have copied at least 8 bytes through +// word-wise copy so the saving of word-wise copy outweights the fixed overhead. +const WORD_COPY_THRESHOLD: usize = if 2 * WORD_SIZE > 16 { + 2 * WORD_SIZE +} else { + 16 +}; + #[inline(always)] -pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, n: usize) { - let mut i = 0; - while i < n { - *dest.add(i) = *src.add(i); - i += 1; +pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) { + #[inline(always)] + unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) { + let dest_end = dest.add(n); + while dest < dest_end { + *dest = *src; + dest = dest.add(1); + src = src.add(1); + } + } + + #[inline(always)] + unsafe fn copy_forward_aligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let mut src_usize = src as *mut usize; + let dest_end = dest.add(n) as *mut usize; + + while dest_usize < dest_end { + *dest_usize = *src_usize; + dest_usize = dest_usize.add(1); + src_usize = src_usize.add(1); + } + } + + #[inline(always)] + unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let dest_end = dest.add(n) as *mut usize; + + // Calculate the misalignment offset and shift needed to reassemble value. + let offset = src as usize & WORD_MASK; + let shift = offset * 8; + + // Realign src + let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize; + // XXX: Could this possibly be UB? + let mut prev_word = *src_aligned; + + while dest_usize < dest_end { + src_aligned = src_aligned.add(1); + let cur_word = *src_aligned; + #[cfg(target_endian = "little")] + let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift); + #[cfg(target_endian = "big")] + let resembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift); + prev_word = cur_word; + + *dest_usize = resembled; + dest_usize = dest_usize.add(1); + } } + + if n >= WORD_COPY_THRESHOLD { + // Align dest + // Because of n >= 2 * WORD_SIZE, dst_misalignment < n + let dest_misalignment = (dest as usize).wrapping_neg() & WORD_MASK; + copy_forward_bytes(dest, src, dest_misalignment); + dest = dest.add(dest_misalignment); + src = src.add(dest_misalignment); + n -= dest_misalignment; + + let n_words = n & !WORD_MASK; + let src_misalignment = src as usize & WORD_MASK; + if likely(src_misalignment == 0) { + copy_forward_aligned_words(dest, src, n_words); + } else { + copy_forward_misaligned_words(dest, src, n_words); + } + dest = dest.add(n_words); + src = src.add(n_words); + n -= n_words; + } + copy_forward_bytes(dest, src, n); } #[inline(always)] -pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, n: usize) { - // copy from end - let mut i = n; - while i != 0 { - i -= 1; - *dest.add(i) = *src.add(i); +pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { + // The following backward copy helper functions uses the pointers past the end + // as their inputs instead of pointers to the start! + #[inline(always)] + unsafe fn copy_backward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) { + let dest_start = dest.sub(n); + while dest_start < dest { + dest = dest.sub(1); + src = src.sub(1); + *dest = *src; + } + } + + #[inline(always)] + unsafe fn copy_backward_aligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let mut src_usize = src as *mut usize; + let dest_start = dest.sub(n) as *mut usize; + + while dest_start < dest_usize { + dest_usize = dest_usize.sub(1); + src_usize = src_usize.sub(1); + *dest_usize = *src_usize; + } } + + #[inline(always)] + unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let dest_start = dest.sub(n) as *mut usize; + + // Calculate the misalignment offset and shift needed to reassemble value. + let offset = src as usize & WORD_MASK; + let shift = offset * 8; + + // Realign src_aligned + let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize; + // XXX: Could this possibly be UB? + let mut prev_word = *src_aligned; + + while dest_start < dest_usize { + src_aligned = src_aligned.sub(1); + let cur_word = *src_aligned; + #[cfg(target_endian = "little")] + let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift; + #[cfg(target_endian = "big")] + let resembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift; + prev_word = cur_word; + + dest_usize = dest_usize.sub(1); + *dest_usize = resembled; + } + } + + let mut dest = dest.add(n); + let mut src = src.add(n); + + if n >= WORD_COPY_THRESHOLD { + // Align dest + // Because of n >= 2 * WORD_SIZE, dst_misalignment < n + let dest_misalignment = dest as usize & WORD_MASK; + copy_backward_bytes(dest, src, dest_misalignment); + dest = dest.sub(dest_misalignment); + src = src.sub(dest_misalignment); + n -= dest_misalignment; + + let n_words = n & !WORD_MASK; + let src_misalignment = src as usize & WORD_MASK; + if likely(src_misalignment == 0) { + copy_backward_aligned_words(dest, src, n_words); + } else { + copy_backward_misaligned_words(dest, src, n_words); + } + dest = dest.sub(n_words); + src = src.sub(n_words); + n -= n_words; + } + copy_backward_bytes(dest, src, n); } #[inline(always)] -pub unsafe fn set_bytes(s: *mut u8, c: u8, n: usize) { - let mut i = 0; - while i < n { - *s.add(i) = c; - i += 1; +pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) { + #[inline(always)] + pub unsafe fn set_bytes_bytes(mut s: *mut u8, c: u8, n: usize) { + let end = s.add(n); + while s < end { + *s = c; + s = s.add(1); + } + } + + #[inline(always)] + pub unsafe fn set_bytes_words(s: *mut u8, c: u8, n: usize) { + let mut broadcast = c as usize; + let mut bits = 8; + while bits < WORD_SIZE * 8 { + broadcast |= broadcast << bits; + bits *= 2; + } + + let mut s_usize = s as *mut usize; + let end = s.add(n) as *mut usize; + + while s_usize < end { + *s_usize = broadcast; + s_usize = s_usize.add(1); + } + } + + if likely(n >= WORD_COPY_THRESHOLD) { + // Align s + // Because of n >= 2 * WORD_SIZE, dst_misalignment < n + let misalignment = (s as usize).wrapping_neg() & WORD_MASK; + set_bytes_bytes(s, c, misalignment); + s = s.add(misalignment); + n -= misalignment; + + let n_words = n & !WORD_MASK; + set_bytes_words(s, c, n_words); + s = s.add(n_words); + n -= n_words; } + set_bytes_bytes(s, c, n); } From 3ad5fa90a2351e10430a5ebea6ad365b202e2378 Mon Sep 17 00:00:00 2001 From: Gary Guo Date: Wed, 10 Feb 2021 23:35:54 +0000 Subject: [PATCH 0464/1459] Add misaligned benchmarks --- testcrate/benches/mem.rs | 134 +++++++++++++++++++++++++++++---------- 1 file changed, 100 insertions(+), 34 deletions(-) diff --git a/testcrate/benches/mem.rs b/testcrate/benches/mem.rs index cee64ae4d..b6883a93b 100644 --- a/testcrate/benches/mem.rs +++ b/testcrate/benches/mem.rs @@ -6,30 +6,64 @@ use test::{black_box, Bencher}; extern crate compiler_builtins; use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; -fn memcpy_builtin(b: &mut Bencher, n: usize, offset: usize) { - let v1 = vec![1u8; n + offset]; - let mut v2 = vec![0u8; n + offset]; +const WORD_SIZE: usize = core::mem::size_of::(); + +struct AlignedVec { + vec: Vec, + size: usize, +} + +impl AlignedVec { + fn new(fill: u8, size: usize) -> Self { + let mut broadcast = fill as usize; + let mut bits = 8; + while bits < WORD_SIZE * 8 { + broadcast |= broadcast << bits; + bits *= 2; + } + + let vec = vec![broadcast; (size + WORD_SIZE - 1) & !WORD_SIZE]; + AlignedVec { vec, size } + } +} + +impl core::ops::Deref for AlignedVec { + type Target = [u8]; + fn deref(&self) -> &[u8] { + unsafe { core::slice::from_raw_parts(self.vec.as_ptr() as *const u8, self.size) } + } +} + +impl core::ops::DerefMut for AlignedVec { + fn deref_mut(&mut self) -> &mut [u8] { + unsafe { core::slice::from_raw_parts_mut(self.vec.as_mut_ptr() as *mut u8, self.size) } + } +} + +fn memcpy_builtin(b: &mut Bencher, n: usize, offset1: usize, offset2: usize) { + let v1 = AlignedVec::new(1, n + offset1); + let mut v2 = AlignedVec::new(0, n + offset2); b.bytes = n as u64; b.iter(|| { - let src: &[u8] = black_box(&v1[offset..]); - let dst: &mut [u8] = black_box(&mut v2[offset..]); + let src: &[u8] = black_box(&v1[offset1..]); + let dst: &mut [u8] = black_box(&mut v2[offset2..]); dst.copy_from_slice(src); }) } -fn memcpy_rust(b: &mut Bencher, n: usize, offset: usize) { - let v1 = vec![1u8; n + offset]; - let mut v2 = vec![0u8; n + offset]; +fn memcpy_rust(b: &mut Bencher, n: usize, offset1: usize, offset2: usize) { + let v1 = AlignedVec::new(1, n + offset1); + let mut v2 = AlignedVec::new(0, n + offset2); b.bytes = n as u64; b.iter(|| { - let src: &[u8] = black_box(&v1[offset..]); - let dst: &mut [u8] = black_box(&mut v2[offset..]); + let src: &[u8] = black_box(&v1[offset1..]); + let dst: &mut [u8] = black_box(&mut v2[offset2..]); unsafe { memcpy(dst.as_mut_ptr(), src.as_ptr(), n) } }) } fn memset_builtin(b: &mut Bencher, n: usize, offset: usize) { - let mut v1 = vec![0u8; n + offset]; + let mut v1 = AlignedVec::new(0, n + offset); b.bytes = n as u64; b.iter(|| { let dst: &mut [u8] = black_box(&mut v1[offset..]); @@ -41,7 +75,7 @@ fn memset_builtin(b: &mut Bencher, n: usize, offset: usize) { } fn memset_rust(b: &mut Bencher, n: usize, offset: usize) { - let mut v1 = vec![0u8; n + offset]; + let mut v1 = AlignedVec::new(0, n + offset); b.bytes = n as u64; b.iter(|| { let dst: &mut [u8] = black_box(&mut v1[offset..]); @@ -51,8 +85,8 @@ fn memset_rust(b: &mut Bencher, n: usize, offset: usize) { } fn memcmp_builtin(b: &mut Bencher, n: usize) { - let v1 = vec![0u8; n]; - let mut v2 = vec![0u8; n]; + let v1 = AlignedVec::new(0, n); + let mut v2 = AlignedVec::new(0, n); v2[n - 1] = 1; b.bytes = n as u64; b.iter(|| { @@ -63,8 +97,8 @@ fn memcmp_builtin(b: &mut Bencher, n: usize) { } fn memcmp_rust(b: &mut Bencher, n: usize) { - let v1 = vec![0u8; n]; - let mut v2 = vec![0u8; n]; + let v1 = AlignedVec::new(0, n); + let mut v2 = AlignedVec::new(0, n); v2[n - 1] = 1; b.bytes = n as u64; b.iter(|| { @@ -74,20 +108,20 @@ fn memcmp_rust(b: &mut Bencher, n: usize) { }) } -fn memmove_builtin(b: &mut Bencher, n: usize) { - let mut v = vec![0u8; n + n / 2]; +fn memmove_builtin(b: &mut Bencher, n: usize, offset: usize) { + let mut v = AlignedVec::new(0, n + n / 2 + offset); b.bytes = n as u64; b.iter(|| { let s: &mut [u8] = black_box(&mut v); - s.copy_within(0..n, n / 2); + s.copy_within(0..n, n / 2 + offset); }) } -fn memmove_rust(b: &mut Bencher, n: usize) { - let mut v = vec![0u8; n + n / 2]; +fn memmove_rust(b: &mut Bencher, n: usize, offset: usize) { + let mut v = AlignedVec::new(0, n + n / 2 + offset); b.bytes = n as u64; b.iter(|| { - let dst: *mut u8 = black_box(&mut v[n / 2..]).as_mut_ptr(); + let dst: *mut u8 = black_box(&mut v[n / 2 + offset..]).as_mut_ptr(); let src: *const u8 = black_box(&v).as_ptr(); unsafe { memmove(dst, src, n) }; }) @@ -95,35 +129,51 @@ fn memmove_rust(b: &mut Bencher, n: usize) { #[bench] fn memcpy_builtin_4096(b: &mut Bencher) { - memcpy_builtin(b, 4096, 0) + memcpy_builtin(b, 4096, 0, 0) } #[bench] fn memcpy_rust_4096(b: &mut Bencher) { - memcpy_rust(b, 4096, 0) + memcpy_rust(b, 4096, 0, 0) } #[bench] fn memcpy_builtin_1048576(b: &mut Bencher) { - memcpy_builtin(b, 1048576, 0) + memcpy_builtin(b, 1048576, 0, 0) } #[bench] fn memcpy_rust_1048576(b: &mut Bencher) { - memcpy_rust(b, 1048576, 0) + memcpy_rust(b, 1048576, 0, 0) } #[bench] fn memcpy_builtin_4096_offset(b: &mut Bencher) { - memcpy_builtin(b, 4096, 65) + memcpy_builtin(b, 4096, 65, 65) } #[bench] fn memcpy_rust_4096_offset(b: &mut Bencher) { - memcpy_rust(b, 4096, 65) + memcpy_rust(b, 4096, 65, 65) } #[bench] fn memcpy_builtin_1048576_offset(b: &mut Bencher) { - memcpy_builtin(b, 1048576, 65) + memcpy_builtin(b, 1048576, 65, 65) } #[bench] fn memcpy_rust_1048576_offset(b: &mut Bencher) { - memcpy_rust(b, 1048576, 65) + memcpy_rust(b, 1048576, 65, 65) +} +#[bench] +fn memcpy_builtin_4096_misalign(b: &mut Bencher) { + memcpy_builtin(b, 4096, 65, 66) +} +#[bench] +fn memcpy_rust_4096_misalign(b: &mut Bencher) { + memcpy_rust(b, 4096, 65, 66) +} +#[bench] +fn memcpy_builtin_1048576_misalign(b: &mut Bencher) { + memcpy_builtin(b, 1048576, 65, 66) +} +#[bench] +fn memcpy_rust_1048576_misalign(b: &mut Bencher) { + memcpy_rust(b, 1048576, 65, 66) } #[bench] @@ -178,17 +228,33 @@ fn memcmp_rust_1048576(b: &mut Bencher) { #[bench] fn memmove_builtin_4096(b: &mut Bencher) { - memmove_builtin(b, 4096) + memmove_builtin(b, 4096, 0) } #[bench] fn memmove_rust_4096(b: &mut Bencher) { - memmove_rust(b, 4096) + memmove_rust(b, 4096, 0) } #[bench] fn memmove_builtin_1048576(b: &mut Bencher) { - memmove_builtin(b, 1048576) + memmove_builtin(b, 1048576, 0) } #[bench] fn memmove_rust_1048576(b: &mut Bencher) { - memmove_rust(b, 1048576) + memmove_rust(b, 1048576, 0) +} +#[bench] +fn memmove_builtin_4096_misalign(b: &mut Bencher) { + memmove_builtin(b, 4096, 1) +} +#[bench] +fn memmove_rust_4096_misalign(b: &mut Bencher) { + memmove_rust(b, 4096, 1) +} +#[bench] +fn memmove_builtin_1048576_misalign(b: &mut Bencher) { + memmove_builtin(b, 1048576, 1) +} +#[bench] +fn memmove_rust_1048576_misalign(b: &mut Bencher) { + memmove_rust(b, 1048576, 1) } From 2d28f4d19af9385328c4a35a938f45a44b189bcc Mon Sep 17 00:00:00 2001 From: Gary Guo Date: Sat, 21 Aug 2021 03:45:38 +0100 Subject: [PATCH 0465/1459] Add different misaligned path for archs with unaligned support --- build.rs | 5 +++++ src/mem/impls.rs | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/build.rs b/build.rs index d4cfe0e1c..fb3dc373a 100644 --- a/build.rs +++ b/build.rs @@ -33,6 +33,11 @@ fn main() { println!("cargo:rustc-cfg=feature=\"mem\""); } + // These targets have hardware unaligned access support. + if target.contains("x86_64") || target.contains("i686") || target.contains("aarch64") { + println!("cargo:rustc-cfg=feature=\"mem-unaligned\""); + } + // NOTE we are going to assume that llvm-target, what determines our codegen option, matches the // target triple. This is usually correct for our built-in targets but can break in presence of // custom targets, which can have arbitrary names. diff --git a/src/mem/impls.rs b/src/mem/impls.rs index e60b160fd..7022d6257 100644 --- a/src/mem/impls.rs +++ b/src/mem/impls.rs @@ -16,6 +16,14 @@ const WORD_COPY_THRESHOLD: usize = if 2 * WORD_SIZE > 16 { 16 }; +#[cfg(feature = "mem-unaligned")] +unsafe fn read_usize_unaligned(x: *const usize) -> usize { + // Do not use `core::ptr::read_unaligned` here, since it calls `copy_nonoverlapping` which + // is translated to memcpy in LLVM. + let x_read = (x as *const [u8; core::mem::size_of::()]).read(); + core::mem::transmute(x_read) +} + #[inline(always)] pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) { #[inline(always)] @@ -41,6 +49,7 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) } } + #[cfg(not(feature = "mem-unaligned"))] #[inline(always)] unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; @@ -69,6 +78,20 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) } } + #[cfg(feature = "mem-unaligned")] + #[inline(always)] + unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let mut src_usize = src as *mut usize; + let dest_end = dest.add(n) as *mut usize; + + while dest_usize < dest_end { + *dest_usize = read_usize_unaligned(src_usize); + dest_usize = dest_usize.add(1); + src_usize = src_usize.add(1); + } + } + if n >= WORD_COPY_THRESHOLD { // Align dest // Because of n >= 2 * WORD_SIZE, dst_misalignment < n @@ -119,6 +142,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { } } + #[cfg(not(feature = "mem-unaligned"))] #[inline(always)] unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; @@ -147,6 +171,20 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { } } + #[cfg(feature = "mem-unaligned")] + #[inline(always)] + unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let mut src_usize = src as *mut usize; + let dest_start = dest.sub(n) as *mut usize; + + while dest_start < dest_usize { + dest_usize = dest_usize.sub(1); + src_usize = src_usize.sub(1); + *dest_usize = read_usize_unaligned(src_usize); + } + } + let mut dest = dest.add(n); let mut src = src.add(n); From ce86d41b4f95e4d1917460ee7a05fcd7fe6831fc Mon Sep 17 00:00:00 2001 From: Gary Guo Date: Tue, 31 Aug 2021 00:22:43 +0100 Subject: [PATCH 0466/1459] Use atomic_load_unordered for first word load in misaligned case --- src/mem/impls.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mem/impls.rs b/src/mem/impls.rs index 7022d6257..65887a338 100644 --- a/src/mem/impls.rs +++ b/src/mem/impls.rs @@ -61,8 +61,8 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) // Realign src let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize; - // XXX: Could this possibly be UB? - let mut prev_word = *src_aligned; + // This will read (but won't use) bytes out of bound. + let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned); while dest_usize < dest_end { src_aligned = src_aligned.add(1); @@ -154,8 +154,8 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { // Realign src_aligned let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize; - // XXX: Could this possibly be UB? - let mut prev_word = *src_aligned; + // This will read (but won't use) bytes out of bound. + let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned); while dest_start < dest_usize { src_aligned = src_aligned.sub(1); From 2be2bc086bd9b3c0fc8eb8d2dc7df025e6ffd318 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 1 Sep 2021 00:22:59 +0200 Subject: [PATCH 0467/1459] Bump to 0.1.50 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7979032d7..8c06d9621 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.49" +version = "0.1.50" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 55f6ecb6de9e2e10d9187b287b9e87b202d07d1e Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 15 Oct 2021 01:56:08 +0200 Subject: [PATCH 0468/1459] Disable broken powerpc64 test due to https://github.com/rust-lang/rust/issues/88520 --- testcrate/tests/cmp.rs | 2 ++ testcrate/tests/conv.rs | 2 ++ testcrate/tests/misc.rs | 2 ++ 3 files changed, 6 insertions(+) diff --git a/testcrate/tests/cmp.rs b/testcrate/tests/cmp.rs index a49779ad0..5c10a5601 100644 --- a/testcrate/tests/cmp.rs +++ b/testcrate/tests/cmp.rs @@ -22,6 +22,8 @@ macro_rules! cmp { }; } +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] #[test] fn float_comparisons() { use compiler_builtins::float::cmp::{ diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 7cdbf9fbb..8c4b1946c 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -95,6 +95,8 @@ macro_rules! f_to_i { }; } +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] #[test] fn float_to_int() { use compiler_builtins::float::conv::{ diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs index 82a1ea27b..5f74e0063 100644 --- a/testcrate/tests/misc.rs +++ b/testcrate/tests/misc.rs @@ -111,6 +111,8 @@ macro_rules! extend { }; } +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] #[test] fn float_extend() { use compiler_builtins::float::extend::__extendsfdf2; From 03d02e219cbd2e910a26e386cf3c7622e2f44a03 Mon Sep 17 00:00:00 2001 From: Georgy Shepelev Date: Wed, 29 Sep 2021 16:02:46 +0400 Subject: [PATCH 0469/1459] expose some math to UEFI envs --- src/lib.rs | 1 + src/math.rs | 46 +++++++++++++++++++++++++++++----------------- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 21796ca55..2bf6e00a8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,6 +40,7 @@ pub mod int; #[cfg(any( all(target_arch = "wasm32", target_os = "unknown"), + all(target_arch = "x86_64", target_os = "uefi"), all(target_arch = "arm", target_os = "none"), all(target_vendor = "fortanix", target_env = "sgx") ))] diff --git a/src/math.rs b/src/math.rs index 94f91124b..6bc1e9076 100644 --- a/src/math.rs +++ b/src/math.rs @@ -20,24 +20,17 @@ macro_rules! no_mangle { target_os = "unknown", not(target_env = "wasi") ), + all(target_arch = "x86_64", target_os = "uefi"), all(target_vendor = "fortanix", target_env = "sgx") ))] no_mangle! { fn acos(x: f64) -> f64; fn asin(x: f64) -> f64; - fn atan(x: f64) -> f64; - fn atan2(x: f64, y: f64) -> f64; fn cbrt(x: f64) -> f64; - fn cosh(x: f64) -> f64; fn expm1(x: f64) -> f64; fn hypot(x: f64, y: f64) -> f64; - fn log1p(x: f64) -> f64; - fn sinh(x: f64) -> f64; fn tan(x: f64) -> f64; - fn tanh(x: f64) -> f64; fn cos(x: f64) -> f64; - fn cosf(x: f32) -> f32; - fn exp(x: f64) -> f64; fn expf(x: f32) -> f32; fn log2(x: f64) -> f64; fn log2f(x: f32) -> f32; @@ -52,33 +45,52 @@ no_mangle! { fn round(x: f64) -> f64; fn roundf(x: f32) -> f32; fn sin(x: f64) -> f64; - fn sinf(x: f32) -> f32; fn pow(x: f64, y: f64) -> f64; fn powf(x: f32, y: f32) -> f32; - fn exp2(x: f64) -> f64; - fn exp2f(x: f32) -> f32; fn fmod(x: f64, y: f64) -> f64; fn fmodf(x: f32, y: f32) -> f32; - fn fma(x: f64, y: f64, z: f64) -> f64; - fn fmaf(x: f32, y: f32, z: f32) -> f32; fn acosf(n: f32) -> f32; - fn asinf(n: f32) -> f32; fn atan2f(a: f32, b: f32) -> f32; fn atanf(n: f32) -> f32; - fn cbrtf(n: f32) -> f32; fn coshf(n: f32) -> f32; fn expm1f(n: f32) -> f32; fn fdim(a: f64, b: f64) -> f64; fn fdimf(a: f32, b: f32) -> f32; - fn hypotf(x: f32, y: f32) -> f32; fn log1pf(n: f32) -> f32; fn sinhf(n: f32) -> f32; - fn tanf(n: f32) -> f32; fn tanhf(n: f32) -> f32; fn ldexp(f: f64, n: i32) -> f64; fn ldexpf(f: f32, n: i32) -> f32; } +#[cfg(any( + all( + target_arch = "wasm32", + target_os = "unknown", + not(target_env = "wasi") + ), + all(target_vendor = "fortanix", target_env = "sgx") +))] +no_mangle! { + fn atan(x: f64) -> f64; + fn atan2(x: f64, y: f64) -> f64; + fn cosh(x: f64) -> f64; + fn log1p(x: f64) -> f64; + fn sinh(x: f64) -> f64; + fn tanh(x: f64) -> f64; + fn cosf(x: f32) -> f32; + fn exp(x: f64) -> f64; + fn sinf(x: f32) -> f32; + fn exp2(x: f64) -> f64; + fn exp2f(x: f32) -> f32; + fn fma(x: f64, y: f64, z: f64) -> f64; + fn fmaf(x: f32, y: f32, z: f32) -> f32; + fn asinf(n: f32) -> f32; + fn cbrtf(n: f32) -> f32; + fn hypotf(x: f32, y: f32) -> f32; + fn tanf(n: f32) -> f32; +} + #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] no_mangle! { fn ceil(x: f64) -> f64; From 4833f5fc8ee07c9c61d3573ac5801af4f2a6b7b6 Mon Sep 17 00:00:00 2001 From: Scott Mabin Date: Fri, 8 Oct 2021 13:36:30 +0100 Subject: [PATCH 0470/1459] Add xtensa to list of soft math targets. --- src/math.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/math.rs b/src/math.rs index 6bc1e9076..6f024e7b3 100644 --- a/src/math.rs +++ b/src/math.rs @@ -21,6 +21,7 @@ macro_rules! no_mangle { not(target_env = "wasi") ), all(target_arch = "x86_64", target_os = "uefi"), + all(target_arch = "xtensa", target_os = "none"), all(target_vendor = "fortanix", target_env = "sgx") ))] no_mangle! { @@ -69,6 +70,7 @@ no_mangle! { target_os = "unknown", not(target_env = "wasi") ), + all(target_arch = "xtensa", target_os = "none"), all(target_vendor = "fortanix", target_env = "sgx") ))] no_mangle! { From e7427313f34f545e62b28d0f31d4da028ca221fd Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 17 Oct 2021 01:41:40 +0200 Subject: [PATCH 0471/1459] Bump to 0.1.51 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8c06d9621..7874b99fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.50" +version = "0.1.51" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 8ebb85a080d3f8695661b8cfb6c0370c3f1d5843 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 28 Oct 2021 10:29:37 -0700 Subject: [PATCH 0472/1459] Adjust some build directives for wasm64 This is still an experimental target but this should get the wasm64 target to behave more like wasm32. --- build.rs | 8 ++++---- src/lib.rs | 1 + src/math.rs | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/build.rs b/build.rs index fb3dc373a..60feb0619 100644 --- a/build.rs +++ b/build.rs @@ -23,9 +23,9 @@ fn main() { return; } - // Forcibly enable memory intrinsics on wasm32 & SGX as we don't have a libc to + // Forcibly enable memory intrinsics on wasm & SGX as we don't have a libc to // provide them. - if (target.contains("wasm32") && !target.contains("wasi")) + if (target.contains("wasm") && !target.contains("wasi")) || (target.contains("sgx") && target.contains("fortanix")) || target.contains("-none") || target.contains("nvptx") @@ -50,13 +50,13 @@ fn main() { if !cfg!(feature = "mangled-names") && cfg!(feature = "c") { // Don't use a C compiler for these targets: // - // * wasm32 - clang 8 for wasm is somewhat hard to come by and it's + // * wasm - clang for wasm is somewhat hard to come by and it's // unlikely that the C is really that much better than our own Rust. // * nvptx - everything is bitcode, not compatible with mixed C/Rust // * riscv - the rust-lang/rust distribution container doesn't have a C // compiler nor is cc-rs ready for compilation to riscv (at this // time). This can probably be removed in the future - if !target.contains("wasm32") && !target.contains("nvptx") && !target.starts_with("riscv") { + if !target.contains("wasm") && !target.contains("nvptx") && !target.starts_with("riscv") { #[cfg(feature = "c")] c::compile(&llvm_target, &target); } diff --git a/src/lib.rs b/src/lib.rs index 2bf6e00a8..b021a6864 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,6 +40,7 @@ pub mod int; #[cfg(any( all(target_arch = "wasm32", target_os = "unknown"), + all(target_arch = "wasm64", target_os = "unknown"), all(target_arch = "x86_64", target_os = "uefi"), all(target_arch = "arm", target_os = "none"), all(target_vendor = "fortanix", target_env = "sgx") diff --git a/src/math.rs b/src/math.rs index 6f024e7b3..c6d47b803 100644 --- a/src/math.rs +++ b/src/math.rs @@ -16,7 +16,7 @@ macro_rules! no_mangle { #[cfg(any( all( - target_arch = "wasm32", + any(target_arch = "wasm32", target_arch = "wasm64"), target_os = "unknown", not(target_env = "wasi") ), @@ -66,7 +66,7 @@ no_mangle! { #[cfg(any( all( - target_arch = "wasm32", + any(target_arch = "wasm32", target_arch = "wasm64"), target_os = "unknown", not(target_env = "wasi") ), From cb06f58efcf484132977bac8f1075bd3d5a9ac38 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 28 Oct 2021 16:32:30 -0700 Subject: [PATCH 0473/1459] Use more concise directives --- src/lib.rs | 3 +-- src/math.rs | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b021a6864..fcafb8978 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -39,8 +39,7 @@ pub mod float; pub mod int; #[cfg(any( - all(target_arch = "wasm32", target_os = "unknown"), - all(target_arch = "wasm64", target_os = "unknown"), + all(target_family = "wasm", target_os = "unknown"), all(target_arch = "x86_64", target_os = "uefi"), all(target_arch = "arm", target_os = "none"), all(target_vendor = "fortanix", target_env = "sgx") diff --git a/src/math.rs b/src/math.rs index c6d47b803..fa59753f8 100644 --- a/src/math.rs +++ b/src/math.rs @@ -16,7 +16,7 @@ macro_rules! no_mangle { #[cfg(any( all( - any(target_arch = "wasm32", target_arch = "wasm64"), + target_family = "wasm", target_os = "unknown", not(target_env = "wasi") ), @@ -66,7 +66,7 @@ no_mangle! { #[cfg(any( all( - any(target_arch = "wasm32", target_arch = "wasm64"), + target_family = "wasm", target_os = "unknown", not(target_env = "wasi") ), From 86aa192ec89f18a63163d3372fabfe1c88382f23 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 1 Nov 2021 23:02:56 +0000 Subject: [PATCH 0474/1459] Bump to 0.1.52 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7874b99fd..2caf6fed4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.51" +version = "0.1.52" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 1e3ed7714b1b245f498dd405a500a6b7a0a01acd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alessandro=C2=A0Decina?= Date: Sun, 21 Nov 2021 06:54:00 +0000 Subject: [PATCH 0475/1459] Turn on the mem-unaligned feature for bpf targets Fixes the following LLVM segfault: Error: e: 05:02:06 [ERROR] fatal error: "Cannot select: 0x55e970a357d0: i64,ch = AtomicLoad<(load unordered (s64) from %ir.45)> 0x55e970410be8, 0x55e970a358a0\n 0x55e970a358a0: i64,ch = CopyFromReg 0x55e970410be8, Register:i64 %19\n 0x55e970a35490: i64 = Register %19\nIn function: memcpy" PLEASE submit a bug report to https://bugs.llvm.org/ and include the crash backtrace. Stack dump: 0. Running pass 'Function Pass Manager' on module 'unroll-loop'. 1. Running pass 'BPF DAG->DAG Pattern Instruction Selection' on function '@memcpy' --- build.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/build.rs b/build.rs index 60feb0619..b930f1d8d 100644 --- a/build.rs +++ b/build.rs @@ -34,7 +34,11 @@ fn main() { } // These targets have hardware unaligned access support. - if target.contains("x86_64") || target.contains("i686") || target.contains("aarch64") { + if target.contains("x86_64") + || target.contains("i686") + || target.contains("aarch64") + || target.contains("bpf") + { println!("cargo:rustc-cfg=feature=\"mem-unaligned\""); } From b81db2f9fef0281829937be45f856fdd99efcf35 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 25 Nov 2021 00:01:25 +0000 Subject: [PATCH 0476/1459] Bump to 0.1.53 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2caf6fed4..d4348fb4f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.52" +version = "0.1.53" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From e0187f17dbcbf9dc026d379b2af8d866300596a5 Mon Sep 17 00:00:00 2001 From: "William D. Jones" Date: Wed, 24 Nov 2021 21:51:50 -0500 Subject: [PATCH 0477/1459] Do not use atomic reads on platforms without atomic support in LLVM. --- src/mem/impls.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/mem/impls.rs b/src/mem/impls.rs index 65887a338..815132425 100644 --- a/src/mem/impls.rs +++ b/src/mem/impls.rs @@ -62,7 +62,12 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) // Realign src let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize; // This will read (but won't use) bytes out of bound. + // cfg needed because not all targets will have atomic loads that can be lowered + // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I) + #[cfg(target_has_atomic_load_store = "ptr")] let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned); + #[cfg(not(target_has_atomic_load_store = "ptr"))] + let mut prev_word = core::ptr::read_volatile(src_aligned); while dest_usize < dest_end { src_aligned = src_aligned.add(1); @@ -155,7 +160,12 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { // Realign src_aligned let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize; // This will read (but won't use) bytes out of bound. + // cfg needed because not all targets will have atomic loads that can be lowered + // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I) + #[cfg(target_has_atomic_load_store = "ptr")] let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned); + #[cfg(not(target_has_atomic_load_store = "ptr"))] + let mut prev_word = core::ptr::read_volatile(src_aligned); while dest_start < dest_usize { src_aligned = src_aligned.sub(1); From ba870b2568547470dab3a0dbf9fc8a19417cc069 Mon Sep 17 00:00:00 2001 From: "William D. Jones" Date: Sat, 27 Nov 2021 19:38:43 -0500 Subject: [PATCH 0478/1459] Use fully-qualified syntax for abs_diff to avoid warning, which can trigger a compiler error. --- src/float/pow.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/float/pow.rs b/src/float/pow.rs index 5ab5e4201..a75340c30 100644 --- a/src/float/pow.rs +++ b/src/float/pow.rs @@ -5,7 +5,7 @@ use int::Int; fn pow(a: F, b: i32) -> F { let mut a = a; let recip = b < 0; - let mut pow = i32::abs_diff(b, 0); + let mut pow = Int::abs_diff(b, 0); let mut mul = F::ONE; loop { if (pow & 1) != 0 { From 95b52112671757c24d8500928a042bd46d25c761 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 28 Nov 2021 11:19:54 +0000 Subject: [PATCH 0479/1459] Bump to 0.1.54 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index d4348fb4f..8beb767d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.53" +version = "0.1.54" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From eaab9d29ecbf538369d7f26953425eb78dae8229 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 28 Nov 2021 12:50:21 +0000 Subject: [PATCH 0480/1459] Ensure AArch64 LSE object files have distinct names in an archive This is needed by libtool which rejects archives that contain object files with the same name multiple times. Fixes #443 --- build.rs | 51 ++++++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/build.rs b/build.rs index b930f1d8d..90b8a04fa 100644 --- a/build.rs +++ b/build.rs @@ -95,6 +95,8 @@ mod c { use std::collections::{BTreeMap, HashSet}; use std::env; + use std::fs::File; + use std::io::Write; use std::path::{Path, PathBuf}; struct Sources { @@ -523,20 +525,13 @@ mod c { cfg.compile("libcompiler-rt.a"); } - fn build_aarch64_out_of_line_atomics_libraries(builtins_dir: &Path, cfg: &cc::Build) { - // NOTE: because we're recompiling the same source file in N different ways, building - // serially is necessary. If we want to lift this restriction, we can either: - // - create symlinks to lse.S and build those_(though we'd still need to pass special - // #define-like flags to each of these), or - // - synthesizing tiny .S files in out/ with the proper #defines, which ultimately #include - // lse.S. - // That said, it's unclear how useful this added complexity will be, so just do the simple - // thing for now. + fn build_aarch64_out_of_line_atomics_libraries(builtins_dir: &Path, cfg: &mut cc::Build) { + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); let outlined_atomics_file = builtins_dir.join("aarch64/lse.S"); println!("cargo:rerun-if-changed={}", outlined_atomics_file.display()); - // Ideally, this would be a Vec of object files, but cc doesn't make it *entirely* - // trivial to build an individual object. + cfg.include(&builtins_dir); + for instruction_type in &["cas", "swp", "ldadd", "ldclr", "ldeor", "ldset"] { for size in &[1, 2, 4, 8, 16] { if *size == 16 && *instruction_type != "cas" { @@ -546,20 +541,30 @@ mod c { for (model_number, model_name) in &[(1, "relax"), (2, "acq"), (3, "rel"), (4, "acq_rel")] { - let library_name = format!( - "liboutline_atomic_helper_{}{}_{}.a", - instruction_type, size, model_name + // The original compiler-rt build system compiles the same + // source file multiple times with different compiler + // options. Here we do something slightly different: we + // create multiple .S files with the proper #defines and + // then include the original file. + // + // This is needed because the cc crate doesn't allow us to + // override the name of object files and libtool requires + // all objects in an archive to have unique names. + let path = + out_dir.join(format!("lse_{}{}_{}.S", instruction_type, size, model_name)); + let mut file = File::create(&path).unwrap(); + writeln!(file, "#define L_{}", instruction_type).unwrap(); + writeln!(file, "#define SIZE {}", size).unwrap(); + writeln!(file, "#define MODEL {}", model_number).unwrap(); + writeln!( + file, + "#include \"{}\"", + outlined_atomics_file.canonicalize().unwrap().display() ); - let sym = format!("__aarch64_{}{}_{}", instruction_type, size, model_name); - let mut cfg = cfg.clone(); - - cfg.include(&builtins_dir) - .define(&format!("L_{}", instruction_type), None) - .define("SIZE", size.to_string().as_str()) - .define("MODEL", model_number.to_string().as_str()) - .file(&outlined_atomics_file); - cfg.compile(&library_name); + drop(file); + cfg.file(path); + let sym = format!("__aarch64_{}{}_{}", instruction_type, size, model_name); println!("cargo:rustc-cfg={}=\"optimized-c\"", sym); } } From 4d7a5306681f253b5b3f4280aef1be654040c4d1 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 28 Nov 2021 15:30:04 +0000 Subject: [PATCH 0481/1459] Bump to 0.1.55 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8beb767d4..7a894b958 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.54" +version = "0.1.55" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 6e2b9461b91c2e3b5ba8a0b0993855d9698c38c7 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 9 Dec 2021 23:51:18 +0000 Subject: [PATCH 0482/1459] Import the asm! macro from core::arch It is going to be removed from the prelude due to the decision in https://github.com/rust-lang/rust/issues/87228 --- build.rs | 3 ++- src/arm.rs | 16 ++++++------ src/int/leading_zeros.rs | 2 ++ src/int/specialized_div_rem/mod.rs | 4 +-- src/lib.rs | 2 ++ src/mem/x86_64.rs | 10 +++---- src/probestack.rs | 42 +++++++++++++++++------------- src/x86.rs | 6 ++--- src/x86_64.rs | 6 ++--- 9 files changed, 51 insertions(+), 40 deletions(-) diff --git a/build.rs b/build.rs index 90b8a04fa..922cd07a0 100644 --- a/build.rs +++ b/build.rs @@ -560,7 +560,8 @@ mod c { file, "#include \"{}\"", outlined_atomics_file.canonicalize().unwrap().display() - ); + ) + .unwrap(); drop(file); cfg.file(path); diff --git a/src/arm.rs b/src/arm.rs index 7203d91e4..3660825aa 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -11,7 +11,7 @@ use core::intrinsics; #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe extern "C" fn __aeabi_uidivmod() { - asm!( + core::arch::asm!( "push {{lr}}", "sub sp, sp, #4", "mov r2, sp", @@ -27,7 +27,7 @@ pub unsafe extern "C" fn __aeabi_uidivmod() { #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe extern "C" fn __aeabi_uidivmod() { - asm!( + core::arch::asm!( "push {{lr}}", "sub sp, sp, #4", "mov r2, sp", @@ -43,7 +43,7 @@ pub unsafe extern "C" fn __aeabi_uidivmod() { #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe extern "C" fn __aeabi_uldivmod() { - asm!( + core::arch::asm!( "push {{r4, lr}}", "sub sp, sp, #16", "add r4, sp, #8", @@ -61,7 +61,7 @@ pub unsafe extern "C" fn __aeabi_uldivmod() { #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe extern "C" fn __aeabi_uldivmod() { - asm!( + core::arch::asm!( "push {{r4, lr}}", "sub sp, sp, #16", "add r4, sp, #8", @@ -79,7 +79,7 @@ pub unsafe extern "C" fn __aeabi_uldivmod() { #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe extern "C" fn __aeabi_idivmod() { - asm!( + core::arch::asm!( "push {{r0, r1, r4, lr}}", "bl __aeabi_idiv", "pop {{r1, r2}}", @@ -94,7 +94,7 @@ pub unsafe extern "C" fn __aeabi_idivmod() { #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe extern "C" fn __aeabi_idivmod() { - asm!( + core::arch::asm!( "push {{r0, r1, r4, lr}}", "bl ___aeabi_idiv", "pop {{r1, r2}}", @@ -109,7 +109,7 @@ pub unsafe extern "C" fn __aeabi_idivmod() { #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe extern "C" fn __aeabi_ldivmod() { - asm!( + core::arch::asm!( "push {{r4, lr}}", "sub sp, sp, #16", "add r4, sp, #8", @@ -127,7 +127,7 @@ pub unsafe extern "C" fn __aeabi_ldivmod() { #[naked] #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe extern "C" fn __aeabi_ldivmod() { - asm!( + core::arch::asm!( "push {{r4, lr}}", "sub sp, sp, #16", "add r4, sp, #8", diff --git a/src/int/leading_zeros.rs b/src/int/leading_zeros.rs index 0265b9a9d..9e60ab0d7 100644 --- a/src/int/leading_zeros.rs +++ b/src/int/leading_zeros.rs @@ -5,6 +5,7 @@ public_test_dep! { /// Returns the number of leading binary zeros in `x`. +#[allow(dead_code)] pub(crate) fn usize_leading_zeros_default(x: usize) -> usize { // The basic idea is to test if the higher bits of `x` are zero and bisect the number // of leading zeros. It is possible for all branches of the bisection to use the same @@ -78,6 +79,7 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize { public_test_dep! { /// Returns the number of leading binary zeros in `x`. +#[allow(dead_code)] pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize { let mut x = x; // the number of potential leading zeros diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs index f5b2af235..6ec4675df 100644 --- a/src/int/specialized_div_rem/mod.rs +++ b/src/int/specialized_div_rem/mod.rs @@ -184,7 +184,7 @@ unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) { // divides the combined registers rdx:rax (`duo` is split into two 64 bit parts to do this) // by `div`. The quotient is stored in rax and the remainder in rdx. // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. - asm!( + core::arch::asm!( "div {0}", in(reg) div, inlateout("rax") duo_lo => quo, @@ -271,7 +271,7 @@ unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) { // divides the combined registers rdx:rax (`duo` is split into two 32 bit parts to do this) // by `div`. The quotient is stored in rax and the remainder in rdx. // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. - asm!( + core::arch::asm!( "div {0}", in(reg) div, inlateout("rax") duo_lo => quo, diff --git a/src/lib.rs b/src/lib.rs index fcafb8978..c3eefbc03 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,6 +18,8 @@ #![allow(improper_ctypes, improper_ctypes_definitions)] // `mem::swap` cannot be used because it may generate references to memcpy in unoptimized code. #![allow(clippy::manual_swap)] +// Support compiling on both stage0 and stage1 which may differ in supported stable features. +#![allow(stable_features)] // We disable #[no_mangle] for tests so that we can verify the test results // against the native compiler-rt implementations of the builtins. diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index abdb8eb67..a7ab6f605 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -20,7 +20,7 @@ #[cfg(target_feature = "ermsb")] pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. - asm!( + core::arch::asm!( "repe movsb (%rsi), (%rdi)", inout("rcx") count => _, inout("rdi") dest => _, @@ -35,7 +35,7 @@ pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { let qword_count = count >> 3; let byte_count = count & 0b111; // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. - asm!( + core::arch::asm!( "repe movsq (%rsi), (%rdi)", "mov {byte_count:e}, %ecx", "repe movsb (%rsi), (%rdi)", @@ -52,7 +52,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { let qword_count = count >> 3; let byte_count = count & 0b111; // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. - asm!( + core::arch::asm!( "std", "repe movsq (%rsi), (%rdi)", "movl {byte_count:e}, %ecx", @@ -72,7 +72,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { #[cfg(target_feature = "ermsb")] pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. - asm!( + core::arch::asm!( "repe stosb %al, (%rdi)", inout("rcx") count => _, inout("rdi") dest => _, @@ -87,7 +87,7 @@ pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { let qword_count = count >> 3; let byte_count = count & 0b111; // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. - asm!( + core::arch::asm!( "repe stosq %rax, (%rdi)", "mov {byte_count:e}, %ecx", "repe stosb %al, (%rdi)", diff --git a/src/probestack.rs b/src/probestack.rs index 4d6cd6949..0c30384db 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -48,8 +48,6 @@ #![cfg(not(feature = "no-asm"))] // We only define stack probing for these architectures today. #![cfg(any(target_arch = "x86_64", target_arch = "x86"))] -// We need to add .att_syntax for bootstraping the new global_asm! -#![allow(unknown_lints, bad_asm_style)] extern "C" { pub fn __rust_probestack(); @@ -65,7 +63,6 @@ macro_rules! define_rust_probestack { ($body: expr) => { concat!( " - .att_syntax .pushsection .text.__rust_probestack .globl __rust_probestack .type __rust_probestack, @function @@ -86,7 +83,6 @@ macro_rules! define_rust_probestack { ($body: expr) => { concat!( " - .att_syntax .globl __rust_probestack __rust_probestack: ", @@ -102,7 +98,6 @@ macro_rules! define_rust_probestack { ($body: expr) => { concat!( " - .att_syntax .globl ___rust_probestack ___rust_probestack: ", @@ -117,7 +112,6 @@ macro_rules! define_rust_probestack { ($body: expr) => { concat!( " - .att_syntax .globl ___rust_probestack ___rust_probestack: ", @@ -137,8 +131,9 @@ macro_rules! define_rust_probestack { target_arch = "x86_64", not(all(target_env = "sgx", target_vendor = "fortanix")) ))] -global_asm!(define_rust_probestack!( - " +core::arch::global_asm!( + define_rust_probestack!( + " .cfi_startproc pushq %rbp .cfi_adjust_cfa_offset 8 @@ -188,7 +183,9 @@ global_asm!(define_rust_probestack!( ret .cfi_endproc " -)); + ), + options(att_syntax) +); // This function is the same as above, except that some instructions are // [manually patched for LVI]. @@ -198,8 +195,9 @@ global_asm!(define_rust_probestack!( target_arch = "x86_64", all(target_env = "sgx", target_vendor = "fortanix") ))] -global_asm!(define_rust_probestack!( - " +core::arch::global_asm!( + define_rust_probestack!( + " .cfi_startproc pushq %rbp .cfi_adjust_cfa_offset 8 @@ -251,7 +249,9 @@ global_asm!(define_rust_probestack!( jmp *%r11 .cfi_endproc " -)); + ), + options(att_syntax) +); #[cfg(all(target_arch = "x86", not(target_os = "uefi")))] // This is the same as x86_64 above, only translated for 32-bit sizes. Note @@ -259,8 +259,9 @@ global_asm!(define_rust_probestack!( // function basically can't tamper with anything. // // The ABI here is the same as x86_64, except everything is 32-bits large. -global_asm!(define_rust_probestack!( - " +core::arch::global_asm!( + define_rust_probestack!( + " .cfi_startproc push %ebp .cfi_adjust_cfa_offset 4 @@ -291,7 +292,9 @@ global_asm!(define_rust_probestack!( ret .cfi_endproc " -)); + ), + options(att_syntax) +); #[cfg(all(target_arch = "x86", target_os = "uefi"))] // UEFI target is windows like target. LLVM will do _chkstk things like windows. @@ -304,8 +307,9 @@ global_asm!(define_rust_probestack!( // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp // themselves. -global_asm!(define_rust_probestack!( - " +core::arch::global_asm!( + define_rust_probestack!( + " .cfi_startproc push %ebp .cfi_adjust_cfa_offset 4 @@ -341,4 +345,6 @@ global_asm!(define_rust_probestack!( ret .cfi_endproc " -)); + ), + options(att_syntax) +); diff --git a/src/x86.rs b/src/x86.rs index 4992de9da..abcc2bdb3 100644 --- a/src/x86.rs +++ b/src/x86.rs @@ -17,7 +17,7 @@ use core::intrinsics; #[naked] #[no_mangle] pub unsafe extern "C" fn ___chkstk_ms() { - asm!( + core::arch::asm!( "push %ecx", "push %eax", "cmp $0x1000,%eax", @@ -49,7 +49,7 @@ pub unsafe extern "C" fn ___chkstk_ms() { #[naked] #[no_mangle] pub unsafe extern "C" fn __alloca() { - asm!( + core::arch::asm!( "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable" options(noreturn, att_syntax) ); @@ -64,7 +64,7 @@ pub unsafe extern "C" fn __alloca() { #[naked] #[no_mangle] pub unsafe extern "C" fn ___chkstk() { - asm!( + core::arch::asm!( "push %ecx", "cmp $0x1000,%eax", "lea 8(%esp),%ecx", // esp before calling this routine -> ecx diff --git a/src/x86_64.rs b/src/x86_64.rs index b382b886c..ea3c99497 100644 --- a/src/x86_64.rs +++ b/src/x86_64.rs @@ -17,7 +17,7 @@ use core::intrinsics; #[naked] #[no_mangle] pub unsafe extern "C" fn ___chkstk_ms() { - asm!( + core::arch::asm!( "push %rcx", "push %rax", "cmp $0x1000,%rax", @@ -48,7 +48,7 @@ pub unsafe extern "C" fn ___chkstk_ms() { #[naked] #[no_mangle] pub unsafe extern "C" fn __alloca() { - asm!( + core::arch::asm!( "mov %rcx,%rax", // x64 _alloca is a normal function with parameter in rcx "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable" options(noreturn, att_syntax) @@ -64,7 +64,7 @@ pub unsafe extern "C" fn __alloca() { #[naked] #[no_mangle] pub unsafe extern "C" fn ___chkstk() { - asm!( + core::arch::asm!( "push %rcx", "cmp $0x1000,%rax", "lea 16(%rsp),%rcx", // rsp before calling this routine -> rcx From 102b7fa673442abdd50ce31cc8aba334dcc5ff79 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 10 Dec 2021 00:04:25 +0000 Subject: [PATCH 0483/1459] Fix clippy lints --- src/float/div.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/float/div.rs b/src/float/div.rs index 9ac1e87b4..528a8368d 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -132,8 +132,9 @@ where // This doubles the number of correct binary digits in the approximation // with each iteration, so after three iterations, we have about 28 binary // digits of accuracy. - let mut correction: u32; - correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); + + let mut correction: u32 = + negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32; correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32; @@ -342,8 +343,9 @@ where // This doubles the number of correct binary digits in the approximation // with each iteration, so after three iterations, we have about 28 binary // digits of accuracy. - let mut correction32: u32; - correction32 = negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32); + + let mut correction32: u32 = + negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32); recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32; correction32 = negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32); recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32; @@ -359,16 +361,15 @@ where // We need to perform one more iteration to get us to 56 binary digits; // The last iteration needs to happen with extra precision. let q63blo = CastInto::::cast(b_significand << 11.cast()); - let correction: u64; - let mut reciprocal: u64; - correction = negate_u64( + + let correction: u64 = negate_u64( (recip32 as u64) .wrapping_mul(q31b as u64) .wrapping_add((recip32 as u64).wrapping_mul(q63blo as u64) >> 32), ); let c_hi = (correction >> 32) as u32; let c_lo = correction as u32; - reciprocal = (recip32 as u64) + let mut reciprocal: u64 = (recip32 as u64) .wrapping_mul(c_hi as u64) .wrapping_add((recip32 as u64).wrapping_mul(c_lo as u64) >> 32); From 4111890053512d66871d318a6c7e9e7623210a3c Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 10 Dec 2021 00:11:25 +0000 Subject: [PATCH 0484/1459] Bump to 0.1.56 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7a894b958..4b2ed2de1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.55" +version = "0.1.65" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 67c763f4e9a2f1600612953ff38c68661367954a Mon Sep 17 00:00:00 2001 From: Ayrton Date: Sat, 11 Dec 2021 23:03:01 -0500 Subject: [PATCH 0485/1459] Add `__truncdfsf2` intrinsic This adds the truncdfsf2 intrinsic and a corresponding fuzz test case. The implementation of trunc is generic to make it easy to add truncdfhs2 and truncsfhf2 if rust ever gets `f16` support. --- README.md | 2 +- src/float/mod.rs | 1 + src/float/trunc.rs | 118 ++++++++++++++++++++++++++++++++++++++++ testcrate/tests/misc.rs | 25 +++++++++ 4 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 src/float/trunc.rs diff --git a/README.md b/README.md index a20d038f8..108b2679d 100644 --- a/README.md +++ b/README.md @@ -183,7 +183,7 @@ features = ["c"] - [x] subdf3.c - [x] subsf3.c - [ ] truncdfhf2.c -- [ ] truncdfsf2.c +- [x] truncdfsf2.c - [ ] truncsfhf2.c - [x] udivdi3.c - [x] udivmoddi4.c diff --git a/src/float/mod.rs b/src/float/mod.rs index 11680e7a9..01a5504d5 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -10,6 +10,7 @@ pub mod extend; pub mod mul; pub mod pow; pub mod sub; +pub mod trunc; public_test_dep! { /// Trait for some basic operations on floats diff --git a/src/float/trunc.rs b/src/float/trunc.rs new file mode 100644 index 000000000..a00994c76 --- /dev/null +++ b/src/float/trunc.rs @@ -0,0 +1,118 @@ +use float::Float; +use int::{CastInto, Int}; + +fn trunc(a: F) -> R +where + F::Int: CastInto, + F::Int: CastInto, + u64: CastInto, + u32: CastInto, + + R::Int: CastInto, + u32: CastInto, + F::Int: CastInto, +{ + let src_zero = F::Int::ZERO; + let src_one = F::Int::ONE; + let src_bits = F::BITS; + let src_exp_bias = F::EXPONENT_BIAS; + + let src_min_normal = F::IMPLICIT_BIT; + let src_significand_mask = F::SIGNIFICAND_MASK; + let src_infinity = F::EXPONENT_MASK; + let src_sign_mask = F::SIGN_MASK; + let src_abs_mask = src_sign_mask - src_one; + let round_mask = (src_one << (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)) - src_one; + let halfway = src_one << (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS - 1); + let src_qnan = src_one << (F::SIGNIFICAND_BITS - 1); + let src_nan_code = src_qnan - src_one; + + let dst_zero = R::Int::ZERO; + let dst_one = R::Int::ONE; + let dst_bits = R::BITS; + let dst_inf_exp = R::EXPONENT_MAX; + let dst_exp_bias = R::EXPONENT_BIAS; + + let underflow_exponent: F::Int = (src_exp_bias + 1 - dst_exp_bias).cast(); + let overflow_exponent: F::Int = (src_exp_bias + dst_inf_exp - dst_exp_bias).cast(); + let underflow: F::Int = underflow_exponent << F::SIGNIFICAND_BITS; + let overflow: F::Int = overflow_exponent << F::SIGNIFICAND_BITS; + + let dst_qnan = R::Int::ONE << (R::SIGNIFICAND_BITS - 1); + let dst_nan_code = dst_qnan - dst_one; + + let sign_bits_delta = F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS; + // Break a into a sign and representation of the absolute value. + let a_abs = a.repr() & src_abs_mask; + let sign = a.repr() & src_sign_mask; + let mut abs_result: R::Int; + + if a_abs.wrapping_sub(underflow) < a_abs.wrapping_sub(overflow) { + // The exponent of a is within the range of normal numbers in the + // destination format. We can convert by simply right-shifting with + // rounding and adjusting the exponent. + abs_result = (a_abs >> sign_bits_delta).cast(); + let tmp = src_exp_bias.wrapping_sub(dst_exp_bias) << R::SIGNIFICAND_BITS; + abs_result = abs_result.wrapping_sub(tmp.cast()); + + let round_bits = a_abs & round_mask; + if round_bits > halfway { + // Round to nearest. + abs_result += dst_one; + } else if round_bits == halfway { + // Tie to even. + abs_result += abs_result & dst_one; + }; + } else if a_abs > src_infinity { + // a is NaN. + // Conjure the result by beginning with infinity, setting the qNaN + // bit and inserting the (truncated) trailing NaN field. + abs_result = (dst_inf_exp << R::SIGNIFICAND_BITS).cast(); + abs_result |= dst_qnan; + abs_result |= dst_nan_code + & ((a_abs & src_nan_code) >> (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)).cast(); + } else if a_abs >= overflow { + // a overflows to infinity. + abs_result = (dst_inf_exp << R::SIGNIFICAND_BITS).cast(); + } else { + // a underflows on conversion to the destination type or is an exact + // zero. The result may be a denormal or zero. Extract the exponent + // to get the shift amount for the denormalization. + let a_exp: u32 = (a_abs >> F::SIGNIFICAND_BITS).cast(); + let shift = src_exp_bias - dst_exp_bias - a_exp + 1; + + let significand = (a.repr() & src_significand_mask) | src_min_normal; + + // Right shift by the denormalization amount with sticky. + if shift > F::SIGNIFICAND_BITS { + abs_result = dst_zero; + } else { + let sticky = if (significand << (src_bits - shift)) != src_zero { + src_one + } else { + src_zero + }; + let denormalized_significand: F::Int = significand >> shift | sticky; + abs_result = + (denormalized_significand >> (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)).cast(); + let round_bits = denormalized_significand & round_mask; + // Round to nearest + if round_bits > halfway { + abs_result += dst_one; + } + // Ties to even + else if round_bits == halfway { + abs_result += abs_result & dst_one; + }; + } + } + + // Apply the signbit to the absolute value. + R::from_repr(abs_result | sign.wrapping_shr(src_bits - dst_bits).cast()) +} + +intrinsics! { + pub extern "C" fn __truncdfsf2(a: f64) -> f32 { + trunc(a) + } +} diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs index 5f74e0063..773153c43 100644 --- a/testcrate/tests/misc.rs +++ b/testcrate/tests/misc.rs @@ -179,3 +179,28 @@ fn float_pow() { f64, 1e-12, __powidf2; ); } + +macro_rules! trunc { + ($fX:ident, $fD:ident, $fn:ident) => { + fuzz_float(N, |x: $fX| { + let tmp0 = x as $fD; + let tmp1: $fD = $fn(x); + if !Float::eq_repr(tmp0, tmp1) { + panic!( + "{}({}): std: {}, builtins: {}", + stringify!($fn), + x, + tmp0, + tmp1 + ); + } + }); + }; +} + +#[test] +fn float_trunc() { + use compiler_builtins::float::trunc::__truncdfsf2; + + trunc!(f64, f32, __truncdfsf2); +} From 080f1fa4779fafd93351d2dd5044a366cba4b6df Mon Sep 17 00:00:00 2001 From: Ayrton Date: Sun, 12 Dec 2021 15:19:05 -0500 Subject: [PATCH 0486/1459] Add attribute for ARM alias --- src/float/trunc.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/float/trunc.rs b/src/float/trunc.rs index a00994c76..d3890e040 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -112,6 +112,7 @@ where } intrinsics! { + #[arm_aeabi_alias = __aeabi_d2f] pub extern "C" fn __truncdfsf2(a: f64) -> f32 { trunc(a) } From 13051ed71ee7f718a6193e5c7830265a3e519076 Mon Sep 17 00:00:00 2001 From: Ayrton Date: Sun, 12 Dec 2021 15:36:09 -0500 Subject: [PATCH 0487/1459] Add `__truncdfsf2vfp` for ARM --- src/float/trunc.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/float/trunc.rs b/src/float/trunc.rs index d3890e040..5f846c669 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -116,4 +116,9 @@ intrinsics! { pub extern "C" fn __truncdfsf2(a: f64) -> f32 { trunc(a) } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __truncdfsf2vfp(a: f64) -> f32 { + a as f32 + } } From f654edbaee919e337a473aaf6d03744d4de100f3 Mon Sep 17 00:00:00 2001 From: Ayrton Date: Sun, 12 Dec 2021 21:12:42 -0500 Subject: [PATCH 0488/1459] Remove truncdfsf2.c from sources in build.rs and add test for __truncdfsf2vfp Also fixed the calling convention for truncdfsf2 on ARM --- README.md | 2 +- build.rs | 1 - examples/intrinsics.rs | 11 ++--------- src/float/trunc.rs | 1 + testcrate/tests/misc.rs | 8 ++++++++ 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 108b2679d..8b25558a8 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ features = ["c"] - [x] arm/softfloat-alias.list - [x] arm/subdf3vfp.S - [x] arm/subsf3vfp.S -- [ ] arm/truncdfsf2vfp.S +- [x] arm/truncdfsf2vfp.S - [ ] arm/udivmodsi4.S (generic version is done) - [ ] arm/udivsi3.S (generic version is done) - [ ] arm/umodsi3.S (generic version is done) diff --git a/build.rs b/build.rs index 922cd07a0..dc1cd1d23 100644 --- a/build.rs +++ b/build.rs @@ -227,7 +227,6 @@ mod c { ("__negsf2", "negsf2.c"), ("__powixf2", "powixf2.c"), ("__truncdfhf2", "truncdfhf2.c"), - ("__truncdfsf2", "truncdfsf2.c"), ("__truncsfhf2", "truncsfhf2.c"), ]); } diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index dfa46b1b8..0ca30c215 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -24,16 +24,9 @@ extern "C" {} // have an additional comment: the function name is the ARM name for the intrinsic and the comment // in the non-ARM name for the intrinsic. mod intrinsics { - // trunccdfsf2 + // truncdfsf2 pub fn aeabi_d2f(x: f64) -> f32 { - // This is only implemented in C currently, so only test it there. - #[cfg(feature = "c")] - return x as f32; - #[cfg(not(feature = "c"))] - { - drop(x); - 0.0 - } + x as f32 } // fixdfsi diff --git a/src/float/trunc.rs b/src/float/trunc.rs index 5f846c669..d73713084 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -112,6 +112,7 @@ where } intrinsics! { + #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_d2f] pub extern "C" fn __truncdfsf2(a: f64) -> f32 { trunc(a) diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs index 773153c43..537ba1e60 100644 --- a/testcrate/tests/misc.rs +++ b/testcrate/tests/misc.rs @@ -204,3 +204,11 @@ fn float_trunc() { trunc!(f64, f32, __truncdfsf2); } + +#[cfg(target_arch = "arm")] +#[test] +fn float_trunc_arm() { + use compiler_builtins::float::trunc::__truncdfsf2vfp; + + trunc!(f64, f32, __truncdfsf2vfp); +} From ea0cb5b589cc498d629c545e9bae600301ba6aed Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 13 Dec 2021 02:25:49 +0000 Subject: [PATCH 0489/1459] Bump to 0.1.66 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4b2ed2de1..3f4c96745 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.65" +version = "0.1.66" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 7dae141f103b91ba86a199773db1fa44662267b9 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 3 Jan 2022 19:00:31 +0100 Subject: [PATCH 0490/1459] Disable powerpc64 tests which were broken by the LLVM 13 upgrade --- libm/ci/run-docker.sh | 2 +- libm/src/math/ceilf.rs | 2 ++ libm/src/math/fabsf.rs | 2 ++ libm/src/math/floorf.rs | 2 ++ libm/src/math/j1f.rs | 2 ++ libm/src/math/roundf.rs | 2 ++ libm/src/math/sincosf.rs | 2 ++ libm/src/math/sqrtf.rs | 2 ++ libm/src/math/truncf.rs | 2 ++ 9 files changed, 17 insertions(+), 1 deletion(-) diff --git a/libm/ci/run-docker.sh b/libm/ci/run-docker.sh index e7b80c719..c7ad60fd4 100755 --- a/libm/ci/run-docker.sh +++ b/libm/ci/run-docker.sh @@ -18,7 +18,7 @@ run() { --user $(id -u):$(id -g) \ -e CARGO_HOME=/cargo \ -e CARGO_TARGET_DIR=/target \ - -v $(dirname $(dirname `which cargo`)):/cargo \ + -v "${HOME}/.cargo":/cargo \ -v `pwd`/target:/target \ -v `pwd`:/checkout:ro \ -v `rustc --print sysroot`:/rust:ro \ diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs index f1edbd061..7bcc647ca 100644 --- a/libm/src/math/ceilf.rs +++ b/libm/src/math/ceilf.rs @@ -40,6 +40,8 @@ pub fn ceilf(x: f32) -> f32 { f32::from_bits(ui) } +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::*; diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs index 6655c4c3c..23f3646dc 100644 --- a/libm/src/math/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -14,6 +14,8 @@ pub fn fabsf(x: f32) -> f32 { f32::from_bits(x.to_bits() & 0x7fffffff) } +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::*; diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs index 287f08642..dfdab91a0 100644 --- a/libm/src/math/floorf.rs +++ b/libm/src/math/floorf.rs @@ -40,6 +40,8 @@ pub fn floorf(x: f32) -> f32 { f32::from_bits(ui) } +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::*; diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs index 5095894d7..33694908c 100644 --- a/libm/src/math/j1f.rs +++ b/libm/src/math/j1f.rs @@ -357,6 +357,8 @@ fn qonef(x: f32) -> f32 { return (0.375 + r / s) / x; } +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::{j1f, y1f}; diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs index c0872a782..becdb5620 100644 --- a/libm/src/math/roundf.rs +++ b/libm/src/math/roundf.rs @@ -7,6 +7,8 @@ pub fn roundf(x: f32) -> f32 { truncf(x + copysignf(0.5 - 0.25 * f32::EPSILON, x)) } +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::roundf; diff --git a/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs index 2725caad6..83c9f40ee 100644 --- a/libm/src/math/sincosf.rs +++ b/libm/src/math/sincosf.rs @@ -122,6 +122,8 @@ pub fn sincosf(x: f32) -> (f32, f32) { } } +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::sincosf; diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index ee868c8c8..00b20e578 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -128,6 +128,8 @@ pub fn sqrtf(x: f32) -> f32 { } } +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::*; diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs index a4c001629..20d5b73bd 100644 --- a/libm/src/math/truncf.rs +++ b/libm/src/math/truncf.rs @@ -31,6 +31,8 @@ pub fn truncf(x: f32) -> f32 { f32::from_bits(i) } +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { #[test] From 826ea3208bdda038dab283369133d38be099282f Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 3 Jan 2022 19:06:54 +0100 Subject: [PATCH 0491/1459] Disable i686-unknown-linux-gnu tests for now --- libm/.github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 80ce4ebd5..decd71f5c 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -12,7 +12,7 @@ jobs: - arm-unknown-linux-gnueabi - arm-unknown-linux-gnueabihf - armv7-unknown-linux-gnueabihf - - i686-unknown-linux-gnu + # - i686-unknown-linux-gnu - mips-unknown-linux-gnu - mips64-unknown-linux-gnuabi64 - mips64el-unknown-linux-gnuabi64 From ea14905611f2d614d4ed0b904c5c5e47a21bd319 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 3 Jan 2022 19:52:31 +0100 Subject: [PATCH 0492/1459] Ignore some functions which don't match musl --- libm/build.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/libm/build.rs b/libm/build.rs index 9af6dec93..13c3fa1cc 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -26,7 +26,19 @@ mod musl_reference_tests { // These files are all internal functions or otherwise miscellaneous, not // defining a function we want to test. - const IGNORED_FILES: &[&str] = &["fenv.rs"]; + const IGNORED_FILES: &[&str] = &[ + "fenv.rs", + // These are giving slightly different results compared to musl + "lgamma.rs", + "lgammaf.rs", + "tgamma.rs", + "j0.rs", + "j0f.rs", + "jn.rs", + "jnf.rs", + "j1.rs", + "j1f.rs", + ]; struct Function { name: String, From 66b1ccaaedc9e10895f38d05602d774c961a5864 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 3 Jan 2022 21:32:08 +0100 Subject: [PATCH 0493/1459] Fix no-panic --- libm/Cargo.toml | 4 ++++ libm/ci/run.sh | 3 +++ 2 files changed, 7 insertions(+) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 106de51df..3a6c5851b 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -33,3 +33,7 @@ no-panic = "0.1.8" [build-dependencies] rand = { version = "0.6.5", optional = true } + +# This is needed for no-panic to correctly detect the lack of panics +[profile.release] +lto = "fat" diff --git a/libm/ci/run.sh b/libm/ci/run.sh index ed253ab0d..d0cd42a8d 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -5,6 +5,9 @@ TARGET=$1 CMD="cargo test --all --target $TARGET" +# Needed for no-panic to correct detect a lack of panics +export RUSTFLAGS="$RUSTFLAGS -Ccodegen-units=1" + # stable by default $CMD $CMD --release From 9e96776e94995899ce754f91bcf06156ae00adf7 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Tue, 4 Jan 2022 00:09:07 +0100 Subject: [PATCH 0494/1459] Disable musl tests on powerpc64 --- libm/build.rs | 7 +++++++ libm/src/lib.rs | 2 ++ 2 files changed, 9 insertions(+) diff --git a/libm/build.rs b/libm/build.rs index 13c3fa1cc..80145a9cc 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -18,6 +18,7 @@ fn main() { mod musl_reference_tests { use rand::seq::SliceRandom; use rand::Rng; + use std::env; use std::fs; use std::process::Command; @@ -60,6 +61,12 @@ mod musl_reference_tests { } pub fn generate() { + // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 + let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); + if target_arch == "powerpc64" { + return; + } + let files = fs::read_dir("src/math") .unwrap() .map(|f| f.unwrap().path()) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index bbc79ecad..29742b451 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -51,5 +51,7 @@ pub fn _eq(a: f64, b: f64) -> Result<(), u64> { } } +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] #[cfg(all(test, feature = "musl-reference-tests"))] include!(concat!(env!("OUT_DIR"), "/musl-tests.rs")); From dc7d27434ba47e7b3bdecf8227a3775251a86c89 Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Wed, 22 Dec 2021 00:56:18 +0000 Subject: [PATCH 0495/1459] force test_near_pi in rem_pio2.rs to be evaluated at runtime not compiletime. --- libm/src/math/rem_pio2.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 46f7c38ff..f58fa359b 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -190,20 +190,28 @@ mod tests { #[test] fn test_near_pi() { + let arg = 3.141592025756836; + force_eval!(arg); assert_eq!( - rem_pio2(3.141592025756836), + rem_pio2(arg), (2, -6.278329573009626e-7, -2.1125998133974653e-23) ); + let arg = 3.141592033207416; + force_eval!(arg); assert_eq!( - rem_pio2(3.141592033207416), + rem_pio2(arg), (2, -6.20382377148128e-7, -2.1125998133974653e-23) ); + let arg = 3.141592144966125; + force_eval!(arg); assert_eq!( - rem_pio2(3.141592144966125), + rem_pio2(arg), (2, -5.086236681942706e-7, -2.1125998133974653e-23) ); + let arg = 3.141592979431152; + force_eval!(arg); assert_eq!( - rem_pio2(3.141592979431152), + rem_pio2(arg), (2, 3.2584135866119817e-7, -2.1125998133974653e-23) ); } From d84d93e3282a6369c9cf69261876ffc338ecc6ab Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Tue, 5 Jan 2021 17:32:30 +0000 Subject: [PATCH 0496/1459] Fix testcases on x87 --- libm/src/math/ceil.rs | 19 +++++++++++++++++++ libm/src/math/floor.rs | 19 +++++++++++++++++++ libm/src/math/j1f.rs | 6 +++++- libm/src/math/rem_pio2f.rs | 4 +++- libm/src/math/sincosf.rs | 36 ++++++++++++++++++++++++++++++++---- 5 files changed, 78 insertions(+), 6 deletions(-) diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index eda28b9a0..22d892971 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -1,3 +1,4 @@ +#![allow(unreachable_code)] use core::f64; const TOINT: f64 = 1. / f64::EPSILON; @@ -15,6 +16,24 @@ pub fn ceil(x: f64) -> f64 { return unsafe { ::core::intrinsics::ceilf64(x) } } } + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + { + //use an alternative implementation on x86, because the + //main implementation fails with the x87 FPU used by + //debian i386, probablly due to excess precision issues. + //basic implementation taken from https://github.com/rust-lang/libm/issues/219 + use super::fabs; + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated < x { + return truncated + 1.0; + } else { + return truncated; + } + } else { + return x; + } + } let u: u64 = x.to_bits(); let e: i64 = (u >> 52 & 0x7ff) as i64; let y: f64; diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index b2b760570..d09f9a1a1 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -1,3 +1,4 @@ +#![allow(unreachable_code)] use core::f64; const TOINT: f64 = 1. / f64::EPSILON; @@ -15,6 +16,24 @@ pub fn floor(x: f64) -> f64 { return unsafe { ::core::intrinsics::floorf64(x) } } } + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + { + //use an alternative implementation on x86, because the + //main implementation fails with the x87 FPU used by + //debian i386, probablly due to excess precision issues. + //basic implementation taken from https://github.com/rust-lang/libm/issues/219 + use super::fabs; + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated > x { + return truncated - 1.0; + } else { + return truncated; + } + } else { + return x; + } + } let ui = x.to_bits(); let e = ((ui >> 52) & 0x7ff) as i32; diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs index 33694908c..225b719bf 100644 --- a/libm/src/math/j1f.rs +++ b/libm/src/math/j1f.rs @@ -369,6 +369,10 @@ mod tests { } #[test] fn test_y1f_2002() { - assert_eq!(y1f(2.0000002_f32), -0.10703229_f32); + //allow slightly different result on x87 + let res = y1f(2.0000002_f32); + if res != -0.10703231_f32 { + assert_eq!(res, -0.10703229_f32); + } } } diff --git a/libm/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs index 5d392ba2d..4d4499b98 100644 --- a/libm/src/math/rem_pio2f.rs +++ b/libm/src/math/rem_pio2f.rs @@ -43,7 +43,9 @@ pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) { if ix < 0x4dc90fdb { /* |x| ~< 2^28*(pi/2), medium size */ /* Use a specialized rint() to get fn. Assume round-to-nearest. */ - let f_n = x64 * INV_PIO2 + TOINT - TOINT; + // use to_bits and from_bits to force rounding to storage format on + // x87. + let f_n = f64::from_bits((x64 * INV_PIO2 + TOINT).to_bits()) - TOINT; return (f_n as i32, x64 - f_n * PIO2_1 - f_n * PIO2_1T); } if ix >= 0x7f800000 { diff --git a/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs index 83c9f40ee..5304e8ca0 100644 --- a/libm/src/math/sincosf.rs +++ b/libm/src/math/sincosf.rs @@ -147,10 +147,38 @@ mod tests { let (s_minus, c_minus) = sincosf(theta - 2. * PI); const TOLERANCE: f32 = 1e-6; - assert!((s - s_plus).abs() < TOLERANCE); - assert!((s - s_minus).abs() < TOLERANCE); - assert!((c - c_plus).abs() < TOLERANCE); - assert!((c - c_minus).abs() < TOLERANCE); + assert!( + (s - s_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_plus, + (s - s_plus).abs(), + TOLERANCE + ); + assert!( + (s - s_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_minus, + (s - s_minus).abs(), + TOLERANCE + ); + assert!( + (c - c_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_plus, + (c - c_plus).abs(), + TOLERANCE + ); + assert!( + (c - c_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_minus, + (c - c_minus).abs(), + TOLERANCE + ); } } } From 1b210191ed569a5ebd92c89f0fd09623ece8af49 Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Tue, 21 Dec 2021 22:41:29 +0000 Subject: [PATCH 0497/1459] Use force_eval instead of to_bits/from_bits combination, Using to_bits/from_bits to force conversion to storage format apparently doesn't work in release mode. Also add an architecture conditional to avoid pessimising other architectures. --- libm/src/math/rem_pio2f.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/libm/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs index 4d4499b98..3ce8f9ab1 100644 --- a/libm/src/math/rem_pio2f.rs +++ b/libm/src/math/rem_pio2f.rs @@ -43,9 +43,11 @@ pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) { if ix < 0x4dc90fdb { /* |x| ~< 2^28*(pi/2), medium size */ /* Use a specialized rint() to get fn. Assume round-to-nearest. */ - // use to_bits and from_bits to force rounding to storage format on - // x87. - let f_n = f64::from_bits((x64 * INV_PIO2 + TOINT).to_bits()) - TOINT; + let tmp = x64 * INV_PIO2 + TOINT; + // force rounding of tmp to it's storage format on x87 to avoid + // excess precision issues. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(tmp); + let f_n = tmp - TOINT; return (f_n as i32, x64 - f_n * PIO2_1 - f_n * PIO2_1T); } if ix >= 0x7f800000 { From 705ce5f2d4847bacb6eb02ea1973a24045c29e17 Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Tue, 21 Dec 2021 23:53:06 +0000 Subject: [PATCH 0498/1459] Add forced rounding to storage format for x87 to rem_pio2.rs as well. --- libm/src/math/rem_pio2.rs | 6 +++- libm/src/math/sincos.rs | 74 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index f58fa359b..4ac9415b5 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -50,7 +50,11 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { fn medium(x: f64, ix: u32) -> (i32, f64, f64) { /* rint(x/(pi/2)), Assume round-to-nearest. */ - let f_n = x as f64 * INV_PIO2 + TO_INT - TO_INT; + let tmp = x as f64 * INV_PIO2 + TO_INT; + // force rounding of tmp to it's storage format on x87 to avoid + // excess precision issues. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(tmp); + let f_n = tmp - TO_INT; let n = f_n as i32; let mut r = x - f_n * PIO2_1; let mut w = f_n * PIO2_1T; /* 1st round, good to 85 bits */ diff --git a/libm/src/math/sincos.rs b/libm/src/math/sincos.rs index d49f65c97..bfc4561f8 100644 --- a/libm/src/math/sincos.rs +++ b/libm/src/math/sincos.rs @@ -57,3 +57,77 @@ pub fn sincos(x: f64) -> (f64, f64) { _ => (0.0, 1.0), } } + +// These tests are based on those from sincosf.rs +#[cfg(test)] +mod tests { + use super::sincos; + + const TOLERANCE: f64 = 1e-6; + + #[test] + fn with_pi() { + let (s, c) = sincos(core::f64::consts::PI); + assert!( + (s - 0.0).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + 0.0, + (s - 0.0).abs(), + TOLERANCE + ); + assert!( + (c + 1.0).abs() < TOLERANCE, + "|{} + {}| = {} >= {}", + c, + 1.0, + (s + 1.0).abs(), + TOLERANCE + ); + } + + #[test] + fn rotational_symmetry() { + use core::f64::consts::PI; + const N: usize = 24; + for n in 0..N { + let theta = 2. * PI * (n as f64) / (N as f64); + let (s, c) = sincos(theta); + let (s_plus, c_plus) = sincos(theta + 2. * PI); + let (s_minus, c_minus) = sincos(theta - 2. * PI); + + assert!( + (s - s_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_plus, + (s - s_plus).abs(), + TOLERANCE + ); + assert!( + (s - s_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_minus, + (s - s_minus).abs(), + TOLERANCE + ); + assert!( + (c - c_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_plus, + (c - c_plus).abs(), + TOLERANCE + ); + assert!( + (c - c_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_minus, + (c - c_minus).abs(), + TOLERANCE + ); + } + } +} From e1762c48b8cdf3f101f27e0a9844f1284ad24904 Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Wed, 22 Dec 2021 00:21:25 +0000 Subject: [PATCH 0499/1459] round to storage format in some tests before comparison to prevent spurious errors on x87. --- libm/src/math/fma.rs | 5 ++++- libm/src/math/pow.rs | 2 ++ libm/src/math/sin.rs | 4 +++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 85d842119..c20de94a5 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -218,7 +218,10 @@ mod tests { -0.00000000000000022204460492503126, ); - assert_eq!(fma(-0.992, -0.992, -0.992), -0.007936000000000007,); + let result = fma(-0.992, -0.992, -0.992); + //force rounding to storage format on x87 to prevent superious errors. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(result); + assert_eq!(result, -0.007936000000000007,); } #[test] diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index f79680a05..3249e7eea 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -484,6 +484,8 @@ mod tests { let exp = expected(*val); let res = computed(*val); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(exp); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(res); assert!( if exp.is_nan() { res.is_nan() diff --git a/libm/src/math/sin.rs b/libm/src/math/sin.rs index 1329b41a9..a562aa6e4 100644 --- a/libm/src/math/sin.rs +++ b/libm/src/math/sin.rs @@ -81,5 +81,7 @@ pub fn sin(x: f64) -> f64 { fn test_near_pi() { let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707 let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7 - assert_eq!(sin(x), sx); + let result = sin(x); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(result); + assert_eq!(result, sx); } From 3ba9cd5070777063c4d7bfde12e87f89167f6a6d Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Tue, 4 Jan 2022 20:38:09 +0000 Subject: [PATCH 0500/1459] only allow x87-specific result in j1f.rs test on x87 --- libm/src/math/j1f.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs index 225b719bf..775ff2b2e 100644 --- a/libm/src/math/j1f.rs +++ b/libm/src/math/j1f.rs @@ -371,8 +371,7 @@ mod tests { fn test_y1f_2002() { //allow slightly different result on x87 let res = y1f(2.0000002_f32); - if res != -0.10703231_f32 { - assert_eq!(res, -0.10703229_f32); - } + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && (res == -0.10703231_f32) { return }; + assert_eq!(res, -0.10703229_f32); } } From d20af36f94a33ce66ebe8a73db7706e0fed0e7cf Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Tue, 4 Jan 2022 20:51:40 +0000 Subject: [PATCH 0501/1459] allow force_eval! to produce a result and use that result to more explicitly force rounding on x87. --- libm/src/math/fma.rs | 3 ++- libm/src/math/mod.rs | 2 +- libm/src/math/pow.rs | 6 ++++-- libm/src/math/rem_pio2.rs | 11 ++++++----- libm/src/math/rem_pio2f.rs | 3 ++- libm/src/math/sin.rs | 3 ++- 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index c20de94a5..516f9ad3a 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -220,7 +220,8 @@ mod tests { let result = fma(-0.992, -0.992, -0.992); //force rounding to storage format on x87 to prevent superious errors. - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(result); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let result = force_eval!(result); assert_eq!(result, -0.007936000000000007,); } diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index ceeee0b31..7f4c8bcf4 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -1,7 +1,7 @@ macro_rules! force_eval { ($e:expr) => { unsafe { - ::core::ptr::read_volatile(&$e); + ::core::ptr::read_volatile(&$e) } }; } diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 3249e7eea..6a19ae601 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -484,8 +484,10 @@ mod tests { let exp = expected(*val); let res = computed(*val); - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(exp); - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(res); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let exp = force_eval!(exp); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let res = force_eval!(res); assert!( if exp.is_nan() { res.is_nan() diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 4ac9415b5..644616f2d 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -53,7 +53,8 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { let tmp = x as f64 * INV_PIO2 + TO_INT; // force rounding of tmp to it's storage format on x87 to avoid // excess precision issues. - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(tmp); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let tmp = force_eval!(tmp); let f_n = tmp - TO_INT; let n = f_n as i32; let mut r = x - f_n * PIO2_1; @@ -195,25 +196,25 @@ mod tests { #[test] fn test_near_pi() { let arg = 3.141592025756836; - force_eval!(arg); + let arg = force_eval!(arg); assert_eq!( rem_pio2(arg), (2, -6.278329573009626e-7, -2.1125998133974653e-23) ); let arg = 3.141592033207416; - force_eval!(arg); + let arg = force_eval!(arg); assert_eq!( rem_pio2(arg), (2, -6.20382377148128e-7, -2.1125998133974653e-23) ); let arg = 3.141592144966125; - force_eval!(arg); + let arg = force_eval!(arg); assert_eq!( rem_pio2(arg), (2, -5.086236681942706e-7, -2.1125998133974653e-23) ); let arg = 3.141592979431152; - force_eval!(arg); + let arg = force_eval!(arg); assert_eq!( rem_pio2(arg), (2, 3.2584135866119817e-7, -2.1125998133974653e-23) diff --git a/libm/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs index 3ce8f9ab1..775f5d750 100644 --- a/libm/src/math/rem_pio2f.rs +++ b/libm/src/math/rem_pio2f.rs @@ -46,7 +46,8 @@ pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) { let tmp = x64 * INV_PIO2 + TOINT; // force rounding of tmp to it's storage format on x87 to avoid // excess precision issues. - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(tmp); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let tmp = force_eval!(tmp); let f_n = tmp - TOINT; return (f_n as i32, x64 - f_n * PIO2_1 - f_n * PIO2_1T); } diff --git a/libm/src/math/sin.rs b/libm/src/math/sin.rs index a562aa6e4..a53843dcd 100644 --- a/libm/src/math/sin.rs +++ b/libm/src/math/sin.rs @@ -82,6 +82,7 @@ fn test_near_pi() { let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707 let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7 let result = sin(x); - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(result); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let result = force_eval!(result); assert_eq!(result, sx); } From 786289572f44eabdcd93aa4f77d7a49d05be5e6e Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Wed, 22 Dec 2021 01:50:25 +0000 Subject: [PATCH 0502/1459] Apply formatting fixes from CI --- libm/src/math/j1f.rs | 5 ++++- libm/src/math/mod.rs | 4 +--- libm/src/math/sincos.rs | 28 ++++++++++++++-------------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs index 775ff2b2e..c39f8ff7e 100644 --- a/libm/src/math/j1f.rs +++ b/libm/src/math/j1f.rs @@ -371,7 +371,10 @@ mod tests { fn test_y1f_2002() { //allow slightly different result on x87 let res = y1f(2.0000002_f32); - if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && (res == -0.10703231_f32) { return }; + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && (res == -0.10703231_f32) + { + return; + } assert_eq!(res, -0.10703229_f32); } } diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 7f4c8bcf4..81bfc53ed 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -1,8 +1,6 @@ macro_rules! force_eval { ($e:expr) => { - unsafe { - ::core::ptr::read_volatile(&$e) - } + unsafe { ::core::ptr::read_volatile(&$e) } }; } diff --git a/libm/src/math/sincos.rs b/libm/src/math/sincos.rs index bfc4561f8..4ab588412 100644 --- a/libm/src/math/sincos.rs +++ b/libm/src/math/sincos.rs @@ -69,21 +69,21 @@ mod tests { fn with_pi() { let (s, c) = sincos(core::f64::consts::PI); assert!( - (s - 0.0).abs() < TOLERANCE, - "|{} - {}| = {} >= {}", - s, - 0.0, - (s - 0.0).abs(), - TOLERANCE - ); + (s - 0.0).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + 0.0, + (s - 0.0).abs(), + TOLERANCE + ); assert!( - (c + 1.0).abs() < TOLERANCE, - "|{} + {}| = {} >= {}", - c, - 1.0, - (s + 1.0).abs(), - TOLERANCE - ); + (c + 1.0).abs() < TOLERANCE, + "|{} + {}| = {} >= {}", + c, + 1.0, + (s + 1.0).abs(), + TOLERANCE + ); } #[test] From 1aa071c9d92c83a9d2323e86478e11a9709dd5ab Mon Sep 17 00:00:00 2001 From: Daniel Sommermann Date: Thu, 27 Jan 2022 11:53:50 -0800 Subject: [PATCH 0503/1459] Stop emitting duplicate symbols for `armv7-linux-androideabi` The change in 186517b3266a7bb2b2310927f7342ea7f41790c3 was intended to affect only `arm-linux-androideabi` but also affected `armv7-linux-androideabi` which is not a pre-ARMv6 architecture. Fixes #449 --- build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.rs b/build.rs index dc1cd1d23..2b34540dc 100644 --- a/build.rs +++ b/build.rs @@ -83,7 +83,7 @@ fn main() { // rustc target (arm-linux-androideabi). if llvm_target[0] == "armv4t" || llvm_target[0] == "armv5te" - || llvm_target.get(2) == Some(&"androideabi") + || target == "arm-linux-androideabi" { println!("cargo:rustc-cfg=kernel_user_helpers") } From fa80502bcdd25405db90fb8ade1dcdc6f669db9e Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 28 Jan 2022 12:47:27 +0000 Subject: [PATCH 0504/1459] Bump to 0.1.67 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 3f4c96745..a8278f0b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.66" +version = "0.1.67" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From c3325dc909153da4c5499515159490335be05231 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 6 Feb 2022 09:20:19 +0000 Subject: [PATCH 0505/1459] Fix run-docker.sh so it can be run locally --- ci/run-docker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run-docker.sh b/ci/run-docker.sh index 4bb2a78d9..8c4af0eff 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -19,7 +19,7 @@ run() { -e CARGO_HOME=/cargo \ -e CARGO_TARGET_DIR=/target \ -e RUST_COMPILER_RT_ROOT \ - -v $(dirname $(dirname `which cargo`)):/cargo \ + -v "${HOME}/.cargo":/cargo \ -v `pwd`/target:/target \ -v `pwd`:/checkout:ro \ -v `rustc --print sysroot`:/rust:ro \ From fe04b8d4df2329d7ab8c7366f33e1f8572d51d64 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 6 Feb 2022 09:20:43 +0000 Subject: [PATCH 0506/1459] Wrap all intrinsics in the intrinsics! macro This ensures that each intrinsic ends up in a separate module, which in turn (because rustc treats compiler_builtins specially) will result in each intrinsic ending up in its own object file. This allows the linker to only pick up object files for intrinsics that are missing and avoids duplicate symbol definition errors. --- src/arm.rs | 376 +++++++++++++++++++---------------------------- src/arm_linux.rs | 21 +-- src/macros.rs | 119 ++++++++++++--- src/mem/mod.rs | 112 +++++++------- src/x86.rs | 148 +++++++++---------- src/x86_64.rs | 158 ++++++++++---------- 6 files changed, 469 insertions(+), 465 deletions(-) diff --git a/src/arm.rs b/src/arm.rs index 3660825aa..95bde5116 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -3,251 +3,181 @@ use core::intrinsics; -// NOTE This function and the ones below are implemented using assembly because they are using a -// custom calling convention which can't be implemented using a normal Rust function. -// NOTE The only difference between the iOS and non-iOS versions of those functions is that the iOS -// versions use 3 leading underscores in the names of called functions instead of 2. -#[cfg(not(any(target_os = "ios", target_env = "msvc")))] -#[naked] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe extern "C" fn __aeabi_uidivmod() { - core::arch::asm!( - "push {{lr}}", - "sub sp, sp, #4", - "mov r2, sp", - "bl __udivmodsi4", - "ldr r1, [sp]", - "add sp, sp, #4", - "pop {{pc}}", - options(noreturn) - ); -} - +// iOS symbols have a leading underscore. #[cfg(target_os = "ios")] -#[naked] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe extern "C" fn __aeabi_uidivmod() { - core::arch::asm!( - "push {{lr}}", - "sub sp, sp, #4", - "mov r2, sp", - "bl ___udivmodsi4", - "ldr r1, [sp]", - "add sp, sp, #4", - "pop {{pc}}", - options(noreturn) - ); +macro_rules! bl { + ($func:literal) => { + concat!("bl _", $func) + }; } - #[cfg(not(target_os = "ios"))] -#[naked] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe extern "C" fn __aeabi_uldivmod() { - core::arch::asm!( - "push {{r4, lr}}", - "sub sp, sp, #16", - "add r4, sp, #8", - "str r4, [sp]", - "bl __udivmoddi4", - "ldr r2, [sp, #8]", - "ldr r3, [sp, #12]", - "add sp, sp, #16", - "pop {{r4, pc}}", - options(noreturn) - ); -} - -#[cfg(target_os = "ios")] -#[naked] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe extern "C" fn __aeabi_uldivmod() { - core::arch::asm!( - "push {{r4, lr}}", - "sub sp, sp, #16", - "add r4, sp, #8", - "str r4, [sp]", - "bl ___udivmoddi4", - "ldr r2, [sp, #8]", - "ldr r3, [sp, #12]", - "add sp, sp, #16", - "pop {{r4, pc}}", - options(noreturn) - ); -} - -#[cfg(not(target_os = "ios"))] -#[naked] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe extern "C" fn __aeabi_idivmod() { - core::arch::asm!( - "push {{r0, r1, r4, lr}}", - "bl __aeabi_idiv", - "pop {{r1, r2}}", - "muls r2, r2, r0", - "subs r1, r1, r2", - "pop {{r4, pc}}", - options(noreturn) - ); -} - -#[cfg(target_os = "ios")] -#[naked] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe extern "C" fn __aeabi_idivmod() { - core::arch::asm!( - "push {{r0, r1, r4, lr}}", - "bl ___aeabi_idiv", - "pop {{r1, r2}}", - "muls r2, r2, r0", - "subs r1, r1, r2", - "pop {{r4, pc}}", - options(noreturn) - ); -} +macro_rules! bl { + ($func:literal) => { + concat!("bl ", $func) + }; +} + +intrinsics! { + // NOTE This function and the ones below are implemented using assembly because they are using a + // custom calling convention which can't be implemented using a normal Rust function. + #[naked] + #[cfg(not(target_env = "msvc"))] + pub unsafe extern "C" fn __aeabi_uidivmod() { + core::arch::asm!( + "push {{lr}}", + "sub sp, sp, #4", + "mov r2, sp", + bl!("__udivmodsi4"), + "ldr r1, [sp]", + "add sp, sp, #4", + "pop {{pc}}", + options(noreturn) + ); + } -#[cfg(not(target_os = "ios"))] -#[naked] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe extern "C" fn __aeabi_ldivmod() { - core::arch::asm!( - "push {{r4, lr}}", - "sub sp, sp, #16", - "add r4, sp, #8", - "str r4, [sp]", - "bl __divmoddi4", - "ldr r2, [sp, #8]", - "ldr r3, [sp, #12]", - "add sp, sp, #16", - "pop {{r4, pc}}", - options(noreturn) - ); -} + #[naked] + pub unsafe extern "C" fn __aeabi_uldivmod() { + core::arch::asm!( + "push {{r4, lr}}", + "sub sp, sp, #16", + "add r4, sp, #8", + "str r4, [sp]", + bl!("__udivmodsi4"), + "ldr r2, [sp, #8]", + "ldr r3, [sp, #12]", + "add sp, sp, #16", + "pop {{r4, pc}}", + options(noreturn) + ); + } -#[cfg(target_os = "ios")] -#[naked] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe extern "C" fn __aeabi_ldivmod() { - core::arch::asm!( - "push {{r4, lr}}", - "sub sp, sp, #16", - "add r4, sp, #8", - "str r4, [sp]", - "bl ___divmoddi4", - "ldr r2, [sp, #8]", - "ldr r3, [sp, #12]", - "add sp, sp, #16", - "pop {{r4, pc}}", - options(noreturn) - ); -} + #[naked] + pub unsafe extern "C" fn __aeabi_idivmod() { + core::arch::asm!( + "push {{r0, r1, r4, lr}}", + bl!("__aeabi_idiv"), + "pop {{r1, r2}}", + "muls r2, r2, r0", + "subs r1, r1, r2", + "pop {{r4, pc}}", + options(noreturn) + ); + } -// The following functions use weak linkage to allow users to override -// with custom implementation. -// FIXME: The `*4` and `*8` variants should be defined as aliases. + #[naked] + pub unsafe extern "C" fn __aeabi_ldivmod() { + core::arch::asm!( + "push {{r4, lr}}", + "sub sp, sp, #16", + "add r4, sp, #8", + "str r4, [sp]", + bl!("__divmoddi4"), + "ldr r2, [sp, #8]", + "ldr r3, [sp, #12]", + "add sp, sp, #16", + "pop {{r4, pc}}", + options(noreturn) + ); + } -#[cfg(not(target_os = "ios"))] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[linkage = "weak"] -pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) { - ::mem::memcpy(dest, src, n); -} + // The following functions use weak linkage to allow users to override + // with custom implementation. + // FIXME: The `*4` and `*8` variants should be defined as aliases. -#[cfg(not(target_os = "ios"))] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[linkage = "weak"] -pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, mut n: usize) { - // We are guaranteed 4-alignment, so accessing at u32 is okay. - let mut dest = dest as *mut u32; - let mut src = src as *mut u32; + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) { + ::mem::memcpy(dest, src, n); + } - while n >= 4 { - *dest = *src; - dest = dest.offset(1); - src = src.offset(1); - n -= 4; + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) { + // We are guaranteed 4-alignment, so accessing at u32 is okay. + let mut dest = dest as *mut u32; + let mut src = src as *mut u32; + let mut n = n; + + while n >= 4 { + *dest = *src; + dest = dest.offset(1); + src = src.offset(1); + n -= 4; + } + + __aeabi_memcpy(dest as *mut u8, src as *const u8, n); } - __aeabi_memcpy(dest as *mut u8, src as *const u8, n); -} + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) { + __aeabi_memcpy4(dest, src, n); + } -#[cfg(not(target_os = "ios"))] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[linkage = "weak"] -pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) { - __aeabi_memcpy4(dest, src, n); -} + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) { + ::mem::memmove(dest, src, n); + } -#[cfg(not(target_os = "ios"))] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[linkage = "weak"] -pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) { - ::mem::memmove(dest, src, n); -} + #[cfg(not(any(target_os = "ios", target_env = "msvc")))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) { + __aeabi_memmove(dest, src, n); + } -#[cfg(not(any(target_os = "ios", target_env = "msvc")))] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[linkage = "weak"] -pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) { - __aeabi_memmove(dest, src, n); -} + #[cfg(not(any(target_os = "ios", target_env = "msvc")))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) { + __aeabi_memmove(dest, src, n); + } -#[cfg(not(any(target_os = "ios", target_env = "msvc")))] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[linkage = "weak"] -pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) { - __aeabi_memmove(dest, src, n); -} + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { + // Note the different argument order + ::mem::memset(dest, c, n); + } -#[cfg(not(target_os = "ios"))] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[linkage = "weak"] -pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { - // Note the different argument order - ::mem::memset(dest, c, n); -} + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) { + let mut dest = dest as *mut u32; + let mut n = n; -#[cfg(not(target_os = "ios"))] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[linkage = "weak"] -pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, mut n: usize, c: i32) { - let mut dest = dest as *mut u32; + let byte = (c as u32) & 0xff; + let c = (byte << 24) | (byte << 16) | (byte << 8) | byte; - let byte = (c as u32) & 0xff; - let c = (byte << 24) | (byte << 16) | (byte << 8) | byte; + while n >= 4 { + *dest = c; + dest = dest.offset(1); + n -= 4; + } - while n >= 4 { - *dest = c; - dest = dest.offset(1); - n -= 4; + __aeabi_memset(dest as *mut u8, n, byte as i32); } - __aeabi_memset(dest as *mut u8, n, byte as i32); -} - -#[cfg(not(target_os = "ios"))] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[linkage = "weak"] -pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) { - __aeabi_memset4(dest, n, c); -} + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) { + __aeabi_memset4(dest, n, c); + } -#[cfg(not(target_os = "ios"))] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[linkage = "weak"] -pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) { - __aeabi_memset(dest, n, 0); -} + #[cfg(not(target_os = "ios"))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) { + __aeabi_memset(dest, n, 0); + } -#[cfg(not(any(target_os = "ios", target_env = "msvc")))] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[linkage = "weak"] -pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) { - __aeabi_memset4(dest, n, 0); -} + #[cfg(not(any(target_os = "ios", target_env = "msvc")))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) { + __aeabi_memset4(dest, n, 0); + } -#[cfg(not(any(target_os = "ios", target_env = "msvc")))] -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -#[linkage = "weak"] -pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { - __aeabi_memset4(dest, n, 0); + #[cfg(not(any(target_os = "ios", target_env = "msvc")))] + #[linkage = "weak"] + pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { + __aeabi_memset4(dest, n, 0); + } } diff --git a/src/arm_linux.rs b/src/arm_linux.rs index e710c1ab9..df1723d99 100644 --- a/src/arm_linux.rs +++ b/src/arm_linux.rs @@ -90,17 +90,19 @@ unsafe fn atomic_cmpxchg(ptr: *mut T, oldval: u32, newval: u32) -> u32 { macro_rules! atomic_rmw { ($name:ident, $ty:ty, $op:expr) => { - #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty { - atomic_rmw(ptr, |x| $op(x as $ty, val) as u32) as $ty + intrinsics! { + pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty { + atomic_rmw(ptr, |x| $op(x as $ty, val) as u32) as $ty + } } }; } macro_rules! atomic_cmpxchg { ($name:ident, $ty:ty) => { - #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - pub unsafe extern "C" fn $name(ptr: *mut $ty, oldval: $ty, newval: $ty) -> $ty { - atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty + intrinsics! { + pub unsafe extern "C" fn $name(ptr: *mut $ty, oldval: $ty, newval: $ty) -> $ty { + atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty + } } }; } @@ -205,7 +207,8 @@ atomic_cmpxchg!(__sync_val_compare_and_swap_1, u8); atomic_cmpxchg!(__sync_val_compare_and_swap_2, u16); atomic_cmpxchg!(__sync_val_compare_and_swap_4, u32); -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe extern "C" fn __sync_synchronize() { - __kuser_memory_barrier(); +intrinsics! { + pub unsafe extern "C" fn __sync_synchronize() { + __kuser_memory_barrier(); + } } diff --git a/src/macros.rs b/src/macros.rs index 214f0795f..6926feac0 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -76,7 +76,7 @@ macro_rules! intrinsics { ( #[maybe_use_optimized_c_shim] $(#[$($attr:tt)*])* - pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } @@ -84,9 +84,9 @@ macro_rules! intrinsics { ) => ( #[cfg($name = "optimized-c")] - pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { extern $abi { - fn $name($($argname: $ty),*) -> $ret; + fn $name($($argname: $ty),*) $(-> $ret)?; } unsafe { $name($($argname),*) @@ -96,7 +96,7 @@ macro_rules! intrinsics { #[cfg(not($name = "optimized-c"))] intrinsics! { $(#[$($attr)*])* - pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } @@ -110,7 +110,7 @@ macro_rules! intrinsics { ( #[aapcs_on_arm] $(#[$($attr:tt)*])* - pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } @@ -119,7 +119,7 @@ macro_rules! intrinsics { #[cfg(target_arch = "arm")] intrinsics! { $(#[$($attr)*])* - pub extern "aapcs" fn $name( $($argname: $ty),* ) -> $ret { + pub extern "aapcs" fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } @@ -127,7 +127,7 @@ macro_rules! intrinsics { #[cfg(not(target_arch = "arm"))] intrinsics! { $(#[$($attr)*])* - pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } @@ -140,7 +140,7 @@ macro_rules! intrinsics { ( #[unadjusted_on_win64] $(#[$($attr:tt)*])* - pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } @@ -149,7 +149,7 @@ macro_rules! intrinsics { #[cfg(all(windows, target_pointer_width = "64"))] intrinsics! { $(#[$($attr)*])* - pub extern "unadjusted" fn $name( $($argname: $ty),* ) -> $ret { + pub extern "unadjusted" fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } @@ -157,7 +157,7 @@ macro_rules! intrinsics { #[cfg(not(all(windows, target_pointer_width = "64")))] intrinsics! { $(#[$($attr)*])* - pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } @@ -175,7 +175,7 @@ macro_rules! intrinsics { ( #[win64_128bit_abi_hack] $(#[$($attr:tt)*])* - pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } @@ -183,7 +183,7 @@ macro_rules! intrinsics { ) => ( #[cfg(all(windows, target_arch = "x86_64"))] $(#[$($attr)*])* - pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } @@ -193,7 +193,7 @@ macro_rules! intrinsics { pub extern $abi fn $name( $($argname: $ty),* ) -> ::macros::win64_128bit_abi_hack::U64x2 { - let e: $ret = super::$name($($argname),*); + let e: $($ret)? = super::$name($($argname),*); ::macros::win64_128bit_abi_hack::U64x2::from(e) } } @@ -201,7 +201,7 @@ macro_rules! intrinsics { #[cfg(not(all(windows, target_arch = "x86_64")))] intrinsics! { $(#[$($attr)*])* - pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } @@ -216,21 +216,21 @@ macro_rules! intrinsics { ( #[arm_aeabi_alias = $alias:ident] $(#[$($attr:tt)*])* - pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( #[cfg(target_arch = "arm")] - pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } #[cfg(target_arch = "arm")] pub mod $name { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } } @@ -238,7 +238,7 @@ macro_rules! intrinsics { #[cfg(target_arch = "arm")] pub mod $alias { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - pub extern "aapcs" fn $alias( $($argname: $ty),* ) -> $ret { + pub extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } } @@ -246,7 +246,57 @@ macro_rules! intrinsics { #[cfg(not(target_arch = "arm"))] intrinsics! { $(#[$($attr)*])* - pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // C mem* functions are only generated when the "mem" feature is enabled. + ( + #[mem_builtin] + $(#[$($attr:tt)*])* + pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + $(#[$($attr)*])* + pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + + #[cfg(feature = "mem")] + pub mod $name { + $(#[$($attr)*])* + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + super::$name($($argname),*) + } + } + + intrinsics!($($rest)*); + ); + + // Naked functions are special: we can't generate wrappers for them since + // they use a custom calling convention. + ( + #[naked] + $(#[$($attr:tt)*])* + pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + pub mod $name { + #[naked] + $(#[$($attr)*])* + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } @@ -268,21 +318,46 @@ macro_rules! intrinsics { // input we were given. ( $(#[$($attr:tt)*])* - pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + + pub mod $name { + $(#[$($attr)*])* + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + super::$name($($argname),*) + } + } + + intrinsics!($($rest)*); + ); + + // Same as the above for unsafe functions. + ( + $(#[$($attr:tt)*])* + pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( $(#[$($attr)*])* - pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { + pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } pub mod $name { $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { + pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } } diff --git a/src/mem/mod.rs b/src/mem/mod.rs index 2f9a9fd94..dce4d87e0 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -19,53 +19,55 @@ use core::ops::{BitOr, Shl}; )] mod impls; -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -#[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] -pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { - impls::copy_forward(dest, src, n); - dest -} - -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -#[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] -pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { - let delta = (dest as usize).wrapping_sub(src as usize); - if delta >= n { - // We can copy forwards because either dest is far enough ahead of src, - // or src is ahead of dest (and delta overflowed). +intrinsics! { + #[mem_builtin] + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] + pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { impls::copy_forward(dest, src, n); - } else { - impls::copy_backward(dest, src, n); + dest } - dest -} -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -#[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] -pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 { - impls::set_bytes(s, c as u8, n); - s -} + #[mem_builtin] + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] + pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { + let delta = (dest as usize).wrapping_sub(src as usize); + if delta >= n { + // We can copy forwards because either dest is far enough ahead of src, + // or src is ahead of dest (and delta overflowed). + impls::copy_forward(dest, src, n); + } else { + impls::copy_backward(dest, src, n); + } + dest + } -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -#[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] -pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { - let mut i = 0; - while i < n { - let a = *s1.add(i); - let b = *s2.add(i); - if a != b { - return a as i32 - b as i32; + #[mem_builtin] + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] + pub unsafe extern "C" fn memset(s: *mut u8, c: crate::mem::c_int, n: usize) -> *mut u8 { + impls::set_bytes(s, c as u8, n); + s + } + + #[mem_builtin] + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] + pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { + let mut i = 0; + while i < n { + let a = *s1.add(i); + let b = *s2.add(i); + if a != b { + return a as i32 - b as i32; + } + i += 1; } - i += 1; + 0 } - 0 -} -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -#[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] -pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { - memcmp(s1, s2, n) + #[mem_builtin] + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] + pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { + memcmp(s1, s2, n) + } } // `bytes` must be a multiple of `mem::size_of::()` @@ -133,65 +135,65 @@ where intrinsics! { #[cfg(target_has_atomic_load_store = "8")] - pub extern "C" fn __llvm_memcpy_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { + pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { memcpy_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "16")] - pub extern "C" fn __llvm_memcpy_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { + pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { memcpy_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "32")] - pub extern "C" fn __llvm_memcpy_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { + pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { memcpy_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "64")] - pub extern "C" fn __llvm_memcpy_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { + pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { memcpy_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "128")] - pub extern "C" fn __llvm_memcpy_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { + pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { memcpy_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "8")] - pub extern "C" fn __llvm_memmove_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { + pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { memmove_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "16")] - pub extern "C" fn __llvm_memmove_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { + pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { memmove_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "32")] - pub extern "C" fn __llvm_memmove_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { + pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { memmove_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "64")] - pub extern "C" fn __llvm_memmove_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { + pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { memmove_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "128")] - pub extern "C" fn __llvm_memmove_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { + pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { memmove_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "8")] - pub extern "C" fn __llvm_memset_element_unordered_atomic_1(s: *mut u8, c: u8, bytes: usize) -> () { + pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_1(s: *mut u8, c: u8, bytes: usize) -> () { memset_element_unordered_atomic(s, c, bytes); } #[cfg(target_has_atomic_load_store = "16")] - pub extern "C" fn __llvm_memset_element_unordered_atomic_2(s: *mut u16, c: u8, bytes: usize) -> () { + pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_2(s: *mut u16, c: u8, bytes: usize) -> () { memset_element_unordered_atomic(s, c, bytes); } #[cfg(target_has_atomic_load_store = "32")] - pub extern "C" fn __llvm_memset_element_unordered_atomic_4(s: *mut u32, c: u8, bytes: usize) -> () { + pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_4(s: *mut u32, c: u8, bytes: usize) -> () { memset_element_unordered_atomic(s, c, bytes); } #[cfg(target_has_atomic_load_store = "64")] - pub extern "C" fn __llvm_memset_element_unordered_atomic_8(s: *mut u64, c: u8, bytes: usize) -> () { + pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_8(s: *mut u64, c: u8, bytes: usize) -> () { memset_element_unordered_atomic(s, c, bytes); } #[cfg(target_has_atomic_load_store = "128")] - pub extern "C" fn __llvm_memset_element_unordered_atomic_16(s: *mut u128, c: u8, bytes: usize) -> () { + pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_16(s: *mut u128, c: u8, bytes: usize) -> () { memset_element_unordered_atomic(s, c, bytes); } } diff --git a/src/x86.rs b/src/x86.rs index abcc2bdb3..fd1f32e3a 100644 --- a/src/x86.rs +++ b/src/x86.rs @@ -8,82 +8,78 @@ use core::intrinsics; // NOTE These functions are never mangled as they are not tested against compiler-rt // and mangling ___chkstk would break the `jmp ___chkstk` instruction in __alloca -#[cfg(all( - windows, - target_env = "gnu", - not(feature = "no-asm"), - not(feature = "mangled-names") -))] -#[naked] -#[no_mangle] -pub unsafe extern "C" fn ___chkstk_ms() { - core::arch::asm!( - "push %ecx", - "push %eax", - "cmp $0x1000,%eax", - "lea 12(%esp),%ecx", - "jb 1f", - "2:", - "sub $0x1000,%ecx", - "test %ecx,(%ecx)", - "sub $0x1000,%eax", - "cmp $0x1000,%eax", - "ja 2b", - "1:", - "sub %eax,%ecx", - "test %ecx,(%ecx)", - "pop %eax", - "pop %ecx", - "ret", - options(noreturn, att_syntax) - ); -} +intrinsics! { + #[naked] + #[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn ___chkstk_ms() { + core::arch::asm!( + "push %ecx", + "push %eax", + "cmp $0x1000,%eax", + "lea 12(%esp),%ecx", + "jb 1f", + "2:", + "sub $0x1000,%ecx", + "test %ecx,(%ecx)", + "sub $0x1000,%eax", + "cmp $0x1000,%eax", + "ja 2b", + "1:", + "sub %eax,%ecx", + "test %ecx,(%ecx)", + "pop %eax", + "pop %ecx", + "ret", + options(noreturn, att_syntax) + ); + } -// FIXME: __alloca should be an alias to __chkstk -#[cfg(all( - windows, - target_env = "gnu", - not(feature = "no-asm"), - not(feature = "mangled-names") -))] -#[naked] -#[no_mangle] -pub unsafe extern "C" fn __alloca() { - core::arch::asm!( - "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable" - options(noreturn, att_syntax) - ); -} + // FIXME: __alloca should be an alias to __chkstk + #[naked] + #[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn __alloca() { + core::arch::asm!( + "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable" + options(noreturn, att_syntax) + ); + } -#[cfg(all( - windows, - target_env = "gnu", - not(feature = "no-asm"), - not(feature = "mangled-names") -))] -#[naked] -#[no_mangle] -pub unsafe extern "C" fn ___chkstk() { - core::arch::asm!( - "push %ecx", - "cmp $0x1000,%eax", - "lea 8(%esp),%ecx", // esp before calling this routine -> ecx - "jb 1f", - "2:", - "sub $0x1000,%ecx", - "test %ecx,(%ecx)", - "sub $0x1000,%eax", - "cmp $0x1000,%eax", - "ja 2b", - "1:", - "sub %eax,%ecx", - "test %ecx,(%ecx)", - "lea 4(%esp),%eax", // load pointer to the return address into eax - "mov %ecx,%esp", // install the new top of stack pointer into esp - "mov -4(%eax),%ecx", // restore ecx - "push (%eax)", // push return address onto the stack - "sub %esp,%eax", // restore the original value in eax - "ret", - options(noreturn, att_syntax) - ); + #[naked] + #[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn ___chkstk() { + core::arch::asm!( + "push %ecx", + "cmp $0x1000,%eax", + "lea 8(%esp),%ecx", // esp before calling this routine -> ecx + "jb 1f", + "2:", + "sub $0x1000,%ecx", + "test %ecx,(%ecx)", + "sub $0x1000,%eax", + "cmp $0x1000,%eax", + "ja 2b", + "1:", + "sub %eax,%ecx", + "test %ecx,(%ecx)", + "lea 4(%esp),%eax", // load pointer to the return address into eax + "mov %ecx,%esp", // install the new top of stack pointer into esp + "mov -4(%eax),%ecx", // restore ecx + "push (%eax)", // push return address onto the stack + "sub %esp,%eax", // restore the original value in eax + "ret", + options(noreturn, att_syntax) + ); + } } diff --git a/src/x86_64.rs b/src/x86_64.rs index ea3c99497..393eeddd8 100644 --- a/src/x86_64.rs +++ b/src/x86_64.rs @@ -8,89 +8,87 @@ use core::intrinsics; // NOTE These functions are never mangled as they are not tested against compiler-rt // and mangling ___chkstk would break the `jmp ___chkstk` instruction in __alloca -#[cfg(all( - windows, - target_env = "gnu", - not(feature = "no-asm"), - not(feature = "mangled-names") -))] -#[naked] -#[no_mangle] -pub unsafe extern "C" fn ___chkstk_ms() { - core::arch::asm!( - "push %rcx", - "push %rax", - "cmp $0x1000,%rax", - "lea 24(%rsp),%rcx", - "jb 1f", - "2:", - "sub $0x1000,%rcx", - "test %rcx,(%rcx)", - "sub $0x1000,%rax", - "cmp $0x1000,%rax", - "ja 2b", - "1:", - "sub %rax,%rcx", - "test %rcx,(%rcx)", - "pop %rax", - "pop %rcx", - "ret", - options(noreturn, att_syntax) - ); -} +intrinsics! { + #[naked] + #[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn ___chkstk_ms() { + core::arch::asm!( + "push %rcx", + "push %rax", + "cmp $0x1000,%rax", + "lea 24(%rsp),%rcx", + "jb 1f", + "2:", + "sub $0x1000,%rcx", + "test %rcx,(%rcx)", + "sub $0x1000,%rax", + "cmp $0x1000,%rax", + "ja 2b", + "1:", + "sub %rax,%rcx", + "test %rcx,(%rcx)", + "pop %rax", + "pop %rcx", + "ret", + options(noreturn, att_syntax) + ); + } -#[cfg(all( - windows, - target_env = "gnu", - not(feature = "no-asm"), - not(feature = "mangled-names") -))] -#[naked] -#[no_mangle] -pub unsafe extern "C" fn __alloca() { - core::arch::asm!( - "mov %rcx,%rax", // x64 _alloca is a normal function with parameter in rcx - "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable" - options(noreturn, att_syntax) - ); -} + #[naked] + #[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn __alloca() { + core::arch::asm!( + "mov %rcx,%rax", // x64 _alloca is a normal function with parameter in rcx + "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable" + options(noreturn, att_syntax) + ); + } -#[cfg(all( - windows, - target_env = "gnu", - not(feature = "no-asm"), - not(feature = "mangled-names") -))] -#[naked] -#[no_mangle] -pub unsafe extern "C" fn ___chkstk() { - core::arch::asm!( - "push %rcx", - "cmp $0x1000,%rax", - "lea 16(%rsp),%rcx", // rsp before calling this routine -> rcx - "jb 1f", - "2:", - "sub $0x1000,%rcx", - "test %rcx,(%rcx)", - "sub $0x1000,%rax", - "cmp $0x1000,%rax", - "ja 2b", - "1:", - "sub %rax,%rcx", - "test %rcx,(%rcx)", - "lea 8(%rsp),%rax", // load pointer to the return address into rax - "mov %rcx,%rsp", // install the new top of stack pointer into rsp - "mov -8(%rax),%rcx", // restore rcx - "push (%rax)", // push return address onto the stack - "sub %rsp,%rax", // restore the original value in rax - "ret", - options(noreturn, att_syntax) - ); + #[naked] + #[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn ___chkstk() { + core::arch::asm!( + "push %rcx", + "cmp $0x1000,%rax", + "lea 16(%rsp),%rcx", // rsp before calling this routine -> rcx + "jb 1f", + "2:", + "sub $0x1000,%rcx", + "test %rcx,(%rcx)", + "sub $0x1000,%rax", + "cmp $0x1000,%rax", + "ja 2b", + "1:", + "sub %rax,%rcx", + "test %rcx,(%rcx)", + "lea 8(%rsp),%rax", // load pointer to the return address into rax + "mov %rcx,%rsp", // install the new top of stack pointer into rsp + "mov -8(%rax),%rcx", // restore rcx + "push (%rax)", // push return address onto the stack + "sub %rsp,%rax", // restore the original value in rax + "ret", + options(noreturn, att_syntax) + ); + } } // HACK(https://github.com/rust-lang/rust/issues/62785): x86_64-unknown-uefi needs special LLVM // support unless we emit the _fltused -#[no_mangle] -#[used] -#[cfg(target_os = "uefi")] -static _fltused: i32 = 0; +mod _fltused { + #[no_mangle] + #[used] + #[cfg(target_os = "uefi")] + static _fltused: i32 = 0; +} From e1a278b8a2c5edfc0d1e0dd0ba4561cc5918fa1a Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 6 Feb 2022 09:52:15 +0000 Subject: [PATCH 0507/1459] Bump to 0.2.2 --- libm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 3a6c5851b..99055ad47 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0" name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/libm" -version = "0.2.1" +version = "0.2.2" edition = "2018" [features] From 0f933a8c48c82def4d629134fe8624420385cdad Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 6 Feb 2022 09:53:16 +0000 Subject: [PATCH 0508/1459] Update libm submodule to 0.2.2 --- libm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm b/libm index c2d22bf95..1f7b8eb61 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit c2d22bf95e2f032ae6b237e8e4c336bc795a151c +Subproject commit 1f7b8eb61cab5f62ec93d2343432bebd1ada30f2 From 9c4118f197fe8cb2b579555a497f5df6b3b3afc3 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 6 Feb 2022 09:59:11 +0000 Subject: [PATCH 0509/1459] Bump to 0.1.68 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a8278f0b8..2ac4a6ba1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.67" +version = "0.1.68" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From d64e815bef89c29957517651f92e1e4cf2ff550d Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 9 Feb 2022 20:54:39 +0000 Subject: [PATCH 0510/1459] Fix typo in __aeabi_uldivmod Accidentally introduced in #452 --- src/arm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arm.rs b/src/arm.rs index 95bde5116..9c1b6ad12 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -42,7 +42,7 @@ intrinsics! { "sub sp, sp, #16", "add r4, sp, #8", "str r4, [sp]", - bl!("__udivmodsi4"), + bl!("__udivmoddi4"), "ldr r2, [sp, #8]", "ldr r3, [sp, #12]", "add sp, sp, #16", From d5e097e21130c693438bec621ab79e71249c3510 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 9 Feb 2022 21:02:02 +0000 Subject: [PATCH 0511/1459] Bump to 0.1.69 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2ac4a6ba1..491cc5a82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.68" +version = "0.1.69" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From f47e0c7615edded23ade34c348fbfa4bcfe8bfd7 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 13 Feb 2022 17:54:35 +0100 Subject: [PATCH 0512/1459] Handle Win64 builtins ABI change in LLVM 14 As of https://reviews.llvm.org/D110413, these no longer use the unadjusted ABI (and use normal C ABI instead, passing i128 indirectly and returning it as <2 x i64>). To support both LLVM 14 and older versions, rustc will expose a "llvm14-builtins-abi" target feature, based on which compiler-builtins can chose the appropriate ABI. This is needed for rust-lang/rust#93577. --- src/float/conv.rs | 116 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 81 insertions(+), 35 deletions(-) diff --git a/src/float/conv.rs b/src/float/conv.rs index 8c46e4d2e..c0cee4373 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -109,16 +109,6 @@ intrinsics! { } } - #[unadjusted_on_win64] - pub extern "C" fn __floattisf(i: i128) -> f32 { - int_to_float(i) - } - - #[unadjusted_on_win64] - pub extern "C" fn __floattidf(i: i128) -> f64 { - int_to_float(i) - } - #[arm_aeabi_alias = __aeabi_ui2f] pub extern "C" fn __floatunsisf(i: u32) -> f32 { int_to_float(i) @@ -140,16 +130,6 @@ intrinsics! { pub extern "C" fn __floatundidf(i: u64) -> f64 { int_to_float(i) } - - #[unadjusted_on_win64] - pub extern "C" fn __floatuntisf(i: u128) -> f32 { - int_to_float(i) - } - - #[unadjusted_on_win64] - pub extern "C" fn __floatuntidf(i: u128) -> f64 { - int_to_float(i) - } } fn float_to_int(f: F) -> I @@ -224,11 +204,6 @@ intrinsics! { float_to_int(f) } - #[unadjusted_on_win64] - pub extern "C" fn __fixsfti(f: f32) -> i128 { - float_to_int(f) - } - #[arm_aeabi_alias = __aeabi_d2iz] pub extern "C" fn __fixdfsi(f: f64) -> i32 { float_to_int(f) @@ -239,11 +214,6 @@ intrinsics! { float_to_int(f) } - #[unadjusted_on_win64] - pub extern "C" fn __fixdfti(f: f64) -> i128 { - float_to_int(f) - } - #[arm_aeabi_alias = __aeabi_f2uiz] pub extern "C" fn __fixunssfsi(f: f32) -> u32 { float_to_int(f) @@ -254,11 +224,6 @@ intrinsics! { float_to_int(f) } - #[unadjusted_on_win64] - pub extern "C" fn __fixunssfti(f: f32) -> u128 { - float_to_int(f) - } - #[arm_aeabi_alias = __aeabi_d2uiz] pub extern "C" fn __fixunsdfsi(f: f64) -> u32 { float_to_int(f) @@ -268,6 +233,87 @@ intrinsics! { pub extern "C" fn __fixunsdfdi(f: f64) -> u64 { float_to_int(f) } +} + +// The ABI for the following intrinsics changed in LLVM 14. On Win64, they now +// use Win64 ABI rather than unadjusted ABI. Pick the correct ABI based on the +// llvm14-builtins-abi target feature. + +#[cfg(target_feature = "llvm14-builtins-abi")] +intrinsics! { + pub extern "C" fn __floattisf(i: i128) -> f32 { + int_to_float(i) + } + + pub extern "C" fn __floattidf(i: i128) -> f64 { + int_to_float(i) + } + + pub extern "C" fn __floatuntisf(i: u128) -> f32 { + int_to_float(i) + } + + pub extern "C" fn __floatuntidf(i: u128) -> f64 { + int_to_float(i) + } + + #[win64_128bit_abi_hack] + pub extern "C" fn __fixsfti(f: f32) -> i128 { + float_to_int(f) + } + + #[win64_128bit_abi_hack] + pub extern "C" fn __fixdfti(f: f64) -> i128 { + float_to_int(f) + } + + #[win64_128bit_abi_hack] + pub extern "C" fn __fixunssfti(f: f32) -> u128 { + float_to_int(f) + } + + #[win64_128bit_abi_hack] + pub extern "C" fn __fixunsdfti(f: f64) -> u128 { + float_to_int(f) + } +} + +#[cfg(not(target_feature = "llvm14-builtins-abi"))] +intrinsics! { + #[unadjusted_on_win64] + pub extern "C" fn __floattisf(i: i128) -> f32 { + int_to_float(i) + } + + #[unadjusted_on_win64] + pub extern "C" fn __floattidf(i: i128) -> f64 { + int_to_float(i) + } + + #[unadjusted_on_win64] + pub extern "C" fn __floatuntisf(i: u128) -> f32 { + int_to_float(i) + } + + #[unadjusted_on_win64] + pub extern "C" fn __floatuntidf(i: u128) -> f64 { + int_to_float(i) + } + + #[unadjusted_on_win64] + pub extern "C" fn __fixsfti(f: f32) -> i128 { + float_to_int(f) + } + + #[unadjusted_on_win64] + pub extern "C" fn __fixdfti(f: f64) -> i128 { + float_to_int(f) + } + + #[unadjusted_on_win64] + pub extern "C" fn __fixunssfti(f: f32) -> u128 { + float_to_int(f) + } #[unadjusted_on_win64] pub extern "C" fn __fixunsdfti(f: f64) -> u128 { From 4f7ca0a343314e7c534be194a06ef78734243884 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 16 Feb 2022 01:18:05 +0000 Subject: [PATCH 0513/1459] Bump to 0.1.70 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 491cc5a82..80e118a94 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.69" +version = "0.1.70" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From d11f1ed687c74b3c79af709bc37e8958a2c46c73 Mon Sep 17 00:00:00 2001 From: Vladimir Michael Eatwell Date: Thu, 17 Jun 2021 17:40:45 +0100 Subject: [PATCH 0514/1459] [watch_os] add watchOS --- build.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.rs b/build.rs index 2b34540dc..c8b770011 100644 --- a/build.rs +++ b/build.rs @@ -241,7 +241,7 @@ mod c { // On iOS and 32-bit OSX these are all just empty intrinsics, no need to // include them. - if target_os != "ios" && (target_vendor != "apple" || target_arch != "x86") { + if target_os != "ios" && target_os != "watchos" && (target_vendor != "apple" || target_arch != "x86") { sources.extend(&[ ("__absvti2", "absvti2.c"), ("__addvti3", "addvti3.c"), @@ -318,7 +318,7 @@ mod c { } } - if target_arch == "arm" && target_os != "ios" && target_env != "msvc" { + if target_arch == "arm" && target_os != "ios" && target_os != "watchos" && target_env != "msvc" { sources.extend(&[ ("__aeabi_div0", "arm/aeabi_div0.c"), ("__aeabi_drsub", "arm/aeabi_drsub.c"), From eb68e3a2f2a0bd4ea625e4287c2eb2069d1c4fa2 Mon Sep 17 00:00:00 2001 From: Vladimir Michael Eatwell Date: Tue, 8 Mar 2022 10:18:07 +0000 Subject: [PATCH 0515/1459] [watch_os] Fix formatting --- build.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/build.rs b/build.rs index c8b770011..d811fc5cb 100644 --- a/build.rs +++ b/build.rs @@ -241,7 +241,10 @@ mod c { // On iOS and 32-bit OSX these are all just empty intrinsics, no need to // include them. - if target_os != "ios" && target_os != "watchos" && (target_vendor != "apple" || target_arch != "x86") { + if target_os != "ios" + && target_os != "watchos" + && (target_vendor != "apple" || target_arch != "x86") + { sources.extend(&[ ("__absvti2", "absvti2.c"), ("__addvti3", "addvti3.c"), @@ -318,7 +321,11 @@ mod c { } } - if target_arch == "arm" && target_os != "ios" && target_os != "watchos" && target_env != "msvc" { + if target_arch == "arm" + && target_os != "ios" + && target_os != "watchos" + && target_env != "msvc" + { sources.extend(&[ ("__aeabi_div0", "arm/aeabi_div0.c"), ("__aeabi_drsub", "arm/aeabi_drsub.c"), From 6d7eeff141030fa73632728afa887a61a1e793cb Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Fri, 18 Mar 2022 18:18:28 +0300 Subject: [PATCH 0516/1459] Provide an implementation of `strlen` to be used as a fallback --- src/lib.rs | 1 + src/mem/mod.rs | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index c3eefbc03..9ca72bc20 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ #![cfg_attr(not(feature = "no-asm"), feature(global_asm))] #![feature(cfg_target_has_atomic)] #![feature(compiler_builtins)] +#![feature(core_ffi_c)] #![feature(core_intrinsics)] #![feature(lang_items)] #![feature(linkage)] diff --git a/src/mem/mod.rs b/src/mem/mod.rs index dce4d87e0..a55113861 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -68,6 +68,18 @@ intrinsics! { pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { memcmp(s1, s2, n) } + + #[mem_builtin] + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] + pub unsafe extern "C" fn strlen(s: *const core::ffi::c_char) -> usize { + let mut n = 0; + let mut s = s; + while *s != 0 { + n += 1; + s = s.offset(1); + } + n + } } // `bytes` must be a multiple of `mem::size_of::()` From 0ce02f35c484593d8c7a0605bdd201f2bb787292 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 18 Mar 2022 20:12:07 +0000 Subject: [PATCH 0517/1459] Bump to 0.1.71 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 80e118a94..8c49b8c9e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.70" +version = "0.1.71" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 4da42c392d5c9aafaaf64729f77e00362fe2aafc Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 22 Mar 2022 19:06:50 -0700 Subject: [PATCH 0518/1459] Build emutls.c on Android. Android uses emulated TLS so we need a runtime support function from this source file. --- build.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/build.rs b/build.rs index d811fc5cb..e199124b2 100644 --- a/build.rs +++ b/build.rs @@ -489,6 +489,16 @@ mod c { sources.remove(&["__aeabi_cdcmp", "__aeabi_cfcmp"]); } + // Android uses emulated TLS so we need a runtime support function. + if target_os == "android" { + sources.extend(&[("__emutls_get_address", "emutls.c")]); + + // Work around a bug in the NDK headers (fixed in + // https://r.android.com/2038949 which will be released in a future + // NDK version) by providing a definition of LONG_BIT. + cfg.define("LONG_BIT", "(8 * sizeof(long))"); + } + // When compiling the C code we require the user to tell us where the // source code is, and this is largely done so when we're compiling as // part of rust-lang/rust we can use the same llvm-project repository as From 19d53ba6d86fe64b89f28dc8dba02eeffb15c8f8 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 30 Mar 2022 19:39:47 +0100 Subject: [PATCH 0519/1459] Bump to 0.1.72 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8c49b8c9e..99f294724 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.71" +version = "0.1.72" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From b945767d46fdc26a64ac8bc7f4dcd3bfb50ce2e2 Mon Sep 17 00:00:00 2001 From: Johannes Stoelp Date: Mon, 2 May 2022 23:00:12 +0200 Subject: [PATCH 0520/1459] rv64 implement muldi3 intrinsic Implement the __muldi3 intrinsic to prevent infinite recursion during multiplication on rv64 without the 'm' extension. --- src/int/mul.rs | 1 + src/lib.rs | 4 ++-- src/{riscv32.rs => riscv.rs} | 17 +++++++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) rename src/{riscv32.rs => riscv.rs} (54%) diff --git a/src/int/mul.rs b/src/int/mul.rs index a5238eeac..37eb03ec0 100644 --- a/src/int/mul.rs +++ b/src/int/mul.rs @@ -100,6 +100,7 @@ impl_signed_mulo!(i128_overflowing_mul, i128, u128); intrinsics! { #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_lmul] + #[cfg(not(target_arch = "riscv64"))] pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 { a.mul(b) } diff --git a/src/lib.rs b/src/lib.rs index 9ca72bc20..009923d27 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -60,8 +60,8 @@ pub mod arm; ))] pub mod arm_linux; -#[cfg(any(target_arch = "riscv32"))] -pub mod riscv32; +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +pub mod riscv; #[cfg(target_arch = "x86")] pub mod x86; diff --git a/src/riscv32.rs b/src/riscv.rs similarity index 54% rename from src/riscv32.rs rename to src/riscv.rs index 9a3c1714c..d9a65e200 100644 --- a/src/riscv32.rs +++ b/src/riscv.rs @@ -1,6 +1,7 @@ intrinsics! { // Implementation from gcc // https://raw.githubusercontent.com/gcc-mirror/gcc/master/libgcc/config/epiphany/mulsi3.c + #[cfg(target_arch = "riscv32")] pub extern "C" fn __mulsi3(a: u32, b: u32) -> u32 { let (mut a, mut b) = (a, b); let mut r = 0; @@ -15,4 +16,20 @@ intrinsics! { r } + + #[cfg(target_arch = "riscv64")] + pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 { + let (mut a, mut b) = (a, b); + let mut r = 0; + + while a > 0 { + if a & 1 > 0 { + r += b; + } + a >>= 1; + b <<= 1; + } + + r + } } From 597bfefd3480c3ae612da427e5a53091698b27ab Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Mon, 2 May 2022 16:05:03 -0700 Subject: [PATCH 0521/1459] Remove apple_versioning.c inclusion According to the README this file isn't used by rust, but it's currently included which leads to these linker warnings in some cases: ``` ld: warning: linker symbol '$ld$hide$os10.5$___udivti3' hides a non-existent symbol '___udivti3' ld: warning: linker symbol '$ld$hide$os10.4$___umoddi3' hides a non-existent symbol '___umoddi3' ld: warning: linker symbol '$ld$hide$os10.5$___umoddi3' hides a non-existent symbol '___umoddi3' ld: warning: linker symbol '$ld$hide$os10.4$___umodti3' hides a non-existent symbol '___umodti3' ld: warning: linker symbol '$ld$hide$os10.5$___umodti3' hides a non-existent symbol '___umodti3' ``` This file exclusively contains macros which hides old symbols on Apple OS versions where they don't exist. https://github.com/rust-lang/llvm-project/blob/fc10370ef7d91babf512c10505f8f2176bc8519d/compiler-rt/lib/builtins/apple_versioning.c --- build.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/build.rs b/build.rs index e199124b2..a1bcf2ea1 100644 --- a/build.rs +++ b/build.rs @@ -193,7 +193,6 @@ mod c { ("__absvsi2", "absvsi2.c"), ("__addvdi3", "addvdi3.c"), ("__addvsi3", "addvsi3.c"), - ("apple_versioning", "apple_versioning.c"), ("__clzdi2", "clzdi2.c"), ("__clzsi2", "clzsi2.c"), ("__cmpdi2", "cmpdi2.c"), From 4baa36e2d49178fc4be7ae64c3a92d15f99585fa Mon Sep 17 00:00:00 2001 From: Johannes Stoelp Date: Thu, 12 May 2022 00:34:49 +0200 Subject: [PATCH 0522/1459] rv32 rv64: adapt conditional compilation Adapt conditional compilation as: rv32i : riscv:__mulsi3, riscv:__muldi3 rv32im: riscv:__mulsi3, int/mul:__muldi3 rv64i : riscv:__mulsi3, riscv:__muldi3 rv64im: riscv:__mulsi3, int/mul:__muldi3 --- src/int/mul.rs | 2 +- src/riscv.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/int/mul.rs b/src/int/mul.rs index 37eb03ec0..07ce061c9 100644 --- a/src/int/mul.rs +++ b/src/int/mul.rs @@ -100,7 +100,7 @@ impl_signed_mulo!(i128_overflowing_mul, i128, u128); intrinsics! { #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_lmul] - #[cfg(not(target_arch = "riscv64"))] + #[cfg(any(not(any(target_arch = "riscv32", target_arch = "riscv64")), target_feature = "m"))] pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 { a.mul(b) } diff --git a/src/riscv.rs b/src/riscv.rs index d9a65e200..ee78b9dba 100644 --- a/src/riscv.rs +++ b/src/riscv.rs @@ -1,7 +1,6 @@ intrinsics! { // Implementation from gcc // https://raw.githubusercontent.com/gcc-mirror/gcc/master/libgcc/config/epiphany/mulsi3.c - #[cfg(target_arch = "riscv32")] pub extern "C" fn __mulsi3(a: u32, b: u32) -> u32 { let (mut a, mut b) = (a, b); let mut r = 0; @@ -17,7 +16,7 @@ intrinsics! { r } - #[cfg(target_arch = "riscv64")] + #[cfg(not(target_feature = "m"))] pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 { let (mut a, mut b) = (a, b); let mut r = 0; From 3f715fd20223f427421f7031058e541113395c13 Mon Sep 17 00:00:00 2001 From: Patryk Wychowaniec Date: Sun, 15 May 2022 12:58:38 +0200 Subject: [PATCH 0523/1459] Fix division on AVRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For division and modulo, AVR uses a custom calling convention that does not match compiler_builtins' expectations, leading to non-working code¹. Ideally we'd just use hand-written naked functions (as, afair, ARM does), but that's a lot of code to port², so hopefully we'll be able to do it gradually later. For the time being, I'd suggest not compiling problematic functions for AVR target - this causes avr-gcc (which is a mandatory part of Rust+AVR toolchain anyway) to link hand-written assembly from libgcc, which is confirmed to work. I've tested the code locally on simavr and the patch seems to be working correctly :-) ¹ https://github.com/rust-lang/rust/issues/82242, https://github.com/rust-lang/rust/issues/83281 ² https://github.com/gcc-mirror/gcc/blob/31048012db98f5ec9c2ba537bfd850374bdd771f/libgcc/config/avr/lib1funcs.S Closes https://github.com/rust-lang/rust/issues/82242 Closes https://github.com/rust-lang/rust/issues/83281 --- src/int/sdiv.rs | 3 +++ src/int/udiv.rs | 5 +++++ src/macros.rs | 31 ++++++++++++++++++++++++++++++- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index e1e3f33bb..f1822f0f8 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -9,6 +9,7 @@ macro_rules! sdivmod { $($attr:tt),* // attributes ) => { intrinsics! { + #[avr_skip] $( #[$attr] )* @@ -50,6 +51,7 @@ macro_rules! sdiv { $($attr:tt),* // attributes ) => { intrinsics! { + #[avr_skip] $( #[$attr] )* @@ -85,6 +87,7 @@ macro_rules! smod { $($attr:tt),* // attributes ) => { intrinsics! { + #[avr_skip] $( #[$attr] )* diff --git a/src/int/udiv.rs b/src/int/udiv.rs index 2f236346d..c5ef4a6b1 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -18,6 +18,7 @@ intrinsics! { u32_div_rem(n, d).1 } + #[avr_skip] #[maybe_use_optimized_c_shim] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 { @@ -28,18 +29,21 @@ intrinsics! { quo_rem.0 } + #[avr_skip] #[maybe_use_optimized_c_shim] /// Returns `n / d` pub extern "C" fn __udivdi3(n: u64, d: u64) -> u64 { u64_div_rem(n, d).0 } + #[avr_skip] #[maybe_use_optimized_c_shim] /// Returns `n % d` pub extern "C" fn __umoddi3(n: u64, d: u64) -> u64 { u64_div_rem(n, d).1 } + #[avr_skip] #[maybe_use_optimized_c_shim] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmoddi4(n: u64, d: u64, rem: Option<&mut u64>) -> u64 { @@ -77,6 +81,7 @@ intrinsics! { } } + #[avr_skip] #[win64_128bit_abi_hack] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmodti4(n: u128, d: u128, rem: Option<&mut u128>) -> u128 { diff --git a/src/macros.rs b/src/macros.rs index 6926feac0..4c1d8af62 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -82,7 +82,6 @@ macro_rules! intrinsics { $($rest:tt)* ) => ( - #[cfg($name = "optimized-c")] pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { extern $abi { @@ -304,6 +303,36 @@ macro_rules! intrinsics { intrinsics!($($rest)*); ); + // For division and modulo, AVR uses a custom calling convention¹ that does + // not match our definitions here. Ideally we would just use hand-written + // naked functions, but that's quite a lot of code to port² - so for the + // time being we are just ignoring the problematic functions, letting + // avr-gcc (which is required to compile to AVR anyway) link them from + // libgcc. + // + // ¹ https://gcc.gnu.org/wiki/avr-gcc (see "Exceptions to the Calling + // Convention") + // ² https://github.com/gcc-mirror/gcc/blob/31048012db98f5ec9c2ba537bfd850374bdd771f/libgcc/config/avr/lib1funcs.S + ( + #[avr_skip] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(not(target_arch = "avr"))] + intrinsics! { + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + // This is the final catch-all rule. At this point we generate an // intrinsic with a conditional `#[no_mangle]` directive to avoid // interfering with duplicate symbols and whatnot during testing. From 2cd255d8baf8165a5943e2c00b6b51f8bccbcbb8 Mon Sep 17 00:00:00 2001 From: Mara Bos Date: Fri, 20 May 2022 14:43:16 +0200 Subject: [PATCH 0524/1459] Support cfg_attr attributes in intrinsics!() macro. --- src/macros.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/macros.rs b/src/macros.rs index 6926feac0..22d82e3fa 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -61,6 +61,35 @@ macro_rules! public_test_dep { macro_rules! intrinsics { () => (); + // Support cfg_attr: + ( + #[cfg_attr($e:meta, $($attr:tt)*)] + $(#[$($attrs:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + $($rest:tt)* + ) => ( + #[cfg($e)] + intrinsics! { + #[$($attr)*] + $(#[$($attrs)*])* + pub extern $abi fn $name($($argname: $ty),*) $(-> $ret)? { + $($body)* + } + } + + #[cfg(not($e))] + intrinsics! { + $(#[$($attrs)*])* + pub extern $abi fn $name($($argname: $ty),*) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + // Right now there's a bunch of architecture-optimized intrinsics in the // stock compiler-rt implementation. Not all of these have been ported over // to Rust yet so when the `c` feature of this crate is enabled we fall back From 019f347d09b1de821e7ceb05ce160d9f139369da Mon Sep 17 00:00:00 2001 From: Mara Bos Date: Fri, 20 May 2022 14:43:34 +0200 Subject: [PATCH 0525/1459] De-duplicate 128 bit float conv intrinsics using cfg_attr. --- src/float/conv.rs | 60 ++++++++++------------------------------------- 1 file changed, 12 insertions(+), 48 deletions(-) diff --git a/src/float/conv.rs b/src/float/conv.rs index c0cee4373..0e21a1ed3 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -239,83 +239,47 @@ intrinsics! { // use Win64 ABI rather than unadjusted ABI. Pick the correct ABI based on the // llvm14-builtins-abi target feature. -#[cfg(target_feature = "llvm14-builtins-abi")] intrinsics! { + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] pub extern "C" fn __floattisf(i: i128) -> f32 { int_to_float(i) } + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] pub extern "C" fn __floattidf(i: i128) -> f64 { int_to_float(i) } + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] pub extern "C" fn __floatuntisf(i: u128) -> f32 { int_to_float(i) } + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] pub extern "C" fn __floatuntidf(i: u128) -> f64 { int_to_float(i) } - #[win64_128bit_abi_hack] + #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] pub extern "C" fn __fixsfti(f: f32) -> i128 { float_to_int(f) } - #[win64_128bit_abi_hack] + #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] pub extern "C" fn __fixdfti(f: f64) -> i128 { float_to_int(f) } - #[win64_128bit_abi_hack] + #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] pub extern "C" fn __fixunssfti(f: f32) -> u128 { float_to_int(f) } - #[win64_128bit_abi_hack] - pub extern "C" fn __fixunsdfti(f: f64) -> u128 { - float_to_int(f) - } -} - -#[cfg(not(target_feature = "llvm14-builtins-abi"))] -intrinsics! { - #[unadjusted_on_win64] - pub extern "C" fn __floattisf(i: i128) -> f32 { - int_to_float(i) - } - - #[unadjusted_on_win64] - pub extern "C" fn __floattidf(i: i128) -> f64 { - int_to_float(i) - } - - #[unadjusted_on_win64] - pub extern "C" fn __floatuntisf(i: u128) -> f32 { - int_to_float(i) - } - - #[unadjusted_on_win64] - pub extern "C" fn __floatuntidf(i: u128) -> f64 { - int_to_float(i) - } - - #[unadjusted_on_win64] - pub extern "C" fn __fixsfti(f: f32) -> i128 { - float_to_int(f) - } - - #[unadjusted_on_win64] - pub extern "C" fn __fixdfti(f: f64) -> i128 { - float_to_int(f) - } - - #[unadjusted_on_win64] - pub extern "C" fn __fixunssfti(f: f32) -> u128 { - float_to_int(f) - } - - #[unadjusted_on_win64] + #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] pub extern "C" fn __fixunsdfti(f: f64) -> u128 { float_to_int(f) } From 7e5768be6ce42cafb51b90f1cb8efd078c1f3380 Mon Sep 17 00:00:00 2001 From: Mara Bos Date: Fri, 20 May 2022 16:25:18 +0200 Subject: [PATCH 0526/1459] Faster int<->float conversions. --- build.rs | 12 +- src/float/conv.rs | 481 ++++++++++++++++++++++++++-------------------- 2 files changed, 274 insertions(+), 219 deletions(-) diff --git a/build.rs b/build.rs index e199124b2..3868510be 100644 --- a/build.rs +++ b/build.rs @@ -281,10 +281,7 @@ mod c { if target_env == "msvc" { if target_arch == "x86_64" { - sources.extend(&[ - ("__floatdisf", "x86_64/floatdisf.c"), - ("__floatdixf", "x86_64/floatdixf.c"), - ]); + sources.extend(&[("__floatdixf", "x86_64/floatdixf.c")]); } } else { // None of these seem to be used on x86_64 windows, and they've all @@ -292,10 +289,7 @@ mod c { if target_os != "windows" { if target_arch == "x86_64" { sources.extend(&[ - ("__floatdisf", "x86_64/floatdisf.c"), ("__floatdixf", "x86_64/floatdixf.c"), - ("__floatundidf", "x86_64/floatundidf.S"), - ("__floatundisf", "x86_64/floatundisf.S"), ("__floatundixf", "x86_64/floatundixf.S"), ]); } @@ -306,11 +300,7 @@ mod c { ("__ashldi3", "i386/ashldi3.S"), ("__ashrdi3", "i386/ashrdi3.S"), ("__divdi3", "i386/divdi3.S"), - ("__floatdidf", "i386/floatdidf.S"), - ("__floatdisf", "i386/floatdisf.S"), ("__floatdixf", "i386/floatdixf.S"), - ("__floatundidf", "i386/floatundidf.S"), - ("__floatundisf", "i386/floatundisf.S"), ("__floatundixf", "i386/floatundixf.S"), ("__lshrdi3", "i386/lshrdi3.S"), ("__moddi3", "i386/moddi3.S"), diff --git a/src/float/conv.rs b/src/float/conv.rs index 0e21a1ed3..86847ed6c 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -1,286 +1,351 @@ -use float::Float; -use int::{CastInto, Int}; - -fn int_to_float(i: I) -> F -where - F::Int: CastInto, - F::Int: CastInto, - I::UnsignedInt: CastInto, - u32: CastInto, -{ - if i == I::ZERO { - return F::ZERO; - } - - let two = I::UnsignedInt::ONE + I::UnsignedInt::ONE; - let four = two + two; - let sign = i < I::ZERO; - let mut x = Int::abs_diff(i, I::ZERO); - - // number of significant digits in the integer - let i_sd = I::BITS - x.leading_zeros(); - // significant digits for the float, including implicit bit - let f_sd = F::SIGNIFICAND_BITS + 1; - - // exponent - let mut exp = i_sd - 1; - - if I::BITS < f_sd { - return F::from_parts( - sign, - (exp + F::EXPONENT_BIAS).cast(), - x.cast() << (f_sd - exp - 1), - ); +/// Conversions from integers to floats. +/// +/// These are hand-optimized bit twiddling code, +/// which unfortunately isn't the easiest kind of code to read. +/// +/// The algorithm is explained here: https://blog.m-ou.se/floats/ +mod int_to_float { + pub fn u32_to_f32_bits(i: u32) -> u32 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + let a = i << n >> 8; // Significant bits, with bit 24 still in tact. + let b = i << n << 24; // Insignificant bits, only relevant for rounding. + let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. + let e = 157 - n as u32; // Exponent plus 127, minus one. + (e << 23) + m // + not |, so the mantissa can overflow into the exponent. } - x = if i_sd > f_sd { - // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - // 12345678901234567890123456 - // 1 = the implicit bit - // P = bit f_sd-1 bits to the right of 1 - // Q = bit f_sd bits to the right of 1 - // R = "or" of all bits to the right of Q - let f_sd_add2 = f_sd + 2; - x = if i_sd == (f_sd + 1) { - x << 1 - } else if i_sd == f_sd_add2 { - x - } else { - (x >> (i_sd - f_sd_add2)) - | Int::from_bool( - (x & I::UnsignedInt::MAX).wrapping_shl((I::BITS + f_sd_add2) - i_sd) - != Int::ZERO, - ) - }; - - // R |= P - x |= Int::from_bool((x & four) != I::UnsignedInt::ZERO); - // round - this step may add a significant bit - x += Int::ONE; - // dump Q and R - x >>= 2; - - // a is now rounded to f_sd or f_sd+1 bits - if (x & (I::UnsignedInt::ONE << f_sd)) != Int::ZERO { - x >>= 1; - exp += 1; + pub fn u32_to_f64_bits(i: u32) -> u64 { + if i == 0 { + return 0; } - x - } else { - x.wrapping_shl(f_sd - i_sd) - }; - - F::from_parts(sign, (exp + F::EXPONENT_BIAS).cast(), x.cast()) -} - -intrinsics! { - #[arm_aeabi_alias = __aeabi_i2f] - pub extern "C" fn __floatsisf(i: i32) -> f32 { - int_to_float(i) + let n = i.leading_zeros(); + let m = (i as u64) << (21 + n); // Significant bits, with bit 53 still in tact. + let e = 1053 - n as u64; // Exponent plus 1023, minus one. + (e << 52) + m // Bit 53 of m will overflow into e. } - #[arm_aeabi_alias = __aeabi_i2d] - pub extern "C" fn __floatsidf(i: i32) -> f64 { - int_to_float(i) + pub fn u64_to_f32_bits(i: u64) -> u32 { + let n = i.leading_zeros(); + let y = i.wrapping_shl(n); + let a = (y >> 40) as u32; // Significant bits, with bit 24 still in tact. + let b = (y >> 8 | y & 0xFFFF) as u32; // Insignificant bits, only relevant for rounding. + let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. + let e = if i == 0 { 0 } else { 189 - n }; // Exponent plus 127, minus one, except for zero. + (e << 23) + m // + not |, so the mantissa can overflow into the exponent. } - #[maybe_use_optimized_c_shim] - #[arm_aeabi_alias = __aeabi_l2f] - pub extern "C" fn __floatdisf(i: i64) -> f32 { - // On x86_64 LLVM will use native instructions for this conversion, we - // can just do it directly - if cfg!(target_arch = "x86_64") { - i as f32 - } else { - int_to_float(i) + pub fn u64_to_f64_bits(i: u64) -> u64 { + if i == 0 { + return 0; } + let n = i.leading_zeros(); + let a = (i << n >> 11) as u64; // Significant bits, with bit 53 still in tact. + let b = (i << n << 53) as u64; // Insignificant bits, only relevant for rounding. + let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even. + let e = 1085 - n as u64; // Exponent plus 1023, minus one. + (e << 52) + m // + not |, so the mantissa can overflow into the exponent. } - #[maybe_use_optimized_c_shim] - #[arm_aeabi_alias = __aeabi_l2d] - pub extern "C" fn __floatdidf(i: i64) -> f64 { - // On x86_64 LLVM will use native instructions for this conversion, we - // can just do it directly - if cfg!(target_arch = "x86_64") { - i as f64 - } else { - int_to_float(i) - } + pub fn u128_to_f32_bits(i: u128) -> u32 { + let n = i.leading_zeros(); + let y = i.wrapping_shl(n); + let a = (y >> 104) as u32; // Significant bits, with bit 24 still in tact. + let b = (y >> 72) as u32 | (y << 32 >> 32 != 0) as u32; // Insignificant bits, only relevant for rounding. + let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. + let e = if i == 0 { 0 } else { 253 - n }; // Exponent plus 127, minus one, except for zero. + (e << 23) + m // + not |, so the mantissa can overflow into the exponent. + } + + pub fn u128_to_f64_bits(i: u128) -> u64 { + let n = i.leading_zeros(); + let y = i.wrapping_shl(n); + let a = (y >> 75) as u64; // Significant bits, with bit 53 still in tact. + let b = (y >> 11 | y & 0xFFFF_FFFF) as u64; // Insignificant bits, only relevant for rounding. + let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even. + let e = if i == 0 { 0 } else { 1149 - n as u64 }; // Exponent plus 1023, minus one, except for zero. + (e << 52) + m // + not |, so the mantissa can overflow into the exponent. } +} +// Conversions from unsigned integers to floats. +intrinsics! { #[arm_aeabi_alias = __aeabi_ui2f] pub extern "C" fn __floatunsisf(i: u32) -> f32 { - int_to_float(i) + f32::from_bits(int_to_float::u32_to_f32_bits(i)) } #[arm_aeabi_alias = __aeabi_ui2d] pub extern "C" fn __floatunsidf(i: u32) -> f64 { - int_to_float(i) + f64::from_bits(int_to_float::u32_to_f64_bits(i)) } - #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_ul2f] pub extern "C" fn __floatundisf(i: u64) -> f32 { - int_to_float(i) + f32::from_bits(int_to_float::u64_to_f32_bits(i)) } - #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_ul2d] pub extern "C" fn __floatundidf(i: u64) -> f64 { - int_to_float(i) + f64::from_bits(int_to_float::u64_to_f64_bits(i)) } -} - -fn float_to_int(f: F) -> I -where - F::ExpInt: CastInto, - u32: CastInto, - F::Int: CastInto, -{ - // converting NaNs is UB, so we don't consider them - let sign = f.sign(); - let mut exp = f.exp(); - - // if less than one or unsigned & negative - if (exp < F::EXPONENT_BIAS.cast()) || (!I::SIGNED && sign) { - return I::ZERO; + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + pub extern "C" fn __floatuntisf(i: u128) -> f32 { + f32::from_bits(int_to_float::u128_to_f32_bits(i)) } - exp -= F::EXPONENT_BIAS.cast(); - - // If the value is too large for `I`, saturate. - let bits: F::ExpInt = I::BITS.cast(); - let max = if I::SIGNED { - bits - F::ExpInt::ONE - } else { - bits - }; - if max <= exp { - return if sign { - // It happens that I::MIN is handled correctly - I::MIN - } else { - I::MAX - }; - }; - // `0 <= exp < max` - - // If 0 <= exponent < F::SIGNIFICAND_BITS, right shift to get the result. Otherwise, shift left. - let sig_bits: F::ExpInt = F::SIGNIFICAND_BITS.cast(); - // The larger integer has to be casted into, or else the shift overflows - let r: I = if F::Int::BITS < I::BITS { - let tmp: I = if exp < sig_bits { - f.imp_frac().cast() >> (sig_bits - exp).cast() - } else { - f.imp_frac().cast() << (exp - sig_bits).cast() - }; - tmp - } else { - let tmp: F::Int = if exp < sig_bits { - f.imp_frac() >> (sig_bits - exp).cast() - } else { - f.imp_frac() << (exp - sig_bits).cast() - }; - tmp.cast() - }; - - if sign { - r.wrapping_neg() - } else { - r + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + pub extern "C" fn __floatuntidf(i: u128) -> f64 { + f64::from_bits(int_to_float::u128_to_f64_bits(i)) } } +// Conversions from signed integers to floats. intrinsics! { - #[arm_aeabi_alias = __aeabi_f2iz] - pub extern "C" fn __fixsfsi(f: f32) -> i32 { - float_to_int(f) + #[arm_aeabi_alias = __aeabi_i2f] + pub extern "C" fn __floatsisf(i: i32) -> f32 { + let sign_bit = ((i >> 31) as u32) << 31; + f32::from_bits(int_to_float::u32_to_f32_bits(i.unsigned_abs()) | sign_bit) } - #[arm_aeabi_alias = __aeabi_f2lz] - pub extern "C" fn __fixsfdi(f: f32) -> i64 { - float_to_int(f) + #[arm_aeabi_alias = __aeabi_i2d] + pub extern "C" fn __floatsidf(i: i32) -> f64 { + let sign_bit = ((i >> 31) as u64) << 63; + f64::from_bits(int_to_float::u32_to_f64_bits(i.unsigned_abs()) | sign_bit) } - #[arm_aeabi_alias = __aeabi_d2iz] - pub extern "C" fn __fixdfsi(f: f64) -> i32 { - float_to_int(f) + #[arm_aeabi_alias = __aeabi_l2f] + pub extern "C" fn __floatdisf(i: i64) -> f32 { + let sign_bit = ((i >> 63) as u32) << 31; + f32::from_bits(int_to_float::u64_to_f32_bits(i.unsigned_abs()) | sign_bit) } - #[arm_aeabi_alias = __aeabi_d2lz] - pub extern "C" fn __fixdfdi(f: f64) -> i64 { - float_to_int(f) + #[arm_aeabi_alias = __aeabi_l2d] + pub extern "C" fn __floatdidf(i: i64) -> f64 { + let sign_bit = ((i >> 63) as u64) << 63; + f64::from_bits(int_to_float::u64_to_f64_bits(i.unsigned_abs()) | sign_bit) + } + + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + pub extern "C" fn __floattisf(i: i128) -> f32 { + let sign_bit = ((i >> 127) as u32) << 31; + f32::from_bits(int_to_float::u128_to_f32_bits(i.unsigned_abs()) | sign_bit) } + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + pub extern "C" fn __floattidf(i: i128) -> f64 { + let sign_bit = ((i >> 127) as u64) << 63; + f64::from_bits(int_to_float::u128_to_f64_bits(i.unsigned_abs()) | sign_bit) + } +} + +// Conversions from floats to unsigned integers. +intrinsics! { #[arm_aeabi_alias = __aeabi_f2uiz] pub extern "C" fn __fixunssfsi(f: f32) -> u32 { - float_to_int(f) + let fbits = f.to_bits(); + if fbits < 127 << 23 { // >= 0, < 1 + 0 + } else if fbits < 159 << 23 { // >= 1, < max + let m = 1 << 31 | fbits << 8; // Mantissa and the implicit 1-bit. + let s = 158 - (fbits >> 23); // Shift based on the exponent and bias. + m >> s + } else if fbits <= 255 << 23 { // >= max (incl. inf) + u32::MAX + } else { // Negative or NaN + 0 + } } #[arm_aeabi_alias = __aeabi_f2ulz] pub extern "C" fn __fixunssfdi(f: f32) -> u64 { - float_to_int(f) + let fbits = f.to_bits(); + if fbits < 127 << 23 { // >= 0, < 1 + 0 + } else if fbits < 191 << 23 { // >= 1, < max + let m = 1 << 63 | (fbits as u64) << 40; // Mantissa and the implicit 1-bit. + let s = 190 - (fbits >> 23); // Shift based on the exponent and bias. + m >> s + } else if fbits <= 255 << 23 { // >= max (incl. inf) + u64::MAX + } else { // Negative or NaN + 0 + } + } + + #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] + #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + pub extern "C" fn __fixunssfti(f: f32) -> u128 { + let fbits = f.to_bits(); + if fbits < 127 << 23 { // >= 0, < 1 + 0 + } else if fbits < 255 << 23 { // >= 1, < inf + let m = 1 << 127 | (fbits as u128) << 104; // Mantissa and the implicit 1-bit. + let s = 254 - (fbits >> 23); // Shift based on the exponent and bias. + m >> s + } else if fbits == 255 << 23 { // == inf + u128::MAX + } else { // Negative or NaN + 0 + } } #[arm_aeabi_alias = __aeabi_d2uiz] pub extern "C" fn __fixunsdfsi(f: f64) -> u32 { - float_to_int(f) + let fbits = f.to_bits(); + if fbits < 1023 << 52 { // >= 0, < 1 + 0 + } else if fbits < 1055 << 52 { // >= 1, < max + let m = 1 << 31 | (fbits >> 21) as u32; // Mantissa and the implicit 1-bit. + let s = 1054 - (fbits >> 52); // Shift based on the exponent and bias. + m >> s + } else if fbits <= 2047 << 52 { // >= max (incl. inf) + u32::MAX + } else { // Negative or NaN + 0 + } } #[arm_aeabi_alias = __aeabi_d2ulz] pub extern "C" fn __fixunsdfdi(f: f64) -> u64 { - float_to_int(f) - } -} - -// The ABI for the following intrinsics changed in LLVM 14. On Win64, they now -// use Win64 ABI rather than unadjusted ABI. Pick the correct ABI based on the -// llvm14-builtins-abi target feature. - -intrinsics! { - #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] - pub extern "C" fn __floattisf(i: i128) -> f32 { - int_to_float(i) + let fbits = f.to_bits(); + if fbits < 1023 << 52 { // >= 0, < 1 + 0 + } else if fbits < 1087 << 52 { // >= 1, < max + let m = 1 << 63 | fbits << 11; // Mantissa and the implicit 1-bit. + let s = 1086 - (fbits >> 52); // Shift based on the exponent and bias. + m >> s + } else if fbits <= 2047 << 52 { // >= max (incl. inf) + u64::MAX + } else { // Negative or NaN + 0 + } } + #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] - pub extern "C" fn __floattidf(i: i128) -> f64 { - int_to_float(i) + pub extern "C" fn __fixunsdfti(f: f64) -> u128 { + let fbits = f.to_bits(); + if fbits < 1023 << 52 { // >= 0, < 1 + 0 + } else if fbits < 1151 << 52 { // >= 1, < max + let m = 1 << 127 | (fbits as u128) << 75; // Mantissa and the implicit 1-bit. + let s = 1150 - (fbits >> 52); // Shift based on the exponent and bias. + m >> s + } else if fbits <= 2047 << 52 { // >= max (incl. inf) + u128::MAX + } else { // Negative or NaN + 0 + } } +} - #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] - pub extern "C" fn __floatuntisf(i: u128) -> f32 { - int_to_float(i) +// Conversions from floats to signed integers. +intrinsics! { + #[arm_aeabi_alias = __aeabi_f2iz] + pub extern "C" fn __fixsfsi(f: f32) -> i32 { + let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. + if fbits < 127 << 23 { // >= 0, < 1 + 0 + } else if fbits < 158 << 23 { // >= 1, < max + let m = 1 << 31 | fbits << 8; // Mantissa and the implicit 1-bit. + let s = 158 - (fbits >> 23); // Shift based on the exponent and bias. + let u = (m >> s) as i32; // Unsigned result. + if f.is_sign_negative() { -u } else { u } + } else if fbits <= 255 << 23 { // >= max (incl. inf) + if f.is_sign_negative() { i32::MIN } else { i32::MAX } + } else { // NaN + 0 + } } - #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] - pub extern "C" fn __floatuntidf(i: u128) -> f64 { - int_to_float(i) + #[arm_aeabi_alias = __aeabi_f2lz] + pub extern "C" fn __fixsfdi(f: f32) -> i64 { + let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. + if fbits < 127 << 23 { // >= 0, < 1 + 0 + } else if fbits < 190 << 23 { // >= 1, < max + let m = 1 << 63 | (fbits as u64) << 40; // Mantissa and the implicit 1-bit. + let s = 190 - (fbits >> 23); // Shift based on the exponent and bias. + let u = (m >> s) as i64; // Unsigned result. + if f.is_sign_negative() { -u } else { u } + } else if fbits <= 255 << 23 { // >= max (incl. inf) + if f.is_sign_negative() { i64::MIN } else { i64::MAX } + } else { // NaN + 0 + } } #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] pub extern "C" fn __fixsfti(f: f32) -> i128 { - float_to_int(f) + let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. + if fbits < 127 << 23 { // >= 0, < 1 + 0 + } else if fbits < 254 << 23 { // >= 1, < max + let m = 1 << 127 | (fbits as u128) << 104; // Mantissa and the implicit 1-bit. + let s = 254 - (fbits >> 23); // Shift based on the exponent and bias. + let u = (m >> s) as i128; // Unsigned result. + if f.is_sign_negative() { -u } else { u } + } else if fbits <= 255 << 23 { // >= max (incl. inf) + if f.is_sign_negative() { i128::MIN } else { i128::MAX } + } else { // NaN + 0 + } } - #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] - #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] - pub extern "C" fn __fixdfti(f: f64) -> i128 { - float_to_int(f) + #[arm_aeabi_alias = __aeabi_d2iz] + pub extern "C" fn __fixdfsi(f: f64) -> i32 { + let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. + if fbits < 1023 << 52 { // >= 0, < 1 + 0 + } else if fbits < 1054 << 52 { // >= 1, < max + let m = 1 << 31 | (fbits >> 21) as u32; // Mantissa and the implicit 1-bit. + let s = 1054 - (fbits >> 52); // Shift based on the exponent and bias. + let u = (m >> s) as i32; // Unsigned result. + if f.is_sign_negative() { -u } else { u } + } else if fbits <= 2047 << 52 { // >= max (incl. inf) + if f.is_sign_negative() { i32::MIN } else { i32::MAX } + } else { // NaN + 0 + } } - #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] - #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] - pub extern "C" fn __fixunssfti(f: f32) -> u128 { - float_to_int(f) + #[arm_aeabi_alias = __aeabi_d2lz] + pub extern "C" fn __fixdfdi(f: f64) -> i64 { + let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. + if fbits < 1023 << 52 { // >= 0, < 1 + 0 + } else if fbits < 1086 << 52 { // >= 1, < max + let m = 1 << 63 | fbits << 11; // Mantissa and the implicit 1-bit. + let s = 1086 - (fbits >> 52); // Shift based on the exponent and bias. + let u = (m >> s) as i64; // Unsigned result. + if f.is_sign_negative() { -u } else { u } + } else if fbits <= 2047 << 52 { // >= max (incl. inf) + if f.is_sign_negative() { i64::MIN } else { i64::MAX } + } else { // NaN + 0 + } } #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] - pub extern "C" fn __fixunsdfti(f: f64) -> u128 { - float_to_int(f) + pub extern "C" fn __fixdfti(f: f64) -> i128 { + let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. + if fbits < 1023 << 52 { // >= 0, < 1 + 0 + } else if fbits < 1150 << 52 { // >= 1, < max + let m = 1 << 127 | (fbits as u128) << 75; // Mantissa and the implicit 1-bit. + let s = 1150 - (fbits >> 52); // Shift based on the exponent and bias. + let u = (m >> s) as i128; // Unsigned result. + if f.is_sign_negative() { -u } else { u } + } else if fbits <= 2047 << 52 { // >= max (incl. inf) + if f.is_sign_negative() { i128::MIN } else { i128::MAX } + } else { // NaN + 0 + } } } From 1ae1653276a38297e9f7d8d35b4a7d1b4290080d Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Mon, 23 May 2022 23:34:10 -0700 Subject: [PATCH 0527/1459] Avoid int to ptr transmute by casting first --- src/arm_linux.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arm_linux.rs b/src/arm_linux.rs index df1723d99..8fe09485b 100644 --- a/src/arm_linux.rs +++ b/src/arm_linux.rs @@ -4,11 +4,11 @@ use core::mem; // Kernel-provided user-mode helper functions: // https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt unsafe fn __kuser_cmpxchg(oldval: u32, newval: u32, ptr: *mut u32) -> bool { - let f: extern "C" fn(u32, u32, *mut u32) -> u32 = mem::transmute(0xffff0fc0u32); + let f: extern "C" fn(u32, u32, *mut u32) -> u32 = mem::transmute(0xffff0fc0usize as *const ()); f(oldval, newval, ptr) == 0 } unsafe fn __kuser_memory_barrier() { - let f: extern "C" fn() = mem::transmute(0xffff0fa0u32); + let f: extern "C" fn() = mem::transmute(0xffff0fa0usize as *const ()); f(); } From 83b996dcdb07f32198808bb0dcb6ae721b30ccbe Mon Sep 17 00:00:00 2001 From: Patryk Wychowaniec Date: Tue, 24 May 2022 19:49:08 +0200 Subject: [PATCH 0528/1459] Add avr_skip for __udivti3 & __umodti3 --- src/int/udiv.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/int/udiv.rs b/src/int/udiv.rs index c5ef4a6b1..fb09f87d8 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -57,6 +57,7 @@ intrinsics! { // Note: we use block configuration and not `if cfg!(...)`, because we need to entirely disable // the existence of `u128_div_rem` to get 32-bit SPARC to compile, see `u128_divide_sparc` docs. + #[avr_skip] #[win64_128bit_abi_hack] /// Returns `n / d` pub extern "C" fn __udivti3(n: u128, d: u128) -> u128 { @@ -68,6 +69,7 @@ intrinsics! { } } + #[avr_skip] #[win64_128bit_abi_hack] /// Returns `n % d` pub extern "C" fn __umodti3(n: u128, d: u128) -> u128 { From b941399928dd870dabdb5681a01eb9a0d4c4a15f Mon Sep 17 00:00:00 2001 From: Mara Bos Date: Thu, 26 May 2022 17:17:38 +0200 Subject: [PATCH 0529/1459] Explicitly use parentheses for associativity of shift operators. --- src/float/conv.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/float/conv.rs b/src/float/conv.rs index 86847ed6c..07b58f3d2 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -10,8 +10,8 @@ mod int_to_float { return 0; } let n = i.leading_zeros(); - let a = i << n >> 8; // Significant bits, with bit 24 still in tact. - let b = i << n << 24; // Insignificant bits, only relevant for rounding. + let a = (i << n) >> 8; // Significant bits, with bit 24 still in tact. + let b = (i << n) << 24; // Insignificant bits, only relevant for rounding. let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. let e = 157 - n as u32; // Exponent plus 127, minus one. (e << 23) + m // + not |, so the mantissa can overflow into the exponent. @@ -42,8 +42,8 @@ mod int_to_float { return 0; } let n = i.leading_zeros(); - let a = (i << n >> 11) as u64; // Significant bits, with bit 53 still in tact. - let b = (i << n << 53) as u64; // Insignificant bits, only relevant for rounding. + let a = ((i << n) >> 11) as u64; // Significant bits, with bit 53 still in tact. + let b = ((i << n) << 53) as u64; // Insignificant bits, only relevant for rounding. let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even. let e = 1085 - n as u64; // Exponent plus 1023, minus one. (e << 52) + m // + not |, so the mantissa can overflow into the exponent. @@ -53,7 +53,7 @@ mod int_to_float { let n = i.leading_zeros(); let y = i.wrapping_shl(n); let a = (y >> 104) as u32; // Significant bits, with bit 24 still in tact. - let b = (y >> 72) as u32 | (y << 32 >> 32 != 0) as u32; // Insignificant bits, only relevant for rounding. + let b = (y >> 72) as u32 | ((y << 32) >> 32 != 0) as u32; // Insignificant bits, only relevant for rounding. let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. let e = if i == 0 { 0 } else { 253 - n }; // Exponent plus 127, minus one, except for zero. (e << 23) + m // + not |, so the mantissa can overflow into the exponent. From b5065a05b3fdd5fc5836b73617aa867e1e395f7f Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 26 May 2022 19:54:32 +0100 Subject: [PATCH 0530/1459] Bump to 0.1.73 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 99f294724..ac1794b18 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.72" +version = "0.1.73" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From f18ce3c73470913ac900a38850586986aeb50f16 Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Tue, 1 Mar 2022 09:50:55 +0100 Subject: [PATCH 0531/1459] Implement faster memcmp for x86_64 x86_64 can load unaligned words in a single cache line as fast as aligned words. Even when crossing cache or page boundaries it is just as fast to do an unaligned word read instead of multiple byte reads. Also add a couple more tests & benchmarks. --- src/mem/impls.rs | 14 ++++++ src/mem/mod.rs | 11 +---- src/mem/x86_64.rs | 41 +++++++++++++++ testcrate/benches/mem.rs | 104 +++++++++++++++++++++++++++++++++++++++ testcrate/tests/mem.rs | 38 ++++++++++++-- 5 files changed, 193 insertions(+), 15 deletions(-) diff --git a/src/mem/impls.rs b/src/mem/impls.rs index 815132425..f31366d76 100644 --- a/src/mem/impls.rs +++ b/src/mem/impls.rs @@ -265,3 +265,17 @@ pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) { } set_bytes_bytes(s, c, n); } + +#[inline(always)] +pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) { + let mut i = 0; + while i < n { + let a = *s1.add(i); + let b = *s2.add(i); + if a != b { + return a as i32 - b as i32; + } + i += 1; + } + 0 +} diff --git a/src/mem/mod.rs b/src/mem/mod.rs index a55113861..c5b0ddc16 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -51,16 +51,7 @@ intrinsics! { #[mem_builtin] #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { - let mut i = 0; - while i < n { - let a = *s1.add(i); - let b = *s2.add(i); - if a != b { - return a as i32 - b as i32; - } - i += 1; - } - 0 + impls::compare_bytes(s1, s2, n) } #[mem_builtin] diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index a7ab6f605..0bfacf713 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -16,6 +16,8 @@ // feature is present at compile-time. We don't bother detecting other features. // Note that ERMSB does not enhance the backwards (DF=1) "rep movsb". +use core::mem; + #[inline(always)] #[cfg(target_feature = "ermsb")] pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { @@ -98,3 +100,42 @@ pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { options(att_syntax, nostack, preserves_flags) ); } + +#[inline(always)] +pub unsafe fn compare_bytes( + a: *const u8, + b: *const u8, + n: usize, +) -> i32 { + unsafe fn cmp(mut a: *const T, mut b: *const T, n: usize, f: F) -> i32 + where + T: Clone + Copy + Eq, + U: Clone + Copy + Eq, + F: FnOnce(*const U, *const U, usize) -> i32, + { + for _ in 0..n / mem::size_of::() { + if a.read_unaligned() != b.read_unaligned() { + return f(a.cast(), b.cast(), mem::size_of::()); + } + a = a.add(1); + b = b.add(1); + } + f(a.cast(), b.cast(), n % mem::size_of::()) + } + let c1 = |mut a: *const u8, mut b: *const u8, n| { + for _ in 0..n { + if a.read() != b.read() { + return i32::from(a.read()) - i32::from(b.read()); + } + a = a.add(1); + b = b.add(1); + } + 0 + }; + let c2 = |a: *const u16, b, n| cmp(a, b, n, c1); + let c4 = |a: *const u32, b, n| cmp(a, b, n, c2); + let c8 = |a: *const u64, b, n| cmp(a, b, n, c4); + let c16 = |a: *const u128, b, n| cmp(a, b, n, c8); + let c32 = |a: *const [u128; 2], b, n| cmp(a, b, n, c16); + c32(a.cast(), b.cast(), n) +} diff --git a/testcrate/benches/mem.rs b/testcrate/benches/mem.rs index b6883a93b..98a040958 100644 --- a/testcrate/benches/mem.rs +++ b/testcrate/benches/mem.rs @@ -96,6 +96,18 @@ fn memcmp_builtin(b: &mut Bencher, n: usize) { }) } +fn memcmp_builtin_unaligned(b: &mut Bencher, n: usize) { + let v1 = AlignedVec::new(0, n); + let mut v2 = AlignedVec::new(0, n); + v2[n - 1] = 1; + b.bytes = n as u64; + b.iter(|| { + let s1: &[u8] = black_box(&v1[0..]); + let s2: &[u8] = black_box(&v2[1..]); + s1.cmp(s2) + }) +} + fn memcmp_rust(b: &mut Bencher, n: usize) { let v1 = AlignedVec::new(0, n); let mut v2 = AlignedVec::new(0, n); @@ -108,6 +120,18 @@ fn memcmp_rust(b: &mut Bencher, n: usize) { }) } +fn memcmp_rust_unaligned(b: &mut Bencher, n: usize) { + let v1 = AlignedVec::new(0, n); + let mut v2 = AlignedVec::new(0, n); + v2[n - 1] = 1; + b.bytes = n as u64; + b.iter(|| { + let s1: &[u8] = black_box(&v1[0..]); + let s2: &[u8] = black_box(&v2[1..]); + unsafe { memcmp(s1.as_ptr(), s2.as_ptr(), n - 1) } + }) +} + fn memmove_builtin(b: &mut Bencher, n: usize, offset: usize) { let mut v = AlignedVec::new(0, n + n / 2 + offset); b.bytes = n as u64; @@ -209,6 +233,38 @@ fn memset_rust_1048576_offset(b: &mut Bencher) { memset_rust(b, 1048576, 65) } +#[bench] +fn memcmp_builtin_8(b: &mut Bencher) { + memcmp_builtin(b, 8) +} +#[bench] +fn memcmp_rust_8(b: &mut Bencher) { + memcmp_rust(b, 8) +} +#[bench] +fn memcmp_builtin_16(b: &mut Bencher) { + memcmp_builtin(b, 16) +} +#[bench] +fn memcmp_rust_16(b: &mut Bencher) { + memcmp_rust(b, 16) +} +#[bench] +fn memcmp_builtin_32(b: &mut Bencher) { + memcmp_builtin(b, 32) +} +#[bench] +fn memcmp_rust_32(b: &mut Bencher) { + memcmp_rust(b, 32) +} +#[bench] +fn memcmp_builtin_64(b: &mut Bencher) { + memcmp_builtin(b, 64) +} +#[bench] +fn memcmp_rust_64(b: &mut Bencher) { + memcmp_rust(b, 64) +} #[bench] fn memcmp_builtin_4096(b: &mut Bencher) { memcmp_builtin(b, 4096) @@ -225,6 +281,54 @@ fn memcmp_builtin_1048576(b: &mut Bencher) { fn memcmp_rust_1048576(b: &mut Bencher) { memcmp_rust(b, 1048576) } +#[bench] +fn memcmp_builtin_unaligned_7(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 8) +} +#[bench] +fn memcmp_rust_unaligned_7(b: &mut Bencher) { + memcmp_rust_unaligned(b, 8) +} +#[bench] +fn memcmp_builtin_unaligned_15(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 16) +} +#[bench] +fn memcmp_rust_unaligned_15(b: &mut Bencher) { + memcmp_rust_unaligned(b, 16) +} +#[bench] +fn memcmp_builtin_unaligned_31(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 32) +} +#[bench] +fn memcmp_rust_unaligned_31(b: &mut Bencher) { + memcmp_rust_unaligned(b, 32) +} +#[bench] +fn memcmp_builtin_unaligned_63(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 64) +} +#[bench] +fn memcmp_rust_unaligned_63(b: &mut Bencher) { + memcmp_rust_unaligned(b, 64) +} +#[bench] +fn memcmp_builtin_unaligned_4095(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 4096) +} +#[bench] +fn memcmp_rust_unaligned_4095(b: &mut Bencher) { + memcmp_rust_unaligned(b, 4096) +} +#[bench] +fn memcmp_builtin_unaligned_1048575(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 1048576) +} +#[bench] +fn memcmp_rust_unaligned_1048575(b: &mut Bencher) { + memcmp_rust_unaligned(b, 1048576) +} #[bench] fn memmove_builtin_4096(b: &mut Bencher) { diff --git a/testcrate/tests/mem.rs b/testcrate/tests/mem.rs index 3f20e72a0..69a63e71d 100644 --- a/testcrate/tests/mem.rs +++ b/testcrate/tests/mem.rs @@ -116,11 +116,13 @@ fn memset_nonzero() { #[test] fn memcmp_eq() { - let arr1: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; - let arr2: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; - unsafe { - assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 8), 0); - assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 3), 0); + let arr1: [u8; 32] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]; + let arr2: [u8; 32] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]; + for i in 0..32 { + unsafe { + assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), i), 0); + assert_eq!(memcmp(arr2.as_ptr(), arr1.as_ptr(), i), 0); + } } } @@ -134,6 +136,32 @@ fn memcmp_ne() { } } +#[test] +fn memcmp_ne_16() { + let arr1: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; + let arr2: [u8; 16] = [0, 1, 2, 3, 4, 5, 7, 7, 8, 9, 10, 11, 12, 13, 14, 15]; + unsafe { + assert!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 16) < 0); + assert!(memcmp(arr2.as_ptr(), arr1.as_ptr(), 16) > 0); + } +} + +#[test] +fn memcmp_ne_32() { + let arr1: [u8; 32] = [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + ]; + let arr2: [u8; 32] = [ + 0, 1, 2, 3, 4, 5, 7, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + ]; + unsafe { + assert!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 32) < 0); + assert!(memcmp(arr2.as_ptr(), arr1.as_ptr(), 32) > 0); + } +} + #[derive(Clone, Copy)] struct AlignedStorage([u8; N], [usize; 0]); From 83b4edd12b687399c82ad5751cdeaf172bf6c8d3 Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Fri, 27 May 2022 22:37:54 +0200 Subject: [PATCH 0532/1459] Fix formatting --- testcrate/tests/mem.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/testcrate/tests/mem.rs b/testcrate/tests/mem.rs index 69a63e71d..8385dfc2c 100644 --- a/testcrate/tests/mem.rs +++ b/testcrate/tests/mem.rs @@ -116,8 +116,14 @@ fn memset_nonzero() { #[test] fn memcmp_eq() { - let arr1: [u8; 32] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]; - let arr2: [u8; 32] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]; + let arr1: [u8; 32] = [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, + ]; + let arr2: [u8; 32] = [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, + ]; for i in 0..32 { unsafe { assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), i), 0); From 51103386365b7972e1e14a7b4504ff86024b4823 Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Sat, 28 May 2022 00:09:02 +0200 Subject: [PATCH 0533/1459] Fix CI, better memcmp tests --- src/mem/impls.rs | 2 +- src/mem/x86_64.rs | 6 +---- testcrate/tests/mem.rs | 55 ++++++++++-------------------------------- 3 files changed, 15 insertions(+), 48 deletions(-) diff --git a/src/mem/impls.rs b/src/mem/impls.rs index f31366d76..72003a5c4 100644 --- a/src/mem/impls.rs +++ b/src/mem/impls.rs @@ -267,7 +267,7 @@ pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) { } #[inline(always)] -pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) { +pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 { let mut i = 0; while i < n { let a = *s1.add(i); diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 0bfacf713..fc89aa768 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -102,11 +102,7 @@ pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { } #[inline(always)] -pub unsafe fn compare_bytes( - a: *const u8, - b: *const u8, - n: usize, -) -> i32 { +pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { unsafe fn cmp(mut a: *const T, mut b: *const T, n: usize, f: F) -> i32 where T: Clone + Copy + Eq, diff --git a/testcrate/tests/mem.rs b/testcrate/tests/mem.rs index 8385dfc2c..48ac95adc 100644 --- a/testcrate/tests/mem.rs +++ b/testcrate/tests/mem.rs @@ -116,55 +116,26 @@ fn memset_nonzero() { #[test] fn memcmp_eq() { - let arr1: [u8; 32] = [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, - ]; - let arr2: [u8; 32] = [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, - ]; - for i in 0..32 { + let arr1 @ arr2 = gen_arr::<256>(); + for i in 0..256 { unsafe { - assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), i), 0); - assert_eq!(memcmp(arr2.as_ptr(), arr1.as_ptr(), i), 0); + assert_eq!(memcmp(arr1.0.as_ptr(), arr2.0.as_ptr(), i), 0); + assert_eq!(memcmp(arr2.0.as_ptr(), arr1.0.as_ptr(), i), 0); } } } #[test] fn memcmp_ne() { - let arr1: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; - let arr2: [u8; 8] = [0, 1, 2, 3, 4, 5, 7, 7]; - unsafe { - assert!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 8) < 0); - assert!(memcmp(arr2.as_ptr(), arr1.as_ptr(), 8) > 0); - } -} - -#[test] -fn memcmp_ne_16() { - let arr1: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; - let arr2: [u8; 16] = [0, 1, 2, 3, 4, 5, 7, 7, 8, 9, 10, 11, 12, 13, 14, 15]; - unsafe { - assert!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 16) < 0); - assert!(memcmp(arr2.as_ptr(), arr1.as_ptr(), 16) > 0); - } -} - -#[test] -fn memcmp_ne_32() { - let arr1: [u8; 32] = [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - ]; - let arr2: [u8; 32] = [ - 0, 1, 2, 3, 4, 5, 7, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - ]; - unsafe { - assert!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 32) < 0); - assert!(memcmp(arr2.as_ptr(), arr1.as_ptr(), 32) > 0); + let arr1 @ arr2 = gen_arr::<256>(); + for i in 0..256 { + let mut diff_arr = arr1; + diff_arr.0[i] = 127; + let expect = diff_arr.0[i].cmp(&arr2.0[i]); + for k in i + 1..256 { + let result = unsafe { memcmp(diff_arr.0.as_ptr(), arr2.0.as_ptr(), k) }; + assert_eq!(expect, result.cmp(&0)); + } } } From 6c1adedba58911ea2c1c922e9a4a7cfa0b3d7304 Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Sat, 28 May 2022 00:50:05 +0200 Subject: [PATCH 0534/1459] Always inline compare_bytes::cmp --- src/mem/x86_64.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index fc89aa768..65b61224d 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -103,6 +103,7 @@ pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { #[inline(always)] pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { + #[inline(always)] unsafe fn cmp(mut a: *const T, mut b: *const T, n: usize, f: F) -> i32 where T: Clone + Copy + Eq, From 03c8bebda424dc183460a30a6091e3c3734b6a9a Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Sat, 28 May 2022 01:23:50 +0200 Subject: [PATCH 0535/1459] Fix panic not being optimized out. I don't know why it isn't being optimized out though, which worries me. --- src/lib.rs | 1 + src/mem/x86_64.rs | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 009923d27..acac040be 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,7 @@ #![feature(compiler_builtins)] #![feature(core_ffi_c)] #![feature(core_intrinsics)] +#![feature(inline_const)] #![feature(lang_items)] #![feature(linkage)] #![feature(naked_functions)] diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 65b61224d..6eecd5a51 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -110,14 +110,20 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { U: Clone + Copy + Eq, F: FnOnce(*const U, *const U, usize) -> i32, { - for _ in 0..n / mem::size_of::() { + // Just to be sure we're actually working with powers of two... + let _ = const { 1 - mem::size_of::().count_ones() }; // <= 1 + let _ = const { mem::size_of::().count_ones() - 1 }; // >= 1 + // This should be equivalent to division with power-of-two sizes, except the former + // somehow still leaves a call to panic because ?? + for _ in 0..n >> mem::size_of::().trailing_zeros() { if a.read_unaligned() != b.read_unaligned() { return f(a.cast(), b.cast(), mem::size_of::()); } a = a.add(1); b = b.add(1); } - f(a.cast(), b.cast(), n % mem::size_of::()) + // Ditto + f(a.cast(), b.cast(), n & (mem::size_of::() - 1)) } let c1 = |mut a: *const u8, mut b: *const u8, n| { for _ in 0..n { From ae069f1e1f772791a12967802869c82f82233511 Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Sat, 28 May 2022 08:16:46 +0200 Subject: [PATCH 0536/1459] Fix rustfmt sillyness --- src/mem/x86_64.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 6eecd5a51..66b51fedf 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -113,6 +113,7 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { // Just to be sure we're actually working with powers of two... let _ = const { 1 - mem::size_of::().count_ones() }; // <= 1 let _ = const { mem::size_of::().count_ones() - 1 }; // >= 1 + // This should be equivalent to division with power-of-two sizes, except the former // somehow still leaves a call to panic because ?? for _ in 0..n >> mem::size_of::().trailing_zeros() { From f15f99f2f5d8aa79bcc90907de853787adee4f4f Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Sat, 28 May 2022 22:46:16 +0200 Subject: [PATCH 0537/1459] Slightly optimize main (32b) memcmp loop It only seems to save a single instruction at first sight yet the effects are significant. --- src/mem/x86_64.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 66b51fedf..2b4875697 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -116,7 +116,8 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { // This should be equivalent to division with power-of-two sizes, except the former // somehow still leaves a call to panic because ?? - for _ in 0..n >> mem::size_of::().trailing_zeros() { + let end = a.add(n >> mem::size_of::().trailing_zeros()); + while a != end { if a.read_unaligned() != b.read_unaligned() { return f(a.cast(), b.cast(), mem::size_of::()); } From 22c06e413b83c6b610b2db7498feb76e6a4dbe26 Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Tue, 31 May 2022 08:20:30 +0200 Subject: [PATCH 0538/1459] Use unchecked_div/rem --- src/mem/x86_64.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 2b4875697..4d2f6e5ee 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -16,6 +16,7 @@ // feature is present at compile-time. We don't bother detecting other features. // Note that ERMSB does not enhance the backwards (DF=1) "rep movsb". +use core::intrinsics; use core::mem; #[inline(always)] @@ -110,13 +111,10 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { U: Clone + Copy + Eq, F: FnOnce(*const U, *const U, usize) -> i32, { - // Just to be sure we're actually working with powers of two... - let _ = const { 1 - mem::size_of::().count_ones() }; // <= 1 - let _ = const { mem::size_of::().count_ones() - 1 }; // >= 1 + // Ensure T is not a ZST. + const { assert!(mem::size_of::() != 0) }; - // This should be equivalent to division with power-of-two sizes, except the former - // somehow still leaves a call to panic because ?? - let end = a.add(n >> mem::size_of::().trailing_zeros()); + let end = a.add(intrinsics::unchecked_div(n, mem::size_of::())); while a != end { if a.read_unaligned() != b.read_unaligned() { return f(a.cast(), b.cast(), mem::size_of::()); @@ -124,8 +122,11 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { a = a.add(1); b = b.add(1); } - // Ditto - f(a.cast(), b.cast(), n & (mem::size_of::() - 1)) + f( + a.cast(), + b.cast(), + intrinsics::unchecked_rem(n, mem::size_of::()), + ) } let c1 = |mut a: *const u8, mut b: *const u8, n| { for _ in 0..n { From dde946c52be80fcc17d4cdc4f34174eeb51cbfaa Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Tue, 31 May 2022 17:19:27 +0100 Subject: [PATCH 0539/1459] Bump to 0.1.74 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index ac1794b18..31cf41265 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.73" +version = "0.1.74" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 4fa69f1262fe9b330537b15ba088838104ce4dc9 Mon Sep 17 00:00:00 2001 From: Sean Cross Date: Thu, 9 Jun 2022 08:40:15 +0800 Subject: [PATCH 0540/1459] build: compile C code for "xous" operating system The "xous" operating system is enturely Rust-based, meaning it has no libc. Therefore, it relies on `compiler-builtins` for all intrinsics. Unfortunately, there are not yet Rust equivalents for all C functions. For example, triganometric functions are still missing. In the meantime, enable C replacements for these functions so that Rust programs compiled for Xous can call these functions. Signed-off-by: Sean Cross --- build.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/build.rs b/build.rs index 11deffb9c..876224934 100644 --- a/build.rs +++ b/build.rs @@ -58,9 +58,11 @@ fn main() { // unlikely that the C is really that much better than our own Rust. // * nvptx - everything is bitcode, not compatible with mixed C/Rust // * riscv - the rust-lang/rust distribution container doesn't have a C - // compiler nor is cc-rs ready for compilation to riscv (at this - // time). This can probably be removed in the future - if !target.contains("wasm") && !target.contains("nvptx") && !target.starts_with("riscv") { + // compiler. + if !target.contains("wasm") + && !target.contains("nvptx") + && (!target.starts_with("riscv") || target.contains("xous")) + { #[cfg(feature = "c")] c::compile(&llvm_target, &target); } From b22eef941f08efd67e02f69e974104c43384690e Mon Sep 17 00:00:00 2001 From: Sean Cross Date: Thu, 9 Sep 2021 11:21:33 +0800 Subject: [PATCH 0541/1459] math: compile math functions for Xous This adds support for Xous, enabling users to call math functions on primitives such as `cos()`. Signed-off-by: Sean Cross --- src/lib.rs | 1 + src/math.rs | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index acac040be..e7bc61e4c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,6 +46,7 @@ pub mod int; all(target_family = "wasm", target_os = "unknown"), all(target_arch = "x86_64", target_os = "uefi"), all(target_arch = "arm", target_os = "none"), + target_os = "xous", all(target_vendor = "fortanix", target_env = "sgx") ))] pub mod math; diff --git a/src/math.rs b/src/math.rs index fa59753f8..14a65395e 100644 --- a/src/math.rs +++ b/src/math.rs @@ -20,6 +20,7 @@ macro_rules! no_mangle { target_os = "unknown", not(target_env = "wasi") ), + target_os = "xous", all(target_arch = "x86_64", target_os = "uefi"), all(target_arch = "xtensa", target_os = "none"), all(target_vendor = "fortanix", target_env = "sgx") @@ -70,6 +71,7 @@ no_mangle! { target_os = "unknown", not(target_env = "wasi") ), + target_os = "xous", all(target_arch = "xtensa", target_os = "none"), all(target_vendor = "fortanix", target_env = "sgx") ))] @@ -93,7 +95,16 @@ no_mangle! { fn tanf(n: f32) -> f32; } -#[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] +#[cfg(target_os = "xous")] +no_mangle! { + fn sqrtf(x: f32) -> f32; + fn sqrt(x: f64) -> f64; +} + +#[cfg(any( + all(target_vendor = "fortanix", target_env = "sgx"), + target_os = "xous" +))] no_mangle! { fn ceil(x: f64) -> f64; fn ceilf(x: f32) -> f32; From 3c67f0463258f58f7b26fe49dfe1d67ca71053db Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Sat, 11 Jun 2022 09:13:28 +0200 Subject: [PATCH 0542/1459] Fix infinite recursion in x86_64 memcmp if SSE2 is not present Fixes #470 --- src/mem/x86_64.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 4d2f6e5ee..3b372d10d 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -143,5 +143,16 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { let c8 = |a: *const u64, b, n| cmp(a, b, n, c4); let c16 = |a: *const u128, b, n| cmp(a, b, n, c8); let c32 = |a: *const [u128; 2], b, n| cmp(a, b, n, c16); - c32(a.cast(), b.cast(), n) + // [u128; 2] internally uses raw_eq for comparisons, which may emit a call to memcmp + // above a certain size threshold. When SSE2 is enabled this threshold does not seem + // to be reached but without SSE2 a call is emitted, leading to infinite recursion. + // + // While replacing [u128; 2] with (u128, u128) fixes the issues it degrades performance + // severely. Likewise, removing c32() has a lesser but still significant impact. Instead the + // [u128; 2] case is only enabled when SSE2 is present. + if cfg!(target_feature = "sse2") { + c32(a.cast(), b.cast(), n) + } else { + c16(a.cast(), b.cast(), n) + } } From f10dbd9bb534b7fa945bff7ff2d7299c38dda389 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 12 Jun 2022 01:19:53 +0100 Subject: [PATCH 0543/1459] Bump to 0.1.75 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 31cf41265..520a1f7dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.74" +version = "0.1.75" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 7eb578cb99e6cab15d84b6bde1a2e33fa6e95624 Mon Sep 17 00:00:00 2001 From: Ayush Singh Date: Tue, 28 Jun 2022 20:16:07 +0530 Subject: [PATCH 0544/1459] Enable mem for UEFI Signed-off-by: Ayush Singh --- build.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/build.rs b/build.rs index 11deffb9c..8d482fa21 100644 --- a/build.rs +++ b/build.rs @@ -29,6 +29,7 @@ fn main() { || (target.contains("sgx") && target.contains("fortanix")) || target.contains("-none") || target.contains("nvptx") + || target.contains("uefi") { println!("cargo:rustc-cfg=feature=\"mem\""); } From 3872a7c38c64279374b46bed5c8dec45e0a5b4fd Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 29 Jun 2022 01:08:54 +0100 Subject: [PATCH 0545/1459] Bump to 0.1.76 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 520a1f7dc..22ce34d36 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.75" +version = "0.1.76" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 3d3432916aacf4950f4652cbf0e788576e3ed252 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Fri, 22 Jul 2022 17:14:18 -0600 Subject: [PATCH 0546/1459] add weak linkage to the ARM AEABI division functions --- src/arm.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/arm.rs b/src/arm.rs index 9c1b6ad12..f3b22544a 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -22,6 +22,7 @@ intrinsics! { // custom calling convention which can't be implemented using a normal Rust function. #[naked] #[cfg(not(target_env = "msvc"))] + #[linkage = "weak"] pub unsafe extern "C" fn __aeabi_uidivmod() { core::arch::asm!( "push {{lr}}", @@ -36,6 +37,7 @@ intrinsics! { } #[naked] + #[linkage = "weak"] pub unsafe extern "C" fn __aeabi_uldivmod() { core::arch::asm!( "push {{r4, lr}}", @@ -52,6 +54,7 @@ intrinsics! { } #[naked] + #[linkage = "weak"] pub unsafe extern "C" fn __aeabi_idivmod() { core::arch::asm!( "push {{r0, r1, r4, lr}}", @@ -65,6 +68,7 @@ intrinsics! { } #[naked] + #[linkage = "weak"] pub unsafe extern "C" fn __aeabi_ldivmod() { core::arch::asm!( "push {{r4, lr}}", From 265fdacab9b3c63b2c17a42fb17c51996c703ef8 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 28 Jul 2022 16:02:04 +0100 Subject: [PATCH 0547/1459] Disable some PPC64 tests which are failing due to an LLVM(?) bug See https://github.com/rust-lang/rust/issues/99853 --- testcrate/tests/mem.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/testcrate/tests/mem.rs b/testcrate/tests/mem.rs index 48ac95adc..5099d69ed 100644 --- a/testcrate/tests/mem.rs +++ b/testcrate/tests/mem.rs @@ -230,6 +230,8 @@ fn memmove_backward_aligned() { } } +// PowerPC tests are failing: https://github.com/rust-lang/rust/issues/99853 +#[cfg(not(target_arch = "powerpc64"))] #[test] fn memset_backward_misaligned_nonaligned_start() { let mut arr = gen_arr::<32>(); @@ -242,6 +244,8 @@ fn memset_backward_misaligned_nonaligned_start() { } } +// PowerPC tests are failing: https://github.com/rust-lang/rust/issues/99853 +#[cfg(not(target_arch = "powerpc64"))] #[test] fn memset_backward_misaligned_aligned_start() { let mut arr = gen_arr::<32>(); @@ -254,6 +258,8 @@ fn memset_backward_misaligned_aligned_start() { } } +// PowerPC tests are failing: https://github.com/rust-lang/rust/issues/99853 +#[cfg(not(target_arch = "powerpc64"))] #[test] fn memset_backward_aligned() { let mut arr = gen_arr::<32>(); From 81b22d4b0493cc0ffc4f9f5df57dd90943a6dd70 Mon Sep 17 00:00:00 2001 From: Ayush Singh Date: Thu, 28 Jul 2022 17:58:16 +0530 Subject: [PATCH 0548/1459] Use all of src/math for UEFI This is needed for libtest --- src/math.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/math.rs b/src/math.rs index 14a65395e..7337dabb9 100644 --- a/src/math.rs +++ b/src/math.rs @@ -72,6 +72,7 @@ no_mangle! { not(target_env = "wasi") ), target_os = "xous", + all(target_arch = "x86_64", target_os = "uefi"), all(target_arch = "xtensa", target_os = "none"), all(target_vendor = "fortanix", target_env = "sgx") ))] @@ -95,7 +96,7 @@ no_mangle! { fn tanf(n: f32) -> f32; } -#[cfg(target_os = "xous")] +#[cfg(any(target_os = "xous", target_os = "uefi"))] no_mangle! { fn sqrtf(x: f32) -> f32; fn sqrt(x: f64) -> f64; @@ -103,7 +104,8 @@ no_mangle! { #[cfg(any( all(target_vendor = "fortanix", target_env = "sgx"), - target_os = "xous" + target_os = "xous", + target_os = "uefi" ))] no_mangle! { fn ceil(x: f64) -> f64; From c2a912422570eb9d6b6cc2db5a8ccb31f5aa0dd6 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 28 Jul 2022 09:42:18 -0600 Subject: [PATCH 0549/1459] restrict linkage to platforms using ELF binaries on windows and apple (which don't use ELF) we can't apply weak linkage --- src/arm.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/arm.rs b/src/arm.rs index f3b22544a..e517a9ef3 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -22,7 +22,7 @@ intrinsics! { // custom calling convention which can't be implemented using a normal Rust function. #[naked] #[cfg(not(target_env = "msvc"))] - #[linkage = "weak"] + #[cfg_attr(all(not(windows), not(target_vendor="apple")), linkage = "weak")] pub unsafe extern "C" fn __aeabi_uidivmod() { core::arch::asm!( "push {{lr}}", @@ -37,7 +37,7 @@ intrinsics! { } #[naked] - #[linkage = "weak"] + #[cfg_attr(all(not(windows), not(target_vendor="apple")), linkage = "weak")] pub unsafe extern "C" fn __aeabi_uldivmod() { core::arch::asm!( "push {{r4, lr}}", @@ -54,7 +54,7 @@ intrinsics! { } #[naked] - #[linkage = "weak"] + #[cfg_attr(all(not(windows), not(target_vendor="apple")), linkage = "weak")] pub unsafe extern "C" fn __aeabi_idivmod() { core::arch::asm!( "push {{r0, r1, r4, lr}}", @@ -68,7 +68,7 @@ intrinsics! { } #[naked] - #[linkage = "weak"] + #[cfg_attr(all(not(windows), not(target_vendor="apple")), linkage = "weak")] pub unsafe extern "C" fn __aeabi_ldivmod() { core::arch::asm!( "push {{r4, lr}}", From e9f895d5f940a2672e927b1af5a99438022f6f9c Mon Sep 17 00:00:00 2001 From: Nicholas Bishop Date: Sat, 9 Jul 2022 22:41:14 -0400 Subject: [PATCH 0550/1459] Enable win64_128bit_abi_hack for x86_64-unknown-uefi The `x86_64-unknown-uefi` target is Windows-like [1], and requires the same altered ABI for some 128-bit integer intrinsics. See also https://github.com/rust-lang/rust/issues/86494. [1]: https://github.com/rust-lang/rust/blob/master/compiler/rustc_target/src/spec/x86_64_unknown_uefi.rs --- src/macros.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/macros.rs b/src/macros.rs index 518a18d4d..6acf1be96 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -209,13 +209,13 @@ macro_rules! intrinsics { $($rest:tt)* ) => ( - #[cfg(all(windows, target_arch = "x86_64"))] + #[cfg(all(any(windows, target_os = "uefi"), target_arch = "x86_64"))] $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } - #[cfg(all(windows, target_arch = "x86_64"))] + #[cfg(all(any(windows, target_os = "uefi"), target_arch = "x86_64"))] pub mod $name { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub extern $abi fn $name( $($argname: $ty),* ) @@ -226,7 +226,7 @@ macro_rules! intrinsics { } } - #[cfg(not(all(windows, target_arch = "x86_64")))] + #[cfg(not(all(any(windows, target_os = "uefi"), target_arch = "x86_64")))] intrinsics! { $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { @@ -426,7 +426,7 @@ macro_rules! intrinsics { // Hack for LLVM expectations for ABI on windows. This is used by the // `#[win64_128bit_abi_hack]` attribute recognized above -#[cfg(all(windows, target_pointer_width = "64"))] +#[cfg(all(any(windows, target_os = "uefi"), target_pointer_width = "64"))] pub mod win64_128bit_abi_hack { #[repr(simd)] pub struct U64x2(u64, u64); From c30322aafc9c72b4f5f1ee5ce21ff5340dbb9173 Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Sat, 2 Jul 2022 23:54:30 +0200 Subject: [PATCH 0551/1459] Align destination in mem* instructions. While misaligned reads are generally fast, misaligned writes aren't and can have severe penalties. --- src/mem/x86_64.rs | 130 +++++++++++++++++++++++++++++++++------------- 1 file changed, 94 insertions(+), 36 deletions(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 3b372d10d..68ef17f1e 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -16,6 +16,7 @@ // feature is present at compile-time. We don't bother detecting other features. // Note that ERMSB does not enhance the backwards (DF=1) "rep movsb". +use core::arch::asm; use core::intrinsics; use core::mem; @@ -34,40 +35,61 @@ pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { #[inline(always)] #[cfg(not(target_feature = "ermsb"))] -pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { - let qword_count = count >> 3; - let byte_count = count & 0b111; - // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. - core::arch::asm!( - "repe movsq (%rsi), (%rdi)", - "mov {byte_count:e}, %ecx", - "repe movsb (%rsi), (%rdi)", - byte_count = in(reg) byte_count, +pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, count: usize) { + let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count); + // Separating the blocks gives the compiler more freedom to reorder instructions. + // It also allows us to trivially skip the rep movsb, which is faster when memcpying + // aligned data. + if pre_byte_count > 0 { + asm!( + "rep movsb", + inout("ecx") pre_byte_count => _, + inout("rdi") dest => dest, + inout("rsi") src => src, + options(nostack, preserves_flags) + ); + } + asm!( + "rep movsq", inout("rcx") qword_count => _, - inout("rdi") dest => _, - inout("rsi") src => _, - options(att_syntax, nostack, preserves_flags) + inout("rdi") dest => dest, + inout("rsi") src => src, + options(nostack, preserves_flags) ); + if byte_count > 0 { + asm!( + "rep movsb", + inout("ecx") byte_count => _, + inout("rdi") dest => _, + inout("rsi") src => _, + options(nostack, preserves_flags) + ); + } } #[inline(always)] pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { - let qword_count = count >> 3; - let byte_count = count & 0b111; - // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. - core::arch::asm!( + let (pre_byte_count, qword_count, byte_count) = rep_param_rev(dest, count); + // We can't separate this block due to std/cld + asm!( "std", - "repe movsq (%rsi), (%rdi)", - "movl {byte_count:e}, %ecx", - "addq $7, %rdi", - "addq $7, %rsi", - "repe movsb (%rsi), (%rdi)", + "rep movsb", + "sub rsi, 7", + "sub rdi, 7", + "mov rcx, {qword_count}", + "rep movsq", + "add rsi, 7", + "add rdi, 7", + "mov ecx, {byte_count:e}", + "rep movsb", "cld", byte_count = in(reg) byte_count, - inout("rcx") qword_count => _, - inout("rdi") dest.add(count).wrapping_sub(8) => _, - inout("rsi") src.add(count).wrapping_sub(8) => _, - options(att_syntax, nostack) + qword_count = in(reg) qword_count, + inout("ecx") pre_byte_count => _, + inout("rdi") dest.add(count - 1) => _, + inout("rsi") src.add(count - 1) => _, + // We modify flags, but we restore it afterwards + options(nostack, preserves_flags) ); } @@ -86,20 +108,36 @@ pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { #[inline(always)] #[cfg(not(target_feature = "ermsb"))] -pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { - let qword_count = count >> 3; - let byte_count = count & 0b111; - // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. - core::arch::asm!( - "repe stosq %rax, (%rdi)", - "mov {byte_count:e}, %ecx", - "repe stosb %al, (%rdi)", - byte_count = in(reg) byte_count, +pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) { + let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count); + // Separating the blocks gives the compiler more freedom to reorder instructions. + // It also allows us to trivially skip the rep stosb, which is faster when memcpying + // aligned data. + if pre_byte_count > 0 { + asm!( + "rep stosb", + inout("ecx") pre_byte_count => _, + inout("rdi") dest => dest, + in("al") c, + options(nostack, preserves_flags) + ); + } + asm!( + "rep stosq", inout("rcx") qword_count => _, - inout("rdi") dest => _, + inout("rdi") dest => dest, in("rax") (c as u64) * 0x0101010101010101, - options(att_syntax, nostack, preserves_flags) + options(nostack, preserves_flags) ); + if byte_count > 0 { + asm!( + "rep stosb", + inout("ecx") byte_count => _, + inout("rdi") dest => _, + in("al") c, + options(nostack, preserves_flags) + ); + } } #[inline(always)] @@ -156,3 +194,23 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { c16(a.cast(), b.cast(), n) } } + +/// Determine optimal parameters for a `rep` instruction. +fn rep_param(dest: *mut u8, mut count: usize) -> (usize, usize, usize) { + // Unaligned writes are still slow on modern processors, so align the destination address. + let pre_byte_count = ((8 - (dest as usize & 0b111)) & 0b111).min(count); + count -= pre_byte_count; + let qword_count = count >> 3; + let byte_count = count & 0b111; + (pre_byte_count, qword_count, byte_count) +} + +/// Determine optimal parameters for a reverse `rep` instruction (i.e. direction bit is set). +fn rep_param_rev(dest: *mut u8, mut count: usize) -> (usize, usize, usize) { + // Unaligned writes are still slow on modern processors, so align the destination address. + let pre_byte_count = ((dest as usize + count) & 0b111).min(count); + count -= pre_byte_count; + let qword_count = count >> 3; + let byte_count = count & 0b111; + (pre_byte_count, qword_count, byte_count) +} From 314354d2b42460c21273e0ce5ec163491e94a796 Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Thu, 7 Jul 2022 11:53:44 +0200 Subject: [PATCH 0552/1459] Fix suboptimal codegen in memset --- src/mem/x86_64.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 68ef17f1e..3cbbbba53 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -109,6 +109,7 @@ pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { #[inline(always)] #[cfg(not(target_feature = "ermsb"))] pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) { + let c = c as u64 * 0x0101_0101_0101_0101; let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count); // Separating the blocks gives the compiler more freedom to reorder instructions. // It also allows us to trivially skip the rep stosb, which is faster when memcpying @@ -118,7 +119,7 @@ pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) { "rep stosb", inout("ecx") pre_byte_count => _, inout("rdi") dest => dest, - in("al") c, + in("rax") c, options(nostack, preserves_flags) ); } @@ -126,7 +127,7 @@ pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) { "rep stosq", inout("rcx") qword_count => _, inout("rdi") dest => dest, - in("rax") (c as u64) * 0x0101010101010101, + in("rax") c, options(nostack, preserves_flags) ); if byte_count > 0 { @@ -134,7 +135,7 @@ pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) { "rep stosb", inout("ecx") byte_count => _, inout("rdi") dest => _, - in("al") c, + in("rax") c, options(nostack, preserves_flags) ); } From a1dd5a8946f6019b77e43e792446a0e4ec3e4671 Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Thu, 7 Jul 2022 13:13:19 +0200 Subject: [PATCH 0553/1459] Remove rep_param_rev --- src/mem/x86_64.rs | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 3cbbbba53..483a3e31b 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -69,7 +69,7 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, count: usize) #[inline(always)] pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { - let (pre_byte_count, qword_count, byte_count) = rep_param_rev(dest, count); + let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count); // We can't separate this block due to std/cld asm!( "std", @@ -80,12 +80,12 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { "rep movsq", "add rsi, 7", "add rdi, 7", - "mov ecx, {byte_count:e}", + "mov ecx, {pre_byte_count:e}", "rep movsb", "cld", - byte_count = in(reg) byte_count, + pre_byte_count = in(reg) pre_byte_count, qword_count = in(reg) qword_count, - inout("ecx") pre_byte_count => _, + inout("ecx") byte_count => _, inout("rdi") dest.add(count - 1) => _, inout("rsi") src.add(count - 1) => _, // We modify flags, but we restore it afterwards @@ -205,13 +205,3 @@ fn rep_param(dest: *mut u8, mut count: usize) -> (usize, usize, usize) { let byte_count = count & 0b111; (pre_byte_count, qword_count, byte_count) } - -/// Determine optimal parameters for a reverse `rep` instruction (i.e. direction bit is set). -fn rep_param_rev(dest: *mut u8, mut count: usize) -> (usize, usize, usize) { - // Unaligned writes are still slow on modern processors, so align the destination address. - let pre_byte_count = ((dest as usize + count) & 0b111).min(count); - count -= pre_byte_count; - let qword_count = count >> 3; - let byte_count = count & 0b111; - (pre_byte_count, qword_count, byte_count) -} From 387f83ea1d8b91f03fd8d03e8597b3c5a19236f2 Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Thu, 7 Jul 2022 13:19:06 +0200 Subject: [PATCH 0554/1459] Use att_syntax for now --- src/mem/x86_64.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 483a3e31b..a1015f612 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -46,7 +46,7 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, count: usize) inout("ecx") pre_byte_count => _, inout("rdi") dest => dest, inout("rsi") src => src, - options(nostack, preserves_flags) + options(att_syntax, nostack, preserves_flags) ); } asm!( @@ -54,7 +54,7 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, count: usize) inout("rcx") qword_count => _, inout("rdi") dest => dest, inout("rsi") src => src, - options(nostack, preserves_flags) + options(att_syntax, nostack, preserves_flags) ); if byte_count > 0 { asm!( @@ -62,7 +62,7 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, count: usize) inout("ecx") byte_count => _, inout("rdi") dest => _, inout("rsi") src => _, - options(nostack, preserves_flags) + options(att_syntax, nostack, preserves_flags) ); } } @@ -74,13 +74,13 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { asm!( "std", "rep movsb", - "sub rsi, 7", - "sub rdi, 7", - "mov rcx, {qword_count}", + "sub $7, %rsi", + "sub $7, %rdi", + "mov {qword_count}, %rcx", "rep movsq", - "add rsi, 7", - "add rdi, 7", - "mov ecx, {pre_byte_count:e}", + "add $7, %rsi", + "add $7, %rdi", + "mov {pre_byte_count:e}, %ecx", "rep movsb", "cld", pre_byte_count = in(reg) pre_byte_count, @@ -89,7 +89,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { inout("rdi") dest.add(count - 1) => _, inout("rsi") src.add(count - 1) => _, // We modify flags, but we restore it afterwards - options(nostack, preserves_flags) + options(att_syntax, nostack, preserves_flags) ); } @@ -120,7 +120,7 @@ pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) { inout("ecx") pre_byte_count => _, inout("rdi") dest => dest, in("rax") c, - options(nostack, preserves_flags) + options(att_syntax, nostack, preserves_flags) ); } asm!( @@ -128,7 +128,7 @@ pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) { inout("rcx") qword_count => _, inout("rdi") dest => dest, in("rax") c, - options(nostack, preserves_flags) + options(att_syntax, nostack, preserves_flags) ); if byte_count > 0 { asm!( @@ -136,7 +136,7 @@ pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) { inout("ecx") byte_count => _, inout("rdi") dest => _, in("rax") c, - options(nostack, preserves_flags) + options(att_syntax, nostack, preserves_flags) ); } } From ae557bde4efcd85bf1fc75c488c4370749d969a7 Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Thu, 7 Jul 2022 13:20:41 +0200 Subject: [PATCH 0555/1459] Skip rep movsb in copy_backward if possible There is currently no measureable performance difference in benchmarks but it likely will make a difference in real workloads. --- src/mem/x86_64.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index a1015f612..e9c1c56d5 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -73,15 +73,21 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { // We can't separate this block due to std/cld asm!( "std", + "test %ecx, %ecx", + "jz 1f", "rep movsb", + "1:", "sub $7, %rsi", "sub $7, %rdi", "mov {qword_count}, %rcx", "rep movsq", + "test {pre_byte_count:e}, {pre_byte_count:e}", + "jz 1f", "add $7, %rsi", "add $7, %rdi", "mov {pre_byte_count:e}, %ecx", "rep movsb", + "1:", "cld", pre_byte_count = in(reg) pre_byte_count, qword_count = in(reg) qword_count, From ef37a23d8417afdb9fb6f215ec4c651e18146366 Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Thu, 28 Jul 2022 18:43:45 +0200 Subject: [PATCH 0556/1459] Remove branches around rep movsb/stosb While it is measurably faster for older CPUs, removing them keeps the code smaller and is likely more beneficial for newer CPUs. --- src/mem/x86_64.rs | 73 ++++++++++++++++++----------------------------- 1 file changed, 28 insertions(+), 45 deletions(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index e9c1c56d5..dd98e37c5 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -38,17 +38,13 @@ pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, count: usize) { let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count); // Separating the blocks gives the compiler more freedom to reorder instructions. - // It also allows us to trivially skip the rep movsb, which is faster when memcpying - // aligned data. - if pre_byte_count > 0 { - asm!( - "rep movsb", - inout("ecx") pre_byte_count => _, - inout("rdi") dest => dest, - inout("rsi") src => src, - options(att_syntax, nostack, preserves_flags) - ); - } + asm!( + "rep movsb", + inout("ecx") pre_byte_count => _, + inout("rdi") dest => dest, + inout("rsi") src => src, + options(att_syntax, nostack, preserves_flags) + ); asm!( "rep movsq", inout("rcx") qword_count => _, @@ -56,15 +52,13 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, count: usize) inout("rsi") src => src, options(att_syntax, nostack, preserves_flags) ); - if byte_count > 0 { - asm!( - "rep movsb", - inout("ecx") byte_count => _, - inout("rdi") dest => _, - inout("rsi") src => _, - options(att_syntax, nostack, preserves_flags) - ); - } + asm!( + "rep movsb", + inout("ecx") byte_count => _, + inout("rdi") dest => _, + inout("rsi") src => _, + options(att_syntax, nostack, preserves_flags) + ); } #[inline(always)] @@ -73,21 +67,16 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { // We can't separate this block due to std/cld asm!( "std", - "test %ecx, %ecx", - "jz 1f", "rep movsb", - "1:", "sub $7, %rsi", "sub $7, %rdi", "mov {qword_count}, %rcx", "rep movsq", "test {pre_byte_count:e}, {pre_byte_count:e}", - "jz 1f", "add $7, %rsi", "add $7, %rdi", "mov {pre_byte_count:e}, %ecx", "rep movsb", - "1:", "cld", pre_byte_count = in(reg) pre_byte_count, qword_count = in(reg) qword_count, @@ -118,17 +107,13 @@ pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) { let c = c as u64 * 0x0101_0101_0101_0101; let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count); // Separating the blocks gives the compiler more freedom to reorder instructions. - // It also allows us to trivially skip the rep stosb, which is faster when memcpying - // aligned data. - if pre_byte_count > 0 { - asm!( - "rep stosb", - inout("ecx") pre_byte_count => _, - inout("rdi") dest => dest, - in("rax") c, - options(att_syntax, nostack, preserves_flags) - ); - } + asm!( + "rep stosb", + inout("ecx") pre_byte_count => _, + inout("rdi") dest => dest, + in("rax") c, + options(att_syntax, nostack, preserves_flags) + ); asm!( "rep stosq", inout("rcx") qword_count => _, @@ -136,15 +121,13 @@ pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) { in("rax") c, options(att_syntax, nostack, preserves_flags) ); - if byte_count > 0 { - asm!( - "rep stosb", - inout("ecx") byte_count => _, - inout("rdi") dest => _, - in("rax") c, - options(att_syntax, nostack, preserves_flags) - ); - } + asm!( + "rep stosb", + inout("ecx") byte_count => _, + inout("rdi") dest => _, + in("rax") c, + options(att_syntax, nostack, preserves_flags) + ); } #[inline(always)] From 7d5610a277ef2894f14a986a970927a2fe8729fd Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 28 Jul 2022 20:58:02 +0100 Subject: [PATCH 0557/1459] Bump to 0.1.77 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 22ce34d36..cd4718830 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.76" +version = "0.1.77" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 6f8b7c9e2931fbc36f013f1780b5a55ad3b42904 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 28 Jul 2022 16:21:37 -0700 Subject: [PATCH 0558/1459] Added tgamma and tgammaf --- src/math.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/math.rs b/src/math.rs index 14a65395e..c3b9788e1 100644 --- a/src/math.rs +++ b/src/math.rs @@ -63,6 +63,8 @@ no_mangle! { fn tanhf(n: f32) -> f32; fn ldexp(f: f64, n: i32) -> f64; fn ldexpf(f: f32, n: i32) -> f32; + fn tgamma(x: f64) -> f64; + fn tgammaf(x: f32) -> f32; } #[cfg(any( From ff98d7a62715f6fac10c52854fbac82340f0e103 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Fri, 29 Jul 2022 15:52:23 -0500 Subject: [PATCH 0559/1459] Add compiler-rt fallback for __trunctfsf2 on mips64-musl. --- build.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/build.rs b/build.rs index befc5e2e3..73952bb9f 100644 --- a/build.rs +++ b/build.rs @@ -460,6 +460,7 @@ mod c { ("__fe_getround", "fp_mode.c"), ("__divtf3", "divtf3.c"), ("__trunctfdf2", "trunctfdf2.c"), + ("__trunctfsf2", "trunctfsf2.c"), ]); } From 56172fcd8bd045e38bbdf76697d1fcca1e965d6d Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Fri, 29 Jul 2022 16:58:05 -0500 Subject: [PATCH 0560/1459] Add compiler-rt fallbacks for sync builtins on armv5te-musl. --- src/arm_linux.rs | 110 +++++++++++++++++++++++++++++++---------------- 1 file changed, 73 insertions(+), 37 deletions(-) diff --git a/src/arm_linux.rs b/src/arm_linux.rs index 8fe09485b..8f22eb628 100644 --- a/src/arm_linux.rs +++ b/src/arm_linux.rs @@ -55,7 +55,7 @@ fn insert_aligned(aligned: u32, val: u32, shift: u32, mask: u32) -> u32 { } // Generic atomic read-modify-write operation -unsafe fn atomic_rmw u32>(ptr: *mut T, f: F) -> u32 { +unsafe fn atomic_rmw u32, G: Fn(u32, u32) -> u32>(ptr: *mut T, f: F, g: G) -> u32 { let aligned_ptr = align_ptr(ptr); let (shift, mask) = get_shift_mask(ptr); @@ -65,7 +65,7 @@ unsafe fn atomic_rmw u32>(ptr: *mut T, f: F) -> u32 { let newval = f(curval); let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask); if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) { - return curval; + return g(curval, newval); } } } @@ -89,13 +89,21 @@ unsafe fn atomic_cmpxchg(ptr: *mut T, oldval: u32, newval: u32) -> u32 { } macro_rules! atomic_rmw { - ($name:ident, $ty:ty, $op:expr) => { + ($name:ident, $ty:ty, $op:expr, $fetch:expr) => { intrinsics! { pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty { - atomic_rmw(ptr, |x| $op(x as $ty, val) as u32) as $ty + atomic_rmw(ptr, |x| $op(x as $ty, val) as u32, |old, new| $fetch(old, new)) as $ty } } }; + + (@old $name:ident, $ty:ty, $op:expr) => { + atomic_rmw!($name, $ty, $op, |old, _| old); + }; + + (@new $name:ident, $ty:ty, $op:expr) => { + atomic_rmw!($name, $ty, $op, |_, new| new); + }; } macro_rules! atomic_cmpxchg { ($name:ident, $ty:ty) => { @@ -107,101 +115,129 @@ macro_rules! atomic_cmpxchg { }; } -atomic_rmw!(__sync_fetch_and_add_1, u8, |a: u8, b: u8| a.wrapping_add(b)); -atomic_rmw!(__sync_fetch_and_add_2, u16, |a: u16, b: u16| a +atomic_rmw!(@old __sync_fetch_and_add_1, u8, |a: u8, b: u8| a.wrapping_add(b)); +atomic_rmw!(@old __sync_fetch_and_add_2, u16, |a: u16, b: u16| a + .wrapping_add(b)); +atomic_rmw!(@old __sync_fetch_and_add_4, u32, |a: u32, b: u32| a + .wrapping_add(b)); + +atomic_rmw!(@new __sync_add_and_fetch_1, u8, |a: u8, b: u8| a.wrapping_add(b)); +atomic_rmw!(@new __sync_add_and_fetch_2, u16, |a: u16, b: u16| a .wrapping_add(b)); -atomic_rmw!(__sync_fetch_and_add_4, u32, |a: u32, b: u32| a +atomic_rmw!(@new __sync_add_and_fetch_4, u32, |a: u32, b: u32| a .wrapping_add(b)); -atomic_rmw!(__sync_fetch_and_sub_1, u8, |a: u8, b: u8| a.wrapping_sub(b)); -atomic_rmw!(__sync_fetch_and_sub_2, u16, |a: u16, b: u16| a +atomic_rmw!(@old __sync_fetch_and_sub_1, u8, |a: u8, b: u8| a.wrapping_sub(b)); +atomic_rmw!(@old __sync_fetch_and_sub_2, u16, |a: u16, b: u16| a .wrapping_sub(b)); -atomic_rmw!(__sync_fetch_and_sub_4, u32, |a: u32, b: u32| a +atomic_rmw!(@old __sync_fetch_and_sub_4, u32, |a: u32, b: u32| a .wrapping_sub(b)); -atomic_rmw!(__sync_fetch_and_and_1, u8, |a: u8, b: u8| a & b); -atomic_rmw!(__sync_fetch_and_and_2, u16, |a: u16, b: u16| a & b); -atomic_rmw!(__sync_fetch_and_and_4, u32, |a: u32, b: u32| a & b); +atomic_rmw!(@new __sync_sub_and_fetch_1, u8, |a: u8, b: u8| a.wrapping_sub(b)); +atomic_rmw!(@new __sync_sub_and_fetch_2, u16, |a: u16, b: u16| a + .wrapping_sub(b)); +atomic_rmw!(@new __sync_sub_and_fetch_4, u32, |a: u32, b: u32| a + .wrapping_sub(b)); + +atomic_rmw!(@old __sync_fetch_and_and_1, u8, |a: u8, b: u8| a & b); +atomic_rmw!(@old __sync_fetch_and_and_2, u16, |a: u16, b: u16| a & b); +atomic_rmw!(@old __sync_fetch_and_and_4, u32, |a: u32, b: u32| a & b); + +atomic_rmw!(@new __sync_and_and_fetch_1, u8, |a: u8, b: u8| a & b); +atomic_rmw!(@new __sync_and_and_fetch_2, u16, |a: u16, b: u16| a & b); +atomic_rmw!(@new __sync_and_and_fetch_4, u32, |a: u32, b: u32| a & b); + +atomic_rmw!(@old __sync_fetch_and_or_1, u8, |a: u8, b: u8| a | b); +atomic_rmw!(@old __sync_fetch_and_or_2, u16, |a: u16, b: u16| a | b); +atomic_rmw!(@old __sync_fetch_and_or_4, u32, |a: u32, b: u32| a | b); + +atomic_rmw!(@new __sync_or_and_fetch_1, u8, |a: u8, b: u8| a | b); +atomic_rmw!(@new __sync_or_and_fetch_2, u16, |a: u16, b: u16| a | b); +atomic_rmw!(@new __sync_or_and_fetch_4, u32, |a: u32, b: u32| a | b); + +atomic_rmw!(@old __sync_fetch_and_xor_1, u8, |a: u8, b: u8| a ^ b); +atomic_rmw!(@old __sync_fetch_and_xor_2, u16, |a: u16, b: u16| a ^ b); +atomic_rmw!(@old __sync_fetch_and_xor_4, u32, |a: u32, b: u32| a ^ b); -atomic_rmw!(__sync_fetch_and_or_1, u8, |a: u8, b: u8| a | b); -atomic_rmw!(__sync_fetch_and_or_2, u16, |a: u16, b: u16| a | b); -atomic_rmw!(__sync_fetch_and_or_4, u32, |a: u32, b: u32| a | b); +atomic_rmw!(@new __sync_xor_and_fetch_1, u8, |a: u8, b: u8| a ^ b); +atomic_rmw!(@new __sync_xor_and_fetch_2, u16, |a: u16, b: u16| a ^ b); +atomic_rmw!(@new __sync_xor_and_fetch_4, u32, |a: u32, b: u32| a ^ b); -atomic_rmw!(__sync_fetch_and_xor_1, u8, |a: u8, b: u8| a ^ b); -atomic_rmw!(__sync_fetch_and_xor_2, u16, |a: u16, b: u16| a ^ b); -atomic_rmw!(__sync_fetch_and_xor_4, u32, |a: u32, b: u32| a ^ b); +atomic_rmw!(@old __sync_fetch_and_nand_1, u8, |a: u8, b: u8| !(a & b)); +atomic_rmw!(@old __sync_fetch_and_nand_2, u16, |a: u16, b: u16| !(a & b)); +atomic_rmw!(@old __sync_fetch_and_nand_4, u32, |a: u32, b: u32| !(a & b)); -atomic_rmw!(__sync_fetch_and_nand_1, u8, |a: u8, b: u8| !(a & b)); -atomic_rmw!(__sync_fetch_and_nand_2, u16, |a: u16, b: u16| !(a & b)); -atomic_rmw!(__sync_fetch_and_nand_4, u32, |a: u32, b: u32| !(a & b)); +atomic_rmw!(@new __sync_nand_and_fetch_1, u8, |a: u8, b: u8| !(a & b)); +atomic_rmw!(@new __sync_nand_and_fetch_2, u16, |a: u16, b: u16| !(a & b)); +atomic_rmw!(@new __sync_nand_and_fetch_4, u32, |a: u32, b: u32| !(a & b)); -atomic_rmw!(__sync_fetch_and_max_1, i8, |a: i8, b: i8| if a > b { +atomic_rmw!(@old __sync_fetch_and_max_1, i8, |a: i8, b: i8| if a > b { a } else { b }); -atomic_rmw!(__sync_fetch_and_max_2, i16, |a: i16, b: i16| if a > b { +atomic_rmw!(@old __sync_fetch_and_max_2, i16, |a: i16, b: i16| if a > b { a } else { b }); -atomic_rmw!(__sync_fetch_and_max_4, i32, |a: i32, b: i32| if a > b { +atomic_rmw!(@old __sync_fetch_and_max_4, i32, |a: i32, b: i32| if a > b { a } else { b }); -atomic_rmw!(__sync_fetch_and_umax_1, u8, |a: u8, b: u8| if a > b { +atomic_rmw!(@old __sync_fetch_and_umax_1, u8, |a: u8, b: u8| if a > b { a } else { b }); -atomic_rmw!(__sync_fetch_and_umax_2, u16, |a: u16, b: u16| if a > b { +atomic_rmw!(@old __sync_fetch_and_umax_2, u16, |a: u16, b: u16| if a > b { a } else { b }); -atomic_rmw!(__sync_fetch_and_umax_4, u32, |a: u32, b: u32| if a > b { +atomic_rmw!(@old __sync_fetch_and_umax_4, u32, |a: u32, b: u32| if a > b { a } else { b }); -atomic_rmw!(__sync_fetch_and_min_1, i8, |a: i8, b: i8| if a < b { +atomic_rmw!(@old __sync_fetch_and_min_1, i8, |a: i8, b: i8| if a < b { a } else { b }); -atomic_rmw!(__sync_fetch_and_min_2, i16, |a: i16, b: i16| if a < b { +atomic_rmw!(@old __sync_fetch_and_min_2, i16, |a: i16, b: i16| if a < b { a } else { b }); -atomic_rmw!(__sync_fetch_and_min_4, i32, |a: i32, b: i32| if a < b { +atomic_rmw!(@old __sync_fetch_and_min_4, i32, |a: i32, b: i32| if a < b { a } else { b }); -atomic_rmw!(__sync_fetch_and_umin_1, u8, |a: u8, b: u8| if a < b { +atomic_rmw!(@old __sync_fetch_and_umin_1, u8, |a: u8, b: u8| if a < b { a } else { b }); -atomic_rmw!(__sync_fetch_and_umin_2, u16, |a: u16, b: u16| if a < b { +atomic_rmw!(@old __sync_fetch_and_umin_2, u16, |a: u16, b: u16| if a < b { a } else { b }); -atomic_rmw!(__sync_fetch_and_umin_4, u32, |a: u32, b: u32| if a < b { +atomic_rmw!(@old __sync_fetch_and_umin_4, u32, |a: u32, b: u32| if a < b { a } else { b }); -atomic_rmw!(__sync_lock_test_and_set_1, u8, |_: u8, b: u8| b); -atomic_rmw!(__sync_lock_test_and_set_2, u16, |_: u16, b: u16| b); -atomic_rmw!(__sync_lock_test_and_set_4, u32, |_: u32, b: u32| b); +atomic_rmw!(@old __sync_lock_test_and_set_1, u8, |_: u8, b: u8| b); +atomic_rmw!(@old __sync_lock_test_and_set_2, u16, |_: u16, b: u16| b); +atomic_rmw!(@old __sync_lock_test_and_set_4, u32, |_: u32, b: u32| b); atomic_cmpxchg!(__sync_val_compare_and_swap_1, u8); atomic_cmpxchg!(__sync_val_compare_and_swap_2, u16); From bfb08120b99ab4806d99cf69b224eeeade71a820 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Fri, 29 Jul 2022 16:52:30 -0700 Subject: [PATCH 0561/1459] Remove panics from tgamma and tgammaf --- libm/src/math/tgamma.rs | 13 +++++++------ libm/src/math/tgammaf.rs | 1 + 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/libm/src/math/tgamma.rs b/libm/src/math/tgamma.rs index f8ccf669a..e64eff61f 100644 --- a/libm/src/math/tgamma.rs +++ b/libm/src/math/tgamma.rs @@ -38,7 +38,7 @@ fn sinpi(mut x: f64) -> f64 { /* reduce x into [-.25,.25] */ n = (4.0 * x) as isize; - n = (n + 1) / 2; + n = div!(n + 1, 2); x -= (n as f64) * 0.5; x *= PI; @@ -118,18 +118,19 @@ fn s(x: f64) -> f64 { /* to avoid overflow handle large x differently */ if x < 8.0 { for i in (0..=N).rev() { - num = num * x + SNUM[i]; - den = den * x + SDEN[i]; + num = num * x + i!(SNUM, i); + den = den * x + i!(SDEN, i); } } else { for i in 0..=N { - num = num / x + SNUM[i]; - den = den / x + SDEN[i]; + num = num / x + i!(SNUM, i); + den = den / x + i!(SDEN, i); } } return num / den; } +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tgamma(mut x: f64) -> f64 { let u: u64 = x.to_bits(); let absx: f64; @@ -157,7 +158,7 @@ pub fn tgamma(mut x: f64) -> f64 { return 0.0 / 0.0; } if x <= FACT.len() as f64 { - return FACT[(x as usize) - 1]; + return i!(FACT, (x as usize) - 1); } } diff --git a/libm/src/math/tgammaf.rs b/libm/src/math/tgammaf.rs index a8f161f0c..23e3814f9 100644 --- a/libm/src/math/tgammaf.rs +++ b/libm/src/math/tgammaf.rs @@ -1,5 +1,6 @@ use super::tgamma; +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tgammaf(x: f32) -> f32 { tgamma(x as f64) as f32 } From f7dd729d1b065c48a11a95709f98707a60219993 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Fri, 29 Jul 2022 17:34:06 -0700 Subject: [PATCH 0562/1459] Update libm --- libm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm b/libm index 1f7b8eb61..add735e30 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 1f7b8eb61cab5f62ec93d2343432bebd1ada30f2 +Subproject commit add735e30df2ba780fb76187f43163dedcd96fa7 From f09a62c496bba7e19d5fc9e1d5ad85bc509a4270 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 30 Jul 2022 01:43:36 +0100 Subject: [PATCH 0563/1459] Bump to 0.2.3 --- libm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 99055ad47..a94a85a26 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0" name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/libm" -version = "0.2.2" +version = "0.2.3" edition = "2018" [features] From 8672395c985dc0c49b5ec738fa258b121f74df40 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 30 Jul 2022 01:45:31 +0100 Subject: [PATCH 0564/1459] Bump to 0.1.78 --- Cargo.toml | 2 +- libm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index cd4718830..826dee795 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.77" +version = "0.1.78" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" diff --git a/libm b/libm index add735e30..d6d8a1610 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit add735e30df2ba780fb76187f43163dedcd96fa7 +Subproject commit d6d8a1610c9912c84c4c1d12d994d0204dc702ef From bb69de060c34ff855c9ff167c16e3b1c784cf376 Mon Sep 17 00:00:00 2001 From: Outurnate Date: Sun, 31 Jul 2022 14:37:57 -0400 Subject: [PATCH 0565/1459] Use smaller IPIO2 table for 16 and 32 bit architectures --- libm/src/math/rem_pio2_large.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 65473f0ab..db97a39d4 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -27,7 +27,7 @@ const INIT_JK: [usize; 4] = [3, 4, 4, 6]; // // NB: This table must have at least (e0-3)/24 + jk terms. // For quad precision (e0 <= 16360, jk = 6), this is 686. -#[cfg(target_pointer_width = "32")] +#[cfg(any(target_pointer_width = "32", target_pointer_width = "16"))] const IPIO2: [i32; 66] = [ 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, From ea4566082aff39088a17b097a82d3e80bd4502a6 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 31 Jul 2022 13:57:20 -0700 Subject: [PATCH 0566/1459] Ensure more functions do not panic --- libm/src/math/acosh.rs | 1 + libm/src/math/acoshf.rs | 1 + libm/src/math/asinh.rs | 1 + libm/src/math/asinhf.rs | 1 + libm/src/math/atanh.rs | 1 + libm/src/math/atanhf.rs | 1 + libm/src/math/copysign.rs | 1 + libm/src/math/copysignf.rs | 1 + libm/src/math/erf.rs | 1 + libm/src/math/erff.rs | 1 + libm/src/math/exp10.rs | 5 +++-- libm/src/math/exp10f.rs | 5 +++-- libm/src/math/ilogb.rs | 1 + libm/src/math/ilogbf.rs | 1 + libm/src/math/lgamma.rs | 1 + libm/src/math/lgamma_r.rs | 3 ++- libm/src/math/lgammaf.rs | 1 + libm/src/math/lgammaf_r.rs | 3 ++- libm/src/math/sincos.rs | 1 + libm/src/math/sincosf.rs | 1 + 20 files changed, 26 insertions(+), 6 deletions(-) diff --git a/libm/src/math/acosh.rs b/libm/src/math/acosh.rs index ac7a5f1c6..d1f5b9fa9 100644 --- a/libm/src/math/acosh.rs +++ b/libm/src/math/acosh.rs @@ -7,6 +7,7 @@ const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa3 /// Calculates the inverse hyperbolic cosine of `x`. /// Is defined as `log(x + sqrt(x*x-1))`. /// `x` must be a number greater than or equal to 1. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn acosh(x: f64) -> f64 { let u = x.to_bits(); let e = ((u >> 52) as usize) & 0x7ff; diff --git a/libm/src/math/acoshf.rs b/libm/src/math/acoshf.rs index 0879e1edb..ad3455fdd 100644 --- a/libm/src/math/acoshf.rs +++ b/libm/src/math/acoshf.rs @@ -7,6 +7,7 @@ const LN2: f32 = 0.693147180559945309417232121458176568; /// Calculates the inverse hyperbolic cosine of `x`. /// Is defined as `log(x + sqrt(x*x-1))`. /// `x` must be a number greater than or equal to 1. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn acoshf(x: f32) -> f32 { let u = x.to_bits(); let a = u & 0x7fffffff; diff --git a/libm/src/math/asinh.rs b/libm/src/math/asinh.rs index 14295357a..0abd80c2f 100644 --- a/libm/src/math/asinh.rs +++ b/libm/src/math/asinh.rs @@ -7,6 +7,7 @@ const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa3 /// /// Calculates the inverse hyperbolic sine of `x`. /// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn asinh(mut x: f64) -> f64 { let mut u = x.to_bits(); let e = ((u >> 52) as usize) & 0x7ff; diff --git a/libm/src/math/asinhf.rs b/libm/src/math/asinhf.rs index e22a29132..09c77823e 100644 --- a/libm/src/math/asinhf.rs +++ b/libm/src/math/asinhf.rs @@ -7,6 +7,7 @@ const LN2: f32 = 0.693147180559945309417232121458176568; /// /// Calculates the inverse hyperbolic sine of `x`. /// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn asinhf(mut x: f32) -> f32 { let u = x.to_bits(); let i = u & 0x7fffffff; diff --git a/libm/src/math/atanh.rs b/libm/src/math/atanh.rs index 79a989c42..b984c4ac6 100644 --- a/libm/src/math/atanh.rs +++ b/libm/src/math/atanh.rs @@ -5,6 +5,7 @@ use super::log1p; /// /// Calculates the inverse hyperbolic tangent of `x`. /// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atanh(x: f64) -> f64 { let u = x.to_bits(); let e = ((u >> 52) as usize) & 0x7ff; diff --git a/libm/src/math/atanhf.rs b/libm/src/math/atanhf.rs index 7b2f34d97..a1aa314a5 100644 --- a/libm/src/math/atanhf.rs +++ b/libm/src/math/atanhf.rs @@ -5,6 +5,7 @@ use super::log1pf; /// /// Calculates the inverse hyperbolic tangent of `x`. /// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atanhf(mut x: f32) -> f32 { let mut u = x.to_bits(); let sign = (u >> 31) != 0; diff --git a/libm/src/math/copysign.rs b/libm/src/math/copysign.rs index 1527fb6ea..1f4a35a33 100644 --- a/libm/src/math/copysign.rs +++ b/libm/src/math/copysign.rs @@ -2,6 +2,7 @@ /// /// Constructs a number with the magnitude (absolute value) of its /// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn copysign(x: f64, y: f64) -> f64 { let mut ux = x.to_bits(); let uy = y.to_bits(); diff --git a/libm/src/math/copysignf.rs b/libm/src/math/copysignf.rs index 35148561a..6c346e3a5 100644 --- a/libm/src/math/copysignf.rs +++ b/libm/src/math/copysignf.rs @@ -2,6 +2,7 @@ /// /// Constructs a number with the magnitude (absolute value) of its /// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn copysignf(x: f32, y: f32) -> f32 { let mut ux = x.to_bits(); let uy = y.to_bits(); diff --git a/libm/src/math/erf.rs b/libm/src/math/erf.rs index a2c617d34..5e21ba578 100644 --- a/libm/src/math/erf.rs +++ b/libm/src/math/erf.rs @@ -219,6 +219,7 @@ fn erfc2(ix: u32, mut x: f64) -> f64 { /// Calculates an approximation to the “error function”, which estimates /// the probability that an observation will fall within x standard /// deviations of the mean (assuming a normal distribution). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn erf(x: f64) -> f64 { let r: f64; let s: f64; diff --git a/libm/src/math/erff.rs b/libm/src/math/erff.rs index 384052293..f74d4b632 100644 --- a/libm/src/math/erff.rs +++ b/libm/src/math/erff.rs @@ -130,6 +130,7 @@ fn erfc2(mut ix: u32, mut x: f32) -> f32 { /// Calculates an approximation to the “error function”, which estimates /// the probability that an observation will fall within x standard /// deviations of the mean (assuming a normal distribution). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn erff(x: f32) -> f32 { let r: f32; let s: f32; diff --git a/libm/src/math/exp10.rs b/libm/src/math/exp10.rs index 9537f76f1..559930e10 100644 --- a/libm/src/math/exp10.rs +++ b/libm/src/math/exp10.rs @@ -6,16 +6,17 @@ const P10: &[f64] = &[ 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, ]; +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp10(x: f64) -> f64 { let (mut y, n) = modf(x); let u: u64 = n.to_bits(); /* fabs(n) < 16 without raising invalid on nan */ if (u >> 52 & 0x7ff) < 0x3ff + 4 { if y == 0.0 { - return P10[((n as isize) + 15) as usize]; + return i!(P10, ((n as isize) + 15) as usize); } y = exp2(LN10 * y); - return y * P10[((n as isize) + 15) as usize]; + return y * i!(P10, ((n as isize) + 15) as usize); } return pow(10.0, x); } diff --git a/libm/src/math/exp10f.rs b/libm/src/math/exp10f.rs index d45fff36e..1279bc6c5 100644 --- a/libm/src/math/exp10f.rs +++ b/libm/src/math/exp10f.rs @@ -6,16 +6,17 @@ const P10: &[f32] = &[ 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, ]; +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp10f(x: f32) -> f32 { let (mut y, n) = modff(x); let u = n.to_bits(); /* fabsf(n) < 8 without raising invalid on nan */ if (u >> 23 & 0xff) < 0x7f + 3 { if y == 0.0 { - return P10[((n as isize) + 7) as usize]; + return i!(P10, ((n as isize) + 7) as usize); } y = exp2f(LN10_F32 * y); - return y * P10[((n as isize) + 7) as usize]; + return y * i!(P10, ((n as isize) + 7) as usize); } return exp2(LN10_F64 * (x as f64)) as f32; } diff --git a/libm/src/math/ilogb.rs b/libm/src/math/ilogb.rs index 0a380b7ef..7d74dcfb6 100644 --- a/libm/src/math/ilogb.rs +++ b/libm/src/math/ilogb.rs @@ -1,6 +1,7 @@ const FP_ILOGBNAN: i32 = -1 - 0x7fffffff; const FP_ILOGB0: i32 = FP_ILOGBNAN; +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ilogb(x: f64) -> i32 { let mut i: u64 = x.to_bits(); let e = ((i >> 52) & 0x7ff) as i32; diff --git a/libm/src/math/ilogbf.rs b/libm/src/math/ilogbf.rs index b384fa4b2..0fa58748c 100644 --- a/libm/src/math/ilogbf.rs +++ b/libm/src/math/ilogbf.rs @@ -1,6 +1,7 @@ const FP_ILOGBNAN: i32 = -1 - 0x7fffffff; const FP_ILOGB0: i32 = FP_ILOGBNAN; +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ilogbf(x: f32) -> i32 { let mut i = x.to_bits(); let e = ((i >> 23) & 0xff) as i32; diff --git a/libm/src/math/lgamma.rs b/libm/src/math/lgamma.rs index 5bc87e85e..a08bc5b64 100644 --- a/libm/src/math/lgamma.rs +++ b/libm/src/math/lgamma.rs @@ -1,5 +1,6 @@ use super::lgamma_r; +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn lgamma(x: f64) -> f64 { lgamma_r(x).0 } diff --git a/libm/src/math/lgamma_r.rs b/libm/src/math/lgamma_r.rs index 9533e882c..b26177e6e 100644 --- a/libm/src/math/lgamma_r.rs +++ b/libm/src/math/lgamma_r.rs @@ -152,7 +152,7 @@ fn sin_pi(mut x: f64) -> f64 { x = 2.0 * (x * 0.5 - floor(x * 0.5)); /* x mod 2.0 */ n = (x * 4.0) as i32; - n = (n + 1) / 2; + n = div!(n + 1, 2); x -= (n as f64) * 0.5; x *= PI; @@ -164,6 +164,7 @@ fn sin_pi(mut x: f64) -> f64 { } } +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn lgamma_r(mut x: f64) -> (f64, i32) { let u: u64 = x.to_bits(); let mut t: f64; diff --git a/libm/src/math/lgammaf.rs b/libm/src/math/lgammaf.rs index dfdc87f96..a9c2da75b 100644 --- a/libm/src/math/lgammaf.rs +++ b/libm/src/math/lgammaf.rs @@ -1,5 +1,6 @@ use super::lgammaf_r; +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn lgammaf(x: f32) -> f32 { lgammaf_r(x).0 } diff --git a/libm/src/math/lgammaf_r.rs b/libm/src/math/lgammaf_r.rs index c5e559f46..723c90daf 100644 --- a/libm/src/math/lgammaf_r.rs +++ b/libm/src/math/lgammaf_r.rs @@ -88,7 +88,7 @@ fn sin_pi(mut x: f32) -> f32 { x = 2.0 * (x * 0.5 - floorf(x * 0.5)); /* x mod 2.0 */ n = (x * 4.0) as isize; - n = (n + 1) / 2; + n = div!(n + 1, 2); y = (x as f64) - (n as f64) * 0.5; y *= 3.14159265358979323846; match n { @@ -99,6 +99,7 @@ fn sin_pi(mut x: f32) -> f32 { } } +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn lgammaf_r(mut x: f32) -> (f32, i32) { let u = x.to_bits(); let mut t: f32; diff --git a/libm/src/math/sincos.rs b/libm/src/math/sincos.rs index 4ab588412..ff5d87a1c 100644 --- a/libm/src/math/sincos.rs +++ b/libm/src/math/sincos.rs @@ -12,6 +12,7 @@ use super::{get_high_word, k_cos, k_sin, rem_pio2}; +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sincos(x: f64) -> (f64, f64) { let s: f64; let c: f64; diff --git a/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs index 5304e8ca0..9a4c36104 100644 --- a/libm/src/math/sincosf.rs +++ b/libm/src/math/sincosf.rs @@ -23,6 +23,7 @@ const S2PIO2: f32 = 2.0 * PI_2; /* 0x400921FB, 0x54442D18 */ const S3PIO2: f32 = 3.0 * PI_2; /* 0x4012D97C, 0x7F3321D2 */ const S4PIO2: f32 = 4.0 * PI_2; /* 0x401921FB, 0x54442D18 */ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sincosf(x: f32) -> (f32, f32) { let s: f32; let c: f32; From 41e1ac64ba3bdc8716b2b85e9b1f200cd2670d14 Mon Sep 17 00:00:00 2001 From: Nicholas Bishop Date: Sun, 31 Jul 2022 11:07:33 -0400 Subject: [PATCH 0567/1459] Enable unadjusted_on_win64 for UEFI in some cases The conversion functions from i128/u128 to f32/f64 have the `unadjusted_on_win64` attribute, but it is disabled starting with LLVM14. This seems to be the correct thing to do for Win64, but for some reason x86_64-unknown-uefi is different, despite generally using the same ABI as Win64. --- src/float/conv.rs | 8 ++++---- src/macros.rs | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/float/conv.rs b/src/float/conv.rs index 07b58f3d2..68ba63408 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -92,12 +92,12 @@ intrinsics! { f64::from_bits(int_to_float::u64_to_f64_bits(i)) } - #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + #[cfg_attr(any(not(target_feature = "llvm14-builtins-abi"), target_os = "uefi"), unadjusted_on_win64)] pub extern "C" fn __floatuntisf(i: u128) -> f32 { f32::from_bits(int_to_float::u128_to_f32_bits(i)) } - #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + #[cfg_attr(any(not(target_feature = "llvm14-builtins-abi"), target_os = "uefi"), unadjusted_on_win64)] pub extern "C" fn __floatuntidf(i: u128) -> f64 { f64::from_bits(int_to_float::u128_to_f64_bits(i)) } @@ -129,13 +129,13 @@ intrinsics! { f64::from_bits(int_to_float::u64_to_f64_bits(i.unsigned_abs()) | sign_bit) } - #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + #[cfg_attr(any(not(target_feature = "llvm14-builtins-abi"), target_os = "uefi"), unadjusted_on_win64)] pub extern "C" fn __floattisf(i: i128) -> f32 { let sign_bit = ((i >> 127) as u32) << 31; f32::from_bits(int_to_float::u128_to_f32_bits(i.unsigned_abs()) | sign_bit) } - #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + #[cfg_attr(any(not(target_feature = "llvm14-builtins-abi"), target_os = "uefi"), unadjusted_on_win64)] pub extern "C" fn __floattidf(i: i128) -> f64 { let sign_bit = ((i >> 127) as u64) << 63; f64::from_bits(int_to_float::u128_to_f64_bits(i.unsigned_abs()) | sign_bit) diff --git a/src/macros.rs b/src/macros.rs index 6acf1be96..7d90b7aad 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -174,7 +174,7 @@ macro_rules! intrinsics { $($rest:tt)* ) => ( - #[cfg(all(windows, target_pointer_width = "64"))] + #[cfg(all(any(windows, all(target_os = "uefi", target_arch = "x86_64")), target_pointer_width = "64"))] intrinsics! { $(#[$($attr)*])* pub extern "unadjusted" fn $name( $($argname: $ty),* ) $(-> $ret)? { @@ -182,7 +182,7 @@ macro_rules! intrinsics { } } - #[cfg(not(all(windows, target_pointer_width = "64")))] + #[cfg(not(all(any(windows, all(target_os = "uefi", target_arch = "x86_64")), target_pointer_width = "64")))] intrinsics! { $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { From 93edcc4213fdf53800fc1e9f51594a658475da8a Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 10 Aug 2022 01:11:33 +0100 Subject: [PATCH 0568/1459] Fix fmaf not to depend on FE_TOWARDZERO Ported from upstream musl commit 4f3d346bffdf9ed2b1803653643dc31242490944 Fixes rust-lang/libm#263 --- libm/src/math/fenv.rs | 6 ------ libm/src/math/fmaf.rs | 35 +++++++++++++++++++++++------------ 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/libm/src/math/fenv.rs b/libm/src/math/fenv.rs index 652e60324..c91272e82 100644 --- a/libm/src/math/fenv.rs +++ b/libm/src/math/fenv.rs @@ -5,7 +5,6 @@ pub(crate) const FE_UNDERFLOW: i32 = 0; pub(crate) const FE_INEXACT: i32 = 0; pub(crate) const FE_TONEAREST: i32 = 0; -pub(crate) const FE_TOWARDZERO: i32 = 0; #[inline] pub(crate) fn feclearexcept(_mask: i32) -> i32 { @@ -26,8 +25,3 @@ pub(crate) fn fetestexcept(_mask: i32) -> i32 { pub(crate) fn fegetround() -> i32 { FE_TONEAREST } - -#[inline] -pub(crate) fn fesetround(_r: i32) -> i32 { - 0 -} diff --git a/libm/src/math/fmaf.rs b/libm/src/math/fmaf.rs index 03d371c55..2848f2aee 100644 --- a/libm/src/math/fmaf.rs +++ b/libm/src/math/fmaf.rs @@ -29,8 +29,7 @@ use core::f32; use core::ptr::read_volatile; use super::fenv::{ - feclearexcept, fegetround, feraiseexcept, fesetround, fetestexcept, FE_INEXACT, FE_TONEAREST, - FE_TOWARDZERO, FE_UNDERFLOW, + feclearexcept, fegetround, feraiseexcept, fetestexcept, FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, }; /* @@ -91,16 +90,28 @@ pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 { * If result is inexact, and exactly halfway between two float values, * we need to adjust the low-order bit in the direction of the error. */ - fesetround(FE_TOWARDZERO); - // prevent `vxy + z` from being CSE'd with `xy + z` above - let vxy: f64 = unsafe { read_volatile(&xy) }; - let mut adjusted_result: f64 = vxy + z as f64; - fesetround(FE_TONEAREST); - if result == adjusted_result { - ui = adjusted_result.to_bits(); + let neg = ui >> 63 != 0; + let err = if neg == (z as f64 > xy) { + xy - result + z as f64 + } else { + z as f64 - result + xy + }; + if neg == (err < 0.0) { ui += 1; - adjusted_result = f64::from_bits(ui); + } else { + ui -= 1; + } + f64::from_bits(ui) as f32 +} + +#[cfg(test)] +mod tests { + #[test] + fn issue_263() { + let a = f32::from_bits(1266679807); + let b = f32::from_bits(1300234242); + let c = f32::from_bits(1115553792); + let expected = f32::from_bits(1501560833); + assert_eq!(super::fmaf(a, b, c), expected); } - z = adjusted_result as f32; - z } From 6353a511f33993ad71e07cb00262463a8934a7ee Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Wed, 10 Aug 2022 11:29:38 +0200 Subject: [PATCH 0569/1459] Remove c32() from x86_64 memcmp Fixes https://github.com/rust-lang/compiler-builtins/issues/487 --- src/mem/x86_64.rs | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index dd98e37c5..17b461f79 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -170,19 +170,7 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { let c4 = |a: *const u32, b, n| cmp(a, b, n, c2); let c8 = |a: *const u64, b, n| cmp(a, b, n, c4); let c16 = |a: *const u128, b, n| cmp(a, b, n, c8); - let c32 = |a: *const [u128; 2], b, n| cmp(a, b, n, c16); - // [u128; 2] internally uses raw_eq for comparisons, which may emit a call to memcmp - // above a certain size threshold. When SSE2 is enabled this threshold does not seem - // to be reached but without SSE2 a call is emitted, leading to infinite recursion. - // - // While replacing [u128; 2] with (u128, u128) fixes the issues it degrades performance - // severely. Likewise, removing c32() has a lesser but still significant impact. Instead the - // [u128; 2] case is only enabled when SSE2 is present. - if cfg!(target_feature = "sse2") { - c32(a.cast(), b.cast(), n) - } else { - c16(a.cast(), b.cast(), n) - } + c16(a.cast(), b.cast(), n) } /// Determine optimal parameters for a `rep` instruction. From f88cc354be6b03aa660e140fd2431f64db775303 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 10 Aug 2022 15:23:56 +0100 Subject: [PATCH 0570/1459] Bump to 0.2.5 --- libm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index a94a85a26..527c383b9 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0" name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/libm" -version = "0.2.3" +version = "0.2.5" edition = "2018" [features] From 5f275268ae05434e37f7144bad137684f6d23fa1 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 10 Aug 2022 15:19:30 +0100 Subject: [PATCH 0571/1459] Update libm submodule --- libm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm b/libm index d6d8a1610..14a76eaf8 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit d6d8a1610c9912c84c4c1d12d994d0204dc702ef +Subproject commit 14a76eaf8dab437e92c5fc505c48fb4ed0eb3483 From 727535966a686dc9d47a166409e41b5f9455b4e9 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 10 Aug 2022 15:28:39 +0100 Subject: [PATCH 0572/1459] Bump to 0.1.79 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 826dee795..26258dd4f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.78" +version = "0.1.79" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 7da562da4ee2800e8382520405517857e7a04df2 Mon Sep 17 00:00:00 2001 From: D1plo1d Date: Sat, 17 Sep 2022 11:47:21 -0400 Subject: [PATCH 0573/1459] math: Enabled floating point intrinsics for RISCV32 microcontrollers --- src/math.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/math.rs b/src/math.rs index fa9836186..3fc33b127 100644 --- a/src/math.rs +++ b/src/math.rs @@ -118,8 +118,11 @@ no_mangle! { fn truncf(x: f32) -> f32; } -// only for the thumb*-none-eabi* targets -#[cfg(all(target_arch = "arm", target_os = "none"))] +// only for the thumb*-none-eabi* targets and riscv32*-none-elf targets that lack the floating point instruction set +#[cfg(any( + all(target_arch = "arm", target_os = "none"), + all(target_arch = "riscv32", not(target_feature = "f"), target_os = "none") +))] no_mangle! { fn fmin(x: f64, y: f64) -> f64; fn fminf(x: f32, y: f32) -> f32; From 0347d995a0c76659614cd0abe70c398bb87c5fce Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Tue, 20 Sep 2022 16:20:52 +0800 Subject: [PATCH 0574/1459] Bump to 0.1.80 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 26258dd4f..f44b8e111 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.79" +version = "0.1.80" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 72c872147679096c53cbb49ca670662d05d43110 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Tue, 27 Sep 2022 13:22:45 -0600 Subject: [PATCH 0575/1459] Update macros.rs --- src/macros.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/macros.rs b/src/macros.rs index 7d90b7aad..477c25684 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -266,6 +266,7 @@ macro_rules! intrinsics { #[cfg(target_arch = "arm")] pub mod $alias { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[cfg_attr(all(not(windows), not(target_vendor="apple")), linkage = "weak")] pub extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } From 08e783ddb26b9b54e320623b3e37f8fc5060fd94 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 29 Sep 2022 03:16:50 +0100 Subject: [PATCH 0576/1459] Bump to 0.1.81 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f44b8e111..5c8ea2597 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.80" +version = "0.1.81" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From ad3aa7d1aa371e5694cdc2c5b23bca929f3c63de Mon Sep 17 00:00:00 2001 From: Andrew Tribick Date: Fri, 30 Sep 2022 14:11:15 +0200 Subject: [PATCH 0577/1459] Use wrapping_neg() to avoid fma errors on underflow --- libm/src/math/fma.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 516f9ad3a..f9a86dc60 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -126,8 +126,8 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { rlo = res; rhi = rhi.wrapping_sub(zhi.wrapping_add(borrow as u64)); if (rhi >> 63) != 0 { - rlo = (-(rlo as i64)) as u64; - rhi = (-(rhi as i64)) as u64 - (rlo != 0) as u64; + rlo = (rlo as i64).wrapping_neg() as u64; + rhi = (rhi as i64).wrapping_neg() as u64 - (rlo != 0) as u64; sign = (sign == 0) as i32; } nonzero = (rhi != 0) as i32; @@ -232,4 +232,12 @@ mod tests { -3991680619069439e277 ); } + + #[test] + fn fma_underflow() { + assert_eq!( + fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), + 0.0, + ); + } } From cc509925f0bd57513e9a04699b27ee8da51383a8 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 10 Oct 2022 17:40:16 +0100 Subject: [PATCH 0578/1459] Fix clippy lints --- src/float/conv.rs | 6 +++--- src/float/div.rs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/float/conv.rs b/src/float/conv.rs index 68ba63408..19fdc2fdc 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -13,7 +13,7 @@ mod int_to_float { let a = (i << n) >> 8; // Significant bits, with bit 24 still in tact. let b = (i << n) << 24; // Insignificant bits, only relevant for rounding. let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. - let e = 157 - n as u32; // Exponent plus 127, minus one. + let e = 157 - n; // Exponent plus 127, minus one. (e << 23) + m // + not |, so the mantissa can overflow into the exponent. } @@ -42,8 +42,8 @@ mod int_to_float { return 0; } let n = i.leading_zeros(); - let a = ((i << n) >> 11) as u64; // Significant bits, with bit 53 still in tact. - let b = ((i << n) << 53) as u64; // Insignificant bits, only relevant for rounding. + let a = (i << n) >> 11; // Significant bits, with bit 53 still in tact. + let b = (i << n) << 53; // Insignificant bits, only relevant for rounding. let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even. let e = 1085 - n as u64; // Exponent plus 1023, minus one. (e << 52) + m // + not |, so the mantissa can overflow into the exponent. diff --git a/src/float/div.rs b/src/float/div.rs index 528a8368d..c2d6c07e7 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -135,11 +135,11 @@ where let mut correction: u32 = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); - reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32; + reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) >> 31) as u32; correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); - reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32; + reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) >> 31) as u32; correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); - reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32; + reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) >> 31) as u32; // Exhaustive testing shows that the error in reciprocal after three steps // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our From edc6d766e90b354503e3f25aef635980c96163c3 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sun, 9 Oct 2022 14:58:18 +0200 Subject: [PATCH 0579/1459] invoke the unreachable intrinsic, not the stable wrapper --- src/int/specialized_div_rem/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs index 6ec4675df..77034eb54 100644 --- a/src/int/specialized_div_rem/mod.rs +++ b/src/int/specialized_div_rem/mod.rs @@ -72,7 +72,10 @@ mod asymmetric; /// impossible to reach by Rust users, unless `compiler-builtins` public division functions or /// `core/std::unchecked_div/rem` are directly used without a zero check in front. fn zero_div_fn() -> ! { - unsafe { core::hint::unreachable_unchecked() } + // Calling the intrinsic directly, to avoid the `assert_unsafe_precondition` that cannot be used + // here because it involves non-`inline` functions + // (https://github.com/rust-lang/compiler-builtins/issues/491). + unsafe { core::intrinsics::unreachable() } } const USE_LZ: bool = { From 5af23aa894661538b820732eef883dd4578ee649 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 10 Oct 2022 19:04:06 +0100 Subject: [PATCH 0580/1459] Bump to 0.1.82 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 5c8ea2597..fb113b922 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.81" +version = "0.1.82" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 2ee15dde381eb59afbc0cff5ee58640be98a2ed8 Mon Sep 17 00:00:00 2001 From: "Felix S. Klock II" Date: Tue, 25 Oct 2022 11:25:14 -0400 Subject: [PATCH 0581/1459] Document origins of the multiplication method being used here. --- src/riscv.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/riscv.rs b/src/riscv.rs index ee78b9dba..eff249692 100644 --- a/src/riscv.rs +++ b/src/riscv.rs @@ -1,5 +1,18 @@ intrinsics! { - // Implementation from gcc + // Ancient Egyptian/Ethiopian/Russian multiplication method + // see https://en.wikipedia.org/wiki/Ancient_Egyptian_multiplication + // + // This is a long-available stock algorithm; e.g. it is documented in + // Knuth's "The Art of Computer Programming" volume 2 (under the section + // "Evaluation of Powers") since at least the 2nd edition (1981). + // + // The main attraction of this method is that it implements (software) + // multiplication atop four simple operations: doubling, halving, checking + // if a value is even/odd, and addition. This is *not* considered to be the + // fastest multiplication method, but it may be amongst the simplest (and + // smallest with respect to code size). + // + // for reference, see also implementation from gcc // https://raw.githubusercontent.com/gcc-mirror/gcc/master/libgcc/config/epiphany/mulsi3.c pub extern "C" fn __mulsi3(a: u32, b: u32) -> u32 { let (mut a, mut b) = (a, b); From 530baa48c1b77b1d9fdcb4dc717fd4efb48b14e5 Mon Sep 17 00:00:00 2001 From: "Felix S. Klock II" Date: Tue, 25 Oct 2022 12:32:41 -0400 Subject: [PATCH 0582/1459] might as well add the link to the LLVM assembly code as well. --- src/riscv.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/riscv.rs b/src/riscv.rs index eff249692..ae361b33a 100644 --- a/src/riscv.rs +++ b/src/riscv.rs @@ -14,6 +14,9 @@ intrinsics! { // // for reference, see also implementation from gcc // https://raw.githubusercontent.com/gcc-mirror/gcc/master/libgcc/config/epiphany/mulsi3.c + // + // and from LLVM (in relatively readable RISC-V assembly): + // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/riscv/int_mul_impl.inc pub extern "C" fn __mulsi3(a: u32, b: u32) -> u32 { let (mut a, mut b) = (a, b); let mut r = 0; From d8dd41a66bee04a064707bfeeef9789187d7be07 Mon Sep 17 00:00:00 2001 From: Jules Bertholet Date: Mon, 7 Nov 2022 12:34:47 -0500 Subject: [PATCH 0583/1459] Add `roundeven` and `roundevenf` --- libm/build.rs | 3 +++ libm/src/math/mod.rs | 4 +++ libm/src/math/roundeven.rs | 50 +++++++++++++++++++++++++++++++++++++ libm/src/math/roundevenf.rs | 50 +++++++++++++++++++++++++++++++++++++ 4 files changed, 107 insertions(+) create mode 100644 libm/src/math/roundeven.rs create mode 100644 libm/src/math/roundevenf.rs diff --git a/libm/build.rs b/libm/build.rs index 80145a9cc..403304a13 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -39,6 +39,9 @@ mod musl_reference_tests { "jnf.rs", "j1.rs", "j1f.rs", + // musl doens't have these + "roundeven.rs", + "roundevenf.rs", ]; struct Function { diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 81bfc53ed..e710bd381 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -171,6 +171,8 @@ mod remainderf; mod remquo; mod remquof; mod round; +mod roundeven; +mod roundevenf; mod roundf; mod scalbn; mod scalbnf; @@ -285,6 +287,8 @@ pub use self::remainderf::remainderf; pub use self::remquo::remquo; pub use self::remquof::remquof; pub use self::round::round; +pub use self::roundeven::roundeven; +pub use self::roundevenf::roundevenf; pub use self::roundf::roundf; pub use self::scalbn::scalbn; pub use self::scalbnf::scalbnf; diff --git a/libm/src/math/roundeven.rs b/libm/src/math/roundeven.rs new file mode 100644 index 000000000..5cc30c8af --- /dev/null +++ b/libm/src/math/roundeven.rs @@ -0,0 +1,50 @@ +// Source: musl libm rint +// (equivalent to roundeven when rounding mode is default, +// which Rust assumes) + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundeven(x: f64) -> f64 { + let one_over_e = 1.0 / f64::EPSILON; + let as_u64: u64 = x.to_bits(); + let exponent: u64 = as_u64 >> 52 & 0x7ff; + let is_positive = (as_u64 >> 63) == 0; + if exponent >= 0x3ff + 52 { + x + } else { + let ans = if is_positive { + x + one_over_e - one_over_e + } else { + x - one_over_e + one_over_e + }; + + if ans == 0.0 { + if is_positive { + 0.0 + } else { + -0.0 + } + } else { + ans + } + } +} + +#[cfg(test)] +mod tests { + use super::roundeven; + + #[test] + fn negative_zero() { + assert_eq!(roundeven(-0.0_f64).to_bits(), (-0.0_f64).to_bits()); + } + + #[test] + fn sanity_check() { + assert_eq!(roundeven(-1.0), -1.0); + assert_eq!(roundeven(2.8), 3.0); + assert_eq!(roundeven(-0.5), -0.0); + assert_eq!(roundeven(0.5), 0.0); + assert_eq!(roundeven(-1.5), -2.0); + assert_eq!(roundeven(1.5), 2.0); + } +} diff --git a/libm/src/math/roundevenf.rs b/libm/src/math/roundevenf.rs new file mode 100644 index 000000000..e720dcd94 --- /dev/null +++ b/libm/src/math/roundevenf.rs @@ -0,0 +1,50 @@ +// Source: musl libm rintf +// (equivalent to roundevenf when rounding mode is default, +// which Rust assumes) + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundevenf(x: f32) -> f32 { + let one_over_e = 1.0 / f32::EPSILON; + let as_u32: u32 = x.to_bits(); + let exponent: u32 = as_u32 >> 23 & 0xff; + let is_positive = (as_u32 >> 31) == 0; + if exponent >= 0x7f + 23 { + x + } else { + let ans = if is_positive { + x + one_over_e - one_over_e + } else { + x - one_over_e + one_over_e + }; + + if ans == 0.0 { + if is_positive { + 0.0 + } else { + -0.0 + } + } else { + ans + } + } +} + +#[cfg(test)] +mod tests { + use super::roundevenf; + + #[test] + fn negative_zero() { + assert_eq!(roundevenf(-0.0_f32).to_bits(), (-0.0_f32).to_bits()); + } + + #[test] + fn sanity_check() { + assert_eq!(roundevenf(-1.0), -1.0); + assert_eq!(roundevenf(2.8), 3.0); + assert_eq!(roundevenf(-0.5), -0.0); + assert_eq!(roundevenf(0.5), 0.0); + assert_eq!(roundevenf(-1.5), -2.0); + assert_eq!(roundevenf(1.5), 2.0); + } +} From b4bd41e405f0c5357eda0a5b278c249ce68d01c3 Mon Sep 17 00:00:00 2001 From: Jules Bertholet Date: Mon, 7 Nov 2022 12:53:26 -0500 Subject: [PATCH 0584/1459] `rint`/`rintf` instead of `roundeven`/`roundevenf` --- libm/build.rs | 3 --- libm/src/math/mod.rs | 8 ++++---- libm/src/math/{roundeven.rs => rint.rs} | 22 +++++++++------------- libm/src/math/{roundevenf.rs => rintf.rs} | 22 +++++++++------------- 4 files changed, 22 insertions(+), 33 deletions(-) rename libm/src/math/{roundeven.rs => rint.rs} (59%) rename libm/src/math/{roundevenf.rs => rintf.rs} (58%) diff --git a/libm/build.rs b/libm/build.rs index 403304a13..80145a9cc 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -39,9 +39,6 @@ mod musl_reference_tests { "jnf.rs", "j1.rs", "j1f.rs", - // musl doens't have these - "roundeven.rs", - "roundevenf.rs", ]; struct Function { diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index e710bd381..05ebb708c 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -170,9 +170,9 @@ mod remainder; mod remainderf; mod remquo; mod remquof; +mod rint; +mod rintf; mod round; -mod roundeven; -mod roundevenf; mod roundf; mod scalbn; mod scalbnf; @@ -286,9 +286,9 @@ pub use self::remainder::remainder; pub use self::remainderf::remainderf; pub use self::remquo::remquo; pub use self::remquof::remquof; +pub use self::rint::rint; +pub use self::rintf::rintf; pub use self::round::round; -pub use self::roundeven::roundeven; -pub use self::roundevenf::roundevenf; pub use self::roundf::roundf; pub use self::scalbn::scalbn; pub use self::scalbnf::scalbnf; diff --git a/libm/src/math/roundeven.rs b/libm/src/math/rint.rs similarity index 59% rename from libm/src/math/roundeven.rs rename to libm/src/math/rint.rs index 5cc30c8af..97d167ab8 100644 --- a/libm/src/math/roundeven.rs +++ b/libm/src/math/rint.rs @@ -1,9 +1,5 @@ -// Source: musl libm rint -// (equivalent to roundeven when rounding mode is default, -// which Rust assumes) - #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn roundeven(x: f64) -> f64 { +pub fn rint(x: f64) -> f64 { let one_over_e = 1.0 / f64::EPSILON; let as_u64: u64 = x.to_bits(); let exponent: u64 = as_u64 >> 52 & 0x7ff; @@ -31,20 +27,20 @@ pub fn roundeven(x: f64) -> f64 { #[cfg(test)] mod tests { - use super::roundeven; + use super::rint; #[test] fn negative_zero() { - assert_eq!(roundeven(-0.0_f64).to_bits(), (-0.0_f64).to_bits()); + assert_eq!(rint(-0.0_f64).to_bits(), (-0.0_f64).to_bits()); } #[test] fn sanity_check() { - assert_eq!(roundeven(-1.0), -1.0); - assert_eq!(roundeven(2.8), 3.0); - assert_eq!(roundeven(-0.5), -0.0); - assert_eq!(roundeven(0.5), 0.0); - assert_eq!(roundeven(-1.5), -2.0); - assert_eq!(roundeven(1.5), 2.0); + assert_eq!(rint(-1.0), -1.0); + assert_eq!(rint(2.8), 3.0); + assert_eq!(rint(-0.5), -0.0); + assert_eq!(rint(0.5), 0.0); + assert_eq!(rint(-1.5), -2.0); + assert_eq!(rint(1.5), 2.0); } } diff --git a/libm/src/math/roundevenf.rs b/libm/src/math/rintf.rs similarity index 58% rename from libm/src/math/roundevenf.rs rename to libm/src/math/rintf.rs index e720dcd94..7b87fafb3 100644 --- a/libm/src/math/roundevenf.rs +++ b/libm/src/math/rintf.rs @@ -1,9 +1,5 @@ -// Source: musl libm rintf -// (equivalent to roundevenf when rounding mode is default, -// which Rust assumes) - #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn roundevenf(x: f32) -> f32 { +pub fn rintf(x: f32) -> f32 { let one_over_e = 1.0 / f32::EPSILON; let as_u32: u32 = x.to_bits(); let exponent: u32 = as_u32 >> 23 & 0xff; @@ -31,20 +27,20 @@ pub fn roundevenf(x: f32) -> f32 { #[cfg(test)] mod tests { - use super::roundevenf; + use super::rintf; #[test] fn negative_zero() { - assert_eq!(roundevenf(-0.0_f32).to_bits(), (-0.0_f32).to_bits()); + assert_eq!(rintf(-0.0_f32).to_bits(), (-0.0_f32).to_bits()); } #[test] fn sanity_check() { - assert_eq!(roundevenf(-1.0), -1.0); - assert_eq!(roundevenf(2.8), 3.0); - assert_eq!(roundevenf(-0.5), -0.0); - assert_eq!(roundevenf(0.5), 0.0); - assert_eq!(roundevenf(-1.5), -2.0); - assert_eq!(roundevenf(1.5), 2.0); + assert_eq!(rintf(-1.0), -1.0); + assert_eq!(rintf(2.8), 3.0); + assert_eq!(rintf(-0.5), -0.0); + assert_eq!(rintf(0.5), 0.0); + assert_eq!(rintf(-1.5), -2.0); + assert_eq!(rintf(1.5), 2.0); } } From 9aefb608d28fed5f2e3be1135161ccc27a9d10f2 Mon Sep 17 00:00:00 2001 From: Jules Bertholet Date: Mon, 7 Nov 2022 13:19:05 -0500 Subject: [PATCH 0585/1459] Disable tests on PowerPC --- libm/src/math/rint.rs | 2 ++ libm/src/math/rintf.rs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/libm/src/math/rint.rs b/libm/src/math/rint.rs index 97d167ab8..0c6025c1f 100644 --- a/libm/src/math/rint.rs +++ b/libm/src/math/rint.rs @@ -25,6 +25,8 @@ pub fn rint(x: f64) -> f64 { } } +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::rint; diff --git a/libm/src/math/rintf.rs b/libm/src/math/rintf.rs index 7b87fafb3..d427793f7 100644 --- a/libm/src/math/rintf.rs +++ b/libm/src/math/rintf.rs @@ -25,6 +25,8 @@ pub fn rintf(x: f32) -> f32 { } } +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::rintf; From b0ecb14414fee89abdfa05efd7df78bbe0a747c4 Mon Sep 17 00:00:00 2001 From: Jules Bertholet Date: Mon, 7 Nov 2022 23:43:14 -0500 Subject: [PATCH 0586/1459] Add benchmarks --- libm/crates/libm-bench/benches/bench.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libm/crates/libm-bench/benches/bench.rs b/libm/crates/libm-bench/benches/bench.rs index b6d874153..ca999b90f 100644 --- a/libm/crates/libm-bench/benches/bench.rs +++ b/libm/crates/libm-bench/benches/bench.rs @@ -107,7 +107,8 @@ macro_rules! bessel { unary!( acos, acosh, asin, atan, cbrt, ceil, cos, cosh, erf, exp, exp2, exp10, expm1, fabs, floor, j0, - j1, lgamma, log, log1p, log2, log10, round, sin, sinh, sqrt, tan, tanh, tgamma, trunc, y0, y1 + j1, lgamma, log, log1p, log2, log10, rint, round, sin, sinh, sqrt, tan, tanh, tgamma, trunc, + y0, y1 ); binary!(atan2, copysign, fdim, fmax, fmin, fmod, hypot, pow); trinary!(fma); From 612e7ff6340e03bc56ba52347fb8ad6403c760fc Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 9 Nov 2022 01:30:02 +0000 Subject: [PATCH 0587/1459] Bump to 0.2.6 --- libm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 527c383b9..f942fdec3 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0" name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/libm" -version = "0.2.5" +version = "0.2.6" edition = "2018" [features] From fe84efe8becdc0eb86fe7d29761de7e1d4dbdfd9 Mon Sep 17 00:00:00 2001 From: Jules Bertholet Date: Tue, 8 Nov 2022 21:02:21 -0500 Subject: [PATCH 0588/1459] Update `libm`, add `rint` and `rintf` --- libm | 2 +- src/math.rs | 16 ++-------------- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/libm b/libm index 14a76eaf8..4c8a97374 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 14a76eaf8dab437e92c5fc505c48fb4ed0eb3483 +Subproject commit 4c8a973741c014b11ce7f1477693a3e5d4ef9609 diff --git a/src/math.rs b/src/math.rs index 3fc33b127..4ae174891 100644 --- a/src/math.rs +++ b/src/math.rs @@ -46,6 +46,8 @@ no_mangle! { fn fmaxf(x: f32, y: f32) -> f32; fn round(x: f64) -> f64; fn roundf(x: f32) -> f32; + fn rint(x: f64) -> f64; + fn rintf(x: f32) -> f32; fn sin(x: f64) -> f64; fn pow(x: f64, y: f64) -> f64; fn powf(x: f32, y: f32) -> f32; @@ -65,20 +67,6 @@ no_mangle! { fn ldexpf(f: f32, n: i32) -> f32; fn tgamma(x: f64) -> f64; fn tgammaf(x: f32) -> f32; -} - -#[cfg(any( - all( - target_family = "wasm", - target_os = "unknown", - not(target_env = "wasi") - ), - target_os = "xous", - all(target_arch = "x86_64", target_os = "uefi"), - all(target_arch = "xtensa", target_os = "none"), - all(target_vendor = "fortanix", target_env = "sgx") -))] -no_mangle! { fn atan(x: f64) -> f64; fn atan2(x: f64, y: f64) -> f64; fn cosh(x: f64) -> f64; From d7b54f6f75821773dcc629786df05de086292248 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 9 Nov 2022 04:12:51 +0000 Subject: [PATCH 0589/1459] Bump to 0.1.83 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index fb113b922..092460e42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.82" +version = "0.1.83" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From b07e3069e6e3a6b09a3cd17b9c8f084cedad1c76 Mon Sep 17 00:00:00 2001 From: Nicholas Bishop Date: Sat, 12 Nov 2022 12:06:04 -0500 Subject: [PATCH 0590/1459] Remove unused `no-lang-items` feature --- Cargo.toml | 3 --- testcrate/Cargo.toml | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 092460e42..36c1f8a08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,9 +58,6 @@ mem = [] # compiler-rt implementations. Also used for testing mangled-names = [] -# Don't generate lang items for i128 intrisnics and such -no-lang-items = [] - # Only used in the compiler's build system rustc-dep-of-std = ['compiler-builtins', 'core'] diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 1f77b2554..762d3293b 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -17,7 +17,7 @@ rand_xoshiro = "0.6" [dependencies.compiler_builtins] path = ".." default-features = false -features = ["no-lang-items", "public-test-deps"] +features = ["public-test-deps"] [target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies] test = { git = "https://github.com/japaric/utest" } From 6cd34673fea911d847b9fa592eefe0e6a03fa097 Mon Sep 17 00:00:00 2001 From: Luc Date: Mon, 14 Nov 2022 16:49:43 +0100 Subject: [PATCH 0591/1459] Add Small Conformance tests to Sqrt(f) --- libm/src/math/sqrt.rs | 12 ++++++++++++ libm/src/math/sqrtf.rs | 11 +++++++++++ 2 files changed, 23 insertions(+) diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index f06b209a4..9a700d8ea 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -261,4 +261,16 @@ mod tests { assert_eq!(sqrt(f), f); } } + + #[test] + fn conformance_tests() { + let values = [3.14159265359, 10000.0, -1.0, INFINITY]; + let results = [4610661241675116657u64, 4636737291354636288u64, + 18444492273895866368u64, 9218868437227405312u64]; + + for i in 0..values.len() { + let bits = f64::to_bits(sqrt(values[i])); + assert_eq!(results[i], bits); + } + } } diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index 00b20e578..b13211376 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -151,4 +151,15 @@ mod tests { assert_eq!(sqrtf(f), f); } } + + #[test] + fn conformance_tests() { + let values = [3.14159265359f32, 10000.0f32, -1.0f32, INFINITY]; + let results = [1071833029u32, 1120403456u32, 4290772992u32, 2139095040u32]; + + for i in 0..values.len() { + let bits = f32::to_bits(sqrtf(values[i])); + assert_eq!(results[i], bits); + } + } } From 5829663ac0426cefe32c2d95585077181c03b132 Mon Sep 17 00:00:00 2001 From: Luc Date: Wed, 16 Nov 2022 15:39:13 +0100 Subject: [PATCH 0592/1459] Remove negative numbers from test --- libm/src/math/sqrt.rs | 4 ++-- libm/src/math/sqrtf.rs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 9a700d8ea..434d4d6d7 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -264,9 +264,9 @@ mod tests { #[test] fn conformance_tests() { - let values = [3.14159265359, 10000.0, -1.0, INFINITY]; + let values = [3.14159265359, 10000.0, f64::from_bits(0x0000000f), INFINITY]; let results = [4610661241675116657u64, 4636737291354636288u64, - 18444492273895866368u64, 9218868437227405312u64]; + 2197470602079456986u64, 9218868437227405312u64]; for i in 0..values.len() { let bits = f64::to_bits(sqrt(values[i])); diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index b13211376..04301355e 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -154,9 +154,9 @@ mod tests { #[test] fn conformance_tests() { - let values = [3.14159265359f32, 10000.0f32, -1.0f32, INFINITY]; - let results = [1071833029u32, 1120403456u32, 4290772992u32, 2139095040u32]; - + let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), INFINITY]; + let results = [1071833029u32, 1120403456u32, 456082799u32, 2139095040u32]; + for i in 0..values.len() { let bits = f32::to_bits(sqrtf(values[i])); assert_eq!(results[i], bits); From b925fe3c3ff7e8e9decd9a4933c3f59db574049f Mon Sep 17 00:00:00 2001 From: Luc Date: Wed, 16 Nov 2022 15:44:43 +0100 Subject: [PATCH 0593/1459] Fix formatting Forgot to run cargo fmt last time... --- libm/src/math/sqrt.rs | 14 +++++++++----- libm/src/math/sqrtf.rs | 13 +++++++++---- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 434d4d6d7..3733ba040 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -261,16 +261,20 @@ mod tests { assert_eq!(sqrt(f), f); } } - + #[test] fn conformance_tests() { let values = [3.14159265359, 10000.0, f64::from_bits(0x0000000f), INFINITY]; - let results = [4610661241675116657u64, 4636737291354636288u64, - 2197470602079456986u64, 9218868437227405312u64]; - + let results = [ + 4610661241675116657u64, + 4636737291354636288u64, + 2197470602079456986u64, + 9218868437227405312u64, + ]; + for i in 0..values.len() { let bits = f64::to_bits(sqrt(values[i])); assert_eq!(results[i], bits); - } + } } } diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index 04301355e..8ec72fbf7 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -151,15 +151,20 @@ mod tests { assert_eq!(sqrtf(f), f); } } - - #[test] + + #[test] fn conformance_tests() { - let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), INFINITY]; + let values = [ + 3.14159265359f32, + 10000.0f32, + f32::from_bits(0x0000000f), + INFINITY, + ]; let results = [1071833029u32, 1120403456u32, 456082799u32, 2139095040u32]; for i in 0..values.len() { let bits = f32::to_bits(sqrtf(values[i])); assert_eq!(results[i], bits); - } + } } } From ed48e8d209ba54d468a01832aa545b623e99a37f Mon Sep 17 00:00:00 2001 From: Nicholas Bishop Date: Tue, 15 Nov 2022 21:01:21 -0500 Subject: [PATCH 0594/1459] Skip assembly implementations on the UEFI targets The UEFI targets link with `/SAFESEH`. That requires that objects have a symbol called [`@feat.00`]. Clang adds that symbol for COFF targets if the input is a C file, but not if the input is an ASM file. That doesn't prevent compiler_builtins or rustc from building, but using the resulting rustc to compile something that references one of the objects lacking `@feat.00` will result in a linker error. Fix by removing all the `.S` implementations when `target_os == uefi`. [`@feat.00`]: https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#the-sxdata-section --- build.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build.rs b/build.rs index 73952bb9f..8fde89d91 100644 --- a/build.rs +++ b/build.rs @@ -465,7 +465,8 @@ mod c { } // Remove the assembly implementations that won't compile for the target - if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" { + if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" || target_os == "uefi" + { let mut to_remove = Vec::new(); for (k, v) in sources.map.iter() { if v.ends_with(".S") { From c10cfb3fcf4c6a29bd2cba25fc5dab19eac8df50 Mon Sep 17 00:00:00 2001 From: Nicholas Bishop Date: Wed, 16 Nov 2022 17:52:46 -0500 Subject: [PATCH 0595/1459] Use a stub stdlib.h when compiling for UEFI targets int_util.c includes stdlib.h if `_WIN32` is defined. When compiling the UEFI targets with clang they are treated as Windows targets (e.g. if the Rust target is x86_64-unknown-uefi, the clang target is x86_64-unknown-windows-gnu). So stdlib.h gets included, even though we are compilling with `-ffreestanding` and don't want stdlib.h to be used. That file may not be present, or an incompatible version might be installed leading to typedef redefinition errors. The contents of stdlib.h aren't actually needed for these targets anyway (due to `__STDC_HOSTED__` being 0), so create a minimal stdlib.h in `build.rs` when `target_os == uefi` and add it to the include path. --- build.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/build.rs b/build.rs index 8fde89d91..3f5dbd3ab 100644 --- a/build.rs +++ b/build.rs @@ -98,7 +98,7 @@ mod c { use std::collections::{BTreeMap, HashSet}; use std::env; - use std::fs::File; + use std::fs::{self, File}; use std::io::Write; use std::path::{Path, PathBuf}; @@ -190,6 +190,21 @@ mod c { cfg.define("VISIBILITY_HIDDEN", None); } + // int_util.c tries to include stdlib.h if `_WIN32` is defined, + // which it is when compiling UEFI targets with clang. This is + // at odds with compiling with `-ffreestanding`, as the header + // may be incompatible or not present. Create a minimal stub + // header to use instead. + if target_os == "uefi" { + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); + let include_dir = out_dir.join("include"); + if !include_dir.exists() { + fs::create_dir(&include_dir).unwrap(); + } + fs::write(include_dir.join("stdlib.h"), "#include ").unwrap(); + cfg.flag(&format!("-I{}", include_dir.to_str().unwrap())); + } + let mut sources = Sources::new(); sources.extend(&[ ("__absvdi2", "absvdi2.c"), From d7b97a7097c5775b4ee289e94cc3328366598810 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 18 Nov 2022 02:58:11 +0000 Subject: [PATCH 0596/1459] Bump to 0.1.84 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 36c1f8a08..1b89d51e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.83" +version = "0.1.84" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From de895297cec25bce9398f2f52e596fdf8618403c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Eertmans?= Date: Mon, 28 Nov 2022 10:53:42 +0100 Subject: [PATCH 0597/1459] fix(docs): typo in docstrings Hello, I think you misspelled `width` to `with`. --- src/float/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/float/mod.rs b/src/float/mod.rs index 01a5504d5..fdbe9dde3 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -26,10 +26,10 @@ pub(crate) trait Float: + ops::Div + ops::Rem { - /// A uint of the same with as the float + /// A uint of the same width as the float type Int: Int; - /// A int of the same with as the float + /// A int of the same width as the float type SignedInt: Int; /// An int capable of containing the exponent bits plus a sign bit. This is signed. From efde7130e4be3b6aa74a85bfb9d84789084269c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Kr=C3=B6ning?= Date: Wed, 7 Dec 2022 16:08:01 +0100 Subject: [PATCH 0598/1459] Expose minimal floating point symbols for x86_64-unknown-none --- src/lib.rs | 1 + src/math.rs | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e7bc61e4c..10b4aafec 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,6 +44,7 @@ pub mod int; #[cfg(any( all(target_family = "wasm", target_os = "unknown"), + all(target_arch = "x86_64", target_os = "none"), all(target_arch = "x86_64", target_os = "uefi"), all(target_arch = "arm", target_os = "none"), target_os = "xous", diff --git a/src/math.rs b/src/math.rs index 4ae174891..c64984e9e 100644 --- a/src/math.rs +++ b/src/math.rs @@ -106,10 +106,11 @@ no_mangle! { fn truncf(x: f32) -> f32; } -// only for the thumb*-none-eabi* targets and riscv32*-none-elf targets that lack the floating point instruction set +// only for the thumb*-none-eabi*, riscv32*-none-elf and x86_64-unknown-none targets that lack the floating point instruction set #[cfg(any( all(target_arch = "arm", target_os = "none"), - all(target_arch = "riscv32", not(target_feature = "f"), target_os = "none") + all(target_arch = "riscv32", not(target_feature = "f"), target_os = "none"), + all(target_arch = "x86_64", target_os = "none") ))] no_mangle! { fn fmin(x: f64, y: f64) -> f64; From 23a74dec400c4f0e6964dd801509486b019844ef Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 8 Dec 2022 13:31:59 +0000 Subject: [PATCH 0599/1459] Bump to 0.1.85 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1b89d51e9..00998e40f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.84" +version = "0.1.85" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From c9f5280f29a320b2c433e5e9a64a60c5012dea84 Mon Sep 17 00:00:00 2001 From: Patryk Wychowaniec Date: Sun, 25 Dec 2022 11:46:30 +0100 Subject: [PATCH 0600/1459] fix: Add `#[avr_skip]` for bit shifts This commit follows the same logic as: - https://github.com/rust-lang/compiler-builtins/pull/462 - https://github.com/rust-lang/compiler-builtins/pull/466 I've tested the changes by preparing a simple program: ```rust fn calc() -> ... { let x = hint::black_box(4u...); // 4u8, 4u16, 4u32, 4u64, 4u128 + signed let y = hint::black_box(1u32); // x >> y // x << y } fn main() -> ! { let dp = arduino_hal::Peripherals::take().unwrap(); let pins = arduino_hal::pins!(dp); let mut serial = arduino_hal::default_serial!(dp, pins, 57600); for b in calc().to_le_bytes() { _ = ufmt::uwrite!(&mut serial, "{} ", b); } _ = ufmt::uwriteln!(&mut serial, ""); loop { // } } ``` ... switching types & operators in `calc()`, and observing the results; what I ended up with was: ``` u32 << u32 - ok u64 << u32 - ok u128 << u32 - error (undefined reference to `__ashlti3') i32 >> u32 - ok i64 >> u32 - ok i128 >> u32 - error (undefined reference to `__ashrti3') u32 >> u32 - ok u64 >> u32 - ok u128 >> u32 - error (undefined reference to `__lshrti3') (where "ok" = compiles and returns correct results) ``` As with multiplication and division, so do in here 128-bit operations not work, because avr-gcc's standard library doesn't provide them (at the same time, requiring that specific calling convention, making it pretty difficult for compiler-builtins to jump in). I think 128-bit operations non-working on an 8-bit controller is an acceptable trade-off - :innocent: - and so the entire fix in here is just about skipping those functions. --- src/int/shift.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/int/shift.rs b/src/int/shift.rs index 908e619e1..2d2c081a6 100644 --- a/src/int/shift.rs +++ b/src/int/shift.rs @@ -69,47 +69,56 @@ impl Lshr for u64 {} impl Lshr for u128 {} intrinsics! { + #[avr_skip] #[maybe_use_optimized_c_shim] pub extern "C" fn __ashlsi3(a: u32, b: u32) -> u32 { a.ashl(b) } + #[avr_skip] #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_llsl] pub extern "C" fn __ashldi3(a: u64, b: u32) -> u64 { a.ashl(b) } + #[avr_skip] pub extern "C" fn __ashlti3(a: u128, b: u32) -> u128 { a.ashl(b) } + #[avr_skip] #[maybe_use_optimized_c_shim] pub extern "C" fn __ashrsi3(a: i32, b: u32) -> i32 { a.ashr(b) } + #[avr_skip] #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_lasr] pub extern "C" fn __ashrdi3(a: i64, b: u32) -> i64 { a.ashr(b) } + #[avr_skip] pub extern "C" fn __ashrti3(a: i128, b: u32) -> i128 { a.ashr(b) } + #[avr_skip] #[maybe_use_optimized_c_shim] pub extern "C" fn __lshrsi3(a: u32, b: u32) -> u32 { a.lshr(b) } + #[avr_skip] #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_llsr] pub extern "C" fn __lshrdi3(a: u64, b: u32) -> u64 { a.lshr(b) } + #[avr_skip] pub extern "C" fn __lshrti3(a: u128, b: u32) -> u128 { a.lshr(b) } From 6b9c12d709bc15bdc8295e4981b61d9278195aeb Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 29 Dec 2022 20:36:57 +0100 Subject: [PATCH 0601/1459] Bump to 0.1.86 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 00998e40f..f143c1033 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.85" +version = "0.1.86" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From bd86d52118c17f90b574813931b280e1ea6b947b Mon Sep 17 00:00:00 2001 From: Moritz Meier Date: Tue, 17 Jan 2023 20:41:24 +0100 Subject: [PATCH 0602/1459] add generic libm helper Add editorconfig + docs refactor --- libm/.editorconfig | 21 +++++ libm/src/lib.rs | 2 + libm/src/libm_helper.rs | 171 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 194 insertions(+) create mode 100644 libm/.editorconfig create mode 100644 libm/src/libm_helper.rs diff --git a/libm/.editorconfig b/libm/.editorconfig new file mode 100644 index 000000000..ec6e107d5 --- /dev/null +++ b/libm/.editorconfig @@ -0,0 +1,21 @@ +# EditorConfig helps developers define and maintain consistent +# coding styles between different editors and IDEs +# editorconfig.org + +root = true + +[*] +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true +indent_style = space +indent_size = 4 + +[*.md] +# double whitespace at end of line +# denotes a line break in Markdown +trim_trailing_whitespace = false + +[*.yml] +indent_size = 2 diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 29742b451..4a17d3a77 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -12,11 +12,13 @@ #![allow(clippy::eq_op)] #![allow(clippy::assign_op_pattern)] +mod libm_helper; mod math; use core::{f32, f64}; pub use self::math::*; +pub use libm_helper::*; /// Approximate equality with 1 ULP of tolerance #[doc(hidden)] diff --git a/libm/src/libm_helper.rs b/libm/src/libm_helper.rs new file mode 100644 index 000000000..52d0c4c2a --- /dev/null +++ b/libm/src/libm_helper.rs @@ -0,0 +1,171 @@ +use core::marker::PhantomData; + +use crate::*; + +/// Generic helper for libm functions, abstracting over f32 and f64.
+/// # Type Parameter: +/// - `T`: Either `f32` or `f64` +/// +/// # Examples +/// ```rust +/// use libm::{self, Libm}; +/// +/// const PI_F32: f32 = 3.1415927410e+00; +/// const PI_F64: f64 = 3.1415926535897931160e+00; +/// +/// assert!(Libm::::cos(0.0f32) == libm::cosf(0.0)); +/// assert!(Libm::::sin(PI_F32) == libm::sinf(PI_F32)); +/// +/// assert!(Libm::::cos(0.0f64) == libm::cos(0.0)); +/// assert!(Libm::::sin(PI_F64) == libm::sin(PI_F64)); +/// ``` +pub struct Libm(PhantomData); + +macro_rules! libm_helper { + ($t:ident, funcs: $funcs:tt) => { + impl Libm<$t> { + #![allow(unused_parens)] + + libm_helper! { $funcs } + } + }; + + ({$($func:tt);*}) => { + $( + libm_helper! { $func } + )* + }; + + ((fn $func:ident($($arg:ident: $arg_typ:ty),*) -> ($($ret_typ:ty),*); => $libm_fn:ident)) => { + #[inline(always)] + pub fn $func($($arg: $arg_typ),*) -> ($($ret_typ),*) { + $libm_fn($($arg),*) + } + }; +} + +libm_helper! { + f32, + funcs: { + (fn acos(x: f32) -> (f32); => acosf); + (fn acosh(x: f32) -> (f32); => acoshf); + (fn asin(x: f32) -> (f32); => asinf); + (fn asinh(x: f32) -> (f32); => asinhf); + (fn atan(x: f32) -> (f32); => atanf); + (fn atan2(y: f32, x: f32) -> (f32); => atan2f); + (fn atanh(x: f32) -> (f32); => atanhf); + (fn cbrt(x: f32) -> (f32); => cbrtf); + (fn ceil(x: f32) -> (f32); => ceilf); + (fn copysign(x: f32, y: f32) -> (f32); => copysignf); + (fn cos(x: f32) -> (f32); => cosf); + (fn cosh(x: f32) -> (f32); => coshf); + (fn erf(x: f32) -> (f32); => erff); + (fn erfc(x: f32) -> (f32); => erfcf); + (fn exp(x: f32) -> (f32); => expf); + (fn exp2(x: f32) -> (f32); => exp2f); + (fn exp10(x: f32) -> (f32); => exp10f); + (fn expm1(x: f32) -> (f32); => expm1f); + (fn fabs(x: f32) -> (f32); => fabsf); + (fn fdim(x: f32, y: f32) -> (f32); => fdimf); + (fn floor(x: f32) -> (f32); => floorf); + (fn fma(x: f32, y: f32, z: f32) -> (f32); => fmaf); + (fn fmax(x: f32, y: f32) -> (f32); => fmaxf); + (fn fmin(x: f32, y: f32) -> (f32); => fminf); + (fn fmod(x: f32, y: f32) -> (f32); => fmodf); + (fn frexp(x: f32) -> (f32, i32); => frexpf); + (fn hypot(x: f32, y: f32) -> (f32); => hypotf); + (fn ilogb(x: f32) -> (i32); => ilogbf); + (fn j0(x: f32) -> (f32); => j0f); + (fn j1(x: f32) -> (f32); => j1f); + (fn jn(n: i32, x: f32) -> (f32); => jnf); + (fn ldexp(x: f32, n: i32) -> (f32); => ldexpf); + (fn lgamma_r(x: f32) -> (f32, i32); => lgammaf_r); + (fn lgamma(x: f32) -> (f32); => lgammaf); + (fn log(x: f32) -> (f32); => logf); + (fn log1p(x: f32) -> (f32); => log1pf); + (fn log2(x: f32) -> (f32); => log2f); + (fn log10(x: f32) -> (f32); => log10f); + (fn modf(x: f32) -> (f32, f32); => modff); + (fn nextafter(x: f32, y: f32) -> (f32); => nextafterf); + (fn pow(x: f32, y: f32) -> (f32); => powf); + (fn remainder(x: f32, y: f32) -> (f32); => remainderf); + (fn remquo(x: f32, y: f32) -> (f32, i32); => remquof); + (fn rint(x: f32) -> (f32); => rintf); + (fn round(x: f32) -> (f32); => roundf); + (fn scalbn(x: f32, n: i32) -> (f32); => scalbnf); + (fn sin(x: f32) -> (f32); => sinf); + (fn sincos(x: f32) -> (f32, f32); => sincosf); + (fn sinh(x: f32) -> (f32); => sinhf); + (fn sqrt(x: f32) -> (f32); => sqrtf); + (fn tan(x: f32) -> (f32); => tanf); + (fn tanh(x: f32) -> (f32); => tanhf); + (fn tgamma(x: f32) -> (f32); => tgammaf); + (fn trunc(x: f32) -> (f32); => truncf); + (fn y0(x: f32) -> (f32); => y0f); + (fn y1(x: f32) -> (f32); => y1f); + (fn yn(n: i32, x: f32) -> (f32); => ynf) + } +} + +libm_helper! { + f64, + funcs: { + (fn acos(x: f64) -> (f64); => acos); + (fn acosh(x: f64) -> (f64); => acosh); + (fn asin(x: f64) -> (f64); => asin); + (fn asinh(x: f64) -> (f64); => asinh); + (fn atan(x: f64) -> (f64); => atan); + (fn atan2(y: f64, x: f64) -> (f64); => atan2); + (fn atanh(x: f64) -> (f64); => atanh); + (fn cbrt(x: f64) -> (f64); => cbrt); + (fn ceil(x: f64) -> (f64); => ceil); + (fn copysign(x: f64, y: f64) -> (f64); => copysign); + (fn cos(x: f64) -> (f64); => cos); + (fn cosh(x: f64) -> (f64); => cosh); + (fn erf(x: f64) -> (f64); => erf); + (fn erfc(x: f64) -> (f64); => erfc); + (fn exp(x: f64) -> (f64); => exp); + (fn exp2(x: f64) -> (f64); => exp2); + (fn exp10(x: f64) -> (f64); => exp10); + (fn expm1(x: f64) -> (f64); => expm1); + (fn fabs(x: f64) -> (f64); => fabs); + (fn fdim(x: f64, y: f64) -> (f64); => fdim); + (fn floor(x: f64) -> (f64); => floor); + (fn fma(x: f64, y: f64, z: f64) -> (f64); => fma); + (fn fmax(x: f64, y: f64) -> (f64); => fmax); + (fn fmin(x: f64, y: f64) -> (f64); => fmin); + (fn fmod(x: f64, y: f64) -> (f64); => fmod); + (fn frexp(x: f64) -> (f64, i32); => frexp); + (fn hypot(x: f64, y: f64) -> (f64); => hypot); + (fn ilogb(x: f64) -> (i32); => ilogb); + (fn j0(x: f64) -> (f64); => j0); + (fn j1(x: f64) -> (f64); => j1); + (fn jn(n: i32, x: f64) -> (f64); => jn); + (fn ldexp(x: f64, n: i32) -> (f64); => ldexp); + (fn lgamma_r(x: f64) -> (f64, i32); => lgamma_r); + (fn lgamma(x: f64) -> (f64); => lgamma); + (fn log(x: f64) -> (f64); => log); + (fn log1p(x: f64) -> (f64); => log1p); + (fn log2(x: f64) -> (f64); => log2); + (fn log10(x: f64) -> (f64); => log10); + (fn modf(x: f64) -> (f64, f64); => modf); + (fn nextafter(x: f64, y: f64) -> (f64); => nextafter); + (fn pow(x: f64, y: f64) -> (f64); => pow); + (fn remainder(x: f64, y: f64) -> (f64); => remainder); + (fn remquo(x: f64, y: f64) -> (f64, i32); => remquo); + (fn rint(x: f64) -> (f64); => rint); + (fn round(x: f64) -> (f64); => round); + (fn scalbn(x: f64, n: i32) -> (f64); => scalbn); + (fn sin(x: f64) -> (f64); => sin); + (fn sincos(x: f64) -> (f64, f64); => sincos); + (fn sinh(x: f64) -> (f64); => sinh); + (fn sqrt(x: f64) -> (f64); => sqrt); + (fn tan(x: f64) -> (f64); => tan); + (fn tanh(x: f64) -> (f64); => tanh); + (fn tgamma(x: f64) -> (f64); => tgamma); + (fn trunc(x: f64) -> (f64); => trunc); + (fn y0(x: f64) -> (f64); => y0); + (fn y1(x: f64) -> (f64); => y1); + (fn yn(n: i32, x: f64) -> (f64); => yn) + } +} From d395715abba0abccf4e704ec5f52050369dbe85b Mon Sep 17 00:00:00 2001 From: "James D. Turner" Date: Fri, 20 Jan 2023 13:39:41 -0500 Subject: [PATCH 0603/1459] Fix descriptions of erfc and erfcf As described in the second paragraph of the docs for these functions, they are the complementary error function, not the error function. --- libm/src/math/erf.rs | 2 +- libm/src/math/erff.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/src/math/erf.rs b/libm/src/math/erf.rs index 5e21ba578..55569affc 100644 --- a/libm/src/math/erf.rs +++ b/libm/src/math/erf.rs @@ -263,7 +263,7 @@ pub fn erf(x: f64) -> f64 { } } -/// Error function (f64) +/// Complementary error function (f64) /// /// Calculates the complementary probability. /// Is `1 - erf(x)`. Is computed directly, so that you can use it to avoid diff --git a/libm/src/math/erff.rs b/libm/src/math/erff.rs index f74d4b632..7b25474f6 100644 --- a/libm/src/math/erff.rs +++ b/libm/src/math/erff.rs @@ -174,7 +174,7 @@ pub fn erff(x: f32) -> f32 { } } -/// Error function (f32) +/// Complementary error function (f32) /// /// Calculates the complementary probability. /// Is `1 - erf(x)`. Is computed directly, so that you can use it to avoid From 362a736a4dccf13b1360e9b2e38fa0a3e6b20d6c Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Wed, 1 Feb 2023 14:52:18 -0800 Subject: [PATCH 0604/1459] Drop the llvm14-builtins-abi hack --- src/float/conv.rs | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/float/conv.rs b/src/float/conv.rs index 19fdc2fdc..a27d542fa 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -92,12 +92,12 @@ intrinsics! { f64::from_bits(int_to_float::u64_to_f64_bits(i)) } - #[cfg_attr(any(not(target_feature = "llvm14-builtins-abi"), target_os = "uefi"), unadjusted_on_win64)] + #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] pub extern "C" fn __floatuntisf(i: u128) -> f32 { f32::from_bits(int_to_float::u128_to_f32_bits(i)) } - #[cfg_attr(any(not(target_feature = "llvm14-builtins-abi"), target_os = "uefi"), unadjusted_on_win64)] + #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] pub extern "C" fn __floatuntidf(i: u128) -> f64 { f64::from_bits(int_to_float::u128_to_f64_bits(i)) } @@ -129,13 +129,13 @@ intrinsics! { f64::from_bits(int_to_float::u64_to_f64_bits(i.unsigned_abs()) | sign_bit) } - #[cfg_attr(any(not(target_feature = "llvm14-builtins-abi"), target_os = "uefi"), unadjusted_on_win64)] + #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] pub extern "C" fn __floattisf(i: i128) -> f32 { let sign_bit = ((i >> 127) as u32) << 31; f32::from_bits(int_to_float::u128_to_f32_bits(i.unsigned_abs()) | sign_bit) } - #[cfg_attr(any(not(target_feature = "llvm14-builtins-abi"), target_os = "uefi"), unadjusted_on_win64)] + #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] pub extern "C" fn __floattidf(i: i128) -> f64 { let sign_bit = ((i >> 127) as u64) << 63; f64::from_bits(int_to_float::u128_to_f64_bits(i.unsigned_abs()) | sign_bit) @@ -176,8 +176,7 @@ intrinsics! { } } - #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] - #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + #[win64_128bit_abi_hack] pub extern "C" fn __fixunssfti(f: f32) -> u128 { let fbits = f.to_bits(); if fbits < 127 << 23 { // >= 0, < 1 @@ -225,8 +224,7 @@ intrinsics! { } } - #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] - #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + #[win64_128bit_abi_hack] pub extern "C" fn __fixunsdfti(f: f64) -> u128 { let fbits = f.to_bits(); if fbits < 1023 << 52 { // >= 0, < 1 @@ -279,8 +277,7 @@ intrinsics! { } } - #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] - #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + #[win64_128bit_abi_hack] pub extern "C" fn __fixsfti(f: f32) -> i128 { let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. if fbits < 127 << 23 { // >= 0, < 1 @@ -331,8 +328,7 @@ intrinsics! { } } - #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)] - #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)] + #[win64_128bit_abi_hack] pub extern "C" fn __fixdfti(f: f64) -> i128 { let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. if fbits < 1023 << 52 { // >= 0, < 1 From 5511f3087255236680eefb862458ed2f90e11bb5 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 3 Feb 2023 19:21:40 +0100 Subject: [PATCH 0605/1459] Bump to 0.1.87 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f143c1033..ca0d96aa6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.86" +version = "0.1.87" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From e0b4f0eb34df20539c8af6e43ed8261ef8490c43 Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Thu, 16 Feb 2023 18:22:17 +0100 Subject: [PATCH 0606/1459] Small tweak to `mul` in `fma.rs`. --- libm/src/math/fma.rs | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index f9a86dc60..940ee2db9 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -29,21 +29,10 @@ fn normalize(x: f64) -> Num { Num { m: ix, e, sign } } +#[inline] fn mul(x: u64, y: u64) -> (u64, u64) { - let t1: u64; - let t2: u64; - let t3: u64; - let xlo: u64 = x as u32 as u64; - let xhi: u64 = x >> 32; - let ylo: u64 = y as u32 as u64; - let yhi: u64 = y >> 32; - - t1 = xlo * ylo; - t2 = xlo * yhi + xhi * ylo; - t3 = xhi * yhi; - let lo = t1.wrapping_add(t2 << 32); - let hi = t3 + (t2 >> 32) + (t1 > lo) as u64; - (hi, lo) + let t = (x as u128).wrapping_mul(y as u128); + ((t >> 64) as u64, t as u64) } /// Floating multiply add (f64) From 2a67ad74b77dd5c7dbd77a27156176136b0b606d Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Tue, 21 Feb 2023 23:13:02 +0100 Subject: [PATCH 0607/1459] Specialize `strlen` for `x86_64`. --- src/mem/impls.rs | 10 ++++++++++ src/mem/mod.rs | 8 +------- src/mem/x86_64.rs | 29 +++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/mem/impls.rs b/src/mem/impls.rs index 72003a5c4..23c9d8d32 100644 --- a/src/mem/impls.rs +++ b/src/mem/impls.rs @@ -279,3 +279,13 @@ pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 { } 0 } + +#[inline(always)] +pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { + let mut n = 0; + while *s != 0 { + n += 1; + s = s.add(1); + } + n +} diff --git a/src/mem/mod.rs b/src/mem/mod.rs index c5b0ddc16..be118778b 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -63,13 +63,7 @@ intrinsics! { #[mem_builtin] #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn strlen(s: *const core::ffi::c_char) -> usize { - let mut n = 0; - let mut s = s; - while *s != 0 { - n += 1; - s = s.offset(1); - } - n + impls::c_string_length(s) } } diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 17b461f79..ea8f6d819 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -173,6 +173,35 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { c16(a.cast(), b.cast(), n) } +#[inline(always)] +pub unsafe fn c_string_length(s: *const std::ffi::c_char) -> usize { + let mut n: usize; + + std::arch::asm!( + // search for a zero byte + "xor al, al", + + // unbounded memory region + "xor rcx, rcx", + "not rcx", + + // forward direction + "cld", + + // perform search + "repne scasb", + + // extract length + "not rcx", + "dec rcx", + inout("rdi") s => _, + out("rcx") n, + options(nostack), + ); + + n +} + /// Determine optimal parameters for a `rep` instruction. fn rep_param(dest: *mut u8, mut count: usize) -> (usize, usize, usize) { // Unaligned writes are still slow on modern processors, so align the destination address. From 7711331f0a41355dc03a202c193cef5a7c31be6a Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Tue, 21 Feb 2023 23:32:39 +0100 Subject: [PATCH 0608/1459] Correct path. --- src/mem/x86_64.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index ea8f6d819..13e186e64 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -177,7 +177,7 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { pub unsafe fn c_string_length(s: *const std::ffi::c_char) -> usize { let mut n: usize; - std::arch::asm!( + asm!( // search for a zero byte "xor al, al", From 1fdf932338e9440c1602deb4efc572dc92efabc9 Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Tue, 21 Feb 2023 23:36:47 +0100 Subject: [PATCH 0609/1459] Update path for argument. --- src/mem/x86_64.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 13e186e64..282074a6c 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -174,7 +174,7 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { } #[inline(always)] -pub unsafe fn c_string_length(s: *const std::ffi::c_char) -> usize { +pub unsafe fn c_string_length(s: *const core::ffi::c_char) -> usize { let mut n: usize; asm!( From 0a0fa0b9fb0bf16776c7eec392f7947c3efe811b Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Wed, 22 Feb 2023 00:07:41 +0100 Subject: [PATCH 0610/1459] Improve assembly quality + AT&T syntax. --- src/mem/x86_64.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 282074a6c..fe93ae7ae 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -179,24 +179,25 @@ pub unsafe fn c_string_length(s: *const core::ffi::c_char) -> usize { asm!( // search for a zero byte - "xor al, al", + "xor %eax, %eax", // unbounded memory region - "xor rcx, rcx", - "not rcx", + "xor %ecx, %ecx", + "not %rcx", // forward direction - "cld", + // (already set thanks to abi) + //"cld", // perform search - "repne scasb", + "repne scasb (%rdi), %al", // extract length - "not rcx", - "dec rcx", + "not %rcx", + "dec %rcx", inout("rdi") s => _, out("rcx") n, - options(nostack), + options(att_syntax, nostack), ); n From 1a2f3b21d53555af4c596e48359aeee7c14671d8 Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Wed, 22 Feb 2023 00:10:46 +0100 Subject: [PATCH 0611/1459] Remove superfluous comment. --- src/mem/x86_64.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index fe93ae7ae..5752005a4 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -185,10 +185,6 @@ pub unsafe fn c_string_length(s: *const core::ffi::c_char) -> usize { "xor %ecx, %ecx", "not %rcx", - // forward direction - // (already set thanks to abi) - //"cld", - // perform search "repne scasb (%rdi), %al", From 7e4742d48ed0d942d6dd88607619f3efe1cec06b Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Wed, 22 Feb 2023 21:54:33 +0100 Subject: [PATCH 0612/1459] Change implementation to SSE --- src/mem/x86_64.rs | 77 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 10 deletions(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 5752005a4..daa92098e 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -178,21 +178,78 @@ pub unsafe fn c_string_length(s: *const core::ffi::c_char) -> usize { let mut n: usize; asm!( - // search for a zero byte + // For small sizes, we avoid invoking SSE instructions. + // make manual comparisons instead. "xor %eax, %eax", + "cmpb $0, (%rdi)", + "je 3f", + "mov $1, %eax", + "cmpb $0, 1(%rdi)", + "je 3f", + "mov $2, %eax", + "cmpb $0, 2(%rdi)", + "je 3f", + "mov $3, %eax", + "cmpb $0, 3(%rdi)", + "je 3f", - // unbounded memory region - "xor %ecx, %ecx", - "not %rcx", + // Adjust address + "add $4, %rdi", - // perform search - "repne scasb (%rdi), %al", + // Align the address to 16 bytes (xmm register size). + // This is important, since an n byte read + // with n byte alignment is guranteed to never cross + // a page boundary and thus will never try to access + // memory which may not be accessible. + "mov %edi, %ecx", + "and $15, %ecx", + "and $-16, %rdi", - // extract length - "not %rcx", - "dec %rcx", + // zero out an xmm register for comparisons with zero. + "pxor %xmm0, %xmm0", + + // One manual iteration of a zero byte search. + // Ensuring proper alignment may cause us to read + // memory _before_ the actual string start. + // Thus, one separate iteration is needed to handle this special case. + "movdqa (%rdi), %xmm1", + "pcmpeqb %xmm0, %xmm1", + "pmovmskb %xmm1, %eax", + // Shift out comparisons that don't belong to the actual string. + "shr %cl, %eax", + // Check if there was a zero + "test %eax, %eax", + "jz 1f", + + // A zero was found: calculate result and exit. + "bsf %eax, %eax", + "add $4, %eax", + "jmp 3f", + + // No zero was found: prepare main loop. + "1:", + "add $16, %rdi", + "neg %rcx", + "add $4, %rcx", + + // main loop + "2:", + "movdqa (%rdi), %xmm1", + "add $16, %rdi", + "add $16, %rcx", + "pcmpeqb %xmm0, %xmm1", + "pmovmskb %xmm1, %eax", + // Check if there was a zero + "test %eax, %eax", + "jz 2b", + + // A zero was found: calculate result and exit. + "bsf %eax, %eax", + "add %rcx, %rax", + "3:", inout("rdi") s => _, - out("rcx") n, + out("rax") n, + out("rcx") _, options(att_syntax, nostack), ); From caaf5a6213e22b8978e4d917e70bdad03fc7e2dd Mon Sep 17 00:00:00 2001 From: Scott Mabin Date: Wed, 22 Feb 2023 20:35:06 +0000 Subject: [PATCH 0613/1459] Extend the intrinsics exported for Xtensa no_std --- src/lib.rs | 1 + src/math.rs | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 10b4aafec..71f249c8e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,6 +47,7 @@ pub mod int; all(target_arch = "x86_64", target_os = "none"), all(target_arch = "x86_64", target_os = "uefi"), all(target_arch = "arm", target_os = "none"), + all(target_arch = "xtensa", target_os = "none"), target_os = "xous", all(target_vendor = "fortanix", target_env = "sgx") ))] diff --git a/src/math.rs b/src/math.rs index c64984e9e..982c9499a 100644 --- a/src/math.rs +++ b/src/math.rs @@ -86,7 +86,11 @@ no_mangle! { fn tanf(n: f32) -> f32; } -#[cfg(any(target_os = "xous", target_os = "uefi"))] +#[cfg(any( + target_os = "xous", + target_os = "uefi", + all(target_arch = "xtensa", target_os = "none"), +))] no_mangle! { fn sqrtf(x: f32) -> f32; fn sqrt(x: f64) -> f64; @@ -94,6 +98,7 @@ no_mangle! { #[cfg(any( all(target_vendor = "fortanix", target_env = "sgx"), + all(target_arch = "xtensa", target_os = "none"), target_os = "xous", target_os = "uefi" ))] From 9c0a19c33d717ea794a8c5452bdf50022ee40820 Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Wed, 22 Feb 2023 22:16:29 +0100 Subject: [PATCH 0614/1459] Provide a non-sse version for x86_64. --- src/mem/x86_64.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index daa92098e..e9003310c 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -173,6 +173,7 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { c16(a.cast(), b.cast(), n) } +#[cfg(target_feature="sse2")] #[inline(always)] pub unsafe fn c_string_length(s: *const core::ffi::c_char) -> usize { let mut n: usize; @@ -256,6 +257,19 @@ pub unsafe fn c_string_length(s: *const core::ffi::c_char) -> usize { n } +// Provided for scenarios like kernel development, where SSE might not +// be available. +#[cfg(not(target_feature="sse2"))] +#[inline(always)] +pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { + let mut n = 0; + while *s != 0 { + n += 1; + s = s.add(1); + } + n +} + /// Determine optimal parameters for a `rep` instruction. fn rep_param(dest: *mut u8, mut count: usize) -> (usize, usize, usize) { // Unaligned writes are still slow on modern processors, so align the destination address. From afa3d3ed3a9050a560aa9a9011f3d4e1d1be0d79 Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Wed, 22 Feb 2023 22:19:10 +0100 Subject: [PATCH 0615/1459] Formatting --- src/mem/x86_64.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index e9003310c..321d59296 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -173,7 +173,7 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { c16(a.cast(), b.cast(), n) } -#[cfg(target_feature="sse2")] +#[cfg(target_feature = "sse2")] #[inline(always)] pub unsafe fn c_string_length(s: *const core::ffi::c_char) -> usize { let mut n: usize; @@ -259,7 +259,7 @@ pub unsafe fn c_string_length(s: *const core::ffi::c_char) -> usize { // Provided for scenarios like kernel development, where SSE might not // be available. -#[cfg(not(target_feature="sse2"))] +#[cfg(not(target_feature = "sse2"))] #[inline(always)] pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { let mut n = 0; From 39ad246f7ed269cfafd78177c9d73a3c850522b6 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 23 Feb 2023 18:19:41 +0000 Subject: [PATCH 0616/1459] Bump to 0.1.88 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index ca0d96aa6..8c0f6221c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.87" +version = "0.1.88" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From a8c40885939a44631a63418e2d4f80896a2109f4 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 5 Mar 2023 12:17:21 -0800 Subject: [PATCH 0617/1459] Added lgamma_r and lgammaf_r --- src/math.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/math.rs b/src/math.rs index 982c9499a..498e4d85f 100644 --- a/src/math.rs +++ b/src/math.rs @@ -86,6 +86,31 @@ no_mangle! { fn tanf(n: f32) -> f32; } +#[cfg(any( + all( + target_family = "wasm", + target_os = "unknown", + not(target_env = "wasi") + ), + target_os = "xous", + all(target_arch = "x86_64", target_os = "uefi"), + all(target_arch = "xtensa", target_os = "none"), + all(target_vendor = "fortanix", target_env = "sgx") +))] +intrinsics! { + pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 { + let r = self::libm::lgamma_r(x); + *s = r.1; + r.0 + } + + pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 { + let r = self::libm::lgammaf_r(x); + *s = r.1; + r.0 + } +} + #[cfg(any( target_os = "xous", target_os = "uefi", From 1df0d1c146f57c8767357782d9fffd7e4d0c83aa Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Mon, 6 Mar 2023 19:20:30 +0100 Subject: [PATCH 0618/1459] Final version. --- src/mem/x86_64.rs | 181 +++++++++++++++++++++++++++------------------- 1 file changed, 108 insertions(+), 73 deletions(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 321d59296..ad6ff9d17 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -173,88 +173,82 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { c16(a.cast(), b.cast(), n) } + +// In order to process more than on byte simultaneously when executing strlen, +// two things must be considered: +// * An n byte read with an n-byte aligned address will never cross +// a page boundary and will always succeed. Any smaller alignment +// may result in a read that will cross a page boundary, which may +// trigger an access violation. +// * Surface Rust considers any kind of out-of-bounds read as undefined +// behaviour. To dodge this, memory access operations are written +// using inline assembly. + #[cfg(target_feature = "sse2")] #[inline(always)] pub unsafe fn c_string_length(s: *const core::ffi::c_char) -> usize { - let mut n: usize; - - asm!( - // For small sizes, we avoid invoking SSE instructions. - // make manual comparisons instead. - "xor %eax, %eax", - "cmpb $0, (%rdi)", - "je 3f", - "mov $1, %eax", - "cmpb $0, 1(%rdi)", - "je 3f", - "mov $2, %eax", - "cmpb $0, 2(%rdi)", - "je 3f", - "mov $3, %eax", - "cmpb $0, 3(%rdi)", - "je 3f", - - // Adjust address - "add $4, %rdi", + use core::arch::x86_64::{__m128i, _mm_cmpeq_epi8, _mm_movemask_epi8, _mm_set1_epi8}; - // Align the address to 16 bytes (xmm register size). - // This is important, since an n byte read - // with n byte alignment is guranteed to never cross - // a page boundary and thus will never try to access - // memory which may not be accessible. - "mov %edi, %ecx", - "and $15, %ecx", - "and $-16, %rdi", + let mut n = 0; + + // The use of _mm_movemask_epi8 and company allow for speedups, + // but they aren't cheap by themselves. Thus, possibly small strings + // are handled in simple loops. - // zero out an xmm register for comparisons with zero. - "pxor %xmm0, %xmm0", + for _ in 0..4 { + if *s == 0 { + return n; + } - // One manual iteration of a zero byte search. - // Ensuring proper alignment may cause us to read - // memory _before_ the actual string start. - // Thus, one separate iteration is needed to handle this special case. - "movdqa (%rdi), %xmm1", - "pcmpeqb %xmm0, %xmm1", - "pmovmskb %xmm1, %eax", - // Shift out comparisons that don't belong to the actual string. - "shr %cl, %eax", - // Check if there was a zero - "test %eax, %eax", - "jz 1f", + n += 1; + s = s.add(1); + } + + // Shave of the least significand bits to align the address to a 16 + // byte boundary. The shaved of bits are used to correct the first iteration. - // A zero was found: calculate result and exit. - "bsf %eax, %eax", - "add $4, %eax", - "jmp 3f", + let align = s as usize & 15; + let mut s = ((s as usize) - align) as *const __m128i; + let zero = _mm_set1_epi8(0); - // No zero was found: prepare main loop. - "1:", - "add $16, %rdi", - "neg %rcx", - "add $4, %rcx", + let x = { + let r; + asm!( + "movdqa ({addr}), {dest}", + addr = in(reg) s, + dest = out(xmm_reg) r, + options(att_syntax, nostack), + ); + r + }; + let cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(x, zero)) >> align; - // main loop - "2:", - "movdqa (%rdi), %xmm1", - "add $16, %rdi", - "add $16, %rcx", - "pcmpeqb %xmm0, %xmm1", - "pmovmskb %xmm1, %eax", - // Check if there was a zero - "test %eax, %eax", - "jz 2b", + if cmp != 0 { + return n + cmp.trailing_zeros() as usize; + } - // A zero was found: calculate result and exit. - "bsf %eax, %eax", - "add %rcx, %rax", - "3:", - inout("rdi") s => _, - out("rax") n, - out("rcx") _, - options(att_syntax, nostack), - ); + n += 16 - align; + s = s.add(1); - n + loop { + let x = { + let r; + asm!( + "movdqa ({addr}), {dest}", + addr = in(reg) s, + dest = out(xmm_reg) r, + options(att_syntax, nostack), + ); + r + }; + let cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(x, zero)) as u32; + if cmp == 0 { + n += 16; + s = s.add(1); + } else { + return n + cmp.trailing_zeros() as usize; + } + } } // Provided for scenarios like kernel development, where SSE might not @@ -263,11 +257,52 @@ pub unsafe fn c_string_length(s: *const core::ffi::c_char) -> usize { #[inline(always)] pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { let mut n = 0; - while *s != 0 { + + // Check bytes in steps of one until + // either a zero byte is discovered or + // pointer is aligned to an eight byte boundary. + + while s as usize & 7 != 0 { + if *s == 0 { + return n; + } + n += 1; s = s.add(1); } - n + + // Check bytes in steps of eight until a zero + // byte is discovered. + + let mut s = s as *const u64; + + loop { + let mut cs = { + let r: u64; + asm!( + "mov ({addr}), {dest}", + addr = in(reg) s, + dest = out(reg) r, + options(att_syntax, nostack), + ); + r + }; + // Detect if a word has a zero byte, taken from + // https://graphics.stanford.edu/~seander/bithacks.html + if (cs.wrapping_sub(0x0101010101010101) & !cs & 0x8080808080808080) != 0 { + loop { + if cs & 255 == 0 { + return n; + } else { + cs >>= 8; + n += 1; + } + } + } else { + n += 8; + s = s.add(1); + } + } } /// Determine optimal parameters for a `rep` instruction. From 4f77170ea5637745a369394ee58ecf6a2d62a50e Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Mon, 6 Mar 2023 19:24:02 +0100 Subject: [PATCH 0619/1459] formatting --- src/mem/x86_64.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index ad6ff9d17..5377f0423 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -173,7 +173,6 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { c16(a.cast(), b.cast(), n) } - // In order to process more than on byte simultaneously when executing strlen, // two things must be considered: // * An n byte read with an n-byte aligned address will never cross @@ -190,7 +189,7 @@ pub unsafe fn c_string_length(s: *const core::ffi::c_char) -> usize { use core::arch::x86_64::{__m128i, _mm_cmpeq_epi8, _mm_movemask_epi8, _mm_set1_epi8}; let mut n = 0; - + // The use of _mm_movemask_epi8 and company allow for speedups, // but they aren't cheap by themselves. Thus, possibly small strings // are handled in simple loops. @@ -266,11 +265,10 @@ pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { if *s == 0 { return n; } - n += 1; s = s.add(1); } - + // Check bytes in steps of eight until a zero // byte is discovered. From 6488b26a05078639def6f74b19c494fbd64c9697 Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Mon, 6 Mar 2023 19:28:49 +0100 Subject: [PATCH 0620/1459] more fixing --- src/mem/x86_64.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 5377f0423..40b67093f 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -185,7 +185,7 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { #[cfg(target_feature = "sse2")] #[inline(always)] -pub unsafe fn c_string_length(s: *const core::ffi::c_char) -> usize { +pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { use core::arch::x86_64::{__m128i, _mm_cmpeq_epi8, _mm_movemask_epi8, _mm_set1_epi8}; let mut n = 0; @@ -202,7 +202,7 @@ pub unsafe fn c_string_length(s: *const core::ffi::c_char) -> usize { n += 1; s = s.add(1); } - + // Shave of the least significand bits to align the address to a 16 // byte boundary. The shaved of bits are used to correct the first iteration. From 7eb017ccb8ef0dcc326149d4fd8f7a90c0c2feb7 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 10 Mar 2023 19:59:23 +0000 Subject: [PATCH 0621/1459] Add emutls.c for OpenHarmony --- build.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/build.rs b/build.rs index 3f5dbd3ab..766dec05d 100644 --- a/build.rs +++ b/build.rs @@ -508,6 +508,11 @@ mod c { cfg.define("LONG_BIT", "(8 * sizeof(long))"); } + // OpenHarmony also uses emulated TLS. + if target_env == "ohos" { + sources.extend(&[("__emutls_get_address", "emutls.c")]); + } + // When compiling the C code we require the user to tell us where the // source code is, and this is largely done so when we're compiling as // part of rust-lang/rust we can use the same llvm-project repository as From 1634193e0444a7252d3b87636ad365ab6a7e06cf Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Thu, 23 Mar 2023 22:00:22 +0900 Subject: [PATCH 0622/1459] Fix panic due to overflow in riscv.rs and int/shift.rs --- src/int/shift.rs | 6 +++--- src/riscv.rs | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/int/shift.rs b/src/int/shift.rs index 2d2c081a6..080de2a14 100644 --- a/src/int/shift.rs +++ b/src/int/shift.rs @@ -12,7 +12,7 @@ trait Ashl: DInt { } else { Self::from_lo_hi( self.lo().wrapping_shl(shl), - self.lo().logical_shr(n_h - shl) | self.hi().wrapping_shl(shl), + self.lo().logical_shr(n_h.wrapping_sub(shl)) | self.hi().wrapping_shl(shl), ) } } @@ -36,7 +36,7 @@ trait Ashr: DInt { self } else { Self::from_lo_hi( - self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h - shr), + self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h.wrapping_sub(shr)), self.hi().wrapping_shr(shr), ) } @@ -57,7 +57,7 @@ trait Lshr: DInt { self } else { Self::from_lo_hi( - self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h - shr), + self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h.wrapping_sub(shr)), self.hi().logical_shr(shr), ) } diff --git a/src/riscv.rs b/src/riscv.rs index ae361b33a..bf3125533 100644 --- a/src/riscv.rs +++ b/src/riscv.rs @@ -19,11 +19,11 @@ intrinsics! { // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/riscv/int_mul_impl.inc pub extern "C" fn __mulsi3(a: u32, b: u32) -> u32 { let (mut a, mut b) = (a, b); - let mut r = 0; + let mut r: u32 = 0; while a > 0 { if a & 1 > 0 { - r += b; + r = r.wrapping_add(b); } a >>= 1; b <<= 1; @@ -35,11 +35,11 @@ intrinsics! { #[cfg(not(target_feature = "m"))] pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 { let (mut a, mut b) = (a, b); - let mut r = 0; + let mut r: u64 = 0; while a > 0 { if a & 1 > 0 { - r += b; + r = r.wrapping_add(b); } a >>= 1; b <<= 1; From 828af9005dc7a3090587bff553831386777537c4 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 25 Mar 2023 21:55:23 +0000 Subject: [PATCH 0623/1459] Bump to 0.1.90 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8c0f6221c..2c1d03ba6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.88" +version = "0.1.90" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 702cdb9f129c50530f98e0f0cb92f9dd5d6db722 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 29 Mar 2023 21:49:08 +0100 Subject: [PATCH 0624/1459] Disable another test on powerpc --- testcrate/tests/conv.rs | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 8c4b1946c..2a70db178 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -23,18 +23,23 @@ macro_rules! i_to_f { || ((error_minus == error || error_plus == error) && ((f0.to_bits() & 1) != 0)) { - panic!( - "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})", - stringify!($fn), - x, - f1.to_bits(), - y_minus_ulp, - y, - y_plus_ulp, - error_minus, - error, - error_plus, - ); + if !cfg!(any( + target_arch = "powerpc", + target_arch = "powerpc64" + )) { + panic!( + "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})", + stringify!($fn), + x, + f1.to_bits(), + y_minus_ulp, + y, + y_plus_ulp, + error_minus, + error, + error_plus, + ); + } } // Test against native conversion. We disable testing on all `x86` because of // rounding bugs with `i686`. `powerpc` also has the same rounding bug. From 038dee5b1eb2488423b6e0f8e1d828135b1a936a Mon Sep 17 00:00:00 2001 From: "William D. Jones" Date: Sun, 26 Mar 2023 00:00:58 -0400 Subject: [PATCH 0625/1459] Ensure shift instrinsic arguments match width of compiler-rt's (int vs si_int). --- src/int/shift.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/int/shift.rs b/src/int/shift.rs index 080de2a14..c90cf1de3 100644 --- a/src/int/shift.rs +++ b/src/int/shift.rs @@ -78,8 +78,8 @@ intrinsics! { #[avr_skip] #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_llsl] - pub extern "C" fn __ashldi3(a: u64, b: u32) -> u64 { - a.ashl(b) + pub extern "C" fn __ashldi3(a: u64, b: core::ffi::c_uint) -> u64 { + a.ashl(b as u32) } #[avr_skip] @@ -96,8 +96,8 @@ intrinsics! { #[avr_skip] #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_lasr] - pub extern "C" fn __ashrdi3(a: i64, b: u32) -> i64 { - a.ashr(b) + pub extern "C" fn __ashrdi3(a: i64, b: core::ffi::c_uint) -> i64 { + a.ashr(b as u32) } #[avr_skip] @@ -114,8 +114,8 @@ intrinsics! { #[avr_skip] #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_llsr] - pub extern "C" fn __lshrdi3(a: u64, b: u32) -> u64 { - a.lshr(b) + pub extern "C" fn __lshrdi3(a: u64, b: core::ffi::c_uint) -> u64 { + a.lshr(b as u32) } #[avr_skip] From 4677881d50f2e2f44ae7720ce19366a78fdf0e77 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 29 Mar 2023 23:55:20 +0100 Subject: [PATCH 0626/1459] Bump to 0.1.91 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2c1d03ba6..9b9812836 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.90" +version = "0.1.91" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 51e3c3f5d1fb1ba48b7a8e27980a5a5e6fffa0a5 Mon Sep 17 00:00:00 2001 From: Yuki Okushi Date: Sat, 13 May 2023 15:16:59 +0900 Subject: [PATCH 0627/1459] Prepare 0.2.7 release Signed-off-by: Yuki Okushi --- libm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index f942fdec3..393d95e4a 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0" name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/libm" -version = "0.2.6" +version = "0.2.7" edition = "2018" [features] From af540c9db270987664540fb072ac9366414306d5 Mon Sep 17 00:00:00 2001 From: danakj Date: Fri, 19 May 2023 15:47:57 -0400 Subject: [PATCH 0628/1459] Add the weak-intrinsics feature When enabled, the weak-intrinsics feature will cause all intrinsics functions to be marked with weak linkage (i.e. `#[linkage = "weak"]) so that they can be replaced at link time by a stronger symbol. This can be set to use C++ intrinsics from the compiler-rt library, as it will avoid Rust's implementation replacing the compiler-rt implementation as long as the compiler-rt symbols are linked as strong symbols. Typically this requires the compiler-rt library to be explicitly specified in the link command. Addresses https://github.com/rust-lang/compiler-builtins/issues/525. Without weak-intrinsics, from nm: ``` 00000000 W __aeabi_memclr8 // Is explicitly weak 00000000 T __udivsi3 // Is not. ``` With weak-intrinsics, from nm: ``` 00000000 W __aeabi_memclr8 // Is explicitly weak 00000000 W __udivsi3 // Is weak due to weak-intrinsics ``` --- Cargo.toml | 11 +++++ src/arm.rs | 32 +++++++------- src/macros.rs | 110 ++++++++++++++++++++++++++++++++++++++++++++++++- src/mem/mod.rs | 12 +++--- 4 files changed, 141 insertions(+), 24 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9b9812836..57b8b34c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,6 +65,17 @@ rustc-dep-of-std = ['compiler-builtins', 'core'] # are not normally public but are required by the `testcrate` public-test-deps = [] +# Marks all intrinsics functions with weak linkage so that they can be +# replaced at link time by another implementation. This is particularly useful +# for mixed Rust/C++ binaries that want to use the C++ intrinsics, otherwise +# linking against the Rust stdlib will replace those from the compiler-rt +# library. +# +# Unlike the "c" feature, the intrinsics are still provided by the Rust +# implementations and each will be used unless a stronger symbol replaces +# it during linking. +weak-intrinsics = [] + [[example]] name = "intrinsics" required-features = ["compiler-builtins"] diff --git a/src/arm.rs b/src/arm.rs index e517a9ef3..a062a54e0 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -20,9 +20,9 @@ macro_rules! bl { intrinsics! { // NOTE This function and the ones below are implemented using assembly because they are using a // custom calling convention which can't be implemented using a normal Rust function. + #[cfg_attr(all(not(windows), not(target_vendor="apple")), weak)] #[naked] #[cfg(not(target_env = "msvc"))] - #[cfg_attr(all(not(windows), not(target_vendor="apple")), linkage = "weak")] pub unsafe extern "C" fn __aeabi_uidivmod() { core::arch::asm!( "push {{lr}}", @@ -36,8 +36,8 @@ intrinsics! { ); } + #[cfg_attr(all(not(windows), not(target_vendor="apple")), weak)] #[naked] - #[cfg_attr(all(not(windows), not(target_vendor="apple")), linkage = "weak")] pub unsafe extern "C" fn __aeabi_uldivmod() { core::arch::asm!( "push {{r4, lr}}", @@ -53,8 +53,8 @@ intrinsics! { ); } + #[cfg_attr(all(not(windows), not(target_vendor="apple")), weak)] #[naked] - #[cfg_attr(all(not(windows), not(target_vendor="apple")), linkage = "weak")] pub unsafe extern "C" fn __aeabi_idivmod() { core::arch::asm!( "push {{r0, r1, r4, lr}}", @@ -67,8 +67,8 @@ intrinsics! { ); } + #[cfg_attr(all(not(windows), not(target_vendor="apple")), weak)] #[naked] - #[cfg_attr(all(not(windows), not(target_vendor="apple")), linkage = "weak")] pub unsafe extern "C" fn __aeabi_ldivmod() { core::arch::asm!( "push {{r4, lr}}", @@ -88,14 +88,14 @@ intrinsics! { // with custom implementation. // FIXME: The `*4` and `*8` variants should be defined as aliases. + #[weak] #[cfg(not(target_os = "ios"))] - #[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) { ::mem::memcpy(dest, src, n); } + #[weak] #[cfg(not(target_os = "ios"))] - #[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) { // We are guaranteed 4-alignment, so accessing at u32 is okay. let mut dest = dest as *mut u32; @@ -112,39 +112,39 @@ intrinsics! { __aeabi_memcpy(dest as *mut u8, src as *const u8, n); } + #[weak] #[cfg(not(target_os = "ios"))] - #[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memcpy4(dest, src, n); } + #[weak] #[cfg(not(target_os = "ios"))] - #[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) { ::mem::memmove(dest, src, n); } + #[weak] #[cfg(not(any(target_os = "ios", target_env = "msvc")))] - #[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memmove(dest, src, n); } + #[weak] #[cfg(not(any(target_os = "ios", target_env = "msvc")))] - #[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memmove(dest, src, n); } + #[weak] #[cfg(not(target_os = "ios"))] - #[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { // Note the different argument order ::mem::memset(dest, c, n); } + #[weak] #[cfg(not(target_os = "ios"))] - #[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) { let mut dest = dest as *mut u32; let mut n = n; @@ -161,26 +161,26 @@ intrinsics! { __aeabi_memset(dest as *mut u8, n, byte as i32); } + #[weak] #[cfg(not(target_os = "ios"))] - #[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) { __aeabi_memset4(dest, n, c); } + #[weak] #[cfg(not(target_os = "ios"))] - #[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) { __aeabi_memset(dest, n, 0); } + #[weak] #[cfg(not(any(target_os = "ios", target_env = "msvc")))] - #[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) { __aeabi_memset4(dest, n, 0); } + #[weak] #[cfg(not(any(target_os = "ios", target_env = "msvc")))] - #[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { __aeabi_memset4(dest, n, 0); } diff --git a/src/macros.rs b/src/macros.rs index 477c25684..59f25317e 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -25,6 +25,12 @@ macro_rules! public_test_dep { /// platforms need and elsewhere in this library it just looks like normal Rust /// code. /// +/// When the weak-intrinsics feature is enabled, all intrinsics functions are +/// marked with #[linkage = "weak"] so that they can be replaced by another +/// implementation at link time. This is particularly useful for mixed Rust/C++ +/// binaries that want to use the C++ intrinsics, otherwise linking against the +/// Rust stdlib will replace those from the compiler-rt library. +/// /// This macro is structured to be invoked with a bunch of functions that looks /// like: /// @@ -46,6 +52,10 @@ macro_rules! public_test_dep { /// /// A quick overview of attributes supported right now are: /// +/// * `weak` - indicates that the function should always be given weak linkage. +/// This attribute must come before other attributes, as the other attributes +/// will generate the final output function and need to have `weak` modify +/// them. /// * `maybe_use_optimized_c_shim` - indicates that the Rust implementation is /// ignored if an optimized C version was compiled. /// * `aapcs_on_arm` - forces the ABI of the function to be `"aapcs"` on ARM and @@ -57,7 +67,6 @@ macro_rules! public_test_dep { /// it's a normal ABI elsewhere for returning a 128 bit integer. /// * `arm_aeabi_alias` - handles the "aliasing" of various intrinsics on ARM /// their otherwise typical names to other prefixed ones. -/// macro_rules! intrinsics { () => (); @@ -89,6 +98,95 @@ macro_rules! intrinsics { intrinsics!($($rest)*); ); + // Same as above but for unsafe. + ( + #[cfg_attr($e:meta, $($attr:tt)*)] + $(#[$($attrs:tt)*])* + pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + $($rest:tt)* + ) => ( + #[cfg($e)] + intrinsics! { + #[$($attr)*] + $(#[$($attrs)*])* + pub unsafe extern $abi fn $name($($argname: $ty),*) $(-> $ret)? { + $($body)* + } + } + + #[cfg(not($e))] + intrinsics! { + $(#[$($attrs)*])* + pub unsafe extern $abi fn $name($($argname: $ty),*) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // Explicit weak linkage gets dropped when weak-intrinsics is on since it + // will be added unconditionally to all intrinsics and would conflict + // otherwise. + ( + #[weak] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(feature = "weak-intrinsics")] + intrinsics! { + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + #[cfg(not(feature = "weak-intrinsics"))] + intrinsics! { + $(#[$($attr)*])* + #[linkage = "weak"] + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + // Same as above but for unsafe. + ( + #[weak] + $(#[$($attr:tt)*])* + pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(feature = "weak-intrinsics")] + intrinsics! { + $(#[$($attr)*])* + pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + #[cfg(not(feature = "weak-intrinsics"))] + intrinsics! { + $(#[$($attr)*])* + #[linkage = "weak"] + pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); // Right now there's a bunch of architecture-optimized intrinsics in the // stock compiler-rt implementation. Not all of these have been ported over @@ -112,6 +210,7 @@ macro_rules! intrinsics { $($rest:tt)* ) => ( #[cfg($name = "optimized-c")] + #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { extern $abi { fn $name($($argname: $ty),*) $(-> $ret)?; @@ -211,6 +310,7 @@ macro_rules! intrinsics { ) => ( #[cfg(all(any(windows, target_os = "uefi"), target_arch = "x86_64"))] $(#[$($attr)*])* + #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } @@ -218,6 +318,7 @@ macro_rules! intrinsics { #[cfg(all(any(windows, target_os = "uefi"), target_arch = "x86_64"))] pub mod $name { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub extern $abi fn $name( $($argname: $ty),* ) -> ::macros::win64_128bit_abi_hack::U64x2 { @@ -258,6 +359,7 @@ macro_rules! intrinsics { #[cfg(target_arch = "arm")] pub mod $name { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } @@ -266,7 +368,7 @@ macro_rules! intrinsics { #[cfg(target_arch = "arm")] pub mod $alias { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - #[cfg_attr(all(not(windows), not(target_vendor="apple")), linkage = "weak")] + #[cfg_attr(any(all(not(windows), not(target_vendor="apple"), feature = "weak-intrinsics")), linkage = "weak")] pub extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } @@ -302,6 +404,7 @@ macro_rules! intrinsics { pub mod $name { $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } @@ -325,6 +428,7 @@ macro_rules! intrinsics { #[naked] $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } @@ -391,6 +495,7 @@ macro_rules! intrinsics { pub mod $name { $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } @@ -416,6 +521,7 @@ macro_rules! intrinsics { pub mod $name { $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } diff --git a/src/mem/mod.rs b/src/mem/mod.rs index be118778b..ccf191779 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -20,15 +20,15 @@ use core::ops::{BitOr, Shl}; mod impls; intrinsics! { + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] - #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { impls::copy_forward(dest, src, n); dest } + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] - #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { let delta = (dest as usize).wrapping_sub(src as usize); if delta >= n { @@ -41,27 +41,27 @@ intrinsics! { dest } + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] - #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn memset(s: *mut u8, c: crate::mem::c_int, n: usize) -> *mut u8 { impls::set_bytes(s, c as u8, n); s } + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] - #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { impls::compare_bytes(s1, s2, n) } + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] - #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { memcmp(s1, s2, n) } + #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] - #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn strlen(s: *const core::ffi::c_char) -> usize { impls::c_string_length(s) } From 5157e43df3fbccb1f36e18bacb6455d5f96b6297 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 20 May 2023 10:49:41 +0200 Subject: [PATCH 0629/1459] Upgrade libm to 0.2.7 --- libm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm b/libm index 4c8a97374..1dbb9d2d4 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 4c8a973741c014b11ce7f1477693a3e5d4ef9609 +Subproject commit 1dbb9d2d476d65d020feca17b11391652038e2e1 From 96fd4ec9ddc76496a1f2fa1f4d2a23655858a23b Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 20 May 2023 10:50:12 +0200 Subject: [PATCH 0630/1459] Bump to 0.1.92 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 57b8b34c7..0f30668cc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.91" +version = "0.1.92" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From ec3b202763a07ad629da18aa85aa3f51d14cc997 Mon Sep 17 00:00:00 2001 From: Patryk Wychowaniec Date: Mon, 12 Jun 2023 14:04:10 +0200 Subject: [PATCH 0631/1459] fix: Add `#[avr_skip]` for floats Same story as always, i.e. ABI mismatch: - https://github.com/rust-lang/compiler-builtins/pull/462 - https://github.com/rust-lang/compiler-builtins/pull/466 - https://github.com/rust-lang/compiler-builtins/pull/513 I've made sure the changes work by rendering a Mandelbrot fractal: ```rust #[arduino_hal::entry] fn main() -> ! { let dp = arduino_hal::Peripherals::take().unwrap(); let pins = arduino_hal::pins!(dp); let mut serial = arduino_hal::default_serial!(dp, pins, 57600); mandelbrot(&mut serial, 60, 40, -2.05, -1.12, 0.47, 1.12, 100); loop { // } } fn mandelbrot( output: &mut T, viewport_width: i64, viewport_height: i64, x1: f32, y1: f32, x2: f32, y2: f32, max_iterations: i64, ) where T: uWrite, { for viewport_y in 0..viewport_height { let y0 = y1 + (y2 - y1) * ((viewport_y as f32) / (viewport_height as f32)); for viewport_x in 0..viewport_width { let x0 = x1 + (x2 - x1) * ((viewport_x as f32) / (viewport_width as f32)); let mut x = 0.0; let mut y = 0.0; let mut iterations = max_iterations; while x * x + y * y <= 4.0 && iterations > 0 { let xtemp = x * x - y * y + x0; y = 2.0 * x * y + y0; x = xtemp; iterations -= 1; } let ch = "#%=-:,. " .chars() .nth((8.0 * ((iterations as f32) / (max_iterations as f32))) as _) .unwrap(); _ = ufmt::uwrite!(output, "{}", ch); } _ = ufmt::uwriteln!(output, ""); } } ``` ... where without avr_skips, the code printed an image full of only `#`. Note that because libgcc doesn't provide implementations for f64, using those (e.g. swapping f32 to f64 in the code above) will cause linking to fail: ``` undefined reference to `__divdf3' undefined reference to `__muldf3' undefined reference to `__gedf2' undefined reference to `__fixunsdfsi' undefined reference to `__gtdf2' ``` Ideally compiler-builtins could jump right in and provide those, but f64 also require a special calling convention which hasn't been yet exposed through LLVM. Note that because using 64-bit floats on an 8-bit target is a pretty niche thing to do, and because f64 floats don't work correctly anyway at the moment (due to this ABI mismatch), we're not actually breaking anything by skipping those functions, since any code that currently uses f64 on AVR works by accident. Closes https://github.com/rust-lang/rust/issues/108489. --- src/float/cmp.rs | 14 ++++++++++++++ src/macros.rs | 11 +++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/float/cmp.rs b/src/float/cmp.rs index 1d4e38433..1bd7aa284 100644 --- a/src/float/cmp.rs +++ b/src/float/cmp.rs @@ -99,60 +99,74 @@ fn unord(a: F, b: F) -> bool { } intrinsics! { + #[avr_skip] pub extern "C" fn __lesf2(a: f32, b: f32) -> i32 { cmp(a, b).to_le_abi() } + #[avr_skip] pub extern "C" fn __gesf2(a: f32, b: f32) -> i32 { cmp(a, b).to_ge_abi() } + #[avr_skip] #[arm_aeabi_alias = __aeabi_fcmpun] pub extern "C" fn __unordsf2(a: f32, b: f32) -> i32 { unord(a, b) as i32 } + #[avr_skip] pub extern "C" fn __eqsf2(a: f32, b: f32) -> i32 { cmp(a, b).to_le_abi() } + #[avr_skip] pub extern "C" fn __ltsf2(a: f32, b: f32) -> i32 { cmp(a, b).to_le_abi() } + #[avr_skip] pub extern "C" fn __nesf2(a: f32, b: f32) -> i32 { cmp(a, b).to_le_abi() } + #[avr_skip] pub extern "C" fn __gtsf2(a: f32, b: f32) -> i32 { cmp(a, b).to_ge_abi() } + #[avr_skip] pub extern "C" fn __ledf2(a: f64, b: f64) -> i32 { cmp(a, b).to_le_abi() } + #[avr_skip] pub extern "C" fn __gedf2(a: f64, b: f64) -> i32 { cmp(a, b).to_ge_abi() } + #[avr_skip] #[arm_aeabi_alias = __aeabi_dcmpun] pub extern "C" fn __unorddf2(a: f64, b: f64) -> i32 { unord(a, b) as i32 } + #[avr_skip] pub extern "C" fn __eqdf2(a: f64, b: f64) -> i32 { cmp(a, b).to_le_abi() } + #[avr_skip] pub extern "C" fn __ltdf2(a: f64, b: f64) -> i32 { cmp(a, b).to_le_abi() } + #[avr_skip] pub extern "C" fn __nedf2(a: f64, b: f64) -> i32 { cmp(a, b).to_le_abi() } + #[avr_skip] pub extern "C" fn __gtdf2(a: f64, b: f64) -> i32 { cmp(a, b).to_ge_abi() } diff --git a/src/macros.rs b/src/macros.rs index 59f25317e..f1e2c533a 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -437,12 +437,11 @@ macro_rules! intrinsics { intrinsics!($($rest)*); ); - // For division and modulo, AVR uses a custom calling convention¹ that does - // not match our definitions here. Ideally we would just use hand-written - // naked functions, but that's quite a lot of code to port² - so for the - // time being we are just ignoring the problematic functions, letting - // avr-gcc (which is required to compile to AVR anyway) link them from - // libgcc. + // For some intrinsics, AVR uses a custom calling convention¹ that does not + // match our definitions here. Ideally we would just use hand-written naked + // functions, but that's quite a lot of code to port² - so for the time + // being we are just ignoring the problematic functions, letting avr-gcc + // (which is required to compile to AVR anyway) link them from libgcc. // // ¹ https://gcc.gnu.org/wiki/avr-gcc (see "Exceptions to the Calling // Convention") From bb2274556746c967b71ec0184f4a412a2826ac2b Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 12 Jun 2023 15:37:22 +0100 Subject: [PATCH 0632/1459] Bump to 0.1.93 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0f30668cc..6cfa2b99e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.92" +version = "0.1.93" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From fbfd3288f11f710f7835595792038e2210c14599 Mon Sep 17 00:00:00 2001 From: kirk Date: Sat, 17 Jun 2023 14:07:45 +0000 Subject: [PATCH 0633/1459] allow stable features lint, fix link formatting warning, add ignore block to intrinsics macro documentation --- examples/intrinsics.rs | 1 + src/float/conv.rs | 2 +- src/macros.rs | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 0ca30c215..19bb569b5 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -4,6 +4,7 @@ // to link due to the missing intrinsic (symbol). #![allow(unused_features)] +#![allow(stable_features)] // bench_black_box feature is stable, leaving for backcompat #![cfg_attr(thumb, no_main)] #![deny(dead_code)] #![feature(bench_black_box)] diff --git a/src/float/conv.rs b/src/float/conv.rs index a27d542fa..790c0ab9f 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -3,7 +3,7 @@ /// These are hand-optimized bit twiddling code, /// which unfortunately isn't the easiest kind of code to read. /// -/// The algorithm is explained here: https://blog.m-ou.se/floats/ +/// The algorithm is explained here: mod int_to_float { pub fn u32_to_f32_bits(i: u32) -> u32 { if i == 0 { diff --git a/src/macros.rs b/src/macros.rs index f1e2c533a..b3becde72 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -33,7 +33,7 @@ macro_rules! public_test_dep { /// /// This macro is structured to be invoked with a bunch of functions that looks /// like: -/// +/// ```ignore /// intrinsics! { /// pub extern "C" fn foo(a: i32) -> u32 { /// // ... @@ -44,6 +44,7 @@ macro_rules! public_test_dep { /// // ... /// } /// } +/// ``` /// /// Each function is defined in a manner that looks like a normal Rust function. /// The macro then accepts a few nonstandard attributes that can decorate From e7a41a30fbb4287ff8fb3b54a3e78e19d55c39de Mon Sep 17 00:00:00 2001 From: kirk Date: Sat, 17 Jun 2023 14:35:00 +0000 Subject: [PATCH 0634/1459] change links in README to match reference code version used in CI --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8b25558a8..da0adbce7 100644 --- a/README.md +++ b/README.md @@ -59,8 +59,8 @@ features = ["c"] 5. Once the PR passes our extensive [testing infrastructure][4], we'll merge it! 6. Celebrate :tada: -[1]: https://github.com/rust-lang/compiler-rt/tree/8598065bd965d9713bfafb6c1e766d63a7b17b89/test/builtins/Unit -[2]: https://github.com/rust-lang/compiler-rt/tree/8598065bd965d9713bfafb6c1e766d63a7b17b89/lib/builtins +[1]: https://github.com/rust-lang/llvm-project/tree/9e3de9490ff580cd484fbfa2908292b4838d56e7/compiler-rt/test/builtins/Unit +[2]: https://github.com/rust-lang/llvm-project/tree/9e3de9490ff580cd484fbfa2908292b4838d56e7/compiler-rt/lib/builtins [3]: https://github.com/rust-lang/compiler-builtins/blob/0ba07e49264a54cb5bbd4856fcea083bb3fbec15/build.rs#L180-L265 [4]: https://travis-ci.org/rust-lang/compiler-builtins From ba44f2e211d2e2e3c1d67cd936b88824d12b13c1 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 25 Jun 2023 21:07:00 +0000 Subject: [PATCH 0635/1459] Port outline-atomics to rust This has a very long history, summarized in https://github.com/rust-lang/rust/issues/109064. This port is a very minimal subset of `aarch64/lse.S` from LLVM's compiler-rt. In particular, it is missing the following: 1. Any form of runtime dispatch between LL/SC and LSE. Determining which version of the intrinsics to use requires one of the following: i) `getauxval` from glibc. It's unclear whether `compiler_builtins` is allowed to depend on libc at all, and musl doesn't even support getauxval. Don't enshrine the requirement "de-facto" by making it required for outline-atomics. ii) kernel support. Linux and FreeBSD have limited support, but it requires an extremely recent kernel version and doesn't work at all under QEMU (https://github.com/rust-lang/rust/issues/109064#issuecomment-1494939904). Instead, we hard-code LL/SC intrinsics. Users who want LSE support should use the LLVM compiler-rt (if you're building from source in rust-lang/rust, make sure you have `src/llvm-project` checked out locally. the goal is to soon add a new `optimized-compiler-builtins` option so this is easier to discover). 2. The global `___aarch64_have_lse_atomics` CTOR, required to do runtime dispatch. Thom Chiviolani has this to say about global CTORs: > static ctors are problems because we are pretty eager about dead code elim > in general if you have a module that isnt directly reference we will probably not have its static ctors > also, while llvm has a super robust way to have a static ctor (theres s special "appending global" to use for c++), we dont use that and just have people make a #[used] static in a special section > 1. the robust way kinda requires rust knowing that the argument is a static ctor (maybe a #[rustc_static_ctor] attribute). it also would be... finnicky, since on windows we actually care beyond being a static ctor, that we run as part in a specific group of ctors, which means a very specific section (one for TLS and the other for, uh, i dont remember) > 2. we still actually have to codegen the cgu that isn't referenced. but maybe we could remember that it has that attribute and use that So while this is possible in theory, it's decidedly non-trivial, and needs invasive changes to rust itself. In any case, it doesn't matter until we decide the story around libc. 3. The 16-byte (i128) version of compare_and_swap. This wouldn't be *too* hard to add, but it would be hard to test. The way I tested the existing code was not just with unit tests but also by loading it as a path dependency and running `x test core` - the latter caught several bugs the unit tests didn't catch (because I originally wrote the tests wrong). So I am slightly nervous about adding a 16-byte version that is much more poorly tested than the other intrinsics. --- build.rs | 57 ++++++++++- src/aarch64.rs | 221 +++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 3 + src/macros.rs | 2 +- testcrate/tests/lse.rs | 88 ++++++++++++++++ 5 files changed, 369 insertions(+), 2 deletions(-) create mode 100644 src/aarch64.rs create mode 100644 testcrate/tests/lse.rs diff --git a/build.rs b/build.rs index 766dec05d..266cc28bb 100644 --- a/build.rs +++ b/build.rs @@ -1,4 +1,4 @@ -use std::env; +use std::{collections::HashMap, env, sync::atomic::Ordering}; fn main() { println!("cargo:rerun-if-changed=build.rs"); @@ -90,6 +90,61 @@ fn main() { { println!("cargo:rustc-cfg=kernel_user_helpers") } + + if llvm_target[0] == "aarch64" { + generate_aarch64_outlined_atomics(); + } +} + +fn aarch64_symbol(ordering: Ordering) -> &'static str { + match ordering { + Ordering::Relaxed => "relax", + Ordering::Acquire => "acq", + Ordering::Release => "rel", + Ordering::AcqRel => "acq_rel", + _ => panic!("unknown symbol for {:?}", ordering), + } +} + +/// The `concat_idents` macro is extremely annoying and doesn't allow us to define new items. +/// Define them from the build script instead. +/// Note that the majority of the code is still defined in `aarch64.rs` through inline macros. +fn generate_aarch64_outlined_atomics() { + use std::fmt::Write; + // #[macro_export] so that we can use this in tests + let gen_macro = + |name| format!("#[macro_export] macro_rules! foreach_{name} {{ ($macro:path) => {{\n"); + + // Generate different macros for add/clr/eor/set so that we can test them separately. + let sym_names = ["cas", "ldadd", "ldclr", "ldeor", "ldset", "swp"]; + let mut macros = HashMap::new(); + for sym in sym_names { + macros.insert(sym, gen_macro(sym)); + } + + for ordering in [ + Ordering::Relaxed, + Ordering::Acquire, + Ordering::Release, + Ordering::AcqRel, + ] { + let sym_ordering = aarch64_symbol(ordering); + // TODO: support CAS 16 + for size in [1, 2, 4, 8 /* , 16*/] { + for (sym, macro_) in &mut macros { + let name = format!("__aarch64_{sym}{size}_{sym_ordering}"); + writeln!(macro_, "$macro!( {ordering:?}, {size}, {name} );").unwrap(); + } + } + } + + let mut buf = String::new(); + for macro_def in macros.values() { + buf += macro_def; + buf += "}; }"; + } + let dst = std::env::var("OUT_DIR").unwrap() + "/outlined_atomics.rs"; + std::fs::write(dst, buf).unwrap(); } #[cfg(feature = "c")] diff --git a/src/aarch64.rs b/src/aarch64.rs new file mode 100644 index 000000000..018880650 --- /dev/null +++ b/src/aarch64.rs @@ -0,0 +1,221 @@ +//! Aarch64 targets have two possible implementations for atomics: +//! 1. Load-Locked, Store-Conditional (LL/SC), older and slower. +//! 2. Large System Extensions (LSE), newer and faster. +//! To avoid breaking backwards compat, C toolchains introduced a concept of "outlined atomics", +//! where atomic operations call into the compiler runtime to dispatch between two depending on +//! which is supported on the current CPU. +//! See https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics for more discussion. +//! +//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection. +//! Use the `compiler-rt` intrinsics if you want LSE support. +//! +//! Ported from `aarch64/lse.S` in LLVM's compiler-rt. +//! +//! Generate functions for each of the following symbols: +//! __aarch64_swpN_ORDER +//! __aarch64_ldaddN_ORDER +//! __aarch64_ldclrN_ORDER +//! __aarch64_ldeorN_ORDER +//! __aarch64_ldsetN_ORDER +//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8}, ORDER = { relax, acq, rel, acq_rel } +//! +//! TODO: M = 16 +//! +//! The original `lse.S` has some truly horrifying code that expects to be compiled multiple times with different constants. +//! We do something similar, but with macro arguments. + +/// We don't do runtime dispatch so we don't have to worry about the global ctor. +/// Apparently MacOS uses a different number of underscores in the symbol name (???) +// #[cfg(target_vendor = "apple")] +// macro_rules! have_lse { +// () => { ___aarch64_have_lse_atomics } +// } + +// #[cfg(not(target_vendor = "apple"))] +// macro_rules! have_lse { +// () => { __aarch64_have_lse_atomics } +// } + +/// Translate a byte size to a Rust type. +macro_rules! int_ty { + (1) => { i8 }; + (2) => { i16 }; + (4) => { i32 }; + (8) => { i64 }; + (16) => { i128 }; +} + +/// Given a byte size and a register number, return a register of the appropriate size. +/// +/// See . +macro_rules! reg { + (1, $num:literal) => { concat!("w", $num) }; + (2, $num:literal) => { concat!("w", $num) }; + (4, $num:literal) => { concat!("w", $num) }; + (8, $num:literal) => { concat!("x", $num) }; +} + +/// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction. +macro_rules! acquire { + (Relaxed) => { "" }; + (Acquire) => { "a" }; + (Release) => { "" }; + (AcqRel) => { "a" }; +} + +/// Given an atomic ordering, translate it to the release suffix for the stxr aarch64 ASM instruction. +macro_rules! release { + (Relaxed) => { "" }; + (Acquire) => { "" }; + (Release) => { "l" }; + (AcqRel) => { "l" }; +} + +/// Given a size in bytes, translate it to the byte suffix for an aarch64 ASM instruction. +macro_rules! size { + (1) => { "b" }; + (2) => { "h" }; + (4) => { "" }; + (8) => { "" }; + (16) => { "" }; +} + +/// Given a byte size, translate it to an Unsigned eXTend instruction +/// with the correct semantics. +/// +/// See +macro_rules! uxt { + (1) => { "uxtb" }; + (2) => { "uxth" }; + ($_:tt) => { "mov" }; +} + +/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Register instruction +/// with the correct semantics. +/// +/// See . +macro_rules! ldxr { + ($ordering:ident, $bytes:tt) => { concat!("ld", acquire!($ordering), "xr", size!($bytes)) } +} + +/// Given an atomic ordering and byte size, translate it to a STore eXclusive Register instruction +/// with the correct semantics. +/// +/// See . +macro_rules! stxr { + ($ordering:ident, $bytes:tt) => { concat!("st", release!($ordering), "xr", size!($bytes)) } +} + +/// See . +macro_rules! compare_and_swap { + ($ordering:ident, $bytes:tt, $name:ident) => { + intrinsics! { + #[maybe_use_optimized_c_shim] + #[naked] + pub extern "C" fn $name ( + expected: int_ty!($bytes), desired: int_ty!($bytes), ptr: *mut int_ty!($bytes) + ) -> int_ty!($bytes) { + // We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap. + unsafe { core::arch::asm! { + // UXT s(tmp0), s(0) + concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)), + "0:", + // LDXR s(0), [x2] + concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x2]"), + // cmp s(0), s(tmp0) + concat!("cmp ", reg!($bytes, 0), ", ", reg!($bytes, 16)), + "bne 1f", + // STXR w(tmp1), s(1), [x2] + concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 1), ", [x2]"), + "cbnz w17, 0b", + "1:", + "ret", + options(noreturn) + } } + } + } + } +} + + +/// See . +macro_rules! swap { + ($ordering:ident, $bytes:tt, $name:ident) => { + intrinsics! { + #[maybe_use_optimized_c_shim] + #[naked] + pub extern "C" fn $name ( + left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes) + ) -> int_ty!($bytes) { + unsafe { core::arch::asm! { + // mov s(tmp0), s(0) + concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), + "0:", + // LDXR s(0), [x1] + concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x1]"), + // STXR w(tmp1), s(tmp0), [x1] + concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"), + "cbnz w17, 0b", + "ret", + options(noreturn) + } } + } + } + } +} + +/// See (e.g.) . +macro_rules! fetch_op { + ($ordering:ident, $bytes:tt, $name:ident, $op:literal) => { + intrinsics! { + #[maybe_use_optimized_c_shim] + #[naked] + pub extern "C" fn $name ( + val: int_ty!($bytes), ptr: *mut int_ty!($bytes) + ) -> int_ty!($bytes) { + unsafe { core::arch::asm! { + // mov s(tmp0), s(0) + concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), + "0:", + // LDXR s(0), [x1] + concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x1]"), + // OP s(tmp1), s(0), s(tmp0) + concat!($op, " ", reg!($bytes, 17), ", ", reg!($bytes, 0), ", ", reg!($bytes, 16)), + // STXR w(tmp2), s(tmp1), [x1] + concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"), + "cbnz w15, 0b", + "ret", + options(noreturn) + } } + } + } + } +} + +// We need a single macro to pass to `foreach_ldadd`. +macro_rules! add { + ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "add" } } +} + +macro_rules! and { + ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "bic" } } +} + +macro_rules! xor { + ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "eor" } } +} + +macro_rules! or { + ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "orr" } } +} + +// See `generate_aarch64_outlined_atomics` in build.rs. +include!(concat!(env!("OUT_DIR"), "/outlined_atomics.rs")); +foreach_cas!(compare_and_swap); +foreach_swp!(swap); +foreach_ldadd!(add); +foreach_ldclr!(and); +foreach_ldeor!(xor); +foreach_ldset!(or); + +// TODO: CAS 16 diff --git a/src/lib.rs b/src/lib.rs index 71f249c8e..90b21f1fc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -57,6 +57,9 @@ pub mod mem; #[cfg(target_arch = "arm")] pub mod arm; +#[cfg(target_arch = "aarch64")] +pub mod aarch64; + #[cfg(all( kernel_user_helpers, any(target_os = "linux", target_os = "android"), diff --git a/src/macros.rs b/src/macros.rs index b3becde72..e3a381928 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -419,7 +419,7 @@ macro_rules! intrinsics { ( #[naked] $(#[$($attr:tt)*])* - pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + pub $(unsafe)? extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } diff --git a/testcrate/tests/lse.rs b/testcrate/tests/lse.rs new file mode 100644 index 000000000..49d73177e --- /dev/null +++ b/testcrate/tests/lse.rs @@ -0,0 +1,88 @@ +#![cfg(target_arch = "aarch64")] +#![feature(decl_macro)] // so we can use pub(super) + +/// Translate a byte size to a Rust type. +macro int_ty { + (1) => { i8 }, + (2) => { i16 }, + (4) => { i32 }, + (8) => { i64 }, + (16) => { i128 } +} + +mod cas { + pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) { + #[test] + fn $name() { + testcrate::fuzz_2(10000, |expected: super::int_ty!($bytes), new| { + let mut target = expected.wrapping_add(10); + assert_eq!( + unsafe { compiler_builtins::aarch64::$name::$name(expected, new, &mut target) }, + expected.wrapping_add(10), + "return value should always be the previous value", + ); + assert_eq!( + target, + expected.wrapping_add(10), + "shouldn't have changed target" + ); + + target = expected; + assert_eq!( + unsafe { compiler_builtins::aarch64::$name::$name(expected, new, &mut target) }, + expected + ); + assert_eq!(target, new, "should have updated target"); + }); + } + } +} + +mod swap { + pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) { + #[test] + fn $name() { + testcrate::fuzz_2(10000, |left: super::int_ty!($bytes), mut right| { + let orig_right = right; + assert_eq!( + unsafe { compiler_builtins::aarch64::$name::$name(left, &mut right) }, + orig_right + ); + assert_eq!(left, right); + }); + } + } +} + +macro_rules! test_op { + ($mod:ident, $( $op:tt )* ) => { + mod $mod { + pub(super) macro test { + ($_ordering:ident, $bytes:tt, $name:ident) => { + #[test] + fn $name() { + testcrate::fuzz_2(10000, |old, val| { + let mut target = old; + let op: fn(super::int_ty!($bytes), super::int_ty!($bytes)) -> _ = $($op)*; + let expected = op(old, val); + assert_eq!(old, unsafe { compiler_builtins::aarch64::$name::$name(val, &mut target) }, "{} should return original value", stringify!($name)); + assert_eq!(expected, target, "{} should store to target", stringify!($name)); + }); + } + } + } + } + }; +} + +test_op!(add, |left, right| left.wrapping_add(right)); +test_op!(clr, |left, right| left & !right); +test_op!(xor, std::ops::BitXor::bitxor); +test_op!(or, std::ops::BitOr::bitor); + +compiler_builtins::foreach_cas!(cas::test); +compiler_builtins::foreach_swp!(swap::test); +compiler_builtins::foreach_ldadd!(add::test); +compiler_builtins::foreach_ldclr!(clr::test); +compiler_builtins::foreach_ldeor!(xor::test); +compiler_builtins::foreach_ldset!(or::test); From 31ee4544dbe47903ce771270d6e3bea8654e9e50 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Mon, 26 Jun 2023 13:54:47 +0000 Subject: [PATCH 0636/1459] address review comments and fix CI - implement CAS 16 - remove useless commented out symbol name - support `feature("no-asm")` - fix warnings when `feature("c")` is enabled - rustfmt --- build.rs | 10 ++-- src/aarch64.rs | 103 +++++++++++++++++++++++++++++++---------- src/lib.rs | 6 ++- testcrate/tests/lse.rs | 7 ++- 4 files changed, 97 insertions(+), 29 deletions(-) diff --git a/build.rs b/build.rs index 266cc28bb..4549d0b4f 100644 --- a/build.rs +++ b/build.rs @@ -122,6 +122,9 @@ fn generate_aarch64_outlined_atomics() { macros.insert(sym, gen_macro(sym)); } + // Only CAS supports 16 bytes, and it has a different implementation that uses a different macro. + let mut cas16 = gen_macro("cas16"); + for ordering in [ Ordering::Relaxed, Ordering::Acquire, @@ -129,17 +132,18 @@ fn generate_aarch64_outlined_atomics() { Ordering::AcqRel, ] { let sym_ordering = aarch64_symbol(ordering); - // TODO: support CAS 16 - for size in [1, 2, 4, 8 /* , 16*/] { + for size in [1, 2, 4, 8] { for (sym, macro_) in &mut macros { let name = format!("__aarch64_{sym}{size}_{sym_ordering}"); writeln!(macro_, "$macro!( {ordering:?}, {size}, {name} );").unwrap(); } } + let name = format!("__aarch64_cas16_{sym_ordering}"); + writeln!(cas16, "$macro!( {ordering:?}, {name} );").unwrap(); } let mut buf = String::new(); - for macro_def in macros.values() { + for macro_def in macros.values().chain(std::iter::once(&cas16)) { buf += macro_def; buf += "}; }"; } diff --git a/src/aarch64.rs b/src/aarch64.rs index 018880650..1aaa1a694 100644 --- a/src/aarch64.rs +++ b/src/aarch64.rs @@ -12,31 +12,21 @@ //! Ported from `aarch64/lse.S` in LLVM's compiler-rt. //! //! Generate functions for each of the following symbols: +//! __aarch64_casM_ORDER //! __aarch64_swpN_ORDER //! __aarch64_ldaddN_ORDER //! __aarch64_ldclrN_ORDER //! __aarch64_ldeorN_ORDER //! __aarch64_ldsetN_ORDER -//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8}, ORDER = { relax, acq, rel, acq_rel } -//! -//! TODO: M = 16 +//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8, 16}, ORDER = { relax, acq, rel, acq_rel } //! //! The original `lse.S` has some truly horrifying code that expects to be compiled multiple times with different constants. //! We do something similar, but with macro arguments. -/// We don't do runtime dispatch so we don't have to worry about the global ctor. -/// Apparently MacOS uses a different number of underscores in the symbol name (???) -// #[cfg(target_vendor = "apple")] -// macro_rules! have_lse { -// () => { ___aarch64_have_lse_atomics } -// } - -// #[cfg(not(target_vendor = "apple"))] -// macro_rules! have_lse { -// () => { __aarch64_have_lse_atomics } -// } +// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor. /// Translate a byte size to a Rust type. +#[rustfmt::skip] macro_rules! int_ty { (1) => { i8 }; (2) => { i16 }; @@ -48,6 +38,7 @@ macro_rules! int_ty { /// Given a byte size and a register number, return a register of the appropriate size. /// /// See . +#[rustfmt::skip] macro_rules! reg { (1, $num:literal) => { concat!("w", $num) }; (2, $num:literal) => { concat!("w", $num) }; @@ -56,6 +47,7 @@ macro_rules! reg { } /// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction. +#[rustfmt::skip] macro_rules! acquire { (Relaxed) => { "" }; (Acquire) => { "a" }; @@ -64,6 +56,7 @@ macro_rules! acquire { } /// Given an atomic ordering, translate it to the release suffix for the stxr aarch64 ASM instruction. +#[rustfmt::skip] macro_rules! release { (Relaxed) => { "" }; (Acquire) => { "" }; @@ -72,6 +65,7 @@ macro_rules! release { } /// Given a size in bytes, translate it to the byte suffix for an aarch64 ASM instruction. +#[rustfmt::skip] macro_rules! size { (1) => { "b" }; (2) => { "h" }; @@ -84,6 +78,7 @@ macro_rules! size { /// with the correct semantics. /// /// See +#[rustfmt::skip] macro_rules! uxt { (1) => { "uxtb" }; (2) => { "uxth" }; @@ -95,7 +90,9 @@ macro_rules! uxt { /// /// See . macro_rules! ldxr { - ($ordering:ident, $bytes:tt) => { concat!("ld", acquire!($ordering), "xr", size!($bytes)) } + ($ordering:ident, $bytes:tt) => { + concat!("ld", acquire!($ordering), "xr", size!($bytes)) + }; } /// Given an atomic ordering and byte size, translate it to a STore eXclusive Register instruction @@ -103,7 +100,29 @@ macro_rules! ldxr { /// /// See . macro_rules! stxr { - ($ordering:ident, $bytes:tt) => { concat!("st", release!($ordering), "xr", size!($bytes)) } + ($ordering:ident, $bytes:tt) => { + concat!("st", release!($ordering), "xr", size!($bytes)) + }; +} + +/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Pair of registers instruction +/// with the correct semantics. +/// +/// See +macro_rules! ldxp { + ($ordering:ident) => { + concat!("ld", acquire!($ordering), "xp") + }; +} + +/// Given an atomic ordering and byte size, translate it to a STore eXclusive Pair of registers instruction +/// with the correct semantics. +/// +/// See . +macro_rules! stxp { + ($ordering:ident) => { + concat!("st", release!($ordering), "xp") + }; } /// See . @@ -134,9 +153,38 @@ macro_rules! compare_and_swap { } } } } - } + }; } +// i128 uses a completely different impl, so it has its own macro. +macro_rules! compare_and_swap_i128 { + ($ordering:ident, $name:ident) => { + intrinsics! { + #[maybe_use_optimized_c_shim] + #[naked] + pub extern "C" fn $name ( + expected: i128, desired: i128, ptr: *mut i128 + ) -> i128 { + unsafe { core::arch::asm! { + "mov x16, x0", + "mov x17, x1", + "0:", + // LDXP x0, x1, [x4] + concat!(ldxp!($ordering), " x0, x1, [x4]"), + "cmp x0, x16", + "ccmp x1, x17, #0, eq", + "bne 1f", + // STXP w(tmp2), x2, x3, [x4] + concat!(stxp!($ordering), " w15, x2, x3, [x4]"), + "cbnz w15, 0b", + "1:", + "ret", + options(noreturn) + } } + } + } + }; +} /// See . macro_rules! swap { @@ -161,7 +209,7 @@ macro_rules! swap { } } } } - } + }; } /// See (e.g.) . @@ -194,28 +242,35 @@ macro_rules! fetch_op { // We need a single macro to pass to `foreach_ldadd`. macro_rules! add { - ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "add" } } + ($ordering:ident, $bytes:tt, $name:ident) => { + fetch_op! { $ordering, $bytes, $name, "add" } + }; } macro_rules! and { - ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "bic" } } + ($ordering:ident, $bytes:tt, $name:ident) => { + fetch_op! { $ordering, $bytes, $name, "bic" } + }; } macro_rules! xor { - ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "eor" } } + ($ordering:ident, $bytes:tt, $name:ident) => { + fetch_op! { $ordering, $bytes, $name, "eor" } + }; } macro_rules! or { - ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "orr" } } + ($ordering:ident, $bytes:tt, $name:ident) => { + fetch_op! { $ordering, $bytes, $name, "orr" } + }; } // See `generate_aarch64_outlined_atomics` in build.rs. include!(concat!(env!("OUT_DIR"), "/outlined_atomics.rs")); foreach_cas!(compare_and_swap); +foreach_cas16!(compare_and_swap_i128); foreach_swp!(swap); foreach_ldadd!(add); foreach_ldclr!(and); foreach_ldeor!(xor); foreach_ldset!(or); - -// TODO: CAS 16 diff --git a/src/lib.rs b/src/lib.rs index 90b21f1fc..4b44adc26 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -57,7 +57,11 @@ pub mod mem; #[cfg(target_arch = "arm")] pub mod arm; -#[cfg(target_arch = "aarch64")] +#[cfg(all( + target_arch = "aarch64", + not(feature = "no-asm"), + not(feature = "optimized-c") +))] pub mod aarch64; #[cfg(all( diff --git a/testcrate/tests/lse.rs b/testcrate/tests/lse.rs index 49d73177e..7b54ab5d2 100644 --- a/testcrate/tests/lse.rs +++ b/testcrate/tests/lse.rs @@ -1,5 +1,5 @@ -#![cfg(target_arch = "aarch64")] #![feature(decl_macro)] // so we can use pub(super) +#![cfg(all(target_arch = "aarch64", not(feature = "no-asm")))] /// Translate a byte size to a Rust type. macro int_ty { @@ -38,6 +38,10 @@ mod cas { } } +macro test_cas16($_ordering:ident, $name:ident) { + cas::test!($_ordering, 16, $name); +} + mod swap { pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) { #[test] @@ -81,6 +85,7 @@ test_op!(xor, std::ops::BitXor::bitxor); test_op!(or, std::ops::BitOr::bitor); compiler_builtins::foreach_cas!(cas::test); +compiler_builtins::foreach_cas16!(test_cas16); compiler_builtins::foreach_swp!(swap::test); compiler_builtins::foreach_ldadd!(add::test); compiler_builtins::foreach_ldclr!(clr::test); From 07cf3b4f109de98910a443c3cacce779eba54cf0 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Mon, 26 Jun 2023 16:27:16 +0000 Subject: [PATCH 0637/1459] require naked functions to be unsafe again they dereference raw pointers, so the caller needs to make sure the pointer is valid. note that this requires changing `maybe_use_optimized_c_shim` to support unsafe functions. --- src/aarch64.rs | 8 ++++---- src/macros.rs | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/aarch64.rs b/src/aarch64.rs index 1aaa1a694..ddbec6d32 100644 --- a/src/aarch64.rs +++ b/src/aarch64.rs @@ -131,7 +131,7 @@ macro_rules! compare_and_swap { intrinsics! { #[maybe_use_optimized_c_shim] #[naked] - pub extern "C" fn $name ( + pub unsafe extern "C" fn $name ( expected: int_ty!($bytes), desired: int_ty!($bytes), ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { // We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap. @@ -162,7 +162,7 @@ macro_rules! compare_and_swap_i128 { intrinsics! { #[maybe_use_optimized_c_shim] #[naked] - pub extern "C" fn $name ( + pub unsafe extern "C" fn $name ( expected: i128, desired: i128, ptr: *mut i128 ) -> i128 { unsafe { core::arch::asm! { @@ -192,7 +192,7 @@ macro_rules! swap { intrinsics! { #[maybe_use_optimized_c_shim] #[naked] - pub extern "C" fn $name ( + pub unsafe extern "C" fn $name ( left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { unsafe { core::arch::asm! { @@ -218,7 +218,7 @@ macro_rules! fetch_op { intrinsics! { #[maybe_use_optimized_c_shim] #[naked] - pub extern "C" fn $name ( + pub unsafe extern "C" fn $name ( val: int_ty!($bytes), ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { unsafe { core::arch::asm! { diff --git a/src/macros.rs b/src/macros.rs index e3a381928..b11114f12 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -204,7 +204,7 @@ macro_rules! intrinsics { ( #[maybe_use_optimized_c_shim] $(#[$($attr:tt)*])* - pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + pub $(unsafe $(@ $empty:tt)? )? extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } @@ -212,7 +212,7 @@ macro_rules! intrinsics { ) => ( #[cfg($name = "optimized-c")] #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] - pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + pub $(unsafe $($empty)? )? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { extern $abi { fn $name($($argname: $ty),*) $(-> $ret)?; } @@ -224,7 +224,7 @@ macro_rules! intrinsics { #[cfg(not($name = "optimized-c"))] intrinsics! { $(#[$($attr)*])* - pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + pub $(unsafe $($empty)? )? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } @@ -419,7 +419,7 @@ macro_rules! intrinsics { ( #[naked] $(#[$($attr:tt)*])* - pub $(unsafe)? extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } From 71b5f58a53d08f1dfe62d26bd27179849fa09e7d Mon Sep 17 00:00:00 2001 From: kirk Date: Sun, 25 Jun 2023 13:06:18 +0000 Subject: [PATCH 0638/1459] port updated version of llvm float div --- src/float/div.rs | 857 ++++++++++++++++++++++++++++--------- testcrate/tests/div_rem.rs | 11 +- 2 files changed, 667 insertions(+), 201 deletions(-) diff --git a/src/float/div.rs b/src/float/div.rs index c2d6c07e7..c0aae34fb 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -12,11 +12,17 @@ where i32: CastInto, F::Int: CastInto, F::Int: HInt, + ::Int: core::ops::Mul, { + const NUMBER_OF_HALF_ITERATIONS: usize = 0; + const NUMBER_OF_FULL_ITERATIONS: usize = 3; + const USE_NATIVE_FULL_ITERATIONS: bool = true; + let one = F::Int::ONE; let zero = F::Int::ZERO; + let hw = F::BITS / 2; + let lo_mask = u32::MAX >> hw; - // let bits = F::BITS; let significand_bits = F::SIGNIFICAND_BITS; let max_exponent = F::EXPONENT_MAX; @@ -109,101 +115,341 @@ where } } - // Or in the implicit significand bit. (If we fell through from the + // Set the implicit significand bit. If we fell through from the // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything.) + // won't hurt anything. a_significand |= implicit_bit; b_significand |= implicit_bit; - let mut quotient_exponent: i32 = CastInto::::cast(a_exponent) - .wrapping_sub(CastInto::::cast(b_exponent)) - .wrapping_add(scale); - - // Align the significand of b as a Q31 fixed-point number in the range - // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax - // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This - // is accurate to about 3.5 binary digits. - let q31b = CastInto::::cast(b_significand << 8.cast()); - let mut reciprocal = (0x7504f333u32).wrapping_sub(q31b); - - // Now refine the reciprocal estimate using a Newton-Raphson iteration: - // - // x1 = x0 * (2 - x0 * b) - // - // This doubles the number of correct binary digits in the approximation - // with each iteration, so after three iterations, we have about 28 binary - // digits of accuracy. - - let mut correction: u32 = - negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); - reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) >> 31) as u32; - correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); - reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) >> 31) as u32; - correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); - reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) >> 31) as u32; - - // Exhaustive testing shows that the error in reciprocal after three steps - // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our - // expectations. We bump the reciprocal by a tiny value to force the error - // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to - // be specific). This also causes 1/1 to give a sensible approximation - // instead of zero (due to overflow). - reciprocal = reciprocal.wrapping_sub(2); - - // The numerical reciprocal is accurate to within 2^-28, lies in the - // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller - // than the true reciprocal of b. Multiplying a by this reciprocal thus - // gives a numerical q = a/b in Q24 with the following properties: - // - // 1. q < a/b - // 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0) - // 3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes - // from the fact that we truncate the product, and the 2^27 term - // is the error in the reciprocal of b scaled by the maximum - // possible value of a. As a consequence of this error bound, - // either q or nextafter(q) is the correctly rounded - let mut quotient = (a_significand << 1).widen_mul(reciprocal.cast()).hi(); - - // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). - // In either case, we are going to compute a residual of the form - // - // r = a - q*b + + let written_exponent: i32 = CastInto::::cast( + a_exponent + .wrapping_sub(b_exponent) + .wrapping_add(scale.cast()), + ) + .wrapping_add(exponent_bias) as i32; + let b_uq1 = b_significand << (F::BITS - significand_bits - 1); + + // Align the significand of b as a UQ1.(n-1) fixed-point number in the range + // [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax + // polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2. + // The max error for this approximation is achieved at endpoints, so + // abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289..., + // which is about 4.5 bits. + // The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571... + + // Then, refine the reciprocal estimate using a quadratically converging + // Newton-Raphson iteration: + // x_{n+1} = x_n * (2 - x_n * b) // - // We know from the construction of q that r satisfies: + // Let b be the original divisor considered "in infinite precision" and + // obtained from IEEE754 representation of function argument (with the + // implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in + // UQ1.(W-1). // - // 0 <= r < ulp(q)*b + // Let b_hw be an infinitely precise number obtained from the highest (HW-1) + // bits of divisor significand (with the implicit bit set). Corresponds to + // half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated** + // version of b_UQ1. // - // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we - // already have the correct result. The exact halfway case cannot occur. - // We also take this time to right shift quotient if it falls in the [1,2) - // range and adjust the exponent accordingly. - let residual = if quotient < (implicit_bit << 1) { - quotient_exponent = quotient_exponent.wrapping_sub(1); - (a_significand << (significand_bits + 1)).wrapping_sub(quotient.wrapping_mul(b_significand)) + // Let e_n := x_n - 1/b_hw + // E_n := x_n - 1/b + // abs(E_n) <= abs(e_n) + (1/b_hw - 1/b) + // = abs(e_n) + (b - b_hw) / (b*b_hw) + // <= abs(e_n) + 2 * 2^-HW + + // rep_t-sized iterations may be slower than the corresponding half-width + // variant depending on the handware and whether single/double/quad precision + // is selected. + // NB: Using half-width iterations increases computation errors due to + // rounding, so error estimations have to be computed taking the selected + // mode into account! + + #[allow(clippy::absurd_extreme_comparisons)] + let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 { + // Starting with (n-1) half-width iterations + let b_uq1_hw: u16 = + (CastInto::::cast(b_significand) >> (significand_bits + 1 - hw)) as u16; + + // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW + // with W0 being either 16 or 32 and W0 <= HW. + // That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which + // b/2 is subtracted to obtain x0) wrapped to [0, 1) range. + + // HW is at least 32. Shifting into the highest bits if needed. + let c_hw = (0x7504_u32 as u16).wrapping_shl(hw.wrapping_sub(32)); + + // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572, + // so x0 fits to UQ0.HW without wrapping. + let x_uq0_hw: u16 = { + let mut x_uq0_hw: u16 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */); + // An e_0 error is comprised of errors due to + // * x0 being an inherently imprecise first approximation of 1/b_hw + // * C_hw being some (irrational) number **truncated** to W0 bits + // Please note that e_0 is calculated against the infinitely precise + // reciprocal of b_hw (that is, **truncated** version of b). + // + // e_0 <= 3/4 - 1/sqrt(2) + 2^-W0 + + // By construction, 1 <= b < 2 + // f(x) = x * (2 - b*x) = 2*x - b*x^2 + // f'(x) = 2 * (1 - b*x) + // + // On the [0, 1] interval, f(0) = 0, + // then it increses until f(1/b) = 1 / b, maximum on (0, 1), + // then it decreses to f(1) = 2 - b + // + // Let g(x) = x - f(x) = b*x^2 - x. + // On (0, 1/b), g(x) < 0 <=> f(x) > x + // On (1/b, 1], g(x) > 0 <=> f(x) < x + // + // For half-width iterations, b_hw is used instead of b. + #[allow(clippy::reversed_empty_ranges)] + for _ in 0..NUMBER_OF_HALF_ITERATIONS { + // corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp + // of corr_UQ1_hw. + // "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1). + // On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided + // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is + // expected to be strictly positive because b_UQ1_hw has its highest bit set + // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1). + let corr_uq1_hw: u16 = + 0.wrapping_sub((x_uq0_hw as u32).wrapping_mul(b_uq1_hw.cast()) >> hw) as u16; + + // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally + // obtaining an UQ1.(HW-1) number and proving its highest bit could be + // considered to be 0 to be able to represent it in UQ0.HW. + // From the above analysis of f(x), if corr_UQ1_hw would be represented + // without any intermediate loss of precision (that is, in twice_rep_t) + // x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly + // less otherwise. On the other hand, to obtain [1.]000..., one have to pass + // 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due + // to 1.0 being not representable as UQ0.HW). + // The fact corr_UQ1_hw was virtually round up (due to result of + // multiplication being **first** truncated, then negated - to improve + // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw. + x_uq0_hw = ((x_uq0_hw as u32).wrapping_mul(corr_uq1_hw as u32) >> (hw - 1)) as u16; + // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t + // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after + // any number of iterations, so just subtract 2 from the reciprocal + // approximation after last iteration. + + // In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW: + // corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1 + // = 1 - e_n * b_hw + 2*eps1 + // x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2 + // = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2 + // = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw + // \------ >0 -------/ \-- >0 ---/ + // abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U) + } + // For initial half-width iterations, U = 2^-HW + // Let abs(e_n) <= u_n * U, + // then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U) + // u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2) + + // Account for possible overflow (see above). For an overflow to occur for the + // first time, for "ideal" corr_UQ1_hw (that is, without intermediate + // truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum + // value representable in UQ0.HW or less by 1. This means that 1/b_hw have to + // be not below that value (see g(x) above), so it is safe to decrement just + // once after the final iteration. On the other hand, an effective value of + // divisor changes after this point (from b_hw to b), so adjust here. + x_uq0_hw.wrapping_sub(1_u16) + }; + + // Error estimations for full-precision iterations are calculated just + // as above, but with U := 2^-W and taking extra decrementing into account. + // We need at least one such iteration. + + // Simulating operations on a twice_rep_t to perform a single final full-width + // iteration. Using ad-hoc multiplication implementations to take advantage + // of particular structure of operands. + + let blo: u32 = (CastInto::::cast(b_uq1)) & lo_mask; + // x_UQ0 = x_UQ0_hw * 2^HW - 1 + // x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1 + // + // <--- higher half ---><--- lower half ---> + // [x_UQ0_hw * b_UQ1_hw] + // + [ x_UQ0_hw * blo ] + // - [ b_UQ1 ] + // = [ result ][.... discarded ...] + let corr_uq1 = negate_u32( + (x_uq0_hw as u32) * (b_uq1_hw as u32) + (((x_uq0_hw as u32) * (blo)) >> hw) - 1, + ); // account for *possible* carry + let lo_corr = corr_uq1 & lo_mask; + let hi_corr = corr_uq1 >> hw; + // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1 + let mut x_uq0: ::Int = ((((x_uq0_hw as u32) * hi_corr) << 1) + .wrapping_add(((x_uq0_hw as u32) * lo_corr) >> (hw - 1)) + .wrapping_sub(2)) + .cast(); // 1 to account for the highest bit of corr_UQ1 can be 1 + // 1 to account for possible carry + // Just like the case of half-width iterations but with possibility + // of overflowing by one extra Ulp of x_UQ0. + x_uq0 -= one; + // ... and then traditional fixup by 2 should work + + // On error estimation: + // abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW + // + (2^-HW + 2^-W)) + // abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW + + // Then like for the half-width iterations: + // With 0 <= eps1, eps2 < 2^-W + // E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b + // abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ] + // abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ] + x_uq0 } else { - quotient >>= 1; - (a_significand << significand_bits).wrapping_sub(quotient.wrapping_mul(b_significand)) + // C is (3/4 + 1/sqrt(2)) - 1 truncated to 32 fractional bits as UQ0.n + let c: ::Int = (0x7504F333 << (F::BITS - 32)).cast(); + let x_uq0: ::Int = c.wrapping_sub(b_uq1); + // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-32 + x_uq0 + }; + + let mut x_uq0 = if USE_NATIVE_FULL_ITERATIONS { + for _ in 0..NUMBER_OF_FULL_ITERATIONS { + let corr_uq1: u32 = 0.wrapping_sub( + ((CastInto::::cast(x_uq0) as u64) * (CastInto::::cast(b_uq1) as u64)) + >> F::BITS, + ) as u32; + x_uq0 = ((((CastInto::::cast(x_uq0) as u64) * (corr_uq1 as u64)) >> (F::BITS - 1)) + as u32) + .cast(); + } + x_uq0 + } else { + // not using native full iterations + x_uq0 }; - let written_exponent = quotient_exponent.wrapping_add(exponent_bias as i32); + // Finally, account for possible overflow, as explained above. + x_uq0 = x_uq0.wrapping_sub(2.cast()); + + // u_n for different precisions (with N-1 half-width iterations): + // W0 is the precision of C + // u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW + + // Estimated with bc: + // define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; } + // define half2(un) { return 2.0 * un / 2.0^hw + 2.0; } + // define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; } + // define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; } + + // | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1) + // u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797 + // u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440 + // u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317 + // u_3 | < 7.31 | | < 7.31 | < 27054456580 + // u_4 | | | | < 80.4 + // Final (U_N) | same as u_3 | < 72 | < 218 | < 13920 + + // Add 2 to U_N due to final decrement. + + let reciprocal_precision: ::Int = 10.cast(); + + // Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W + let x_uq0 = x_uq0 - reciprocal_precision; + // Now 1/b - (2*P) * 2^-W < x < 1/b + // FIXME Is x_UQ0 still >= 0.5? + + let mut quotient: ::Int = x_uq0.widen_mul(a_significand << 1).hi(); + // Now, a/b - 4*P * 2^-W < q < a/b for q= in UQ1.(SB+1+W). + + // quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1), + // adjust it to be in [1.0, 2.0) as UQ1.SB. + let (mut residual, written_exponent) = if quotient < (implicit_bit << 1) { + // Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB, + // effectively doubling its value as well as its error estimation. + let residual_lo = (a_significand << (significand_bits + 1)).wrapping_sub( + (CastInto::::cast(quotient).wrapping_mul(CastInto::::cast(b_significand))) + .cast(), + ); + a_significand <<= 1; + (residual_lo, written_exponent.wrapping_sub(1)) + } else { + // Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it + // to UQ1.SB by right shifting by 1. Least significant bit is omitted. + quotient >>= 1; + let residual_lo = (a_significand << significand_bits).wrapping_sub( + (CastInto::::cast(quotient).wrapping_mul(CastInto::::cast(b_significand))) + .cast(), + ); + (residual_lo, written_exponent) + }; + //drop mutability + let quotient = quotient; + + // NB: residualLo is calculated above for the normal result case. + // It is re-computed on denormal path that is expected to be not so + // performance-sensitive. + + // Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB + // Each NextAfter() increments the floating point value by at least 2^-SB + // (more, if exponent was incremented). + // Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint): + // q + // | | * | | | | | + // <---> 2^t + // | | | | | * | | + // q + // To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB. + // (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB + // (8*P) * 2^-W < 0.5 * 2^-SB + // P < 2^(W-4-SB) + // Generally, for at most R NextAfter() to be enough, + // P < (2*R - 1) * 2^(W-4-SB) + // For f32 (0+3): 10 < 32 (OK) + // For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required + // For f64: 220 < 256 (OK) + // For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required) + + // If we have overflowed the exponent, return infinity if written_exponent >= max_exponent as i32 { - // If we have overflowed the exponent, return infinity. return F::from_repr(inf_rep | quotient_sign); - } else if written_exponent < 1 { - // Flush denormals to zero. In the future, it would be nice to add - // code to round them correctly. - return F::from_repr(quotient_sign); - } else { - let round = ((residual << 1) > b_significand) as u32; - // Clear the implicit bits - let mut abs_result = quotient & significand_mask; - // Insert the exponent - abs_result |= written_exponent.cast() << significand_bits; - // Round - abs_result = abs_result.wrapping_add(round.cast()); - // Insert the sign and return - return F::from_repr(abs_result | quotient_sign); } + + // Now, quotient <= the correctly-rounded result + // and may need taking NextAfter() up to 3 times (see error estimates above) + // r = a - b * q + let abs_result = if written_exponent > 0 { + let mut ret = quotient & significand_mask; + ret |= ((written_exponent as u32) << significand_bits).cast(); + residual <<= 1; + ret + } else { + if (significand_bits as i32 + written_exponent) < 0 { + return F::from_repr(quotient_sign); + } + let ret = quotient.wrapping_shr(negate_u32(CastInto::::cast(written_exponent)) + 1); + residual = (CastInto::::cast( + a_significand.wrapping_shl( + significand_bits.wrapping_add(CastInto::::cast(written_exponent)), + ), + ) + .wrapping_sub( + (CastInto::::cast(ret).wrapping_mul(CastInto::::cast(b_significand))) << 1, + )) + .cast(); + ret + }; + // Round + let abs_result = { + residual += abs_result & one; // tie to even + // The above line conditionally turns the below LT comparison into LTE + + if residual > b_significand { + abs_result + one + } else { + abs_result + } + }; + F::from_repr(abs_result | quotient_sign) } fn div64(a: F, b: F) -> F @@ -218,10 +464,15 @@ where F::Int: CastInto, F::Int: HInt, { + const NUMBER_OF_HALF_ITERATIONS: usize = 3; + const NUMBER_OF_FULL_ITERATIONS: usize = 1; + const USE_NATIVE_FULL_ITERATIONS: bool = false; + let one = F::Int::ONE; let zero = F::Int::ZERO; + let hw = F::BITS / 2; + let lo_mask = u64::MAX >> hw; - // let bits = F::BITS; let significand_bits = F::SIGNIFICAND_BITS; let max_exponent = F::EXPONENT_MAX; @@ -235,12 +486,6 @@ where let inf_rep = exponent_mask; let quiet_bit = implicit_bit >> 1; let qnan_rep = exponent_mask | quiet_bit; - // let exponent_bits = F::EXPONENT_BITS; - - #[inline(always)] - fn negate_u32(a: u32) -> u32 { - (::wrapping_neg(a as i32)) as u32 - } #[inline(always)] fn negate_u64(a: u64) -> u64 { @@ -320,128 +565,340 @@ where } } - // Or in the implicit significand bit. (If we fell through from the + // Set the implicit significand bit. If we fell through from the // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything.) + // won't hurt anything. a_significand |= implicit_bit; b_significand |= implicit_bit; - let mut quotient_exponent: i32 = CastInto::::cast(a_exponent) - .wrapping_sub(CastInto::::cast(b_exponent)) - .wrapping_add(scale); - - // Align the significand of b as a Q31 fixed-point number in the range - // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax - // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This - // is accurate to about 3.5 binary digits. - let q31b = CastInto::::cast(b_significand >> 21.cast()); - let mut recip32 = (0x7504f333u32).wrapping_sub(q31b); - - // Now refine the reciprocal estimate using a Newton-Raphson iteration: - // - // x1 = x0 * (2 - x0 * b) - // - // This doubles the number of correct binary digits in the approximation - // with each iteration, so after three iterations, we have about 28 binary - // digits of accuracy. - - let mut correction32: u32 = - negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32); - recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32; - correction32 = negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32); - recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32; - correction32 = negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32); - recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32; - - // recip32 might have overflowed to exactly zero in the preceeding - // computation if the high word of b is exactly 1.0. This would sabotage - // the full-width final stage of the computation that follows, so we adjust - // recip32 downward by one bit. - recip32 = recip32.wrapping_sub(1); - - // We need to perform one more iteration to get us to 56 binary digits; - // The last iteration needs to happen with extra precision. - let q63blo = CastInto::::cast(b_significand << 11.cast()); - - let correction: u64 = negate_u64( - (recip32 as u64) - .wrapping_mul(q31b as u64) - .wrapping_add((recip32 as u64).wrapping_mul(q63blo as u64) >> 32), - ); - let c_hi = (correction >> 32) as u32; - let c_lo = correction as u32; - let mut reciprocal: u64 = (recip32 as u64) - .wrapping_mul(c_hi as u64) - .wrapping_add((recip32 as u64).wrapping_mul(c_lo as u64) >> 32); - - // We already adjusted the 32-bit estimate, now we need to adjust the final - // 64-bit reciprocal estimate downward to ensure that it is strictly smaller - // than the infinitely precise exact reciprocal. Because the computation - // of the Newton-Raphson step is truncating at every step, this adjustment - // is small; most of the work is already done. - reciprocal = reciprocal.wrapping_sub(2); - - // The numerical reciprocal is accurate to within 2^-56, lies in the - // interval [0.5, 1.0), and is strictly smaller than the true reciprocal - // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b - // in Q53 with the following properties: - // - // 1. q < a/b - // 2. q is in the interval [0.5, 2.0) - // 3. the error in q is bounded away from 2^-53 (actually, we have a - // couple of bits to spare, but this is all we need). - - // We need a 64 x 64 multiply high to compute q, which isn't a basic - // operation in C, so we need to be a little bit fussy. - // let mut quotient: F::Int = ((((reciprocal as u64) - // .wrapping_mul(CastInto::::cast(a_significand << 1) as u64)) - // >> 32) as u32) - // .cast(); - - // We need a 64 x 64 multiply high to compute q, which isn't a basic - // operation in C, so we need to be a little bit fussy. - let mut quotient = (a_significand << 2).widen_mul(reciprocal.cast()).hi(); - - // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). - // In either case, we are going to compute a residual of the form - // - // r = a - q*b + + let written_exponent: i64 = CastInto::::cast( + a_exponent + .wrapping_sub(b_exponent) + .wrapping_add(scale.cast()), + ) + .wrapping_add(exponent_bias as u64) as i64; + let b_uq1 = b_significand << (F::BITS - significand_bits - 1); + + // Align the significand of b as a UQ1.(n-1) fixed-point number in the range + // [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax + // polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2. + // The max error for this approximation is achieved at endpoints, so + // abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289..., + // which is about 4.5 bits. + // The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571... + + // Then, refine the reciprocal estimate using a quadratically converging + // Newton-Raphson iteration: + // x_{n+1} = x_n * (2 - x_n * b) // - // We know from the construction of q that r satisfies: + // Let b be the original divisor considered "in infinite precision" and + // obtained from IEEE754 representation of function argument (with the + // implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in + // UQ1.(W-1). // - // 0 <= r < ulp(q)*b + // Let b_hw be an infinitely precise number obtained from the highest (HW-1) + // bits of divisor significand (with the implicit bit set). Corresponds to + // half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated** + // version of b_UQ1. // - // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we - // already have the correct result. The exact halfway case cannot occur. - // We also take this time to right shift quotient if it falls in the [1,2) - // range and adjust the exponent accordingly. - let residual = if quotient < (implicit_bit << 1) { - quotient_exponent = quotient_exponent.wrapping_sub(1); - (a_significand << (significand_bits + 1)).wrapping_sub(quotient.wrapping_mul(b_significand)) + // Let e_n := x_n - 1/b_hw + // E_n := x_n - 1/b + // abs(E_n) <= abs(e_n) + (1/b_hw - 1/b) + // = abs(e_n) + (b - b_hw) / (b*b_hw) + // <= abs(e_n) + 2 * 2^-HW + + // rep_t-sized iterations may be slower than the corresponding half-width + // variant depending on the handware and whether single/double/quad precision + // is selected. + // NB: Using half-width iterations increases computation errors due to + // rounding, so error estimations have to be computed taking the selected + // mode into account! + + let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 { + // Starting with (n-1) half-width iterations + let b_uq1_hw: u32 = + (CastInto::::cast(b_significand) >> (significand_bits + 1 - hw)) as u32; + + // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW + // with W0 being either 16 or 32 and W0 <= HW. + // That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which + // b/2 is subtracted to obtain x0) wrapped to [0, 1) range. + + // HW is at least 32. Shifting into the highest bits if needed. + let c_hw = (0x7504F333_u64 as u32).wrapping_shl(hw.wrapping_sub(32)); + + // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572, + // so x0 fits to UQ0.HW without wrapping. + let x_uq0_hw: u32 = { + let mut x_uq0_hw: u32 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */); + // dbg!(x_uq0_hw); + // An e_0 error is comprised of errors due to + // * x0 being an inherently imprecise first approximation of 1/b_hw + // * C_hw being some (irrational) number **truncated** to W0 bits + // Please note that e_0 is calculated against the infinitely precise + // reciprocal of b_hw (that is, **truncated** version of b). + // + // e_0 <= 3/4 - 1/sqrt(2) + 2^-W0 + + // By construction, 1 <= b < 2 + // f(x) = x * (2 - b*x) = 2*x - b*x^2 + // f'(x) = 2 * (1 - b*x) + // + // On the [0, 1] interval, f(0) = 0, + // then it increses until f(1/b) = 1 / b, maximum on (0, 1), + // then it decreses to f(1) = 2 - b + // + // Let g(x) = x - f(x) = b*x^2 - x. + // On (0, 1/b), g(x) < 0 <=> f(x) > x + // On (1/b, 1], g(x) > 0 <=> f(x) < x + // + // For half-width iterations, b_hw is used instead of b. + for _ in 0..NUMBER_OF_HALF_ITERATIONS { + // corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp + // of corr_UQ1_hw. + // "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1). + // On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided + // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is + // expected to be strictly positive because b_UQ1_hw has its highest bit set + // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1). + let corr_uq1_hw: u32 = + 0.wrapping_sub(((x_uq0_hw as u64).wrapping_mul(b_uq1_hw as u64)) >> hw) as u32; + // dbg!(corr_uq1_hw); + + // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally + // obtaining an UQ1.(HW-1) number and proving its highest bit could be + // considered to be 0 to be able to represent it in UQ0.HW. + // From the above analysis of f(x), if corr_UQ1_hw would be represented + // without any intermediate loss of precision (that is, in twice_rep_t) + // x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly + // less otherwise. On the other hand, to obtain [1.]000..., one have to pass + // 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due + // to 1.0 being not representable as UQ0.HW). + // The fact corr_UQ1_hw was virtually round up (due to result of + // multiplication being **first** truncated, then negated - to improve + // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw. + x_uq0_hw = ((x_uq0_hw as u64).wrapping_mul(corr_uq1_hw as u64) >> (hw - 1)) as u32; + // dbg!(x_uq0_hw); + // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t + // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after + // any number of iterations, so just subtract 2 from the reciprocal + // approximation after last iteration. + + // In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW: + // corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1 + // = 1 - e_n * b_hw + 2*eps1 + // x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2 + // = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2 + // = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw + // \------ >0 -------/ \-- >0 ---/ + // abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U) + } + // For initial half-width iterations, U = 2^-HW + // Let abs(e_n) <= u_n * U, + // then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U) + // u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2) + + // Account for possible overflow (see above). For an overflow to occur for the + // first time, for "ideal" corr_UQ1_hw (that is, without intermediate + // truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum + // value representable in UQ0.HW or less by 1. This means that 1/b_hw have to + // be not below that value (see g(x) above), so it is safe to decrement just + // once after the final iteration. On the other hand, an effective value of + // divisor changes after this point (from b_hw to b), so adjust here. + x_uq0_hw.wrapping_sub(1_u32) + }; + + // Error estimations for full-precision iterations are calculated just + // as above, but with U := 2^-W and taking extra decrementing into account. + // We need at least one such iteration. + + // Simulating operations on a twice_rep_t to perform a single final full-width + // iteration. Using ad-hoc multiplication implementations to take advantage + // of particular structure of operands. + let blo: u64 = (CastInto::::cast(b_uq1)) & lo_mask; + // x_UQ0 = x_UQ0_hw * 2^HW - 1 + // x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1 + // + // <--- higher half ---><--- lower half ---> + // [x_UQ0_hw * b_UQ1_hw] + // + [ x_UQ0_hw * blo ] + // - [ b_UQ1 ] + // = [ result ][.... discarded ...] + let corr_uq1 = negate_u64( + (x_uq0_hw as u64) * (b_uq1_hw as u64) + (((x_uq0_hw as u64) * (blo)) >> hw) - 1, + ); // account for *possible* carry + let lo_corr = corr_uq1 & lo_mask; + let hi_corr = corr_uq1 >> hw; + // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1 + let mut x_uq0: ::Int = ((((x_uq0_hw as u64) * hi_corr) << 1) + .wrapping_add(((x_uq0_hw as u64) * lo_corr) >> (hw - 1)) + .wrapping_sub(2)) + .cast(); // 1 to account for the highest bit of corr_UQ1 can be 1 + // 1 to account for possible carry + // Just like the case of half-width iterations but with possibility + // of overflowing by one extra Ulp of x_UQ0. + x_uq0 -= one; + // ... and then traditional fixup by 2 should work + + // On error estimation: + // abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW + // + (2^-HW + 2^-W)) + // abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW + + // Then like for the half-width iterations: + // With 0 <= eps1, eps2 < 2^-W + // E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b + // abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ] + // abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ] + x_uq0 } else { - quotient >>= 1; - (a_significand << significand_bits).wrapping_sub(quotient.wrapping_mul(b_significand)) + // C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n + let c: ::Int = (0x7504F333 << (F::BITS - 32)).cast(); + let x_uq0: ::Int = c.wrapping_sub(b_uq1); + // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64 + x_uq0 + }; + + let mut x_uq0 = if USE_NATIVE_FULL_ITERATIONS { + for _ in 0..NUMBER_OF_FULL_ITERATIONS { + let corr_uq1: u64 = 0.wrapping_sub( + (CastInto::::cast(x_uq0) * (CastInto::::cast(b_uq1))) >> F::BITS, + ); + x_uq0 = ((((CastInto::::cast(x_uq0) as u128) * (corr_uq1 as u128)) + >> (F::BITS - 1)) as u64) + .cast(); + } + x_uq0 + } else { + // not using native full iterations + x_uq0 }; - let written_exponent = quotient_exponent.wrapping_add(exponent_bias as i32); + // Finally, account for possible overflow, as explained above. + x_uq0 = x_uq0.wrapping_sub(2.cast()); + + // u_n for different precisions (with N-1 half-width iterations): + // W0 is the precision of C + // u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW + + // Estimated with bc: + // define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; } + // define half2(un) { return 2.0 * un / 2.0^hw + 2.0; } + // define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; } + // define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; } + + // | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1) + // u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797 + // u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440 + // u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317 + // u_3 | < 7.31 | | < 7.31 | < 27054456580 + // u_4 | | | | < 80.4 + // Final (U_N) | same as u_3 | < 72 | < 218 | < 13920 + + // Add 2 to U_N due to final decrement. + + let reciprocal_precision: ::Int = 220.cast(); + + // Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W + let x_uq0 = x_uq0 - reciprocal_precision; + // Now 1/b - (2*P) * 2^-W < x < 1/b + // FIXME Is x_UQ0 still >= 0.5? + + let mut quotient: ::Int = x_uq0.widen_mul(a_significand << 1).hi(); + // Now, a/b - 4*P * 2^-W < q < a/b for q= in UQ1.(SB+1+W). + + // quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1), + // adjust it to be in [1.0, 2.0) as UQ1.SB. + let (mut residual, written_exponent) = if quotient < (implicit_bit << 1) { + // Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB, + // effectively doubling its value as well as its error estimation. + let residual_lo = (a_significand << (significand_bits + 1)).wrapping_sub( + (CastInto::::cast(quotient).wrapping_mul(CastInto::::cast(b_significand))) + .cast(), + ); + a_significand <<= 1; + (residual_lo, written_exponent.wrapping_sub(1)) + } else { + // Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it + // to UQ1.SB by right shifting by 1. Least significant bit is omitted. + quotient >>= 1; + let residual_lo = (a_significand << significand_bits).wrapping_sub( + (CastInto::::cast(quotient).wrapping_mul(CastInto::::cast(b_significand))) + .cast(), + ); + (residual_lo, written_exponent) + }; - if written_exponent >= max_exponent as i32 { - // If we have overflowed the exponent, return infinity. + //drop mutability + let quotient = quotient; + + // NB: residualLo is calculated above for the normal result case. + // It is re-computed on denormal path that is expected to be not so + // performance-sensitive. + + // Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB + // Each NextAfter() increments the floating point value by at least 2^-SB + // (more, if exponent was incremented). + // Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint): + // q + // | | * | | | | | + // <---> 2^t + // | | | | | * | | + // q + // To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB. + // (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB + // (8*P) * 2^-W < 0.5 * 2^-SB + // P < 2^(W-4-SB) + // Generally, for at most R NextAfter() to be enough, + // P < (2*R - 1) * 2^(W-4-SB) + // For f32 (0+3): 10 < 32 (OK) + // For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required + // For f64: 220 < 256 (OK) + // For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required) + + // If we have overflowed the exponent, return infinity + if written_exponent >= max_exponent as i64 { return F::from_repr(inf_rep | quotient_sign); - } else if written_exponent < 1 { - // Flush denormals to zero. In the future, it would be nice to add - // code to round them correctly. - return F::from_repr(quotient_sign); - } else { - let round = ((residual << 1) > b_significand) as u32; - // Clear the implicit bits - let mut abs_result = quotient & significand_mask; - // Insert the exponent - abs_result |= written_exponent.cast() << significand_bits; - // Round - abs_result = abs_result.wrapping_add(round.cast()); - // Insert the sign and return - return F::from_repr(abs_result | quotient_sign); } + + // Now, quotient <= the correctly-rounded result + // and may need taking NextAfter() up to 3 times (see error estimates above) + // r = a - b * q + let abs_result = if written_exponent > 0 { + let mut ret = quotient & significand_mask; + ret |= ((written_exponent as u64) << significand_bits).cast(); + residual <<= 1; + ret + } else { + if (significand_bits as i64 + written_exponent) < 0 { + return F::from_repr(quotient_sign); + } + let ret = + quotient.wrapping_shr((negate_u64(CastInto::::cast(written_exponent)) + 1) as u32); + residual = (CastInto::::cast( + a_significand.wrapping_shl( + significand_bits.wrapping_add(CastInto::::cast(written_exponent)), + ), + ) + .wrapping_sub( + (CastInto::::cast(ret).wrapping_mul(CastInto::::cast(b_significand))) << 1, + )) + .cast(); + ret + }; + // Round + let abs_result = { + residual += abs_result & one; // tie to even + // conditionally turns the below LT comparison into LTE + if residual > b_significand { + abs_result + one + } else { + abs_result + } + }; + F::from_repr(abs_result | quotient_sign) } intrinsics! { diff --git a/testcrate/tests/div_rem.rs b/testcrate/tests/div_rem.rs index c3f067640..de3bd9bee 100644 --- a/testcrate/tests/div_rem.rs +++ b/testcrate/tests/div_rem.rs @@ -109,7 +109,16 @@ macro_rules! float { fuzz_float_2(N, |x: $i, y: $i| { let quo0 = x / y; let quo1: $i = $fn(x, y); - // division of subnormals is not currently handled + #[cfg(not(target_arch = "arm"))] + if !Float::eq_repr(quo0, quo1) { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn), x, y, quo0, quo1 + ); + } + + // ARM SIMD instructions always flush subnormals to zero + #[cfg(target_arch = "arm")] if !(Float::is_subnormal(quo0) || Float::is_subnormal(quo1)) { if !Float::eq_repr(quo0, quo1) { panic!( From cac12e8184f94e5a3ae7cdb247c70e3578b56caa Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Tue, 27 Jun 2023 02:31:42 +0000 Subject: [PATCH 0639/1459] fix tests to work with `--feature c` --- src/aarch64.rs | 1 + src/lib.rs | 6 +----- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/aarch64.rs b/src/aarch64.rs index ddbec6d32..62144e531 100644 --- a/src/aarch64.rs +++ b/src/aarch64.rs @@ -22,6 +22,7 @@ //! //! The original `lse.S` has some truly horrifying code that expects to be compiled multiple times with different constants. //! We do something similar, but with macro arguments. +#![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule // We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor. diff --git a/src/lib.rs b/src/lib.rs index 4b44adc26..2e5c587fb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -57,11 +57,7 @@ pub mod mem; #[cfg(target_arch = "arm")] pub mod arm; -#[cfg(all( - target_arch = "aarch64", - not(feature = "no-asm"), - not(feature = "optimized-c") -))] +#[cfg(all(target_arch = "aarch64", not(feature = "no-asm"),))] pub mod aarch64; #[cfg(all( From 2a58f6fb95a401d6bb9cd1c69e1f96f3a4f0be96 Mon Sep 17 00:00:00 2001 From: Stephan Date: Wed, 28 Jun 2023 21:35:33 +0200 Subject: [PATCH 0640/1459] enable simple f32/f64 support needed by core library --- src/lib.rs | 1 + src/math.rs | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 71f249c8e..729b1ec54 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -48,6 +48,7 @@ pub mod int; all(target_arch = "x86_64", target_os = "uefi"), all(target_arch = "arm", target_os = "none"), all(target_arch = "xtensa", target_os = "none"), + all(target_arch = "mips", target_os = "none"), target_os = "xous", all(target_vendor = "fortanix", target_env = "sgx") ))] diff --git a/src/math.rs b/src/math.rs index 498e4d85f..b4e5fc113 100644 --- a/src/math.rs +++ b/src/math.rs @@ -136,11 +136,12 @@ no_mangle! { fn truncf(x: f32) -> f32; } -// only for the thumb*-none-eabi*, riscv32*-none-elf and x86_64-unknown-none targets that lack the floating point instruction set +// only for the thumb*-none-eabi*, riscv32*-none-elf, x86_64-unknown-none and mips*-unknown-none targets that lack the floating point instruction set #[cfg(any( all(target_arch = "arm", target_os = "none"), all(target_arch = "riscv32", not(target_feature = "f"), target_os = "none"), - all(target_arch = "x86_64", target_os = "none") + all(target_arch = "x86_64", target_os = "none"), + all(target_arch = "mips", target_os = "none"), ))] no_mangle! { fn fmin(x: f64, y: f64) -> f64; From 417a45ae20e5f742b28d42b3de93d1d2838d4841 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 28 Jun 2023 23:44:06 +0100 Subject: [PATCH 0641/1459] Bump to 0.1.94 --- Cargo.toml | 2 +- libm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6cfa2b99e..c468bdf75 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.93" +version = "0.1.94" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" diff --git a/libm b/libm index 1dbb9d2d4..4c8a97374 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 1dbb9d2d476d65d020feca17b11391652038e2e1 +Subproject commit 4c8a973741c014b11ce7f1477693a3e5d4ef9609 From 7ffddb3549e42f1171457b782fdcba2d20cd8274 Mon Sep 17 00:00:00 2001 From: jyn Date: Sun, 2 Jul 2023 14:43:25 -0500 Subject: [PATCH 0642/1459] Only compile outlined-atomics intrinsics on linux --- src/{aarch64.rs => aarch64_linux.rs} | 0 src/lib.rs | 4 ++-- testcrate/tests/lse.rs | 12 ++++++++---- 3 files changed, 10 insertions(+), 6 deletions(-) rename src/{aarch64.rs => aarch64_linux.rs} (100%) diff --git a/src/aarch64.rs b/src/aarch64_linux.rs similarity index 100% rename from src/aarch64.rs rename to src/aarch64_linux.rs diff --git a/src/lib.rs b/src/lib.rs index 58603a57c..a6b61bdf5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -58,8 +58,8 @@ pub mod mem; #[cfg(target_arch = "arm")] pub mod arm; -#[cfg(all(target_arch = "aarch64", not(feature = "no-asm"),))] -pub mod aarch64; +#[cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm"),))] +pub mod aarch64_linux; #[cfg(all( kernel_user_helpers, diff --git a/testcrate/tests/lse.rs b/testcrate/tests/lse.rs index 7b54ab5d2..5589f22f1 100644 --- a/testcrate/tests/lse.rs +++ b/testcrate/tests/lse.rs @@ -17,7 +17,9 @@ mod cas { testcrate::fuzz_2(10000, |expected: super::int_ty!($bytes), new| { let mut target = expected.wrapping_add(10); assert_eq!( - unsafe { compiler_builtins::aarch64::$name::$name(expected, new, &mut target) }, + unsafe { + compiler_builtins::aarch64_linux::$name::$name(expected, new, &mut target) + }, expected.wrapping_add(10), "return value should always be the previous value", ); @@ -29,7 +31,9 @@ mod cas { target = expected; assert_eq!( - unsafe { compiler_builtins::aarch64::$name::$name(expected, new, &mut target) }, + unsafe { + compiler_builtins::aarch64_linux::$name::$name(expected, new, &mut target) + }, expected ); assert_eq!(target, new, "should have updated target"); @@ -49,7 +53,7 @@ mod swap { testcrate::fuzz_2(10000, |left: super::int_ty!($bytes), mut right| { let orig_right = right; assert_eq!( - unsafe { compiler_builtins::aarch64::$name::$name(left, &mut right) }, + unsafe { compiler_builtins::aarch64_linux::$name::$name(left, &mut right) }, orig_right ); assert_eq!(left, right); @@ -69,7 +73,7 @@ macro_rules! test_op { let mut target = old; let op: fn(super::int_ty!($bytes), super::int_ty!($bytes)) -> _ = $($op)*; let expected = op(old, val); - assert_eq!(old, unsafe { compiler_builtins::aarch64::$name::$name(val, &mut target) }, "{} should return original value", stringify!($name)); + assert_eq!(old, unsafe { compiler_builtins::aarch64_linux::$name::$name(val, &mut target) }, "{} should return original value", stringify!($name)); assert_eq!(expected, target, "{} should store to target", stringify!($name)); }); } From 82feed32e9dbe675622098accb852d1a5f660dd4 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 3 Jul 2023 10:45:20 +0200 Subject: [PATCH 0643/1459] Bump to 0.1.95 --- Cargo.toml | 2 +- libm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c468bdf75..52c1ee39a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.94" +version = "0.1.95" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" diff --git a/libm b/libm index 4c8a97374..1dbb9d2d4 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 4c8a973741c014b11ce7f1477693a3e5d4ef9609 +Subproject commit 1dbb9d2d476d65d020feca17b11391652038e2e1 From db03ae6eeca7d1aec1efc14e9810e0ffa5f48fc1 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 6 Jul 2023 20:20:19 +0200 Subject: [PATCH 0644/1459] Disable MIPS in CI See https://github.com/rust-lang/compiler-team/issues/648 --- .github/workflows/main.yml | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3d63bc070..b69c48c16 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -23,18 +23,20 @@ jobs: - target: i686-unknown-linux-gnu os: ubuntu-latest rust: nightly - - target: mips-unknown-linux-gnu - os: ubuntu-latest - rust: nightly - - target: mips64-unknown-linux-gnuabi64 - os: ubuntu-latest - rust: nightly - - target: mips64el-unknown-linux-gnuabi64 - os: ubuntu-latest - rust: nightly - - target: mipsel-unknown-linux-gnu - os: ubuntu-latest - rust: nightly + # MIPS targets disabled since they are dropped to tier 3. + # See https://github.com/rust-lang/compiler-team/issues/648 + #- target: mips-unknown-linux-gnu + # os: ubuntu-latest + # rust: nightly + #- target: mips64-unknown-linux-gnuabi64 + # os: ubuntu-latest + # rust: nightly + #- target: mips64el-unknown-linux-gnuabi64 + # os: ubuntu-latest + # rust: nightly + #- target: mipsel-unknown-linux-gnu + # os: ubuntu-latest + # rust: nightly - target: powerpc-unknown-linux-gnu os: ubuntu-latest rust: nightly From fd46ea5debe9a70084cf537a1e7c9f3c9adf3a29 Mon Sep 17 00:00:00 2001 From: Aaron Kutch Date: Thu, 6 Jul 2023 01:51:47 -0500 Subject: [PATCH 0645/1459] Use the target_feature "zbb" instead of "b" for RISC-V --- src/int/specialized_div_rem/mod.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs index 77034eb54..1ff1d19dc 100644 --- a/src/int/specialized_div_rem/mod.rs +++ b/src/int/specialized_div_rem/mod.rs @@ -95,8 +95,9 @@ const USE_LZ: bool = { // LZD or LZCNT on SPARC only exists for the VIS 3 extension and later. cfg!(target_feature = "vis3") } else if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { - // The `B` extension on RISC-V determines if a CLZ assembly instruction exists - cfg!(target_feature = "b") + // The 'Zbb' Basic Bit-Manipulation extension on RISC-V + // determines if a CLZ assembly instruction exists + cfg!(target_feature = "zbb") } else { // All other common targets Rust supports should have CLZ instructions true From bfe9796ed02602dd7026129a214babd408a661a3 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Fri, 7 Jul 2023 10:49:54 -0400 Subject: [PATCH 0646/1459] Add lgamma_r and lgammaf_r to MSVC --- src/math.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/math.rs b/src/math.rs index b4e5fc113..4e255ca1a 100644 --- a/src/math.rs +++ b/src/math.rs @@ -95,7 +95,8 @@ no_mangle! { target_os = "xous", all(target_arch = "x86_64", target_os = "uefi"), all(target_arch = "xtensa", target_os = "none"), - all(target_vendor = "fortanix", target_env = "sgx") + all(target_vendor = "fortanix", target_env = "sgx"), + target_env = "msvc" ))] intrinsics! { pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 { From 1288ed4fa6c9ad43eaac9d072a69010c813a98c5 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Sat, 8 Jul 2023 14:18:27 +0900 Subject: [PATCH 0647/1459] Fix build error on aarch64_be linux --- build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.rs b/build.rs index 4549d0b4f..26db6b4be 100644 --- a/build.rs +++ b/build.rs @@ -91,7 +91,7 @@ fn main() { println!("cargo:rustc-cfg=kernel_user_helpers") } - if llvm_target[0] == "aarch64" { + if llvm_target[0].starts_with("aarch64") { generate_aarch64_outlined_atomics(); } } From 98881e92d99ab4414cd534c79c8c42edde2ab0d1 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 17 Jul 2023 13:51:40 +0100 Subject: [PATCH 0648/1459] Bump to 0.1.96 --- Cargo.toml | 2 +- libm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 52c1ee39a..6c6c8d136 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.95" +version = "0.1.96" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" diff --git a/libm b/libm index 1dbb9d2d4..4c8a97374 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 1dbb9d2d476d65d020feca17b11391652038e2e1 +Subproject commit 4c8a973741c014b11ce7f1477693a3e5d4ef9609 From f4a2143389f98f3b179edcaaec92f6bc202e1a5a Mon Sep 17 00:00:00 2001 From: Nicholas Bishop Date: Tue, 18 Jul 2023 21:59:39 -0400 Subject: [PATCH 0649/1459] Enable chkstk/alloca intrinsics on x86_64-unknown-uefi --- src/x86_64.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/x86_64.rs b/src/x86_64.rs index 393eeddd8..7ad941158 100644 --- a/src/x86_64.rs +++ b/src/x86_64.rs @@ -11,8 +11,7 @@ use core::intrinsics; intrinsics! { #[naked] #[cfg(all( - windows, - target_env = "gnu", + any(all(windows, target_env = "gnu"), target_os = "uefi"), not(feature = "no-asm") ))] pub unsafe extern "C" fn ___chkstk_ms() { @@ -40,8 +39,7 @@ intrinsics! { #[naked] #[cfg(all( - windows, - target_env = "gnu", + any(all(windows, target_env = "gnu"), target_os = "uefi"), not(feature = "no-asm") ))] pub unsafe extern "C" fn __alloca() { @@ -54,8 +52,7 @@ intrinsics! { #[naked] #[cfg(all( - windows, - target_env = "gnu", + any(all(windows, target_env = "gnu"), target_os = "uefi"), not(feature = "no-asm") ))] pub unsafe extern "C" fn ___chkstk() { From 5f984924207e9120272f3b5b0969fd05d2ef8d41 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 21 Jul 2023 14:29:00 +0100 Subject: [PATCH 0650/1459] Bump to 0.1.97 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6c6c8d136..d95a69530 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.96" +version = "0.1.97" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 8f03b4d212658e7f349e21968340603558ea35dc Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 21 Jul 2023 17:45:47 +0100 Subject: [PATCH 0651/1459] Upgrade to libm 0.2.7 --- libm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm b/libm index 4c8a97374..1dbb9d2d4 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 4c8a973741c014b11ce7f1477693a3e5d4ef9609 +Subproject commit 1dbb9d2d476d65d020feca17b11391652038e2e1 From 03c759c7c0859b5c50fe4a6ce439a107805cc798 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 21 Jul 2023 17:46:23 +0100 Subject: [PATCH 0652/1459] Bump to 0.1.98 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index d95a69530..d1072fe6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.97" +version = "0.1.98" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 9406c0dbd699d91c1967be7b81c6a370cfcd6064 Mon Sep 17 00:00:00 2001 From: Nicholas Bishop Date: Sat, 22 Jul 2023 15:05:30 -0400 Subject: [PATCH 0653/1459] Add __chkstk for aarch64-unknown-uefi This is based on compiler-rt/lib/builtins/aarch64/chkstk.S: https://github.com/llvm/llvm-project/commit/f8e19b37995751f7913692bf9eed8ebb14a95f2d --- src/aarch64.rs | 22 ++++++++++++++++++++++ src/lib.rs | 3 +++ 2 files changed, 25 insertions(+) create mode 100644 src/aarch64.rs diff --git a/src/aarch64.rs b/src/aarch64.rs new file mode 100644 index 000000000..e5747d525 --- /dev/null +++ b/src/aarch64.rs @@ -0,0 +1,22 @@ +#![allow(unused_imports)] + +use core::intrinsics; + +intrinsics! { + #[naked] + #[cfg(all(target_os = "uefi", not(feature = "no-asm")))] + pub unsafe extern "C" fn __chkstk() { + core::arch::asm!( + ".p2align 2", + "lsl x16, x15, #4", + "mov x17, sp", + "1:", + "sub x17, x17, 4096", + "subs x16, x16, 4096", + "ldr xzr, [x17]", + "b.gt 1b", + "ret", + options(noreturn) + ); + } +} diff --git a/src/lib.rs b/src/lib.rs index a6b61bdf5..f77f08268 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -58,6 +58,9 @@ pub mod mem; #[cfg(target_arch = "arm")] pub mod arm; +#[cfg(target_arch = "aarch64")] +pub mod aarch64; + #[cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm"),))] pub mod aarch64_linux; From 1f809e007dab3604ea2a997eb5e8ed65205d48d3 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 30 Jul 2023 10:45:26 -0700 Subject: [PATCH 0654/1459] Add lgamma_r and lgammaf_r to MSVC --- src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index f77f08268..73cb3d505 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -50,7 +50,8 @@ pub mod int; all(target_arch = "xtensa", target_os = "none"), all(target_arch = "mips", target_os = "none"), target_os = "xous", - all(target_vendor = "fortanix", target_env = "sgx") + all(target_vendor = "fortanix", target_env = "sgx"), + target_env = "msvc" ))] pub mod math; pub mod mem; From efd227f9c2ac220a8699188e937eb8be810df317 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 30 Jul 2023 23:56:13 +0100 Subject: [PATCH 0655/1459] Bump to 0.1.99 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index d1072fe6d..dde57030f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.98" +version = "0.1.99" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From d3f31b24dc0bd4695c580846898f85651d0fcbea Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 31 Jul 2023 15:08:37 -0700 Subject: [PATCH 0656/1459] Add lgamma_r and lgammaf_r to MinGW --- src/lib.rs | 2 +- src/math.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 73cb3d505..dd81608f2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -51,7 +51,7 @@ pub mod int; all(target_arch = "mips", target_os = "none"), target_os = "xous", all(target_vendor = "fortanix", target_env = "sgx"), - target_env = "msvc" + target_os = "windows" ))] pub mod math; pub mod mem; diff --git a/src/math.rs b/src/math.rs index 4e255ca1a..e0f7d8e8b 100644 --- a/src/math.rs +++ b/src/math.rs @@ -96,7 +96,7 @@ no_mangle! { all(target_arch = "x86_64", target_os = "uefi"), all(target_arch = "xtensa", target_os = "none"), all(target_vendor = "fortanix", target_env = "sgx"), - target_env = "msvc" + target_os = "windows" ))] intrinsics! { pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 { From a768d9b34f87edf2eaa86cc472fdac54a4ccde7d Mon Sep 17 00:00:00 2001 From: mulhern Date: Wed, 2 Aug 2023 14:05:07 -0400 Subject: [PATCH 0657/1459] Exclude ci directory from packaged crate I do not think there is compelling reason to release the ci support as part of a Rust source code package. In addition, the crate, as it is released now, gets flagged in some security scans due to the presence of Dockerfiles which are considered to be following some unsafe practices. Most Linux distros package using the vendored appraoch and provide a vendor tarfile of an application's dependencies. Scanners will tend to expect that the contents of the vendor tarfile will be source code. These Dockerfiles are already being flagged by some scanners; other contents of the ci directory may be flagged in future. --- libm/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 393d95e4a..50e6ed6e2 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -10,6 +10,7 @@ readme = "README.md" repository = "https://github.com/rust-lang/libm" version = "0.2.7" edition = "2018" +exclude = ["/ci/"] [features] default = [] From 98ae7ddc66e850bed442b27bef9bd0be46210698 Mon Sep 17 00:00:00 2001 From: mulhern Date: Thu, 3 Aug 2023 15:05:28 -0400 Subject: [PATCH 0658/1459] Exclude GitHub Workflows files from packaged crate --- libm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 50e6ed6e2..12126c6d0 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -10,7 +10,7 @@ readme = "README.md" repository = "https://github.com/rust-lang/libm" version = "0.2.7" edition = "2018" -exclude = ["/ci/"] +exclude = ["/ci/", "/.github/workflows/"] [features] default = [] From fdc1a87f471141de366045a212386af0cd83d44d Mon Sep 17 00:00:00 2001 From: klensy Date: Thu, 20 Jul 2023 17:23:56 +0300 Subject: [PATCH 0659/1459] edition 2018 --- Cargo.toml | 1 + src/arm.rs | 6 +++--- src/float/add.rs | 4 ++-- src/float/cmp.rs | 4 ++-- src/float/div.rs | 4 ++-- src/float/extend.rs | 4 ++-- src/float/mul.rs | 4 ++-- src/float/pow.rs | 4 ++-- src/float/sub.rs | 6 +++--- src/float/trunc.rs | 4 ++-- src/int/addsub.rs | 2 +- src/int/mul.rs | 2 +- src/int/sdiv.rs | 2 +- src/int/shift.rs | 2 +- src/int/udiv.rs | 4 ++-- src/macros.rs | 8 ++++---- src/math.rs | 1 + 17 files changed, 32 insertions(+), 30 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index dde57030f..bc8528087 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" homepage = "https://github.com/rust-lang/compiler-builtins" documentation = "https://docs.rs/compiler_builtins" +edition = "2018" description = """ Compiler intrinsics used by the Rust compiler. Also available for other targets if necessary! diff --git a/src/arm.rs b/src/arm.rs index a062a54e0..cc67642e1 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -91,7 +91,7 @@ intrinsics! { #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) { - ::mem::memcpy(dest, src, n); + crate::mem::memcpy(dest, src, n); } #[weak] @@ -121,7 +121,7 @@ intrinsics! { #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) { - ::mem::memmove(dest, src, n); + crate::mem::memmove(dest, src, n); } #[weak] @@ -140,7 +140,7 @@ intrinsics! { #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { // Note the different argument order - ::mem::memset(dest, c, n); + crate::mem::memset(dest, c, n); } #[weak] diff --git a/src/float/add.rs b/src/float/add.rs index 67f6c2c14..804f4b510 100644 --- a/src/float/add.rs +++ b/src/float/add.rs @@ -1,5 +1,5 @@ -use float::Float; -use int::{CastInto, Int}; +use crate::float::Float; +use crate::int::{CastInto, Int}; /// Returns `a + b` fn add(a: F, b: F) -> F diff --git a/src/float/cmp.rs b/src/float/cmp.rs index 1bd7aa284..1c8917af8 100644 --- a/src/float/cmp.rs +++ b/src/float/cmp.rs @@ -1,7 +1,7 @@ #![allow(unreachable_code)] -use float::Float; -use int::Int; +use crate::float::Float; +use crate::int::Int; #[derive(Clone, Copy)] enum Result { diff --git a/src/float/div.rs b/src/float/div.rs index c0aae34fb..8c4cf55b8 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -2,8 +2,8 @@ // `return`s makes it clear where function exit points are #![allow(clippy::needless_return)] -use float::Float; -use int::{CastInto, DInt, HInt, Int}; +use crate::float::Float; +use crate::int::{CastInto, DInt, HInt, Int}; fn div32(a: F, b: F) -> F where diff --git a/src/float/extend.rs b/src/float/extend.rs index 39633773b..cffc57510 100644 --- a/src/float/extend.rs +++ b/src/float/extend.rs @@ -1,5 +1,5 @@ -use float::Float; -use int::{CastInto, Int}; +use crate::float::Float; +use crate::int::{CastInto, Int}; /// Generic conversion from a narrower to a wider IEEE-754 floating-point type fn extend(a: F) -> R diff --git a/src/float/mul.rs b/src/float/mul.rs index c89f22756..1b8c61203 100644 --- a/src/float/mul.rs +++ b/src/float/mul.rs @@ -1,5 +1,5 @@ -use float::Float; -use int::{CastInto, DInt, HInt, Int}; +use crate::float::Float; +use crate::int::{CastInto, DInt, HInt, Int}; fn mul(a: F, b: F) -> F where diff --git a/src/float/pow.rs b/src/float/pow.rs index a75340c30..0232ef405 100644 --- a/src/float/pow.rs +++ b/src/float/pow.rs @@ -1,5 +1,5 @@ -use float::Float; -use int::Int; +use crate::float::Float; +use crate::int::Int; /// Returns `a` raised to the power `b` fn pow(a: F, b: i32) -> F { diff --git a/src/float/sub.rs b/src/float/sub.rs index 8d300e9d2..0ea071b3c 100644 --- a/src/float/sub.rs +++ b/src/float/sub.rs @@ -1,6 +1,6 @@ -use float::add::__adddf3; -use float::add::__addsf3; -use float::Float; +use crate::float::add::__adddf3; +use crate::float::add::__addsf3; +use crate::float::Float; intrinsics! { #[arm_aeabi_alias = __aeabi_fsub] diff --git a/src/float/trunc.rs b/src/float/trunc.rs index d73713084..9bc4d6e57 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -1,5 +1,5 @@ -use float::Float; -use int::{CastInto, Int}; +use crate::float::Float; +use crate::int::{CastInto, Int}; fn trunc(a: F) -> R where diff --git a/src/int/addsub.rs b/src/int/addsub.rs index f4841e90f..f31eff4bd 100644 --- a/src/int/addsub.rs +++ b/src/int/addsub.rs @@ -1,4 +1,4 @@ -use int::{DInt, Int}; +use crate::int::{DInt, Int}; trait UAddSub: DInt { fn uadd(self, other: Self) -> Self { diff --git a/src/int/mul.rs b/src/int/mul.rs index 07ce061c9..2538e2f41 100644 --- a/src/int/mul.rs +++ b/src/int/mul.rs @@ -1,4 +1,4 @@ -use int::{DInt, HInt, Int}; +use crate::int::{DInt, HInt, Int}; trait Mul: DInt where diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index f1822f0f8..9d316c76e 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -1,4 +1,4 @@ -use int::udiv::*; +use crate::int::udiv::*; macro_rules! sdivmod { ( diff --git a/src/int/shift.rs b/src/int/shift.rs index c90cf1de3..dbd040187 100644 --- a/src/int/shift.rs +++ b/src/int/shift.rs @@ -1,4 +1,4 @@ -use int::{DInt, HInt, Int}; +use crate::int::{DInt, HInt, Int}; trait Ashl: DInt { /// Returns `a << b`, requires `b < Self::BITS` diff --git a/src/int/udiv.rs b/src/int/udiv.rs index fb09f87d8..c891eede4 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -1,8 +1,8 @@ #[cfg(not(feature = "public-test-deps"))] -pub(crate) use int::specialized_div_rem::*; +pub(crate) use crate::int::specialized_div_rem::*; #[cfg(feature = "public-test-deps")] -pub use int::specialized_div_rem::*; +pub use crate::int::specialized_div_rem::*; intrinsics! { #[maybe_use_optimized_c_shim] diff --git a/src/macros.rs b/src/macros.rs index b11114f12..d2b5734d5 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -321,10 +321,10 @@ macro_rules! intrinsics { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub extern $abi fn $name( $($argname: $ty),* ) - -> ::macros::win64_128bit_abi_hack::U64x2 + -> $crate::macros::win64_128bit_abi_hack::U64x2 { let e: $($ret)? = super::$name($($argname),*); - ::macros::win64_128bit_abi_hack::U64x2::from(e) + $crate::macros::win64_128bit_abi_hack::U64x2::from(e) } } @@ -540,7 +540,7 @@ pub mod win64_128bit_abi_hack { impl From for U64x2 { fn from(i: i128) -> U64x2 { - use int::DInt; + use crate::int::DInt; let j = i as u128; U64x2(j.lo(), j.hi()) } @@ -548,7 +548,7 @@ pub mod win64_128bit_abi_hack { impl From for U64x2 { fn from(i: u128) -> U64x2 { - use int::DInt; + use crate::int::DInt; U64x2(i.lo(), i.hi()) } } diff --git a/src/math.rs b/src/math.rs index 4e255ca1a..21d23ff97 100644 --- a/src/math.rs +++ b/src/math.rs @@ -2,6 +2,7 @@ #[path = "../libm/src/math/mod.rs"] mod libm; +#[allow(unused_macros)] macro_rules! no_mangle { ($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => { intrinsics! { From f9beeba4b78d5fe4711af0ae23d3aa29058ebd70 Mon Sep 17 00:00:00 2001 From: klensy Date: Mon, 7 Aug 2023 17:52:27 +0300 Subject: [PATCH 0660/1459] allow internal_features, added in https://github.com/rust-lang/rust/pull/108955 --- examples/intrinsics.rs | 1 + src/lib.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 19bb569b5..54b703dfb 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -5,6 +5,7 @@ #![allow(unused_features)] #![allow(stable_features)] // bench_black_box feature is stable, leaving for backcompat +#![allow(internal_features)] #![cfg_attr(thumb, no_main)] #![deny(dead_code)] #![feature(bench_black_box)] diff --git a/src/lib.rs b/src/lib.rs index 73cb3d505..2ef28fc63 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,6 +14,7 @@ #![no_builtins] #![no_std] #![allow(unused_features)] +#![allow(internal_features)] // We use `u128` in a whole bunch of places which we currently agree with the // compiler on ABIs and such, so we should be "good enough" for now and changes // to the `u128` ABI will be reflected here. From 1ac3230ed8036c8c48657e6b2d046b96d76513c7 Mon Sep 17 00:00:00 2001 From: klensy Date: Mon, 7 Aug 2023 21:19:25 +0300 Subject: [PATCH 0661/1459] impl_binary_long allow to pass attribute --- src/int/specialized_div_rem/binary_long.rs | 4 ++++ src/int/specialized_div_rem/mod.rs | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/int/specialized_div_rem/binary_long.rs b/src/int/specialized_div_rem/binary_long.rs index 0d7822882..2c61a45e0 100644 --- a/src/int/specialized_div_rem/binary_long.rs +++ b/src/int/specialized_div_rem/binary_long.rs @@ -13,9 +13,13 @@ macro_rules! impl_binary_long { $n:tt, // the number of bits in a $iX or $uX $uX:ident, // unsigned integer type for the inputs and outputs of `$fn` $iX:ident // signed integer type with same bitwidth as `$uX` + $(, $fun_attr:meta)* // attributes for the function ) => { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. + $( + #[$fun_attr] + )* pub fn $fn(duo: $uX, div: $uX) -> ($uX, $uX) { let mut duo = duo; // handle edge cases before calling `$normalization_shift` diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs index 1ff1d19dc..760f5f5b7 100644 --- a/src/int/specialized_div_rem/mod.rs +++ b/src/int/specialized_div_rem/mod.rs @@ -306,5 +306,6 @@ impl_binary_long!( u32_normalization_shift, 32, u32, - i32 + i32, + allow(dead_code) ); From 3766e73cc618a35fe510704796996bf3ec144dc0 Mon Sep 17 00:00:00 2001 From: klensy Date: Mon, 7 Aug 2023 21:51:34 +0300 Subject: [PATCH 0662/1459] fix ci, disable mips, mirrors https://github.com/rust-lang/compiler-builtins/pull/537 --- libm/.github/workflows/main.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index decd71f5c..5408ac23f 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -13,9 +13,11 @@ jobs: - arm-unknown-linux-gnueabihf - armv7-unknown-linux-gnueabihf # - i686-unknown-linux-gnu - - mips-unknown-linux-gnu - - mips64-unknown-linux-gnuabi64 - - mips64el-unknown-linux-gnuabi64 + # MIPS targets disabled since they are dropped to tier 3. + # See https://github.com/rust-lang/compiler-team/issues/648 + #- mips-unknown-linux-gnu + #- mips64-unknown-linux-gnuabi64 + #- mips64el-unknown-linux-gnuabi64 - powerpc-unknown-linux-gnu - powerpc64-unknown-linux-gnu - powerpc64le-unknown-linux-gnu From 0796d75e10241df62cd5ce724ca918c286d7f961 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 7 Aug 2023 23:37:59 +0100 Subject: [PATCH 0663/1459] Bump to 0.1.100 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index bc8528087..797857d76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.99" +version = "0.1.100" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 54629a1587f746485cc6ae9c513da926cc2cdc99 Mon Sep 17 00:00:00 2001 From: Sean Cross Date: Thu, 10 Aug 2023 13:50:04 +0800 Subject: [PATCH 0664/1459] build: compile memory intrinsics on xous Like SGX, Xous does not have any libc to link against. As a result, memory intrinsics need to be available as part of `compiler_builtins` Signed-off-by: Sean Cross --- build.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/build.rs b/build.rs index 26db6b4be..10c7aaa63 100644 --- a/build.rs +++ b/build.rs @@ -30,6 +30,7 @@ fn main() { || target.contains("-none") || target.contains("nvptx") || target.contains("uefi") + || target.contains("xous") { println!("cargo:rustc-cfg=feature=\"mem\""); } From 8434a9f27f7f6db476c760372789a09402591b70 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 12 Aug 2023 10:28:45 +0200 Subject: [PATCH 0665/1459] Bump to 0.1.101 --- Cargo.toml | 2 +- libm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 797857d76..239191a9b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.100" +version = "0.1.101" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" diff --git a/libm b/libm index 1dbb9d2d4..5bcbc3b63 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 1dbb9d2d476d65d020feca17b11391652038e2e1 +Subproject commit 5bcbc3b636b79da7a3061b1c257d4ae53f033018 From 9964bb433ea01633b835e4bf6bdc86b7ab87cc3d Mon Sep 17 00:00:00 2001 From: James Farrell Date: Mon, 28 Aug 2023 22:14:18 +0000 Subject: [PATCH 0666/1459] Generate outlined_atomics.rs in a deterministic order. Also add a missing newline. --- build.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/build.rs b/build.rs index 10c7aaa63..b60181062 100644 --- a/build.rs +++ b/build.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, env, sync::atomic::Ordering}; +use std::{collections::BTreeMap, env, sync::atomic::Ordering}; fn main() { println!("cargo:rerun-if-changed=build.rs"); @@ -118,7 +118,7 @@ fn generate_aarch64_outlined_atomics() { // Generate different macros for add/clr/eor/set so that we can test them separately. let sym_names = ["cas", "ldadd", "ldclr", "ldeor", "ldset", "swp"]; - let mut macros = HashMap::new(); + let mut macros = BTreeMap::new(); for sym in sym_names { macros.insert(sym, gen_macro(sym)); } @@ -146,7 +146,7 @@ fn generate_aarch64_outlined_atomics() { let mut buf = String::new(); for macro_def in macros.values().chain(std::iter::once(&cas16)) { buf += macro_def; - buf += "}; }"; + buf += "}; }\n"; } let dst = std::env::var("OUT_DIR").unwrap() + "/outlined_atomics.rs"; std::fs::write(dst, buf).unwrap(); From 8f6cffb58603469e0d047ec2281f5b85ce63e87f Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Thu, 5 Oct 2023 17:38:44 +0000 Subject: [PATCH 0667/1459] Fix rint and rintf on x87. --- libm/src/math/rint.rs | 14 ++++++++++++-- libm/src/math/rintf.rs | 14 ++++++++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/libm/src/math/rint.rs b/libm/src/math/rint.rs index 0c6025c1f..8edbe3440 100644 --- a/libm/src/math/rint.rs +++ b/libm/src/math/rint.rs @@ -8,9 +8,19 @@ pub fn rint(x: f64) -> f64 { x } else { let ans = if is_positive { - x + one_over_e - one_over_e + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let x = force_eval!(x); + let xplusoneovere = x + one_over_e; + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let xplusoneovere = force_eval!(xplusoneovere); + xplusoneovere - one_over_e } else { - x - one_over_e + one_over_e + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let x = force_eval!(x); + let xminusoneovere = x - one_over_e; + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let xminusoneovere = force_eval!(xminusoneovere); + xminusoneovere + one_over_e }; if ans == 0.0 { diff --git a/libm/src/math/rintf.rs b/libm/src/math/rintf.rs index d427793f7..7a7da618a 100644 --- a/libm/src/math/rintf.rs +++ b/libm/src/math/rintf.rs @@ -8,9 +8,19 @@ pub fn rintf(x: f32) -> f32 { x } else { let ans = if is_positive { - x + one_over_e - one_over_e + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let x = force_eval!(x); + let xplusoneovere = x + one_over_e; + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let xplusoneovere = force_eval!(xplusoneovere); + xplusoneovere - one_over_e } else { - x - one_over_e + one_over_e + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let x = force_eval!(x); + let xminusoneovere = x - one_over_e; + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let xminusoneovere = force_eval!(xminusoneovere); + xminusoneovere + one_over_e }; if ans == 0.0 { From 978fa548d7495017def792b39332c298eddcbd9e Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 6 Oct 2023 11:25:38 +0200 Subject: [PATCH 0668/1459] Release 0.2.8 --- libm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 12126c6d0..d33ca61cd 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0" name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/libm" -version = "0.2.7" +version = "0.2.8" edition = "2018" exclude = ["/ci/", "/.github/workflows/"] From d1c083cda12af53870a11bc4dd222522c9e26eb7 Mon Sep 17 00:00:00 2001 From: Sebastian Imlay Date: Fri, 6 Oct 2023 17:35:20 -0400 Subject: [PATCH 0669/1459] Add tvOS to build script --- build.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build.rs b/build.rs index b60181062..048611677 100644 --- a/build.rs +++ b/build.rs @@ -320,6 +320,7 @@ mod c { // include them. if target_os != "ios" && target_os != "watchos" + && target_os != "tvos" && (target_vendor != "apple" || target_arch != "x86") { sources.extend(&[ @@ -391,6 +392,7 @@ mod c { if target_arch == "arm" && target_os != "ios" && target_os != "watchos" + && target_os != "tvos" && target_env != "msvc" { sources.extend(&[ From 9acbcc4ad14d1cc15e6350b543a4dac37a7de09e Mon Sep 17 00:00:00 2001 From: Grant Elbert Date: Mon, 23 Oct 2023 15:54:05 -0500 Subject: [PATCH 0670/1459] intrinsics macro: fix non-weak aeabi generation Signed-off-by: Grant Elbert --- src/macros.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/macros.rs b/src/macros.rs index d2b5734d5..2aa9a742c 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -369,7 +369,7 @@ macro_rules! intrinsics { #[cfg(target_arch = "arm")] pub mod $alias { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - #[cfg_attr(any(all(not(windows), not(target_vendor="apple"), feature = "weak-intrinsics")), linkage = "weak")] + #[cfg_attr(any(all(not(windows), not(target_vendor="apple")), feature = "weak-intrinsics"), linkage = "weak")] pub extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } From 588cc2aa198369364495df9c940b1544e1fc3f2e Mon Sep 17 00:00:00 2001 From: Nicholas Bishop Date: Fri, 27 Oct 2023 20:19:55 +0000 Subject: [PATCH 0671/1459] Enable src/math for all UEFI targets This fixes various math operations on aarch64-unknown-uefi and i686-unknown-uefi. --- src/lib.rs | 2 +- src/math.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d1195a4a8..e2e35fbd3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,7 +46,7 @@ pub mod int; #[cfg(any( all(target_family = "wasm", target_os = "unknown"), all(target_arch = "x86_64", target_os = "none"), - all(target_arch = "x86_64", target_os = "uefi"), + target_os = "uefi", all(target_arch = "arm", target_os = "none"), all(target_arch = "xtensa", target_os = "none"), all(target_arch = "mips", target_os = "none"), diff --git a/src/math.rs b/src/math.rs index f8f9d225b..63d858333 100644 --- a/src/math.rs +++ b/src/math.rs @@ -22,7 +22,7 @@ macro_rules! no_mangle { not(target_env = "wasi") ), target_os = "xous", - all(target_arch = "x86_64", target_os = "uefi"), + target_os = "uefi", all(target_arch = "xtensa", target_os = "none"), all(target_vendor = "fortanix", target_env = "sgx") ))] @@ -94,7 +94,7 @@ no_mangle! { not(target_env = "wasi") ), target_os = "xous", - all(target_arch = "x86_64", target_os = "uefi"), + target_os = "uefi", all(target_arch = "xtensa", target_os = "none"), all(target_vendor = "fortanix", target_env = "sgx"), target_os = "windows" From ce1e4d2d2cf1b8014ef4c39028d41ecf87e98802 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Chodzikiewicz?= Date: Tue, 31 Oct 2023 20:32:53 +0100 Subject: [PATCH 0672/1459] Enable math module for all "none" os targets This was initially a bugfix that fixed gating math module for riscv32, but conclusiion is it makes no sense to gate on target architecture. --- src/lib.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e2e35fbd3..3e5491878 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,11 +45,8 @@ pub mod int; #[cfg(any( all(target_family = "wasm", target_os = "unknown"), - all(target_arch = "x86_64", target_os = "none"), target_os = "uefi", - all(target_arch = "arm", target_os = "none"), - all(target_arch = "xtensa", target_os = "none"), - all(target_arch = "mips", target_os = "none"), + target_os = "none", target_os = "xous", all(target_vendor = "fortanix", target_env = "sgx"), target_os = "windows" From 1ac8e561055b0ca5638b9edcb57b05becc1407bb Mon Sep 17 00:00:00 2001 From: "Mark S. Baranowski" Date: Tue, 31 Oct 2023 15:59:56 -0600 Subject: [PATCH 0673/1459] This updates the exponent calculations done in the nextafter functions related to detecting underflow/overflow. The functions now match the behavior of the MUSL implementations these were based on. Fixes rust-lang/libm#286 --- libm/src/math/nextafter.rs | 2 +- libm/src/math/nextafterf.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/src/math/nextafter.rs b/libm/src/math/nextafter.rs index 13094a17c..057626191 100644 --- a/libm/src/math/nextafter.rs +++ b/libm/src/math/nextafter.rs @@ -23,7 +23,7 @@ pub fn nextafter(x: f64, y: f64) -> f64 { ux_i += 1; } - let e = ux_i.wrapping_shr(52 & 0x7ff); + let e = ux_i >> 52 & 0x7ff; // raise overflow if ux.f is infinite and x is finite if e == 0x7ff { force_eval!(x + x); diff --git a/libm/src/math/nextafterf.rs b/libm/src/math/nextafterf.rs index df9b10829..8ba383356 100644 --- a/libm/src/math/nextafterf.rs +++ b/libm/src/math/nextafterf.rs @@ -23,7 +23,7 @@ pub fn nextafterf(x: f32, y: f32) -> f32 { ux_i += 1; } - let e = ux_i.wrapping_shr(0x7f80_0000_u32); + let e = ux_i & 0x7f80_0000_u32; // raise overflow if ux_f is infinite and x is finite if e == 0x7f80_0000_u32 { force_eval!(x + x); From a22a8f410d83143a89ad9dae5e260ebc9c531007 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 4 Nov 2023 00:28:36 +0000 Subject: [PATCH 0674/1459] Bump to 0.1.102 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 239191a9b..320a4c1fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.101" +version = "0.1.102" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 2a331a2831cd9dcb566f2b19b17df9a96baa7944 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 4 Nov 2023 00:29:27 +0000 Subject: [PATCH 0675/1459] Bump to 0.1.103 There was a mistake in publishing the 0.1.102 release. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 320a4c1fe..5a022cc4b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.102" +version = "0.1.103" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From cfbf9c1d55d847ffb541164da30c584aa76b58c0 Mon Sep 17 00:00:00 2001 From: Patryk Wychowaniec Date: Sun, 26 Nov 2023 16:17:00 +0100 Subject: [PATCH 0676/1459] fix: Add (even more) `#[avr_skip]` for floats Tale as old as the world - there's an ABI mismatch: https://github.com/rust-lang/compiler-builtins/pull/527 Fortunately, newest GCCs (from v11, it seems) actually provide most of those intrinsics (even for f64!), so that's pretty cool. (the only intrinsics not provided by GCC are `__powisf2` & `__powidf2`, but our codegen for AVR doesn't emit those anyway.) Fixes https://github.com/rust-lang/rust/issues/118079. --- src/float/div.rs | 2 ++ src/float/extend.rs | 1 + src/float/mul.rs | 2 ++ src/float/pow.rs | 2 ++ src/float/sub.rs | 2 ++ src/float/trunc.rs | 1 + 6 files changed, 10 insertions(+) diff --git a/src/float/div.rs b/src/float/div.rs index 8c4cf55b8..d587fe4f9 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -902,11 +902,13 @@ where } intrinsics! { + #[avr_skip] #[arm_aeabi_alias = __aeabi_fdiv] pub extern "C" fn __divsf3(a: f32, b: f32) -> f32 { div32(a, b) } + #[avr_skip] #[arm_aeabi_alias = __aeabi_ddiv] pub extern "C" fn __divdf3(a: f64, b: f64) -> f64 { div64(a, b) diff --git a/src/float/extend.rs b/src/float/extend.rs index cffc57510..0e6673b9c 100644 --- a/src/float/extend.rs +++ b/src/float/extend.rs @@ -70,6 +70,7 @@ where } intrinsics! { + #[avr_skip] #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_f2d] pub extern "C" fn __extendsfdf2(a: f32) -> f64 { diff --git a/src/float/mul.rs b/src/float/mul.rs index 1b8c61203..378fa9701 100644 --- a/src/float/mul.rs +++ b/src/float/mul.rs @@ -185,12 +185,14 @@ where } intrinsics! { + #[avr_skip] #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_fmul] pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 { mul(a, b) } + #[avr_skip] #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_dmul] pub extern "C" fn __muldf3(a: f64, b: f64) -> f64 { diff --git a/src/float/pow.rs b/src/float/pow.rs index 0232ef405..3103fe6f6 100644 --- a/src/float/pow.rs +++ b/src/float/pow.rs @@ -26,10 +26,12 @@ fn pow(a: F, b: i32) -> F { } intrinsics! { + #[avr_skip] pub extern "C" fn __powisf2(a: f32, b: i32) -> f32 { pow(a, b) } + #[avr_skip] pub extern "C" fn __powidf2(a: f64, b: i32) -> f64 { pow(a, b) } diff --git a/src/float/sub.rs b/src/float/sub.rs index 0ea071b3c..64653ee25 100644 --- a/src/float/sub.rs +++ b/src/float/sub.rs @@ -3,11 +3,13 @@ use crate::float::add::__addsf3; use crate::float::Float; intrinsics! { + #[avr_skip] #[arm_aeabi_alias = __aeabi_fsub] pub extern "C" fn __subsf3(a: f32, b: f32) -> f32 { __addsf3(a, f32::from_repr(b.repr() ^ f32::SIGN_MASK)) } + #[avr_skip] #[arm_aeabi_alias = __aeabi_dsub] pub extern "C" fn __subdf3(a: f64, b: f64) -> f64 { __adddf3(a, f64::from_repr(b.repr() ^ f64::SIGN_MASK)) diff --git a/src/float/trunc.rs b/src/float/trunc.rs index 9bc4d6e57..0beeb9f98 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -112,6 +112,7 @@ where } intrinsics! { + #[avr_skip] #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_d2f] pub extern "C" fn __truncdfsf2(a: f64) -> f32 { From 2731a4837bfe091a0ba3e449e3ebeff73a6d2562 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Tue, 28 Nov 2023 08:02:43 +0000 Subject: [PATCH 0677/1459] Bump to 0.1.104 --- Cargo.toml | 2 +- libm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5a022cc4b..6ebabfdcb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.103" +version = "0.1.104" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" diff --git a/libm b/libm index 5bcbc3b63..721a5edc1 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 5bcbc3b636b79da7a3061b1c257d4ae53f033018 +Subproject commit 721a5edc1be6b0412e4b1704590aed76f9a55899 From 24023423be18001aff81fa8d51e731ad3a77c0a0 Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Sun, 5 Nov 2023 18:42:34 -0800 Subject: [PATCH 0678/1459] Add hexagon support Signed-off-by: Brian Cain --- src/hexagon.rs | 55 +++ src/hexagon/dfaddsub.s | 321 +++++++++++++++ src/hexagon/dfdiv.s | 372 ++++++++++++++++++ src/hexagon/dffma.s | 536 ++++++++++++++++++++++++++ src/hexagon/dfminmax.s | 51 +++ src/hexagon/dfmul.s | 309 +++++++++++++++ src/hexagon/dfsqrt.s | 277 +++++++++++++ src/hexagon/divdi3.s | 64 +++ src/hexagon/divsi3.s | 53 +++ src/hexagon/fastmath2_dlib_asm.s | 266 +++++++++++++ src/hexagon/fastmath2_ldlib_asm.s | 187 +++++++++ src/hexagon/func_macro.s | 12 + src/hexagon/memcpy_forward_vp4cp4n2.s | 91 +++++ src/hexagon/memcpy_likely_aligned.s | 42 ++ src/hexagon/moddi3.s | 63 +++ src/hexagon/modsi3.s | 44 +++ src/hexagon/sfdiv_opt.s | 42 ++ src/hexagon/sfsqrt_opt.s | 49 +++ src/hexagon/udivdi3.s | 50 +++ src/hexagon/udivmoddi4.s | 50 +++ src/hexagon/udivmodsi4.s | 39 ++ src/hexagon/udivsi3.s | 36 ++ src/hexagon/umoddi3.s | 53 +++ src/hexagon/umodsi3.s | 34 ++ src/lib.rs | 4 + 25 files changed, 3100 insertions(+) create mode 100644 src/hexagon.rs create mode 100644 src/hexagon/dfaddsub.s create mode 100644 src/hexagon/dfdiv.s create mode 100644 src/hexagon/dffma.s create mode 100644 src/hexagon/dfminmax.s create mode 100644 src/hexagon/dfmul.s create mode 100644 src/hexagon/dfsqrt.s create mode 100644 src/hexagon/divdi3.s create mode 100644 src/hexagon/divsi3.s create mode 100644 src/hexagon/fastmath2_dlib_asm.s create mode 100644 src/hexagon/fastmath2_ldlib_asm.s create mode 100644 src/hexagon/func_macro.s create mode 100644 src/hexagon/memcpy_forward_vp4cp4n2.s create mode 100644 src/hexagon/memcpy_likely_aligned.s create mode 100644 src/hexagon/moddi3.s create mode 100644 src/hexagon/modsi3.s create mode 100644 src/hexagon/sfdiv_opt.s create mode 100644 src/hexagon/sfsqrt_opt.s create mode 100644 src/hexagon/udivdi3.s create mode 100644 src/hexagon/udivmoddi4.s create mode 100644 src/hexagon/udivmodsi4.s create mode 100644 src/hexagon/udivsi3.s create mode 100644 src/hexagon/umoddi3.s create mode 100644 src/hexagon/umodsi3.s diff --git a/src/hexagon.rs b/src/hexagon.rs new file mode 100644 index 000000000..91cf91c31 --- /dev/null +++ b/src/hexagon.rs @@ -0,0 +1,55 @@ +#![cfg(not(feature = "no-asm"))] + +use core::arch::global_asm; + +global_asm!(include_str!("hexagon/func_macro.s"), options(raw)); + +global_asm!(include_str!("hexagon/dfaddsub.s"), options(raw)); + +global_asm!(include_str!("hexagon/dfdiv.s"), options(raw)); + +global_asm!(include_str!("hexagon/dffma.s"), options(raw)); + +global_asm!(include_str!("hexagon/dfminmax.s"), options(raw)); + +global_asm!(include_str!("hexagon/dfmul.s"), options(raw)); + +global_asm!(include_str!("hexagon/dfsqrt.s"), options(raw)); + +global_asm!(include_str!("hexagon/divdi3.s"), options(raw)); + +global_asm!(include_str!("hexagon/divsi3.s"), options(raw)); + +global_asm!(include_str!("hexagon/fastmath2_dlib_asm.s"), options(raw)); + +global_asm!(include_str!("hexagon/fastmath2_ldlib_asm.s"), options(raw)); + +global_asm!( + include_str!("hexagon/memcpy_forward_vp4cp4n2.s"), + options(raw) +); + +global_asm!( + include_str!("hexagon/memcpy_likely_aligned.s"), + options(raw) +); + +global_asm!(include_str!("hexagon/moddi3.s"), options(raw)); + +global_asm!(include_str!("hexagon/modsi3.s"), options(raw)); + +global_asm!(include_str!("hexagon/sfdiv_opt.s"), options(raw)); + +global_asm!(include_str!("hexagon/sfsqrt_opt.s"), options(raw)); + +global_asm!(include_str!("hexagon/udivdi3.s"), options(raw)); + +global_asm!(include_str!("hexagon/udivmoddi4.s"), options(raw)); + +global_asm!(include_str!("hexagon/udivmodsi4.s"), options(raw)); + +global_asm!(include_str!("hexagon/udivsi3.s"), options(raw)); + +global_asm!(include_str!("hexagon/umoddi3.s"), options(raw)); + +global_asm!(include_str!("hexagon/umodsi3.s"), options(raw)); diff --git a/src/hexagon/dfaddsub.s b/src/hexagon/dfaddsub.s new file mode 100644 index 000000000..1f59e460b --- /dev/null +++ b/src/hexagon/dfaddsub.s @@ -0,0 +1,321 @@ + .text + .global __hexagon_adddf3 + .global __hexagon_subdf3 + .type __hexagon_adddf3, @function + .type __hexagon_subdf3, @function + +.global __qdsp_adddf3 ; .set __qdsp_adddf3, __hexagon_adddf3 +.global __hexagon_fast_adddf3 ; .set __hexagon_fast_adddf3, __hexagon_adddf3 +.global __hexagon_fast2_adddf3 ; .set __hexagon_fast2_adddf3, __hexagon_adddf3 +.global __qdsp_subdf3 ; .set __qdsp_subdf3, __hexagon_subdf3 +.global __hexagon_fast_subdf3 ; .set __hexagon_fast_subdf3, __hexagon_subdf3 +.global __hexagon_fast2_subdf3 ; .set __hexagon_fast2_subdf3, __hexagon_subdf3 + + .p2align 5 +__hexagon_adddf3: + { + r4 = extractu(r1,#11,#20) + r5 = extractu(r3,#11,#20) + r13:12 = combine(##0x20000000,#0) + } + { + p3 = dfclass(r1:0,#2) + p3 = dfclass(r3:2,#2) + r9:8 = r13:12 + p2 = cmp.gtu(r5,r4) + } + { + if (!p3) jump .Ladd_abnormal + if (p2) r1:0 = r3:2 + if (p2) r3:2 = r1:0 + if (p2) r5:4 = combine(r4,r5) + } + { + r13:12 = insert(r1:0,#52,#11 -2) + r9:8 = insert(r3:2,#52,#11 -2) + r15 = sub(r4,r5) + r7:6 = combine(#62,#1) + } + + + + + +.Ladd_continue: + { + r15 = min(r15,r7) + + r11:10 = neg(r13:12) + p2 = cmp.gt(r1,#-1) + r14 = #0 + } + { + if (!p2) r13:12 = r11:10 + r11:10 = extractu(r9:8,r15:14) + r9:8 = ASR(r9:8,r15) + + + + + r15:14 = #0 + } + { + p1 = cmp.eq(r11:10,r15:14) + if (!p1.new) r8 = or(r8,r6) + r5 = add(r4,#-1024 -60) + p3 = cmp.gt(r3,#-1) + } + { + r13:12 = add(r13:12,r9:8) + r11:10 = sub(r13:12,r9:8) + r7:6 = combine(#54,##2045) + } + { + p0 = cmp.gtu(r4,r7) + p0 = !cmp.gtu(r4,r6) + if (!p0.new) jump:nt .Ladd_ovf_unf + if (!p3) r13:12 = r11:10 + } + { + r1:0 = convert_d2df(r13:12) + p0 = cmp.eq(r13,#0) + p0 = cmp.eq(r12,#0) + if (p0.new) jump:nt .Ladd_zero + } + { + r1 += asl(r5,#20) + jumpr r31 + } + .falign +__hexagon_subdf3: + { + r3 = togglebit(r3,#31) + jump __qdsp_adddf3 + } + + + .falign +.Ladd_zero: + + + { + r28 = USR + r1:0 = #0 + r3 = #1 + } + { + r28 = extractu(r28,#2,#22) + r3 = asl(r3,#31) + } + { + p0 = cmp.eq(r28,#2) + if (p0.new) r1 = xor(r1,r3) + jumpr r31 + } + .falign +.Ladd_ovf_unf: + { + r1:0 = convert_d2df(r13:12) + p0 = cmp.eq(r13,#0) + p0 = cmp.eq(r12,#0) + if (p0.new) jump:nt .Ladd_zero + } + { + r28 = extractu(r1,#11,#20) + r1 += asl(r5,#20) + } + { + r5 = add(r5,r28) + r3:2 = combine(##0x00100000,#0) + } + { + p0 = cmp.gt(r5,##1024 +1024 -2) + if (p0.new) jump:nt .Ladd_ovf + } + { + p0 = cmp.gt(r5,#0) + if (p0.new) jumpr:t r31 + r28 = sub(#1,r5) + } + { + r3:2 = insert(r1:0,#52,#0) + r1:0 = r13:12 + } + { + r3:2 = lsr(r3:2,r28) + } + { + r1:0 = insert(r3:2,#63,#0) + jumpr r31 + } + .falign +.Ladd_ovf: + + { + r1:0 = r13:12 + r28 = USR + r13:12 = combine(##0x7fefffff,#-1) + } + { + r5 = extractu(r28,#2,#22) + r28 = or(r28,#0x28) + r9:8 = combine(##0x7ff00000,#0) + } + { + USR = r28 + r5 ^= lsr(r1,#31) + r28 = r5 + } + { + p0 = !cmp.eq(r28,#1) + p0 = !cmp.eq(r5,#2) + if (p0.new) r13:12 = r9:8 + } + { + r1:0 = insert(r13:12,#63,#0) + } + { + p0 = dfcmp.eq(r1:0,r1:0) + jumpr r31 + } + +.Ladd_abnormal: + { + r13:12 = extractu(r1:0,#63,#0) + r9:8 = extractu(r3:2,#63,#0) + } + { + p3 = cmp.gtu(r13:12,r9:8) + if (!p3.new) r1:0 = r3:2 + if (!p3.new) r3:2 = r1:0 + } + { + + p0 = dfclass(r1:0,#0x0f) + if (!p0.new) jump:nt .Linvalid_nan_add + if (!p3) r13:12 = r9:8 + if (!p3) r9:8 = r13:12 + } + { + + + p1 = dfclass(r1:0,#0x08) + if (p1.new) jump:nt .Linf_add + } + { + p2 = dfclass(r3:2,#0x01) + if (p2.new) jump:nt .LB_zero + r13:12 = #0 + } + + { + p0 = dfclass(r1:0,#4) + if (p0.new) jump:nt .Ladd_two_subnormal + r13:12 = combine(##0x20000000,#0) + } + { + r4 = extractu(r1,#11,#20) + r5 = #1 + + r9:8 = asl(r9:8,#11 -2) + } + + + + { + r13:12 = insert(r1:0,#52,#11 -2) + r15 = sub(r4,r5) + r7:6 = combine(#62,#1) + jump .Ladd_continue + } + +.Ladd_two_subnormal: + { + r13:12 = extractu(r1:0,#63,#0) + r9:8 = extractu(r3:2,#63,#0) + } + { + r13:12 = neg(r13:12) + r9:8 = neg(r9:8) + p0 = cmp.gt(r1,#-1) + p1 = cmp.gt(r3,#-1) + } + { + if (p0) r13:12 = r1:0 + if (p1) r9:8 = r3:2 + } + { + r13:12 = add(r13:12,r9:8) + } + { + r9:8 = neg(r13:12) + p0 = cmp.gt(r13,#-1) + r3:2 = #0 + } + { + if (!p0) r1:0 = r9:8 + if (p0) r1:0 = r13:12 + r3 = ##0x80000000 + } + { + if (!p0) r1 = or(r1,r3) + p0 = dfcmp.eq(r1:0,r3:2) + if (p0.new) jump:nt .Lzero_plus_zero + } + { + jumpr r31 + } + +.Linvalid_nan_add: + { + r28 = convert_df2sf(r1:0) + p0 = dfclass(r3:2,#0x0f) + if (p0.new) r3:2 = r1:0 + } + { + r2 = convert_df2sf(r3:2) + r1:0 = #-1 + jumpr r31 + } + .falign +.LB_zero: + { + p0 = dfcmp.eq(r13:12,r1:0) + if (!p0.new) jumpr:t r31 + } + + + + +.Lzero_plus_zero: + { + p0 = cmp.eq(r1:0,r3:2) + if (p0.new) jumpr:t r31 + } + { + r28 = USR + } + { + r28 = extractu(r28,#2,#22) + r1:0 = #0 + } + { + p0 = cmp.eq(r28,#2) + if (p0.new) r1 = ##0x80000000 + jumpr r31 + } +.Linf_add: + + { + p0 = !cmp.eq(r1,r3) + p0 = dfclass(r3:2,#8) + if (!p0.new) jumpr:t r31 + } + { + r2 = ##0x7f800001 + } + { + r1:0 = convert_sf2df(r2) + jumpr r31 + } +.size __hexagon_adddf3,.-__hexagon_adddf3 diff --git a/src/hexagon/dfdiv.s b/src/hexagon/dfdiv.s new file mode 100644 index 000000000..6d65dbfc4 --- /dev/null +++ b/src/hexagon/dfdiv.s @@ -0,0 +1,372 @@ + .text + .global __hexagon_divdf3 + .type __hexagon_divdf3,@function + .global __qdsp_divdf3 ; .set __qdsp_divdf3, __hexagon_divdf3 + .global __hexagon_fast_divdf3 ; .set __hexagon_fast_divdf3, __hexagon_divdf3 + .global __hexagon_fast2_divdf3 ; .set __hexagon_fast2_divdf3, __hexagon_divdf3 + .p2align 5 +__hexagon_divdf3: + { + p2 = dfclass(r1:0,#0x02) + p2 = dfclass(r3:2,#0x02) + r13:12 = combine(r3,r1) + r28 = xor(r1,r3) + } + { + if (!p2) jump .Ldiv_abnormal + r7:6 = extractu(r3:2,#23,#52 -23) + r8 = ##0x3f800001 + } + { + r9 = or(r8,r6) + r13 = extractu(r13,#11,#52 -32) + r12 = extractu(r12,#11,#52 -32) + p3 = cmp.gt(r28,#-1) + } + + +.Ldenorm_continue: + { + r11,p0 = sfrecipa(r8,r9) + r10 = and(r8,#-2) + r28 = #1 + r12 = sub(r12,r13) + } + + + { + r10 -= sfmpy(r11,r9):lib + r1 = insert(r28,#11 +1,#52 -32) + r13 = ##0x00800000 << 3 + } + { + r11 += sfmpy(r11,r10):lib + r3 = insert(r28,#11 +1,#52 -32) + r10 = and(r8,#-2) + } + { + r10 -= sfmpy(r11,r9):lib + r5 = #-0x3ff +1 + r4 = #0x3ff -1 + } + { + r11 += sfmpy(r11,r10):lib + p1 = cmp.gt(r12,r5) + p1 = !cmp.gt(r12,r4) + } + { + r13 = insert(r11,#23,#3) + r5:4 = #0 + r12 = add(r12,#-61) + } + + + + + { + r13 = add(r13,#((-3) << 3)) + } + { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASL(r7:6, # ( 14 )); r1:0 -= asl(r15:14, # 32); } + { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 1 )); r1:0 -= asl(r15:14, # 32); } + { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 16 )); r1:0 -= asl(r15:14, # 32); } + { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 31 )); r1:0 -= asl(r15:14, # 32); r7:6=# ( 0 ); } + + + + + + + + { + + r15:14 = sub(r1:0,r3:2) + p0 = cmp.gtu(r3:2,r1:0) + + if (!p0.new) r6 = #2 + } + { + r5:4 = add(r5:4,r7:6) + if (!p0) r1:0 = r15:14 + r15:14 = #0 + } + { + p0 = cmp.eq(r1:0,r15:14) + if (!p0.new) r4 = or(r4,r28) + } + { + r7:6 = neg(r5:4) + } + { + if (!p3) r5:4 = r7:6 + } + { + r1:0 = convert_d2df(r5:4) + if (!p1) jump .Ldiv_ovf_unf + } + { + r1 += asl(r12,#52 -32) + jumpr r31 + } + +.Ldiv_ovf_unf: + { + r1 += asl(r12,#52 -32) + r13 = extractu(r1,#11,#52 -32) + } + { + r7:6 = abs(r5:4) + r12 = add(r12,r13) + } + { + p0 = cmp.gt(r12,##0x3ff +0x3ff) + if (p0.new) jump:nt .Ldiv_ovf + } + { + p0 = cmp.gt(r12,#0) + if (p0.new) jump:nt .Lpossible_unf2 + } + { + r13 = add(clb(r7:6),#-1) + r12 = sub(#7,r12) + r10 = USR + r11 = #63 + } + { + r13 = min(r12,r11) + r11 = or(r10,#0x030) + r7:6 = asl(r7:6,r13) + r12 = #0 + } + { + r15:14 = extractu(r7:6,r13:12) + r7:6 = lsr(r7:6,r13) + r3:2 = #1 + } + { + p0 = cmp.gtu(r3:2,r15:14) + if (!p0.new) r6 = or(r2,r6) + r7 = setbit(r7,#52 -32+4) + } + { + r5:4 = neg(r7:6) + p0 = bitsclr(r6,#(1<<4)-1) + if (!p0.new) r10 = r11 + } + { + USR = r10 + if (p3) r5:4 = r7:6 + r10 = #-0x3ff -(52 +4) + } + { + r1:0 = convert_d2df(r5:4) + } + { + r1 += asl(r10,#52 -32) + jumpr r31 + } + + +.Lpossible_unf2: + + + { + r3:2 = extractu(r1:0,#63,#0) + r15:14 = combine(##0x00100000,#0) + r10 = #0x7FFF + } + { + p0 = dfcmp.eq(r15:14,r3:2) + p0 = bitsset(r7,r10) + } + + + + + + + { + if (!p0) jumpr r31 + r10 = USR + } + + { + r10 = or(r10,#0x30) + } + { + USR = r10 + } + { + p0 = dfcmp.eq(r1:0,r1:0) + jumpr r31 + } + +.Ldiv_ovf: + + + + { + r10 = USR + r3:2 = combine(##0x7fefffff,#-1) + r1 = mux(p3,#0,#-1) + } + { + r7:6 = combine(##0x7ff00000,#0) + r5 = extractu(r10,#2,#22) + r10 = or(r10,#0x28) + } + { + USR = r10 + r5 ^= lsr(r1,#31) + r4 = r5 + } + { + p0 = !cmp.eq(r4,#1) + p0 = !cmp.eq(r5,#2) + if (p0.new) r3:2 = r7:6 + p0 = dfcmp.eq(r3:2,r3:2) + } + { + r1:0 = insert(r3:2,#63,#0) + jumpr r31 + } + + + + + + + +.Ldiv_abnormal: + { + p0 = dfclass(r1:0,#0x0F) + p0 = dfclass(r3:2,#0x0F) + p3 = cmp.gt(r28,#-1) + } + { + p1 = dfclass(r1:0,#0x08) + p1 = dfclass(r3:2,#0x08) + } + { + p2 = dfclass(r1:0,#0x01) + p2 = dfclass(r3:2,#0x01) + } + { + if (!p0) jump .Ldiv_nan + if (p1) jump .Ldiv_invalid + } + { + if (p2) jump .Ldiv_invalid + } + { + p2 = dfclass(r1:0,#(0x0F ^ 0x01)) + p2 = dfclass(r3:2,#(0x0F ^ 0x08)) + } + { + p1 = dfclass(r1:0,#(0x0F ^ 0x08)) + p1 = dfclass(r3:2,#(0x0F ^ 0x01)) + } + { + if (!p2) jump .Ldiv_zero_result + if (!p1) jump .Ldiv_inf_result + } + + + + + + { + p0 = dfclass(r1:0,#0x02) + p1 = dfclass(r3:2,#0x02) + r10 = ##0x00100000 + } + { + r13:12 = combine(r3,r1) + r1 = insert(r10,#11 +1,#52 -32) + r3 = insert(r10,#11 +1,#52 -32) + } + { + if (p0) r1 = or(r1,r10) + if (p1) r3 = or(r3,r10) + } + { + r5 = add(clb(r1:0),#-11) + r4 = add(clb(r3:2),#-11) + r10 = #1 + } + { + r12 = extractu(r12,#11,#52 -32) + r13 = extractu(r13,#11,#52 -32) + } + { + r1:0 = asl(r1:0,r5) + r3:2 = asl(r3:2,r4) + if (!p0) r12 = sub(r10,r5) + if (!p1) r13 = sub(r10,r4) + } + { + r7:6 = extractu(r3:2,#23,#52 -23) + } + { + r9 = or(r8,r6) + jump .Ldenorm_continue + } + +.Ldiv_zero_result: + { + r1 = xor(r1,r3) + r3:2 = #0 + } + { + r1:0 = insert(r3:2,#63,#0) + jumpr r31 + } +.Ldiv_inf_result: + { + p2 = dfclass(r3:2,#0x01) + p2 = dfclass(r1:0,#(0x0F ^ 0x08)) + } + { + r10 = USR + if (!p2) jump 1f + r1 = xor(r1,r3) + } + { + r10 = or(r10,#0x04) + } + { + USR = r10 + } +1: + { + r3:2 = combine(##0x7ff00000,#0) + p0 = dfcmp.uo(r3:2,r3:2) + } + { + r1:0 = insert(r3:2,#63,#0) + jumpr r31 + } +.Ldiv_nan: + { + p0 = dfclass(r1:0,#0x10) + p1 = dfclass(r3:2,#0x10) + if (!p0.new) r1:0 = r3:2 + if (!p1.new) r3:2 = r1:0 + } + { + r5 = convert_df2sf(r1:0) + r4 = convert_df2sf(r3:2) + } + { + r1:0 = #-1 + jumpr r31 + } + +.Ldiv_invalid: + { + r10 = ##0x7f800001 + } + { + r1:0 = convert_sf2df(r10) + jumpr r31 + } +.size __hexagon_divdf3,.-__hexagon_divdf3 diff --git a/src/hexagon/dffma.s b/src/hexagon/dffma.s new file mode 100644 index 000000000..043a1d294 --- /dev/null +++ b/src/hexagon/dffma.s @@ -0,0 +1,536 @@ + .text + .global __hexagon_fmadf4 + .type __hexagon_fmadf4,@function + .global __hexagon_fmadf5 + .type __hexagon_fmadf5,@function + .global fma + .type fma,@function + .global __qdsp_fmadf5 ; .set __qdsp_fmadf5, __hexagon_fmadf5 + .p2align 5 +__hexagon_fmadf4: +__hexagon_fmadf5: +fma: + { + p0 = dfclass(r1:0,#2) + p0 = dfclass(r3:2,#2) + r13:12 = #0 + r15:14 = #0 + } + { + r13:12 = insert(r1:0,#52,#11 -3) + r15:14 = insert(r3:2,#52,#11 -3) + r7 = ##0x10000000 + allocframe(#32) + } + { + r9:8 = mpyu(r12,r14) + if (!p0) jump .Lfma_abnormal_ab + r13 = or(r13,r7) + r15 = or(r15,r7) + } + { + p0 = dfclass(r5:4,#2) + if (!p0.new) jump:nt .Lfma_abnormal_c + r11:10 = combine(r7,#0) + r7:6 = combine(#0,r9) + } +.Lfma_abnormal_c_restart: + { + r7:6 += mpyu(r14,r13) + r11:10 = insert(r5:4,#52,#11 -3) + memd(r29+#0) = r17:16 + memd(r29+#8) = r19:18 + } + { + r7:6 += mpyu(r12,r15) + r19:18 = neg(r11:10) + p0 = cmp.gt(r5,#-1) + r28 = xor(r1,r3) + } + { + r18 = extractu(r1,#11,#20) + r19 = extractu(r3,#11,#20) + r17:16 = combine(#0,r7) + if (!p0) r11:10 = r19:18 + } + { + r17:16 += mpyu(r13,r15) + r9:8 = combine(r6,r8) + r18 = add(r18,r19) + + + + + r19 = extractu(r5,#11,#20) + } + { + r18 = add(r18,#-1023 +(4)) + p3 = !cmp.gt(r28,#-1) + r7:6 = #0 + r15:14 = #0 + } + { + r7:6 = sub(r7:6,r9:8,p3):carry + p0 = !cmp.gt(r28,#-1) + p1 = cmp.gt(r19,r18) + if (p1.new) r19:18 = combine(r18,r19) + } + { + r15:14 = sub(r15:14,r17:16,p3):carry + if (p0) r9:8 = r7:6 + + + + + r7:6 = #0 + r19 = sub(r18,r19) + } + { + if (p0) r17:16 = r15:14 + p0 = cmp.gt(r19,#63) + if (p1) r9:8 = r7:6 + if (p1) r7:6 = r9:8 + } + + + + + + + + { + if (p1) r17:16 = r11:10 + if (p1) r11:10 = r17:16 + if (p0) r19 = add(r19,#-64) + r28 = #63 + } + { + + if (p0) r7:6 = r11:10 + r28 = asr(r11,#31) + r13 = min(r19,r28) + r12 = #0 + } + + + + + + + { + if (p0) r11:10 = combine(r28,r28) + r5:4 = extract(r7:6,r13:12) + r7:6 = lsr(r7:6,r13) + r12 = sub(#64,r13) + } + { + r15:14 = #0 + r28 = #-2 + r7:6 |= lsl(r11:10,r12) + r11:10 = asr(r11:10,r13) + } + { + p3 = cmp.gtu(r5:4,r15:14) + if (p3.new) r6 = and(r6,r28) + + + + r15:14 = #1 + r5:4 = #0 + } + { + r9:8 = add(r7:6,r9:8,p3):carry + } + { + r17:16 = add(r11:10,r17:16,p3):carry + r28 = #62 + } + + + + + + + + { + r12 = add(clb(r17:16),#-2) + if (!cmp.eq(r12.new,r28)) jump:t 1f + } + + { + r11:10 = extractu(r9:8,#62,#2) + r9:8 = asl(r9:8,#62) + r18 = add(r18,#-62) + } + { + r17:16 = insert(r11:10,#62,#0) + } + { + r12 = add(clb(r17:16),#-2) + } + .falign +1: + { + r11:10 = asl(r17:16,r12) + r5:4 |= asl(r9:8,r12) + r13 = sub(#64,r12) + r18 = sub(r18,r12) + } + { + r11:10 |= lsr(r9:8,r13) + p2 = cmp.gtu(r15:14,r5:4) + r28 = #1023 +1023 -2 + } + { + if (!p2) r10 = or(r10,r14) + + p0 = !cmp.gt(r18,r28) + p0 = cmp.gt(r18,#1) + if (!p0.new) jump:nt .Lfma_ovf_unf + } + { + + p0 = cmp.gtu(r15:14,r11:10) + r1:0 = convert_d2df(r11:10) + r18 = add(r18,#-1023 -60) + r17:16 = memd(r29+#0) + } + { + r1 += asl(r18,#20) + r19:18 = memd(r29+#8) + if (!p0) dealloc_return + } +.Ladd_yields_zero: + + { + r28 = USR + r1:0 = #0 + } + { + r28 = extractu(r28,#2,#22) + r17:16 = memd(r29+#0) + r19:18 = memd(r29+#8) + } + { + p0 = cmp.eq(r28,#2) + if (p0.new) r1 = ##0x80000000 + dealloc_return + } +.Lfma_ovf_unf: + { + p0 = cmp.gtu(r15:14,r11:10) + if (p0.new) jump:nt .Ladd_yields_zero + } + { + r1:0 = convert_d2df(r11:10) + r18 = add(r18,#-1023 -60) + r28 = r18 + } + + + { + r1 += asl(r18,#20) + r7 = extractu(r1,#11,#20) + } + { + r6 = add(r18,r7) + r17:16 = memd(r29+#0) + r19:18 = memd(r29+#8) + r9:8 = abs(r11:10) + } + { + p0 = cmp.gt(r6,##1023 +1023) + if (p0.new) jump:nt .Lfma_ovf + } + { + p0 = cmp.gt(r6,#0) + if (p0.new) jump:nt .Lpossible_unf0 + } + { + + + + r7 = add(clb(r9:8),#-2) + r6 = sub(#1+5,r28) + p3 = cmp.gt(r11,#-1) + } + + + + { + r6 = add(r6,r7) + r9:8 = asl(r9:8,r7) + r1 = USR + r28 = #63 + } + { + r7 = min(r6,r28) + r6 = #0 + r0 = #0x0030 + } + { + r3:2 = extractu(r9:8,r7:6) + r9:8 = asr(r9:8,r7) + } + { + p0 = cmp.gtu(r15:14,r3:2) + if (!p0.new) r8 = or(r8,r14) + r9 = setbit(r9,#20 +3) + } + { + r11:10 = neg(r9:8) + p1 = bitsclr(r8,#(1<<3)-1) + if (!p1.new) r1 = or(r1,r0) + r3:2 = #0 + } + { + if (p3) r11:10 = r9:8 + USR = r1 + r28 = #-1023 -(52 +3) + } + { + r1:0 = convert_d2df(r11:10) + } + { + r1 += asl(r28,#20) + dealloc_return + } +.Lpossible_unf0: + { + r28 = ##0x7fefffff + r9:8 = abs(r11:10) + } + { + p0 = cmp.eq(r0,#0) + p0 = bitsclr(r1,r28) + if (!p0.new) dealloc_return:t + r28 = #0x7fff + } + { + p0 = bitsset(r9,r28) + r3 = USR + r2 = #0x0030 + } + { + if (p0) r3 = or(r3,r2) + } + { + USR = r3 + } + { + p0 = dfcmp.eq(r1:0,r1:0) + dealloc_return + } +.Lfma_ovf: + { + r28 = USR + r11:10 = combine(##0x7fefffff,#-1) + r1:0 = r11:10 + } + { + r9:8 = combine(##0x7ff00000,#0) + r3 = extractu(r28,#2,#22) + r28 = or(r28,#0x28) + } + { + USR = r28 + r3 ^= lsr(r1,#31) + r2 = r3 + } + { + p0 = !cmp.eq(r2,#1) + p0 = !cmp.eq(r3,#2) + } + { + p0 = dfcmp.eq(r9:8,r9:8) + if (p0.new) r11:10 = r9:8 + } + { + r1:0 = insert(r11:10,#63,#0) + dealloc_return + } +.Lfma_abnormal_ab: + { + r9:8 = extractu(r1:0,#63,#0) + r11:10 = extractu(r3:2,#63,#0) + deallocframe + } + { + p3 = cmp.gtu(r9:8,r11:10) + if (!p3.new) r1:0 = r3:2 + if (!p3.new) r3:2 = r1:0 + } + { + p0 = dfclass(r1:0,#0x0f) + if (!p0.new) jump:nt .Lnan + if (!p3) r9:8 = r11:10 + if (!p3) r11:10 = r9:8 + } + { + p1 = dfclass(r1:0,#0x08) + p1 = dfclass(r3:2,#0x0e) + } + { + p0 = dfclass(r1:0,#0x08) + p0 = dfclass(r3:2,#0x01) + } + { + if (p1) jump .Lab_inf + p2 = dfclass(r3:2,#0x01) + } + { + if (p0) jump .Linvalid + if (p2) jump .Lab_true_zero + r28 = ##0x7c000000 + } + + + + + + { + p0 = bitsclr(r1,r28) + if (p0.new) jump:nt .Lfma_ab_tiny + } + { + r28 = add(clb(r11:10),#-11) + } + { + r11:10 = asl(r11:10,r28) + } + { + r3:2 = insert(r11:10,#63,#0) + r1 -= asl(r28,#20) + } + jump fma + +.Lfma_ab_tiny: + r9:8 = combine(##0x00100000,#0) + { + r1:0 = insert(r9:8,#63,#0) + r3:2 = insert(r9:8,#63,#0) + } + jump fma + +.Lab_inf: + { + r3:2 = lsr(r3:2,#63) + p0 = dfclass(r5:4,#0x10) + } + { + r1:0 ^= asl(r3:2,#63) + if (p0) jump .Lnan + } + { + p1 = dfclass(r5:4,#0x08) + if (p1.new) jump:nt .Lfma_inf_plus_inf + } + + { + jumpr r31 + } + .falign +.Lfma_inf_plus_inf: + { + p0 = dfcmp.eq(r1:0,r5:4) + if (!p0.new) jump:nt .Linvalid + } + { + jumpr r31 + } + +.Lnan: + { + p0 = dfclass(r3:2,#0x10) + p1 = dfclass(r5:4,#0x10) + if (!p0.new) r3:2 = r1:0 + if (!p1.new) r5:4 = r1:0 + } + { + r3 = convert_df2sf(r3:2) + r2 = convert_df2sf(r5:4) + } + { + r3 = convert_df2sf(r1:0) + r1:0 = #-1 + jumpr r31 + } + +.Linvalid: + { + r28 = ##0x7f800001 + } + { + r1:0 = convert_sf2df(r28) + jumpr r31 + } + +.Lab_true_zero: + + { + p0 = dfclass(r5:4,#0x10) + if (p0.new) jump:nt .Lnan + if (p0.new) r1:0 = r5:4 + } + { + p0 = dfcmp.eq(r3:2,r5:4) + r1 = lsr(r1,#31) + } + { + r3 ^= asl(r1,#31) + if (!p0) r1:0 = r5:4 + if (!p0) jumpr r31 + } + + { + p0 = cmp.eq(r3:2,r5:4) + if (p0.new) jumpr:t r31 + r1:0 = r3:2 + } + { + r28 = USR + } + { + r28 = extractu(r28,#2,#22) + r1:0 = #0 + } + { + p0 = cmp.eq(r28,#2) + if (p0.new) r1 = ##0x80000000 + jumpr r31 + } + + + + + .falign +.Lfma_abnormal_c: + + + { + p0 = dfclass(r5:4,#0x10) + if (p0.new) jump:nt .Lnan + if (p0.new) r1:0 = r5:4 + deallocframe + } + { + p0 = dfclass(r5:4,#0x08) + if (p0.new) r1:0 = r5:4 + if (p0.new) jumpr:nt r31 + } + + + { + p0 = dfclass(r5:4,#0x01) + if (p0.new) jump:nt __hexagon_muldf3 + r28 = #1 + } + + + { + allocframe(#32) + r11:10 = #0 + r5 = insert(r28,#11,#20) + jump .Lfma_abnormal_c_restart + } +.size fma,.-fma diff --git a/src/hexagon/dfminmax.s b/src/hexagon/dfminmax.s new file mode 100644 index 000000000..3337a3223 --- /dev/null +++ b/src/hexagon/dfminmax.s @@ -0,0 +1,51 @@ + .text + .global __hexagon_mindf3 + .global __hexagon_maxdf3 + .global fmin + .type fmin,@function + .global fmax + .type fmax,@function + .type __hexagon_mindf3,@function + .type __hexagon_maxdf3,@function + .global __qdsp_mindf3 ; .set __qdsp_mindf3, __hexagon_mindf3 + .global __qdsp_maxdf3 ; .set __qdsp_maxdf3, __hexagon_maxdf3 + .p2align 5 +__hexagon_mindf3: +fmin: + { + p0 = dfclass(r1:0,#0x10) + p1 = dfcmp.gt(r1:0,r3:2) + r5:4 = r1:0 + } + { + if (p0) r1:0 = r3:2 + if (p1) r1:0 = r3:2 + p2 = dfcmp.eq(r1:0,r3:2) + if (!p2.new) jumpr:t r31 + } + + { + r1:0 = or(r5:4,r3:2) + jumpr r31 + } +.size __hexagon_mindf3,.-__hexagon_mindf3 + .falign +__hexagon_maxdf3: +fmax: + { + p0 = dfclass(r1:0,#0x10) + p1 = dfcmp.gt(r3:2,r1:0) + r5:4 = r1:0 + } + { + if (p0) r1:0 = r3:2 + if (p1) r1:0 = r3:2 + p2 = dfcmp.eq(r1:0,r3:2) + if (!p2.new) jumpr:t r31 + } + + { + r1:0 = and(r5:4,r3:2) + jumpr r31 + } +.size __hexagon_maxdf3,.-__hexagon_maxdf3 diff --git a/src/hexagon/dfmul.s b/src/hexagon/dfmul.s new file mode 100644 index 000000000..32fc674f9 --- /dev/null +++ b/src/hexagon/dfmul.s @@ -0,0 +1,309 @@ + .text + .global __hexagon_muldf3 + .type __hexagon_muldf3,@function + .global __qdsp_muldf3 ; .set __qdsp_muldf3, __hexagon_muldf3 + .global __hexagon_fast_muldf3 ; .set __hexagon_fast_muldf3, __hexagon_muldf3 + .global __hexagon_fast2_muldf3 ; .set __hexagon_fast2_muldf3, __hexagon_muldf3 + .p2align 5 +__hexagon_muldf3: + { + p0 = dfclass(r1:0,#2) + p0 = dfclass(r3:2,#2) + r13:12 = combine(##0x40000000,#0) + } + { + r13:12 = insert(r1:0,#52,#11 -1) + r5:4 = asl(r3:2,#11 -1) + r28 = #-1024 + r9:8 = #1 + } + { + r7:6 = mpyu(r4,r13) + r5:4 = insert(r9:8,#2,#62) + } + + + + + { + r15:14 = mpyu(r12,r4) + r7:6 += mpyu(r12,r5) + } + { + r7:6 += lsr(r15:14,#32) + r11:10 = mpyu(r13,r5) + r5:4 = combine(##1024 +1024 -4,#0) + } + { + r11:10 += lsr(r7:6,#32) + if (!p0) jump .Lmul_abnormal + p1 = cmp.eq(r14,#0) + p1 = cmp.eq(r6,#0) + } + { + if (!p1) r10 = or(r10,r8) + r6 = extractu(r1,#11,#20) + r7 = extractu(r3,#11,#20) + } + { + r15:14 = neg(r11:10) + r6 += add(r28,r7) + r28 = xor(r1,r3) + } + { + if (!p2.new) r11:10 = r15:14 + p2 = cmp.gt(r28,#-1) + p0 = !cmp.gt(r6,r5) + p0 = cmp.gt(r6,r4) + if (!p0.new) jump:nt .Lmul_ovf_unf + } + { + r1:0 = convert_d2df(r11:10) + r6 = add(r6,#-1024 -58) + } + { + r1 += asl(r6,#20) + jumpr r31 + } + + .falign +.Lpossible_unf1: + { + p0 = cmp.eq(r0,#0) + p0 = bitsclr(r1,r4) + if (!p0.new) jumpr:t r31 + r5 = #0x7fff + } + { + p0 = bitsset(r13,r5) + r4 = USR + r5 = #0x030 + } + { + if (p0) r4 = or(r4,r5) + } + { + USR = r4 + } + { + p0 = dfcmp.eq(r1:0,r1:0) + jumpr r31 + } + .falign +.Lmul_ovf_unf: + { + r1:0 = convert_d2df(r11:10) + r13:12 = abs(r11:10) + r7 = add(r6,#-1024 -58) + } + { + r1 += asl(r7,#20) + r7 = extractu(r1,#11,#20) + r4 = ##0x7FEFFFFF + } + { + r7 += add(r6,##-1024 -58) + + r5 = #0 + } + { + p0 = cmp.gt(r7,##1024 +1024 -2) + if (p0.new) jump:nt .Lmul_ovf + } + { + p0 = cmp.gt(r7,#0) + if (p0.new) jump:nt .Lpossible_unf1 + r5 = sub(r6,r5) + r28 = #63 + } + { + r4 = #0 + r5 = sub(#5,r5) + } + { + p3 = cmp.gt(r11,#-1) + r5 = min(r5,r28) + r11:10 = r13:12 + } + { + r28 = USR + r15:14 = extractu(r11:10,r5:4) + } + { + r11:10 = asr(r11:10,r5) + r4 = #0x0030 + r1 = insert(r9,#11,#20) + } + { + p0 = cmp.gtu(r9:8,r15:14) + if (!p0.new) r10 = or(r10,r8) + r11 = setbit(r11,#20 +3) + } + { + r15:14 = neg(r11:10) + p1 = bitsclr(r10,#0x7) + if (!p1.new) r28 = or(r4,r28) + } + { + if (!p3) r11:10 = r15:14 + USR = r28 + } + { + r1:0 = convert_d2df(r11:10) + p0 = dfcmp.eq(r1:0,r1:0) + } + { + r1 = insert(r9,#11 -1,#20 +1) + jumpr r31 + } + .falign +.Lmul_ovf: + + { + r28 = USR + r13:12 = combine(##0x7fefffff,#-1) + r1:0 = r11:10 + } + { + r14 = extractu(r28,#2,#22) + r28 = or(r28,#0x28) + r5:4 = combine(##0x7ff00000,#0) + } + { + USR = r28 + r14 ^= lsr(r1,#31) + r28 = r14 + } + { + p0 = !cmp.eq(r28,#1) + p0 = !cmp.eq(r14,#2) + if (p0.new) r13:12 = r5:4 + p0 = dfcmp.eq(r1:0,r1:0) + } + { + r1:0 = insert(r13:12,#63,#0) + jumpr r31 + } + +.Lmul_abnormal: + { + r13:12 = extractu(r1:0,#63,#0) + r5:4 = extractu(r3:2,#63,#0) + } + { + p3 = cmp.gtu(r13:12,r5:4) + if (!p3.new) r1:0 = r3:2 + if (!p3.new) r3:2 = r1:0 + } + { + + p0 = dfclass(r1:0,#0x0f) + if (!p0.new) jump:nt .Linvalid_nan + if (!p3) r13:12 = r5:4 + if (!p3) r5:4 = r13:12 + } + { + + p1 = dfclass(r1:0,#0x08) + p1 = dfclass(r3:2,#0x0e) + } + { + + + p0 = dfclass(r1:0,#0x08) + p0 = dfclass(r3:2,#0x01) + } + { + if (p1) jump .Ltrue_inf + p2 = dfclass(r3:2,#0x01) + } + { + if (p0) jump .Linvalid_zeroinf + if (p2) jump .Ltrue_zero + r28 = ##0x7c000000 + } + + + + + + { + p0 = bitsclr(r1,r28) + if (p0.new) jump:nt .Lmul_tiny + } + { + r28 = cl0(r5:4) + } + { + r28 = add(r28,#-11) + } + { + r5:4 = asl(r5:4,r28) + } + { + r3:2 = insert(r5:4,#63,#0) + r1 -= asl(r28,#20) + } + jump __hexagon_muldf3 +.Lmul_tiny: + { + r28 = USR + r1:0 = xor(r1:0,r3:2) + } + { + r28 = or(r28,#0x30) + r1:0 = insert(r9:8,#63,#0) + r5 = extractu(r28,#2,#22) + } + { + USR = r28 + p0 = cmp.gt(r5,#1) + if (!p0.new) r0 = #0 + r5 ^= lsr(r1,#31) + } + { + p0 = cmp.eq(r5,#3) + if (!p0.new) r0 = #0 + jumpr r31 + } +.Linvalid_zeroinf: + { + r28 = USR + } + { + r1:0 = #-1 + r28 = or(r28,#2) + } + { + USR = r28 + } + { + p0 = dfcmp.uo(r1:0,r1:0) + jumpr r31 + } +.Linvalid_nan: + { + p0 = dfclass(r3:2,#0x0f) + r28 = convert_df2sf(r1:0) + if (p0.new) r3:2 = r1:0 + } + { + r2 = convert_df2sf(r3:2) + r1:0 = #-1 + jumpr r31 + } + .falign +.Ltrue_zero: + { + r1:0 = r3:2 + r3:2 = r1:0 + } +.Ltrue_inf: + { + r3 = extract(r3,#1,#31) + } + { + r1 ^= asl(r3,#31) + jumpr r31 + } +.size __hexagon_muldf3,.-__hexagon_muldf3 diff --git a/src/hexagon/dfsqrt.s b/src/hexagon/dfsqrt.s new file mode 100644 index 000000000..14f584a11 --- /dev/null +++ b/src/hexagon/dfsqrt.s @@ -0,0 +1,277 @@ + .text + .global __hexagon_sqrtdf2 + .type __hexagon_sqrtdf2,@function + .global __hexagon_sqrt + .type __hexagon_sqrt,@function + .global __qdsp_sqrtdf2 ; .set __qdsp_sqrtdf2, __hexagon_sqrtdf2; .type __qdsp_sqrtdf2,@function + .global __qdsp_sqrt ; .set __qdsp_sqrt, __hexagon_sqrt; .type __qdsp_sqrt,@function + .global __hexagon_fast_sqrtdf2 ; .set __hexagon_fast_sqrtdf2, __hexagon_sqrtdf2; .type __hexagon_fast_sqrtdf2,@function + .global __hexagon_fast_sqrt ; .set __hexagon_fast_sqrt, __hexagon_sqrt; .type __hexagon_fast_sqrt,@function + .global __hexagon_fast2_sqrtdf2 ; .set __hexagon_fast2_sqrtdf2, __hexagon_sqrtdf2; .type __hexagon_fast2_sqrtdf2,@function + .global __hexagon_fast2_sqrt ; .set __hexagon_fast2_sqrt, __hexagon_sqrt; .type __hexagon_fast2_sqrt,@function + .type sqrt,@function + .p2align 5 +__hexagon_sqrtdf2: +__hexagon_sqrt: + { + r15:14 = extractu(r1:0,#23 +1,#52 -23) + r28 = extractu(r1,#11,#52 -32) + r5:4 = combine(##0x3f000004,#1) + } + { + p2 = dfclass(r1:0,#0x02) + p2 = cmp.gt(r1,#-1) + if (!p2.new) jump:nt .Lsqrt_abnormal + r9 = or(r5,r14) + } + +.Ldenormal_restart: + { + r11:10 = r1:0 + r7,p0 = sfinvsqrta(r9) + r5 = and(r5,#-16) + r3:2 = #0 + } + { + r3 += sfmpy(r7,r9):lib + r2 += sfmpy(r7,r5):lib + r6 = r5 + + + r9 = and(r28,#1) + } + { + r6 -= sfmpy(r3,r2):lib + r11 = insert(r4,#11 +1,#52 -32) + p1 = cmp.gtu(r9,#0) + } + { + r3 += sfmpy(r3,r6):lib + r2 += sfmpy(r2,r6):lib + r6 = r5 + r9 = mux(p1,#8,#9) + } + { + r6 -= sfmpy(r3,r2):lib + r11:10 = asl(r11:10,r9) + r9 = mux(p1,#3,#2) + } + { + r2 += sfmpy(r2,r6):lib + + r15:14 = asl(r11:10,r9) + } + { + r2 = and(r2,##0x007fffff) + } + { + r2 = add(r2,##0x00800000 - 3) + r9 = mux(p1,#7,#8) + } + { + r8 = asl(r2,r9) + r9 = mux(p1,#15-(1+1),#15-(1+0)) + } + { + r13:12 = mpyu(r8,r15) + } + { + r1:0 = asl(r11:10,#15) + r15:14 = mpyu(r13,r13) + p1 = cmp.eq(r0,r0) + } + { + r1:0 -= asl(r15:14,#15) + r15:14 = mpyu(r13,r12) + p2 = cmp.eq(r0,r0) + } + { + r1:0 -= lsr(r15:14,#16) + p3 = cmp.eq(r0,r0) + } + { + r1:0 = mpyu(r1,r8) + } + { + r13:12 += lsr(r1:0,r9) + r9 = add(r9,#16) + r1:0 = asl(r11:10,#31) + } + + { + r15:14 = mpyu(r13,r13) + r1:0 -= mpyu(r13,r12) + } + { + r1:0 -= asl(r15:14,#31) + r15:14 = mpyu(r12,r12) + } + { + r1:0 -= lsr(r15:14,#33) + } + { + r1:0 = mpyu(r1,r8) + } + { + r13:12 += lsr(r1:0,r9) + r9 = add(r9,#16) + r1:0 = asl(r11:10,#47) + } + + { + r15:14 = mpyu(r13,r13) + } + { + r1:0 -= asl(r15:14,#47) + r15:14 = mpyu(r13,r12) + } + { + r1:0 -= asl(r15:14,#16) + r15:14 = mpyu(r12,r12) + } + { + r1:0 -= lsr(r15:14,#17) + } + { + r1:0 = mpyu(r1,r8) + } + { + r13:12 += lsr(r1:0,r9) + } + { + r3:2 = mpyu(r13,r12) + r5:4 = mpyu(r12,r12) + r15:14 = #0 + r1:0 = #0 + } + { + r3:2 += lsr(r5:4,#33) + r5:4 += asl(r3:2,#33) + p1 = cmp.eq(r0,r0) + } + { + r7:6 = mpyu(r13,r13) + r1:0 = sub(r1:0,r5:4,p1):carry + r9:8 = #1 + } + { + r7:6 += lsr(r3:2,#31) + r9:8 += asl(r13:12,#1) + } + + + + + + { + r15:14 = sub(r11:10,r7:6,p1):carry + r5:4 = sub(r1:0,r9:8,p2):carry + + + + + r7:6 = #1 + r11:10 = #0 + } + { + r3:2 = sub(r15:14,r11:10,p2):carry + r7:6 = add(r13:12,r7:6) + r28 = add(r28,#-0x3ff) + } + { + + if (p2) r13:12 = r7:6 + if (p2) r1:0 = r5:4 + if (p2) r15:14 = r3:2 + } + { + r5:4 = sub(r1:0,r9:8,p3):carry + r7:6 = #1 + r28 = asr(r28,#1) + } + { + r3:2 = sub(r15:14,r11:10,p3):carry + r7:6 = add(r13:12,r7:6) + } + { + if (p3) r13:12 = r7:6 + if (p3) r1:0 = r5:4 + + + + + + r2 = #1 + } + { + p0 = cmp.eq(r1:0,r11:10) + if (!p0.new) r12 = or(r12,r2) + r3 = cl0(r13:12) + r28 = add(r28,#-63) + } + + + + { + r1:0 = convert_ud2df(r13:12) + r28 = add(r28,r3) + } + { + r1 += asl(r28,#52 -32) + jumpr r31 + } +.Lsqrt_abnormal: + { + p0 = dfclass(r1:0,#0x01) + if (p0.new) jumpr:t r31 + } + { + p0 = dfclass(r1:0,#0x10) + if (p0.new) jump:nt .Lsqrt_nan + } + { + p0 = cmp.gt(r1,#-1) + if (!p0.new) jump:nt .Lsqrt_invalid_neg + if (!p0.new) r28 = ##0x7F800001 + } + { + p0 = dfclass(r1:0,#0x08) + if (p0.new) jumpr:nt r31 + } + + + { + r1:0 = extractu(r1:0,#52,#0) + } + { + r28 = add(clb(r1:0),#-11) + } + { + r1:0 = asl(r1:0,r28) + r28 = sub(#1,r28) + } + { + r1 = insert(r28,#1,#52 -32) + } + { + r3:2 = extractu(r1:0,#23 +1,#52 -23) + r5 = ##0x3f000004 + } + { + r9 = or(r5,r2) + r5 = and(r5,#-16) + jump .Ldenormal_restart + } +.Lsqrt_nan: + { + r28 = convert_df2sf(r1:0) + r1:0 = #-1 + jumpr r31 + } +.Lsqrt_invalid_neg: + { + r1:0 = convert_sf2df(r28) + jumpr r31 + } +.size __hexagon_sqrt,.-__hexagon_sqrt +.size __hexagon_sqrtdf2,.-__hexagon_sqrtdf2 diff --git a/src/hexagon/divdi3.s b/src/hexagon/divdi3.s new file mode 100644 index 000000000..0fee6e70f --- /dev/null +++ b/src/hexagon/divdi3.s @@ -0,0 +1,64 @@ + +FUNCTION_BEGIN __hexagon_divdi3 + { + p2 = tstbit(r1,#31) + p3 = tstbit(r3,#31) + } + { + r1:0 = abs(r1:0) + r3:2 = abs(r3:2) + } + { + r6 = cl0(r1:0) + r7 = cl0(r3:2) + r5:4 = r3:2 + r3:2 = r1:0 + } + { + p3 = xor(p2,p3) + r10 = sub(r7,r6) + r1:0 = #0 + r15:14 = #1 + } + { + r11 = add(r10,#1) + r13:12 = lsl(r5:4,r10) + r15:14 = lsl(r15:14,r10) + } + { + p0 = cmp.gtu(r5:4,r3:2) + loop0(1f,r11) + } + { + if (p0) jump .hexagon_divdi3_return + } + .falign +1: + { + p0 = cmp.gtu(r13:12,r3:2) + } + { + r7:6 = sub(r3:2, r13:12) + r9:8 = add(r1:0, r15:14) + } + { + r1:0 = vmux(p0, r1:0, r9:8) + r3:2 = vmux(p0, r3:2, r7:6) + } + { + r15:14 = lsr(r15:14, #1) + r13:12 = lsr(r13:12, #1) + }:endloop0 + +.hexagon_divdi3_return: + { + r3:2 = neg(r1:0) + } + { + r1:0 = vmux(p3,r3:2,r1:0) + jumpr r31 + } +FUNCTION_END __hexagon_divdi3 + + .globl __qdsp_divdi3 + .set __qdsp_divdi3, __hexagon_divdi3 diff --git a/src/hexagon/divsi3.s b/src/hexagon/divsi3.s new file mode 100644 index 000000000..fc957a431 --- /dev/null +++ b/src/hexagon/divsi3.s @@ -0,0 +1,53 @@ + +FUNCTION_BEGIN __hexagon_divsi3 + { + p0 = cmp.ge(r0,#0) + p1 = cmp.ge(r1,#0) + r1 = abs(r0) + r2 = abs(r1) + } + { + r3 = cl0(r1) + r4 = cl0(r2) + r5 = sub(r1,r2) + p2 = cmp.gtu(r2,r1) + } + { + r0 = #0 + p1 = xor(p0,p1) + p0 = cmp.gtu(r2,r5) + if (p2) jumpr r31 + } + + { + r0 = mux(p1,#-1,#1) + if (p0) jumpr r31 + r4 = sub(r4,r3) + r3 = #1 + } + { + r0 = #0 + r3:2 = vlslw(r3:2,r4) + loop0(1f,r4) + } + .falign +1: + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r1 = sub(r1,r2) + if (!p0.new) r0 = add(r0,r3) + r3:2 = vlsrw(r3:2,#1) + }:endloop0 + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r0 = add(r0,r3) + if (!p1) jumpr r31 + } + { + r0 = neg(r0) + jumpr r31 + } +FUNCTION_END __hexagon_divsi3 + + .globl __qdsp_divsi3 + .set __qdsp_divsi3, __hexagon_divsi3 diff --git a/src/hexagon/fastmath2_dlib_asm.s b/src/hexagon/fastmath2_dlib_asm.s new file mode 100644 index 000000000..15c387846 --- /dev/null +++ b/src/hexagon/fastmath2_dlib_asm.s @@ -0,0 +1,266 @@ + .text + .global fast2_dadd_asm + .type fast2_dadd_asm, @function +fast2_dadd_asm: + .falign + { + R7:6 = VABSDIFFH(R1:0, R3:2) + R9 = #62 + R4 = SXTH(R0) + R5 = SXTH(R2) + } { + R6 = SXTH(R6) + P0 = CMP.GT(R4, R5); + if ( P0.new) R8 = add(R4, #1) + if (!P0.new) R8 = add(R5, #1) + } { + if ( P0) R4 = #1 + if (!P0) R5 = #1 + R0.L = #0 + R6 = MIN(R6, R9) + } { + if (!P0) R4 = add(R6, #1) + if ( P0) R5 = add(R6, #1) + R2.L = #0 + R11:10 = #0 + } { + R1:0 = ASR(R1:0, R4) + R3:2 = ASR(R3:2, R5) + } { + R1:0 = add(R1:0, R3:2) + R10.L = #0x8001 + } { + R4 = clb(R1:0) + R9 = #58 + } { + R4 = add(R4, #-1) + p0 = cmp.gt(R4, R9) + } { + R1:0 = ASL(R1:0, R4) + R8 = SUB(R8, R4) + if(p0) jump .Ldenorma + } { + R0 = insert(R8, #16, #0) + jumpr r31 + } +.Ldenorma: + { + R1:0 = R11:10 + jumpr r31 + } + .text + .global fast2_dsub_asm + .type fast2_dsub_asm, @function +fast2_dsub_asm: + .falign + { + R7:6 = VABSDIFFH(R1:0, R3:2) + R9 = #62 + R4 = SXTH(R0) + R5 = SXTH(R2) + } { + R6 = SXTH(R6) + P0 = CMP.GT(R4, R5); + if ( P0.new) R8 = add(R4, #1) + if (!P0.new) R8 = add(R5, #1) + } { + if ( P0) R4 = #1 + if (!P0) R5 = #1 + R0.L = #0 + R6 = MIN(R6, R9) + } { + if (!P0) R4 = add(R6, #1) + if ( P0) R5 = add(R6, #1) + R2.L = #0 + R11:10 = #0 + } { + R1:0 = ASR(R1:0, R4) + R3:2 = ASR(R3:2, R5) + } { + R1:0 = sub(R1:0, R3:2) + R10.L = #0x8001 + } { + R4 = clb(R1:0) + R9 = #58 + } { + R4 = add(R4, #-1) + p0 = cmp.gt(R4, R9) + } { + R1:0 = ASL(R1:0, R4) + R8 = SUB(R8, R4) + if(p0) jump .Ldenorm + } { + R0 = insert(R8, #16, #0) + jumpr r31 + } +.Ldenorm: + { + R1:0 = R11:10 + jumpr r31 + } + .text + .global fast2_dmpy_asm + .type fast2_dmpy_asm, @function +fast2_dmpy_asm: + .falign + { + R13= lsr(R2, #16) + R5 = sxth(R2) + R4 = sxth(R0) + R12= lsr(R0, #16) + } + { + R11:10 = mpy(R1, R3) + R7:6 = mpy(R1, R13) + R0.L = #0x0 + R15:14 = #0 + } + { + R11:10 = add(R11:10, R11:10) + R7:6 += mpy(R3, R12) + R2.L = #0x0 + R15.H = #0x8000 + } + { + R7:6 = asr(R7:6, #15) + R12.L = #0x8001 + p1 = cmp.eq(R1:0, R3:2) + } + { + R7:6 = add(R7:6, R11:10) + R8 = add(R4, R5) + p2 = cmp.eq(R1:0, R15:14) + } + { + R9 = clb(R7:6) + R3:2 = abs(R7:6) + R11 = #58 + } + { + p1 = and(p1, p2) + R8 = sub(R8, R9) + R9 = add(R9, #-1) + p0 = cmp.gt(R9, R11) + } + { + R8 = add(R8, #1) + R1:0 = asl(R7:6, R9) + if(p1) jump .Lsat + } + { + R0 = insert(R8,#16, #0) + if(!p0) jumpr r31 + } + { + R0 = insert(R12,#16, #0) + jumpr r31 + } +.Lsat: + { + R1:0 = #-1 + } + { + R1:0 = lsr(R1:0, #1) + } + { + R0 = insert(R8,#16, #0) + jumpr r31 + } + .text + .global fast2_qd2f_asm + .type fast2_qd2f_asm, @function +fast2_qd2f_asm: + .falign + { + R3 = abs(R1):sat + R4 = sxth(R0) + R5 = #0x40 + R6.L = #0xffc0 + } + { + R0 = extractu(R3, #8, #0) + p2 = cmp.gt(R4, #126) + p3 = cmp.ge(R4, #-126) + R6.H = #0x7fff + } + { + p1 = cmp.eq(R0,#0x40) + if(p1.new) R5 = #0 + R4 = add(R4, #126) + if(!p3) jump .Lmin + } + { + p0 = bitsset(R3, R6) + R0.L = #0x0000 + R2 = add(R3, R5) + R7 = lsr(R6, #8) + } + { + if(p0) R4 = add(R4, #1) + if(p0) R3 = #0 + R2 = lsr(R2, #7) + R0.H = #0x8000 + } + { + R0 = and(R0, R1) + R6 &= asl(R4, #23) + if(!p0) R3 = and(R2, R7) + if(p2) jump .Lmax + } + { + R0 += add(R6, R3) + jumpr r31 + } +.Lmax: + { + R0.L = #0xffff; + } + { + R0.H = #0x7f7f; + jumpr r31 + } +.Lmin: + { + R0 = #0x0 + jumpr r31 + } + .text + .global fast2_f2qd_asm + .type fast2_f2qd_asm, @function +fast2_f2qd_asm: + + + + + + + + .falign + { + R1 = asl(R0, #7) + p0 = tstbit(R0, #31) + R5:4 = #0 + R3 = add(R0,R0) + } + { + R1 = setbit(R1, #30) + R0= extractu(R0,#8,#23) + R4.L = #0x8001 + p1 = cmp.eq(R3, #0) + } + { + R1= extractu(R1, #31, #0) + R0= add(R0, #-126) + R2 = #0 + if(p1) jump .Lminqd + } + { + R0 = zxth(R0) + if(p0) R1= sub(R2, R1) + jumpr r31 + } +.Lminqd: + { + R1:0 = R5:4 + jumpr r31 + } diff --git a/src/hexagon/fastmath2_ldlib_asm.s b/src/hexagon/fastmath2_ldlib_asm.s new file mode 100644 index 000000000..b72b7550a --- /dev/null +++ b/src/hexagon/fastmath2_ldlib_asm.s @@ -0,0 +1,187 @@ + .text + .global fast2_ldadd_asm + .type fast2_ldadd_asm, @function +fast2_ldadd_asm: + .falign + { + R4 = memw(r29+#8) + R5 = memw(r29+#24) + r7 = r0 + } + { + R6 = sub(R4, R5):sat + P0 = CMP.GT(R4, R5); + if ( P0.new) R8 = add(R4, #1) + if (!P0.new) R8 = add(R5, #1) + } { + R6 = abs(R6):sat + if ( P0) R4 = #1 + if (!P0) R5 = #1 + R9 = #62 + } { + R6 = MIN(R6, R9) + R1:0 = memd(r29+#0) + R3:2 = memd(r29+#16) + } { + if (!P0) R4 = add(R6, #1) + if ( P0) R5 = add(R6, #1) + } { + R1:0 = ASR(R1:0, R4) + R3:2 = ASR(R3:2, R5) + } { + R1:0 = add(R1:0, R3:2) + R3:2 = #0 + } { + R4 = clb(R1:0) + R9.L =#0x0001 + } { + R8 -= add(R4, #-1) + R4 = add(R4, #-1) + p0 = cmp.gt(R4, #58) + R9.H =#0x8000 + } { + if(!p0)memw(r7+#8) = R8 + R1:0 = ASL(R1:0, R4) + if(p0) jump .Ldenorma1 + } { + memd(r7+#0) = R1:0 + jumpr r31 + } +.Ldenorma1: + memd(r7+#0) = R3:2 + { + memw(r7+#8) = R9 + jumpr r31 + } + .text + .global fast2_ldsub_asm + .type fast2_ldsub_asm, @function +fast2_ldsub_asm: + .falign + { + R4 = memw(r29+#8) + R5 = memw(r29+#24) + r7 = r0 + } + { + R6 = sub(R4, R5):sat + P0 = CMP.GT(R4, R5); + if ( P0.new) R8 = add(R4, #1) + if (!P0.new) R8 = add(R5, #1) + } { + R6 = abs(R6):sat + if ( P0) R4 = #1 + if (!P0) R5 = #1 + R9 = #62 + } { + R6 = min(R6, R9) + R1:0 = memd(r29+#0) + R3:2 = memd(r29+#16) + } { + if (!P0) R4 = add(R6, #1) + if ( P0) R5 = add(R6, #1) + } { + R1:0 = ASR(R1:0, R4) + R3:2 = ASR(R3:2, R5) + } { + R1:0 = sub(R1:0, R3:2) + R3:2 = #0 + } { + R4 = clb(R1:0) + R9.L =#0x0001 + } { + R8 -= add(R4, #-1) + R4 = add(R4, #-1) + p0 = cmp.gt(R4, #58) + R9.H =#0x8000 + } { + if(!p0)memw(r7+#8) = R8 + R1:0 = asl(R1:0, R4) + if(p0) jump .Ldenorma_s + } { + memd(r7+#0) = R1:0 + jumpr r31 + } +.Ldenorma_s: + memd(r7+#0) = R3:2 + { + memw(r7+#8) = R9 + jumpr r31 + } + .text + .global fast2_ldmpy_asm + .type fast2_ldmpy_asm, @function +fast2_ldmpy_asm: + .falign + { + R15:14 = memd(r29+#0) + R3:2 = memd(r29+#16) + R13:12 = #0 + } + { + R8= extractu(R2, #31, #1) + R9= extractu(R14, #31, #1) + R13.H = #0x8000 + } + { + R11:10 = mpy(R15, R3) + R7:6 = mpy(R15, R8) + R4 = memw(r29+#8) + R5 = memw(r29+#24) + } + { + R11:10 = add(R11:10, R11:10) + R7:6 += mpy(R3, R9) + } + { + R7:6 = asr(R7:6, #30) + R8.L = #0x0001 + p1 = cmp.eq(R15:14, R3:2) + } + { + R7:6 = add(R7:6, R11:10) + R4= add(R4, R5) + p2 = cmp.eq(R3:2, R13:12) + } + { + R9 = clb(R7:6) + R8.H = #0x8000 + p1 = and(p1, p2) + } + { + R4-= add(R9, #-1) + R9 = add(R9, #-1) + if(p1) jump .Lsat1 + } + { + R7:6 = asl(R7:6, R9) + memw(R0+#8) = R4 + p0 = cmp.gt(R9, #58) + if(p0.new) jump:NT .Ldenorm1 + } + { + memd(R0+#0) = R7:6 + jumpr r31 + } +.Lsat1: + { + R13:12 = #0 + R4+= add(R9, #1) + } + { + R13.H = #0x4000 + memw(R0+#8) = R4 + } + { + memd(R0+#0) = R13:12 + jumpr r31 + } +.Ldenorm1: + { + memw(R0+#8) = R8 + R15:14 = #0 + } + { + memd(R0+#0) = R15:14 + jumpr r31 + } diff --git a/src/hexagon/func_macro.s b/src/hexagon/func_macro.s new file mode 100644 index 000000000..9a1e11aeb --- /dev/null +++ b/src/hexagon/func_macro.s @@ -0,0 +1,12 @@ + .macro FUNCTION_BEGIN name + .text + .p2align 5 + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + diff --git a/src/hexagon/memcpy_forward_vp4cp4n2.s b/src/hexagon/memcpy_forward_vp4cp4n2.s new file mode 100644 index 000000000..89f69010a --- /dev/null +++ b/src/hexagon/memcpy_forward_vp4cp4n2.s @@ -0,0 +1,91 @@ + .text + + + + + + + .globl hexagon_memcpy_forward_vp4cp4n2 + .balign 32 + .type hexagon_memcpy_forward_vp4cp4n2,@function +hexagon_memcpy_forward_vp4cp4n2: + + + + + { + r3 = sub(##4096, r1) + r5 = lsr(r2, #3) + } + { + + + r3 = extractu(r3, #10, #2) + r4 = extractu(r3, #7, #5) + } + { + r3 = minu(r2, r3) + r4 = minu(r5, r4) + } + { + r4 = or(r4, ##2105344) + p0 = cmp.eq(r3, #0) + if (p0.new) jump:nt .Lskipprolog + } + l2fetch(r1, r4) + { + loop0(.Lprolog, r3) + r2 = sub(r2, r3) + } + .falign +.Lprolog: + { + r4 = memw(r1++#4) + memw(r0++#4) = r4.new + } :endloop0 +.Lskipprolog: + { + + r3 = lsr(r2, #10) + if (cmp.eq(r3.new, #0)) jump:nt .Lskipmain + } + { + loop1(.Lout, r3) + r2 = extractu(r2, #10, #0) + r3 = ##2105472 + } + + .falign +.Lout: + + l2fetch(r1, r3) + loop0(.Lpage, #512) + .falign +.Lpage: + r5:4 = memd(r1++#8) + { + memw(r0++#8) = r4 + memw(r0+#4) = r5 + } :endloop0:endloop1 +.Lskipmain: + { + r3 = ##2105344 + r4 = lsr(r2, #3) + p0 = cmp.eq(r2, #0) + if (p0.new) jumpr:nt r31 + } + { + r3 = or(r3, r4) + loop0(.Lepilog, r2) + } + l2fetch(r1, r3) + .falign +.Lepilog: + { + r4 = memw(r1++#4) + memw(r0++#4) = r4.new + } :endloop0 + + jumpr r31 + +.size hexagon_memcpy_forward_vp4cp4n2, . - hexagon_memcpy_forward_vp4cp4n2 diff --git a/src/hexagon/memcpy_likely_aligned.s b/src/hexagon/memcpy_likely_aligned.s new file mode 100644 index 000000000..7e9b62f6a --- /dev/null +++ b/src/hexagon/memcpy_likely_aligned.s @@ -0,0 +1,42 @@ + +FUNCTION_BEGIN __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes + { + p0 = bitsclr(r1,#7) + p0 = bitsclr(r0,#7) + if (p0.new) r5:4 = memd(r1) + r3 = #-3 + } + { + if (!p0) jump .Lmemcpy_call + if (p0) memd(r0++#8) = r5:4 + if (p0) r5:4 = memd(r1+#8) + r3 += lsr(r2,#3) + } + { + memd(r0++#8) = r5:4 + r5:4 = memd(r1+#16) + r1 = add(r1,#24) + loop0(1f,r3) + } + .falign +1: + { + memd(r0++#8) = r5:4 + r5:4 = memd(r1++#8) + }:endloop0 + { + memd(r0) = r5:4 + r0 -= add(r2,#-8) + jumpr r31 + } +FUNCTION_END __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes + +.Lmemcpy_call: + + jump memcpy@PLT + + + + + .globl __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes + .set __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes, __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes diff --git a/src/hexagon/moddi3.s b/src/hexagon/moddi3.s new file mode 100644 index 000000000..53ea6d52a --- /dev/null +++ b/src/hexagon/moddi3.s @@ -0,0 +1,63 @@ + + +FUNCTION_BEGIN __hexagon_moddi3 + { + p3 = tstbit(r1,#31) + } + { + r1:0 = abs(r1:0) + r3:2 = abs(r3:2) + } + { + r6 = cl0(r1:0) + r7 = cl0(r3:2) + r5:4 = r3:2 + r3:2 = r1:0 + } + { + r10 = sub(r7,r6) + r1:0 = #0 + r15:14 = #1 + } + { + r11 = add(r10,#1) + r13:12 = lsl(r5:4,r10) + r15:14 = lsl(r15:14,r10) + } + { + p0 = cmp.gtu(r5:4,r3:2) + loop0(1f,r11) + } + { + if (p0) jump .hexagon_moddi3_return + } + .falign +1: + { + p0 = cmp.gtu(r13:12,r3:2) + } + { + r7:6 = sub(r3:2, r13:12) + r9:8 = add(r1:0, r15:14) + } + { + r1:0 = vmux(p0, r1:0, r9:8) + r3:2 = vmux(p0, r3:2, r7:6) + } + { + r15:14 = lsr(r15:14, #1) + r13:12 = lsr(r13:12, #1) + }:endloop0 + +.hexagon_moddi3_return: + { + r1:0 = neg(r3:2) + } + { + r1:0 = vmux(p3,r1:0,r3:2) + jumpr r31 + } +FUNCTION_END __hexagon_moddi3 + + .globl __qdsp_moddi3 + .set __qdsp_moddi3, __hexagon_moddi3 diff --git a/src/hexagon/modsi3.s b/src/hexagon/modsi3.s new file mode 100644 index 000000000..c4ae7e59e --- /dev/null +++ b/src/hexagon/modsi3.s @@ -0,0 +1,44 @@ + + +FUNCTION_BEGIN __hexagon_modsi3 + { + p2 = cmp.ge(r0,#0) + r2 = abs(r0) + r1 = abs(r1) + } + { + r3 = cl0(r2) + r4 = cl0(r1) + p0 = cmp.gtu(r1,r2) + } + { + r3 = sub(r4,r3) + if (p0) jumpr r31 + } + { + p1 = cmp.eq(r3,#0) + loop0(1f,r3) + r0 = r2 + r2 = lsl(r1,r3) + } + .falign +1: + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r2) + r2 = lsr(r2,#1) + if (p1) r1 = #0 + }:endloop0 + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r1) + if (p2) jumpr r31 + } + { + r0 = neg(r0) + jumpr r31 + } +FUNCTION_END __hexagon_modsi3 + + .globl __qdsp_modsi3 + .set __qdsp_modsi3, __hexagon_modsi3 diff --git a/src/hexagon/sfdiv_opt.s b/src/hexagon/sfdiv_opt.s new file mode 100644 index 000000000..26c91f15c --- /dev/null +++ b/src/hexagon/sfdiv_opt.s @@ -0,0 +1,42 @@ + +FUNCTION_BEGIN __hexagon_divsf3 + { + r2,p0 = sfrecipa(r0,r1) + r4 = sffixupd(r0,r1) + r3 = ##0x3f800000 + } + { + r5 = sffixupn(r0,r1) + r3 -= sfmpy(r4,r2):lib + r6 = ##0x80000000 + r7 = r3 + } + { + r2 += sfmpy(r3,r2):lib + r3 = r7 + r6 = r5 + r0 = and(r6,r5) + } + { + r3 -= sfmpy(r4,r2):lib + r0 += sfmpy(r5,r2):lib + } + { + r2 += sfmpy(r3,r2):lib + r6 -= sfmpy(r0,r4):lib + } + { + r0 += sfmpy(r6,r2):lib + } + { + r5 -= sfmpy(r0,r4):lib + } + { + r0 += sfmpy(r5,r2,p0):scale + jumpr r31 + } +FUNCTION_END __hexagon_divsf3 + +.global __qdsp_divsf3 ; .set __qdsp_divsf3, __hexagon_divsf3 +.global __hexagon_fast_divsf3 ; .set __hexagon_fast_divsf3, __hexagon_divsf3 +.global __hexagon_fast2_divsf3 ; .set __hexagon_fast2_divsf3, __hexagon_divsf3 diff --git a/src/hexagon/sfsqrt_opt.s b/src/hexagon/sfsqrt_opt.s new file mode 100644 index 000000000..c90af1797 --- /dev/null +++ b/src/hexagon/sfsqrt_opt.s @@ -0,0 +1,49 @@ +FUNCTION_BEGIN __hexagon_sqrtf + { + r3,p0 = sfinvsqrta(r0) + r5 = sffixupr(r0) + r4 = ##0x3f000000 + r1:0 = combine(#0,#0) + } + { + r0 += sfmpy(r3,r5):lib + r1 += sfmpy(r3,r4):lib + r2 = r4 + r3 = r5 + } + { + r2 -= sfmpy(r0,r1):lib + p1 = sfclass(r5,#1) + + } + { + r0 += sfmpy(r0,r2):lib + r1 += sfmpy(r1,r2):lib + r2 = r4 + r3 = r5 + } + { + r2 -= sfmpy(r0,r1):lib + r3 -= sfmpy(r0,r0):lib + } + { + r0 += sfmpy(r1,r3):lib + r1 += sfmpy(r1,r2):lib + r2 = r4 + r3 = r5 + } + { + + r3 -= sfmpy(r0,r0):lib + if (p1) r0 = or(r0,r5) + } + { + r0 += sfmpy(r1,r3,p0):scale + jumpr r31 + } + +FUNCTION_END __hexagon_sqrtf + +.global __qdsp_sqrtf ; .set __qdsp_sqrtf, __hexagon_sqrtf +.global __hexagon_fast_sqrtf ; .set __hexagon_fast_sqrtf, __hexagon_sqrtf +.global __hexagon_fast2_sqrtf ; .set __hexagon_fast2_sqrtf, __hexagon_sqrtf diff --git a/src/hexagon/udivdi3.s b/src/hexagon/udivdi3.s new file mode 100644 index 000000000..f0fffc23d --- /dev/null +++ b/src/hexagon/udivdi3.s @@ -0,0 +1,50 @@ + + +FUNCTION_BEGIN __hexagon_udivdi3 + { + r6 = cl0(r1:0) + r7 = cl0(r3:2) + r5:4 = r3:2 + r3:2 = r1:0 + } + { + r10 = sub(r7,r6) + r1:0 = #0 + r15:14 = #1 + } + { + r11 = add(r10,#1) + r13:12 = lsl(r5:4,r10) + r15:14 = lsl(r15:14,r10) + } + { + p0 = cmp.gtu(r5:4,r3:2) + loop0(1f,r11) + } + { + if (p0) jumpr r31 + } + .falign +1: + { + p0 = cmp.gtu(r13:12,r3:2) + } + { + r7:6 = sub(r3:2, r13:12) + r9:8 = add(r1:0, r15:14) + } + { + r1:0 = vmux(p0, r1:0, r9:8) + r3:2 = vmux(p0, r3:2, r7:6) + } + { + r15:14 = lsr(r15:14, #1) + r13:12 = lsr(r13:12, #1) + }:endloop0 + { + jumpr r31 + } +FUNCTION_END __hexagon_udivdi3 + + .globl __qdsp_udivdi3 + .set __qdsp_udivdi3, __hexagon_udivdi3 diff --git a/src/hexagon/udivmoddi4.s b/src/hexagon/udivmoddi4.s new file mode 100644 index 000000000..cbfb3987d --- /dev/null +++ b/src/hexagon/udivmoddi4.s @@ -0,0 +1,50 @@ + + +FUNCTION_BEGIN __hexagon_udivmoddi4 + { + r6 = cl0(r1:0) + r7 = cl0(r3:2) + r5:4 = r3:2 + r3:2 = r1:0 + } + { + r10 = sub(r7,r6) + r1:0 = #0 + r15:14 = #1 + } + { + r11 = add(r10,#1) + r13:12 = lsl(r5:4,r10) + r15:14 = lsl(r15:14,r10) + } + { + p0 = cmp.gtu(r5:4,r3:2) + loop0(1f,r11) + } + { + if (p0) jumpr r31 + } + .falign +1: + { + p0 = cmp.gtu(r13:12,r3:2) + } + { + r7:6 = sub(r3:2, r13:12) + r9:8 = add(r1:0, r15:14) + } + { + r1:0 = vmux(p0, r1:0, r9:8) + r3:2 = vmux(p0, r3:2, r7:6) + } + { + r15:14 = lsr(r15:14, #1) + r13:12 = lsr(r13:12, #1) + }:endloop0 + { + jumpr r31 + } +FUNCTION_END __hexagon_udivmoddi4 + + .globl __qdsp_udivmoddi4 + .set __qdsp_udivmoddi4, __hexagon_udivmoddi4 diff --git a/src/hexagon/udivmodsi4.s b/src/hexagon/udivmodsi4.s new file mode 100644 index 000000000..83489c514 --- /dev/null +++ b/src/hexagon/udivmodsi4.s @@ -0,0 +1,39 @@ + + +FUNCTION_BEGIN __hexagon_udivmodsi4 + { + r2 = cl0(r0) + r3 = cl0(r1) + r5:4 = combine(#1,#0) + p0 = cmp.gtu(r1,r0) + } + { + r6 = sub(r3,r2) + r4 = r1 + r1:0 = combine(r0,r4) + if (p0) jumpr r31 + } + { + r3:2 = vlslw(r5:4,r6) + loop0(1f,r6) + p0 = cmp.eq(r6,#0) + if (p0.new) r4 = #0 + } + .falign +1: + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r1 = sub(r1,r2) + if (!p0.new) r0 = add(r0,r3) + r3:2 = vlsrw(r3:2,#1) + }:endloop0 + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r1 = sub(r1,r4) + if (!p0.new) r0 = add(r0,r3) + jumpr r31 + } +FUNCTION_END __hexagon_udivmodsi4 + + .globl __qdsp_udivmodsi4 + .set __qdsp_udivmodsi4, __hexagon_udivmodsi4 diff --git a/src/hexagon/udivsi3.s b/src/hexagon/udivsi3.s new file mode 100644 index 000000000..e0b94aa99 --- /dev/null +++ b/src/hexagon/udivsi3.s @@ -0,0 +1,36 @@ + + +FUNCTION_BEGIN __hexagon_udivsi3 + { + r2 = cl0(r0) + r3 = cl0(r1) + r5:4 = combine(#1,#0) + p0 = cmp.gtu(r1,r0) + } + { + r6 = sub(r3,r2) + r4 = r1 + r1:0 = combine(r0,r4) + if (p0) jumpr r31 + } + { + r3:2 = vlslw(r5:4,r6) + loop0(1f,r6) + } + .falign +1: + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r1 = sub(r1,r2) + if (!p0.new) r0 = add(r0,r3) + r3:2 = vlsrw(r3:2,#1) + }:endloop0 + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r0 = add(r0,r3) + jumpr r31 + } +FUNCTION_END __hexagon_udivsi3 + + .globl __qdsp_udivsi3 + .set __qdsp_udivsi3, __hexagon_udivsi3 diff --git a/src/hexagon/umoddi3.s b/src/hexagon/umoddi3.s new file mode 100644 index 000000000..c76011c3e --- /dev/null +++ b/src/hexagon/umoddi3.s @@ -0,0 +1,53 @@ + + +FUNCTION_BEGIN __hexagon_umoddi3 + { + r6 = cl0(r1:0) + r7 = cl0(r3:2) + r5:4 = r3:2 + r3:2 = r1:0 + } + { + r10 = sub(r7,r6) + r1:0 = #0 + r15:14 = #1 + } + { + r11 = add(r10,#1) + r13:12 = lsl(r5:4,r10) + r15:14 = lsl(r15:14,r10) + } + { + p0 = cmp.gtu(r5:4,r3:2) + loop0(1f,r11) + } + { + if (p0) jump .hexagon_umoddi3_return + } + .falign +1: + { + p0 = cmp.gtu(r13:12,r3:2) + } + { + r7:6 = sub(r3:2, r13:12) + r9:8 = add(r1:0, r15:14) + } + { + r1:0 = vmux(p0, r1:0, r9:8) + r3:2 = vmux(p0, r3:2, r7:6) + } + { + r15:14 = lsr(r15:14, #1) + r13:12 = lsr(r13:12, #1) + }:endloop0 + +.hexagon_umoddi3_return: + { + r1:0 = r3:2 + jumpr r31 + } +FUNCTION_END __hexagon_umoddi3 + + .globl __qdsp_umoddi3 + .set __qdsp_umoddi3, __hexagon_umoddi3 diff --git a/src/hexagon/umodsi3.s b/src/hexagon/umodsi3.s new file mode 100644 index 000000000..1b592a7c5 --- /dev/null +++ b/src/hexagon/umodsi3.s @@ -0,0 +1,34 @@ + + +FUNCTION_BEGIN __hexagon_umodsi3 + { + r2 = cl0(r0) + r3 = cl0(r1) + p0 = cmp.gtu(r1,r0) + } + { + r2 = sub(r3,r2) + if (p0) jumpr r31 + } + { + loop0(1f,r2) + p1 = cmp.eq(r2,#0) + r2 = lsl(r1,r2) + } + .falign +1: + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r2) + r2 = lsr(r2,#1) + if (p1) r1 = #0 + }:endloop0 + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r1) + jumpr r31 + } +FUNCTION_END __hexagon_umodsi3 + + .globl __qdsp_umodsi3 + .set __qdsp_umodsi3, __hexagon_umodsi3 diff --git a/src/lib.rs b/src/lib.rs index 3e5491878..47aef540e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ #![cfg_attr(feature = "compiler-builtins", compiler_builtins)] #![cfg_attr(not(feature = "no-asm"), feature(asm))] #![feature(abi_unadjusted)] +#![feature(asm_experimental_arch)] #![cfg_attr(not(feature = "no-asm"), feature(global_asm))] #![feature(cfg_target_has_atomic)] #![feature(compiler_builtins)] @@ -70,6 +71,9 @@ pub mod aarch64_linux; ))] pub mod arm_linux; +#[cfg(target_arch = "hexagon")] +pub mod hexagon; + #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] pub mod riscv; From 80ed5f8c3ddbea7986c4a8243e42b963cb954b42 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 14 Dec 2023 20:23:55 +0000 Subject: [PATCH 0679/1459] Bump to 0.1.105 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6ebabfdcb..1b8b426ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.104" +version = "0.1.105" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From d3210c5ab4b436018395baca87f5e5d4a348c00c Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Wed, 20 Dec 2023 09:30:11 +1000 Subject: [PATCH 0680/1459] build: Allow building C compiler-rt fallbacks for RISC-V Now that https://github.com/rust-lang/rust/pull/117654 has been merged the rust-lang/rust distribution containers contain RISC-V C compilers. This means that we can now enable the "c" feature fallback. Resolves: https://github.com/rust-lang/compiler-builtins/issues/350 Signed-off-by: Alistair Francis --- build.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/build.rs b/build.rs index 048611677..d4b2a9b49 100644 --- a/build.rs +++ b/build.rs @@ -59,12 +59,7 @@ fn main() { // * wasm - clang for wasm is somewhat hard to come by and it's // unlikely that the C is really that much better than our own Rust. // * nvptx - everything is bitcode, not compatible with mixed C/Rust - // * riscv - the rust-lang/rust distribution container doesn't have a C - // compiler. - if !target.contains("wasm") - && !target.contains("nvptx") - && (!target.starts_with("riscv") || target.contains("xous")) - { + if !target.contains("wasm") && !target.contains("nvptx") { #[cfg(feature = "c")] c::compile(&llvm_target, &target); } @@ -519,7 +514,7 @@ mod c { } } - if target_arch == "mips" { + if target_arch == "mips" || target_arch == "riscv32" || target_arch == "riscv64" { sources.extend(&[("__bswapsi2", "bswapsi2.c")]); } From eba4bef26abbb83ddaa7a5b661253b4817e0c998 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Tue, 9 Jan 2024 00:07:27 +0900 Subject: [PATCH 0681/1459] Update actions/checkout action to v4 --- .github/workflows/main.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b69c48c16..360ed9739 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -80,7 +80,7 @@ jobs: os: windows-latest rust: nightly-x86_64-gnu steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 with: submodules: true - name: Install Rust (rustup) @@ -108,7 +108,7 @@ jobs: name: Rustfmt runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 with: submodules: true - name: Install stable `rustfmt` @@ -119,7 +119,7 @@ jobs: name: Clippy runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 with: submodules: true # Unlike rustfmt, stable clippy does not work on code with nightly features. From bf279fd5e77e6926b2b06e82bbfef0d5d2bd13ff Mon Sep 17 00:00:00 2001 From: trevyn <230691+trevyn@users.noreply.github.com> Date: Tue, 9 Jan 2024 01:23:59 +0400 Subject: [PATCH 0682/1459] build: Allow building C compiler-rt fallbacks for wasm --- .github/workflows/main.yml | 4 ++-- build.rs | 7 +------ ci/docker/wasm32-unknown-unknown/Dockerfile | 2 +- ci/run.sh | 1 + 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b69c48c16..8110e27f7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -90,8 +90,8 @@ jobs: - run: rustup component add llvm-tools-preview - name: Download compiler-rt reference sources run: | - curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/12.0-2021-04-15.tar.gz - tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-12.0-2021-04-15/compiler-rt + curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/13.0-2021-08-08.tar.gz + tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-13.0-2021-08-08/compiler-rt echo RUST_COMPILER_RT_ROOT=./compiler-rt >> $GITHUB_ENV shell: bash diff --git a/build.rs b/build.rs index 048611677..9bf6a2547 100644 --- a/build.rs +++ b/build.rs @@ -56,15 +56,10 @@ fn main() { if !cfg!(feature = "mangled-names") && cfg!(feature = "c") { // Don't use a C compiler for these targets: // - // * wasm - clang for wasm is somewhat hard to come by and it's - // unlikely that the C is really that much better than our own Rust. // * nvptx - everything is bitcode, not compatible with mixed C/Rust // * riscv - the rust-lang/rust distribution container doesn't have a C // compiler. - if !target.contains("wasm") - && !target.contains("nvptx") - && (!target.starts_with("riscv") || target.contains("xous")) - { + if !target.contains("nvptx") && (!target.starts_with("riscv") || target.contains("xous")) { #[cfg(feature = "c")] c::compile(&llvm_target, &target); } diff --git a/ci/docker/wasm32-unknown-unknown/Dockerfile b/ci/docker/wasm32-unknown-unknown/Dockerfile index 758d94d50..85ead29aa 100644 --- a/ci/docker/wasm32-unknown-unknown/Dockerfile +++ b/ci/docker/wasm32-unknown-unknown/Dockerfile @@ -1,6 +1,6 @@ FROM ubuntu:20.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ - gcc libc6-dev ca-certificates + gcc clang libc6-dev ca-certificates ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=true diff --git a/ci/run.sh b/ci/run.sh index 44ec30fb7..6376d2216 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -66,6 +66,7 @@ for rlib in $(echo $path); do grep -v __x86.get_pc_thunk | \ grep -v __builtin_cl | \ grep -v __builtin_ctz | \ + grep -v __builtin_sadd_overflow | \ grep 'T __' if test $? = 0; then From ab849edf5c9e66ad2c9194f83b023847bd6dbe6d Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 18 Jan 2024 01:27:08 +0000 Subject: [PATCH 0683/1459] Bump to 0.1.106 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1b8b426ce..e3540f465 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.105" +version = "0.1.106" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 1f6ca44444f9fcd9c35cf77d236c612794bb39bf Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 22 Jan 2024 12:55:25 +0100 Subject: [PATCH 0684/1459] Only add 80-bit long double source on x86 These no longer build on other architectures with LLVM 18. In previous versions they used an incorrect float layout. --- build.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/build.rs b/build.rs index d4b2a9b49..690a4fc9b 100644 --- a/build.rs +++ b/build.rs @@ -290,17 +290,23 @@ mod c { sources.extend(&[ ("__divdc3", "divdc3.c"), ("__divsc3", "divsc3.c"), - ("__divxc3", "divxc3.c"), ("__extendhfsf2", "extendhfsf2.c"), ("__muldc3", "muldc3.c"), ("__mulsc3", "mulsc3.c"), - ("__mulxc3", "mulxc3.c"), ("__negdf2", "negdf2.c"), ("__negsf2", "negsf2.c"), - ("__powixf2", "powixf2.c"), ("__truncdfhf2", "truncdfhf2.c"), ("__truncsfhf2", "truncsfhf2.c"), ]); + + if target_arch == "x86" || target_arch == "x86_64" { + // Only add 80-bit long double sources on x86. + sources.extend(&[ + ("__divxc3", "divxc3.c"), + ("__mulxc3", "mulxc3.c"), + ("__powixf2", "powixf2.c"), + ]); + } } // When compiling in rustbuild (the rust-lang/rust repo) this library From 6031a71c58a342e0b8b3184858ca73374bcdfdfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Wed, 24 Jan 2024 19:38:55 +0100 Subject: [PATCH 0685/1459] CI: add a success conclusion job --- .github/workflows/main.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 360ed9739..9f15cb9d8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -128,3 +128,18 @@ jobs: run: | rustup set profile minimal && rustup default "nightly-$(curl -s https://rust-lang.github.io/rustup-components-history/x86_64-unknown-linux-gnu/clippy)" && rustup component add clippy - run: cargo clippy -- -D clippy::all + + success: + needs: + - test + - rustfmt + - clippy + runs-on: ubuntu-latest + # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency + # failed" as success. So we have to do some contortions to ensure the job fails if any of its + # dependencies fails. + if: always() # make sure this is never "skipped" + steps: + # Manually check the status of all dependencies. `if: failure()` does not work. + - name: check if any dependency failed + run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}' From 76e5763996bb0b79f76fd918591ad7474b182c38 Mon Sep 17 00:00:00 2001 From: BD103 <59022059+BD103@users.noreply.github.com> Date: Wed, 24 Jan 2024 13:57:31 -0500 Subject: [PATCH 0686/1459] fix: replace travis link with github actions link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index da0adbce7..adcdcacba 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ features = ["c"] [1]: https://github.com/rust-lang/llvm-project/tree/9e3de9490ff580cd484fbfa2908292b4838d56e7/compiler-rt/test/builtins/Unit [2]: https://github.com/rust-lang/llvm-project/tree/9e3de9490ff580cd484fbfa2908292b4838d56e7/compiler-rt/lib/builtins [3]: https://github.com/rust-lang/compiler-builtins/blob/0ba07e49264a54cb5bbd4856fcea083bb3fbec15/build.rs#L180-L265 -[4]: https://travis-ci.org/rust-lang/compiler-builtins +[4]: https://github.com/rust-lang/compiler-builtins/actions ### Porting Reminders From 4e5c34b1e6a78a9a646b62d6aa13f98d42c975ea Mon Sep 17 00:00:00 2001 From: Tyler Mandry Date: Thu, 4 Jan 2024 18:43:58 -0800 Subject: [PATCH 0687/1459] Handle move of cpu_model.c This happened in https://github.com/llvm/llvm-project/pull/75635/files and shows up when building against a recent commit of LLVM. --- build.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/build.rs b/build.rs index 690a4fc9b..c133d5e7c 100644 --- a/build.rs +++ b/build.rs @@ -602,7 +602,12 @@ mod c { build_aarch64_out_of_line_atomics_libraries(&src_dir, cfg); // Some run-time CPU feature detection is necessary, as well. - sources.extend(&[("__aarch64_have_lse_atomics", "cpu_model.c")]); + let cpu_model_src = if src_dir.join("cpu_model.c").exists() { + "cpu_model.c" + } else { + "cpu_model/aarch64.c" + }; + sources.extend(&[("__aarch64_have_lse_atomics", cpu_model_src)]); } let mut added_sources = HashSet::new(); From 8f778e49399797a417c345c815b88e04668caa1b Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 31 Jan 2024 08:36:00 +0000 Subject: [PATCH 0688/1459] Bump to 0.1.107 (#571) --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e3540f465..b7e504ad5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.106" +version = "0.1.107" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 3426186db7b20a27176e7ce40850cf708424cc30 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 31 Jan 2024 17:37:26 +0100 Subject: [PATCH 0689/1459] Remove 80-bit builtins entirely It turns out that these also don't build on x86 + MSVC. Rather than fixing up the condition, I'm just deleting them entirely. As far as I know, Rust does not support 80-bit floats and has no plan to support them, so we shouldn't need them. --- build.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/build.rs b/build.rs index 6ad7f7153..8082131ef 100644 --- a/build.rs +++ b/build.rs @@ -296,15 +296,6 @@ mod c { ("__truncdfhf2", "truncdfhf2.c"), ("__truncsfhf2", "truncsfhf2.c"), ]); - - if target_arch == "x86" || target_arch == "x86_64" { - // Only add 80-bit long double sources on x86. - sources.extend(&[ - ("__divxc3", "divxc3.c"), - ("__mulxc3", "mulxc3.c"), - ("__powixf2", "powixf2.c"), - ]); - } } // When compiling in rustbuild (the rust-lang/rust repo) this library From 9e1e3905e2aa102a3c336b9b5b8601e240d680ae Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 2 Feb 2024 10:04:55 +0100 Subject: [PATCH 0690/1459] Also don't build floatdixf and floatundixf --- build.rs | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/build.rs b/build.rs index 8082131ef..6268fd2ff 100644 --- a/build.rs +++ b/build.rs @@ -347,29 +347,12 @@ mod c { ]); } - if target_env == "msvc" { - if target_arch == "x86_64" { - sources.extend(&[("__floatdixf", "x86_64/floatdixf.c")]); - } - } else { - // None of these seem to be used on x86_64 windows, and they've all - // got the wrong ABI anyway, so we want to avoid them. - if target_os != "windows" { - if target_arch == "x86_64" { - sources.extend(&[ - ("__floatdixf", "x86_64/floatdixf.c"), - ("__floatundixf", "x86_64/floatundixf.S"), - ]); - } - } - + if target_env != "msvc" { if target_arch == "x86" { sources.extend(&[ ("__ashldi3", "i386/ashldi3.S"), ("__ashrdi3", "i386/ashrdi3.S"), ("__divdi3", "i386/divdi3.S"), - ("__floatdixf", "i386/floatdixf.S"), - ("__floatundixf", "i386/floatundixf.S"), ("__lshrdi3", "i386/lshrdi3.S"), ("__moddi3", "i386/moddi3.S"), ("__muldi3", "i386/muldi3.S"), From 351d48e4b95f1665cfd3360e3ba8f3dd4d3fb3c1 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 4 Feb 2024 06:37:55 +0000 Subject: [PATCH 0691/1459] Release 0.1.108 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b7e504ad5..e94c2f762 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.107" +version = "0.1.108" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From ef58df106b2987cadc888d1a59ddf5fdd734e731 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 21 Feb 2024 00:58:14 +0000 Subject: [PATCH 0692/1459] Allow internal_features lint when building with "unstable" --- libm/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 4a17d3a77..1f23ef8a8 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -1,7 +1,8 @@ //! libm in pure Rust #![deny(warnings)] #![no_std] -#![cfg_attr(all(feature = "unstable"), feature(core_intrinsics))] +#![cfg_attr(feature = "unstable", allow(internal_features))] +#![cfg_attr(feature = "unstable", feature(core_intrinsics))] #![allow(clippy::unreadable_literal)] #![allow(clippy::many_single_char_names)] #![allow(clippy::needless_return)] From 2d76cf6c6d869055e3209ac587d66d128f022c16 Mon Sep 17 00:00:00 2001 From: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> Date: Sun, 11 Feb 2024 18:03:42 +0800 Subject: [PATCH 0693/1459] Remove unneeded `extern core` in `tgamma` --- libm/src/math/tgamma.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/libm/src/math/tgamma.rs b/libm/src/math/tgamma.rs index e64eff61f..3f38c0b1d 100644 --- a/libm/src/math/tgamma.rs +++ b/libm/src/math/tgamma.rs @@ -22,7 +22,6 @@ Gamma(x)*Gamma(-x) = -pi/(x sin(pi x)) most ideas and constants are from boost and python */ -extern crate core; use super::{exp, floor, k_cos, k_sin, pow}; const PI: f64 = 3.141592653589793238462643383279502884; From 63a0b2044e1cf6b21bf1c1871cb287cc2487fcca Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sun, 24 Mar 2024 20:27:38 +0000 Subject: [PATCH 0694/1459] Clean up and rework CI script --- .github/workflows/main.yml | 1 + ci/docker/thumbv6m-none-eabi/Dockerfile | 2 +- ci/docker/thumbv7em-none-eabi/Dockerfile | 2 +- ci/docker/thumbv7em-none-eabihf/Dockerfile | 2 +- ci/docker/thumbv7m-none-eabi/Dockerfile | 2 +- ci/run.sh | 58 ++++++++++++---------- 6 files changed, 37 insertions(+), 30 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9071e4ff0..f5600c1ed 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -6,6 +6,7 @@ jobs: name: Test runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: include: - target: aarch64-unknown-linux-gnu diff --git a/ci/docker/thumbv6m-none-eabi/Dockerfile b/ci/docker/thumbv6m-none-eabi/Dockerfile index 04d4f4429..dc7dd431b 100644 --- a/ci/docker/thumbv6m-none-eabi/Dockerfile +++ b/ci/docker/thumbv6m-none-eabi/Dockerfile @@ -4,4 +4,4 @@ RUN apt-get update && \ gcc libc6-dev ca-certificates \ gcc-arm-none-eabi \ libnewlib-arm-none-eabi -ENV XARGO=1 +ENV NO_STD=1 diff --git a/ci/docker/thumbv7em-none-eabi/Dockerfile b/ci/docker/thumbv7em-none-eabi/Dockerfile index 04d4f4429..dc7dd431b 100644 --- a/ci/docker/thumbv7em-none-eabi/Dockerfile +++ b/ci/docker/thumbv7em-none-eabi/Dockerfile @@ -4,4 +4,4 @@ RUN apt-get update && \ gcc libc6-dev ca-certificates \ gcc-arm-none-eabi \ libnewlib-arm-none-eabi -ENV XARGO=1 +ENV NO_STD=1 diff --git a/ci/docker/thumbv7em-none-eabihf/Dockerfile b/ci/docker/thumbv7em-none-eabihf/Dockerfile index 04d4f4429..dc7dd431b 100644 --- a/ci/docker/thumbv7em-none-eabihf/Dockerfile +++ b/ci/docker/thumbv7em-none-eabihf/Dockerfile @@ -4,4 +4,4 @@ RUN apt-get update && \ gcc libc6-dev ca-certificates \ gcc-arm-none-eabi \ libnewlib-arm-none-eabi -ENV XARGO=1 +ENV NO_STD=1 diff --git a/ci/docker/thumbv7m-none-eabi/Dockerfile b/ci/docker/thumbv7m-none-eabi/Dockerfile index 04d4f4429..dc7dd431b 100644 --- a/ci/docker/thumbv7m-none-eabi/Dockerfile +++ b/ci/docker/thumbv7m-none-eabi/Dockerfile @@ -4,4 +4,4 @@ RUN apt-get update && \ gcc libc6-dev ca-certificates \ gcc-arm-none-eabi \ libnewlib-arm-none-eabi -ENV XARGO=1 +ENV NO_STD=1 diff --git a/ci/run.sh b/ci/run.sh index 6376d2216..b059d5660 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -1,10 +1,7 @@ set -ex -cargo=cargo - # Test our implementation -if [ "$XARGO" = "1" ]; then - # FIXME: currently these tests don't work... +if [ "$NO_STD" = "1" ]; then echo nothing to do else run="cargo test --manifest-path testcrate/Cargo.toml --target $1" @@ -16,6 +13,15 @@ else $run --features no-asm --release fi +if [ -d /target ]; then + path=/target/${1}/debug/deps/libcompiler_builtins-*.rlib +else + path=target/${1}/debug/deps/libcompiler_builtins-*.rlib +fi + +# Remove any existing artifacts from previous tests that don't set #![compiler_builtins] +rm -f $path + cargo build --target $1 cargo build --target $1 --release cargo build --target $1 --features c @@ -36,15 +42,15 @@ case $1 in ;; esac -NM=$(find $(rustc --print sysroot) -name llvm-nm) +NM=$(find $(rustc --print sysroot) \( -name llvm-nm -o -name llvm-nm.exe \) ) if [ "$NM" = "" ]; then NM=${PREFIX}nm fi - -if [ -d /target ]; then - path=/target/${1}/debug/deps/libcompiler_builtins-*.rlib -else - path=target/${1}/debug/deps/libcompiler_builtins-*.rlib +# i686-pc-windows-gnu tools have a dependency on some DLLs, so run it with +# rustup run to ensure that those are in PATH. +TOOLCHAIN=$(rustup show active-toolchain | sed 's/ (default)//') +if [[ $TOOLCHAIN == *i686-pc-windows-gnu ]]; then + NM="rustup run $TOOLCHAIN $NM" fi # Look out for duplicated symbols when we include the compiler-rt (C) implementation @@ -79,29 +85,29 @@ done rm -f $path # Verify that we haven't drop any intrinsic/symbol -build_intrinsics="$cargo build --target $1 -v --example intrinsics" -RUSTFLAGS="-C debug-assertions=no" $build_intrinsics -RUSTFLAGS="-C debug-assertions=no" $build_intrinsics --release -RUSTFLAGS="-C debug-assertions=no" $build_intrinsics --features c -RUSTFLAGS="-C debug-assertions=no" $build_intrinsics --features c --release +build_intrinsics="cargo build --target $1 -v --example intrinsics" +$build_intrinsics +$build_intrinsics --release +$build_intrinsics --features c +$build_intrinsics --features c --release # Verify that there are no undefined symbols to `panic` within our # implementations -# -# TODO(#79) fix the undefined references problem for debug-assertions+lto -if [ -z "$DEBUG_LTO_BUILD_DOESNT_WORK" ]; then - RUSTFLAGS="-C debug-assertions=no" \ - CARGO_INCREMENTAL=0 \ - CARGO_PROFILE_DEV_LTO=true \ - $cargo rustc --features "$INTRINSICS_FEATURES" --target $1 --example intrinsics -fi +CARGO_PROFILE_DEV_LTO=true \ + cargo build --target $1 --example intrinsics CARGO_PROFILE_RELEASE_LTO=true \ - $cargo rustc --features "$INTRINSICS_FEATURES" --target $1 --example intrinsics --release + cargo build --target $1 --example intrinsics --release -# Ensure no references to a panicking function +# Ensure no references to any symbols from core for rlib in $(echo $path); do set +ex - $NM -u $rlib 2>&1 | grep panicking + echo "================================================================" + echo checking $rlib for references to core + echo "================================================================" + + $NM --quiet -U $rlib | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > defined_symbols.txt + $NM --quiet -u $rlib | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > undefined_symbols.txt + grep -v -F -x -f defined_symbols.txt undefined_symbols.txt if test $? = 0; then exit 1 From 17a9c0065ef99d9a846e7b3221fe972825c9cf74 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 27 Mar 2024 18:16:35 +0100 Subject: [PATCH 0695/1459] add c_unwind feature --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index 47aef540e..66540bb9b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,7 @@ #![feature(linkage)] #![feature(naked_functions)] #![feature(repr_simd)] +#![feature(c_unwind)] #![no_builtins] #![no_std] #![allow(unused_features)] From 8dc0e28e03bc408fb97200939e3e68c2e30d0081 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 14 Nov 2023 18:43:39 +0800 Subject: [PATCH 0696/1459] Add compiler-rt fallbacks on loongarch64-musl --- build.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/build.rs b/build.rs index 6268fd2ff..1b901b7e8 100644 --- a/build.rs +++ b/build.rs @@ -514,6 +514,24 @@ mod c { ]); } + if target_arch == "loongarch64" { + sources.extend(&[ + ("__extenddftf2", "extenddftf2.c"), + ("__netf2", "comparetf2.c"), + ("__addtf3", "addtf3.c"), + ("__multf3", "multf3.c"), + ("__subtf3", "subtf3.c"), + ("__fixtfsi", "fixtfsi.c"), + ("__floatsitf", "floatsitf.c"), + ("__fixunstfsi", "fixunstfsi.c"), + ("__floatunsitf", "floatunsitf.c"), + ("__fe_getround", "fp_mode.c"), + ("__divtf3", "divtf3.c"), + ("__trunctfdf2", "trunctfdf2.c"), + ("__trunctfsf2", "trunctfsf2.c"), + ]); + } + // Remove the assembly implementations that won't compile for the target if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" || target_os == "uefi" { From dc3b50e1e5b69c878646b2a5abc72352697eedd2 Mon Sep 17 00:00:00 2001 From: trevyn <230691+trevyn@users.noreply.github.com> Date: Thu, 8 Feb 2024 07:34:09 -0800 Subject: [PATCH 0697/1459] Remove duplicate symbol workaround --- .github/workflows/main.yml | 4 ++-- ci/run.sh | 5 ----- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f5600c1ed..97d298cf0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -91,8 +91,8 @@ jobs: - run: rustup component add llvm-tools-preview - name: Download compiler-rt reference sources run: | - curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/13.0-2021-08-08.tar.gz - tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-13.0-2021-08-08/compiler-rt + curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/18.0-2024-02-13.tar.gz + tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-18.0-2024-02-13/compiler-rt echo RUST_COMPILER_RT_ROOT=./compiler-rt >> $GITHUB_ENV shell: bash diff --git a/ci/run.sh b/ci/run.sh index b059d5660..09728191a 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -63,16 +63,11 @@ for rlib in $(echo $path); do stdout=$($NM -g --defined-only $rlib 2>&1) # NOTE On i586, It's normal that the get_pc_thunk symbol appears several # times so ignore it - # - # FIXME(#167) - we shouldn't ignore `__builtin_cl` style symbols here. set +e echo "$stdout" | \ sort | \ uniq -d | \ grep -v __x86.get_pc_thunk | \ - grep -v __builtin_cl | \ - grep -v __builtin_ctz | \ - grep -v __builtin_sadd_overflow | \ grep 'T __' if test $? = 0; then From 127bbc53b5a8986abff28b3af1524342427bf38f Mon Sep 17 00:00:00 2001 From: Daniel Paoliello Date: Fri, 23 Feb 2024 10:51:15 -0800 Subject: [PATCH 0698/1459] Add support for arm64ec --- build.rs | 2 +- src/lib.rs | 2 +- testcrate/tests/lse.rs | 5 ++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/build.rs b/build.rs index 1b901b7e8..44946c124 100644 --- a/build.rs +++ b/build.rs @@ -461,7 +461,7 @@ mod c { ]); } - if target_arch == "aarch64" && consider_float_intrinsics { + if (target_arch == "aarch64" || target_arch == "arm64ec") && consider_float_intrinsics { sources.extend(&[ ("__comparetf2", "comparetf2.c"), ("__extenddftf2", "extenddftf2.c"), diff --git a/src/lib.rs b/src/lib.rs index 66540bb9b..a414efde0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -59,7 +59,7 @@ pub mod mem; #[cfg(target_arch = "arm")] pub mod arm; -#[cfg(target_arch = "aarch64")] +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] pub mod aarch64; #[cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm"),))] diff --git a/testcrate/tests/lse.rs b/testcrate/tests/lse.rs index 5589f22f1..63c98957c 100644 --- a/testcrate/tests/lse.rs +++ b/testcrate/tests/lse.rs @@ -1,5 +1,8 @@ #![feature(decl_macro)] // so we can use pub(super) -#![cfg(all(target_arch = "aarch64", not(feature = "no-asm")))] +#![cfg(all( + any(target_arch = "aarch64", target_arch = "arm64ec"), + not(feature = "no-asm") +))] /// Translate a byte size to a Rust type. macro int_ty { From d32d5eeeb565c3c6f4288976e4bc07d82d9694fd Mon Sep 17 00:00:00 2001 From: Kleis Auke Wolthuizen Date: Sat, 10 Feb 2024 14:47:03 +0100 Subject: [PATCH 0699/1459] Sync x86 chkstk intrinsics with LLVM Incorporates the following commits: https://github.com/llvm/llvm-project/commit/885d7b759b5c166c07c07f4c58c6e0ba110fb0c2 https://github.com/llvm/llvm-project/commit/1f9eff100ce8faea1284d68b779d844c6e019b77 https://github.com/llvm/llvm-project/commit/7a5cba8bea8f774d48db1b0426bcc102edd2b69f --- README.md | 2 -- src/x86.rs | 47 ++--------------------------------------------- src/x86_64.rs | 44 -------------------------------------------- 3 files changed, 2 insertions(+), 91 deletions(-) diff --git a/README.md b/README.md index adcdcacba..ffef4e52c 100644 --- a/README.md +++ b/README.md @@ -163,7 +163,6 @@ features = ["c"] - [ ] i386/ashldi3.S - [ ] i386/ashrdi3.S - [x] i386/chkstk.S -- [x] i386/chkstk2.S - [ ] i386/divdi3.S - [ ] i386/lshrdi3.S - [ ] i386/moddi3.S @@ -192,7 +191,6 @@ features = ["c"] - [x] umoddi3.c - [x] umodsi3.c - [x] x86_64/chkstk.S -- [x] x86_64/chkstk2.S These builtins are needed to support 128-bit integers, which are in the process of being added to Rust. diff --git a/src/x86.rs b/src/x86.rs index fd1f32e3a..5016816eb 100644 --- a/src/x86.rs +++ b/src/x86.rs @@ -6,7 +6,6 @@ use core::intrinsics; // calling convention which can't be implemented using a normal Rust function // NOTE These functions are never mangled as they are not tested against compiler-rt -// and mangling ___chkstk would break the `jmp ___chkstk` instruction in __alloca intrinsics! { #[naked] @@ -15,50 +14,8 @@ intrinsics! { target_env = "gnu", not(feature = "no-asm") ))] - pub unsafe extern "C" fn ___chkstk_ms() { - core::arch::asm!( - "push %ecx", - "push %eax", - "cmp $0x1000,%eax", - "lea 12(%esp),%ecx", - "jb 1f", - "2:", - "sub $0x1000,%ecx", - "test %ecx,(%ecx)", - "sub $0x1000,%eax", - "cmp $0x1000,%eax", - "ja 2b", - "1:", - "sub %eax,%ecx", - "test %ecx,(%ecx)", - "pop %eax", - "pop %ecx", - "ret", - options(noreturn, att_syntax) - ); - } - - // FIXME: __alloca should be an alias to __chkstk - #[naked] - #[cfg(all( - windows, - target_env = "gnu", - not(feature = "no-asm") - ))] - pub unsafe extern "C" fn __alloca() { - core::arch::asm!( - "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable" - options(noreturn, att_syntax) - ); - } - - #[naked] - #[cfg(all( - windows, - target_env = "gnu", - not(feature = "no-asm") - ))] - pub unsafe extern "C" fn ___chkstk() { + pub unsafe extern "C" fn _alloca() { + // _chkstk and _alloca are the same function core::arch::asm!( "push %ecx", "cmp $0x1000,%eax", diff --git a/src/x86_64.rs b/src/x86_64.rs index 7ad941158..8048f85c8 100644 --- a/src/x86_64.rs +++ b/src/x86_64.rs @@ -6,7 +6,6 @@ use core::intrinsics; // calling convention which can't be implemented using a normal Rust function // NOTE These functions are never mangled as they are not tested against compiler-rt -// and mangling ___chkstk would break the `jmp ___chkstk` instruction in __alloca intrinsics! { #[naked] @@ -36,49 +35,6 @@ intrinsics! { options(noreturn, att_syntax) ); } - - #[naked] - #[cfg(all( - any(all(windows, target_env = "gnu"), target_os = "uefi"), - not(feature = "no-asm") - ))] - pub unsafe extern "C" fn __alloca() { - core::arch::asm!( - "mov %rcx,%rax", // x64 _alloca is a normal function with parameter in rcx - "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable" - options(noreturn, att_syntax) - ); - } - - #[naked] - #[cfg(all( - any(all(windows, target_env = "gnu"), target_os = "uefi"), - not(feature = "no-asm") - ))] - pub unsafe extern "C" fn ___chkstk() { - core::arch::asm!( - "push %rcx", - "cmp $0x1000,%rax", - "lea 16(%rsp),%rcx", // rsp before calling this routine -> rcx - "jb 1f", - "2:", - "sub $0x1000,%rcx", - "test %rcx,(%rcx)", - "sub $0x1000,%rax", - "cmp $0x1000,%rax", - "ja 2b", - "1:", - "sub %rax,%rcx", - "test %rcx,(%rcx)", - "lea 8(%rsp),%rax", // load pointer to the return address into rax - "mov %rcx,%rsp", // install the new top of stack pointer into rsp - "mov -8(%rax),%rcx", // restore rcx - "push (%rax)", // push return address onto the stack - "sub %rsp,%rax", // restore the original value in rax - "ret", - options(noreturn, att_syntax) - ); - } } // HACK(https://github.com/rust-lang/rust/issues/62785): x86_64-unknown-uefi needs special LLVM From 740aa7843bbd26e3e153a07bdc793035f96d5957 Mon Sep 17 00:00:00 2001 From: Jeff Thuong Date: Tue, 2 Apr 2024 11:35:08 +0800 Subject: [PATCH 0700/1459] Corrected English typos --- libm/src/math/ceil.rs | 2 +- libm/src/math/floor.rs | 2 +- libm/src/math/pow.rs | 2 +- libm/src/math/sqrt.rs | 2 +- libm/src/math/sqrtf.rs | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index 22d892971..cde5a19d0 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -20,7 +20,7 @@ pub fn ceil(x: f64) -> f64 { { //use an alternative implementation on x86, because the //main implementation fails with the x87 FPU used by - //debian i386, probablly due to excess precision issues. + //debian i386, probably due to excess precision issues. //basic implementation taken from https://github.com/rust-lang/libm/issues/219 use super::fabs; if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index d09f9a1a1..b7d1a04d2 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -20,7 +20,7 @@ pub fn floor(x: f64) -> f64 { { //use an alternative implementation on x86, because the //main implementation fails with the x87 FPU used by - //debian i386, probablly due to excess precision issues. + //debian i386, probably due to excess precision issues. //basic implementation taken from https://github.com/rust-lang/libm/issues/219 use super::fabs; if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 6a19ae601..09d12c185 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -16,7 +16,7 @@ // 1. Compute and return log2(x) in two pieces: // log2(x) = w1 + w2, // where w1 has 53-24 = 29 bit trailing zeros. -// 2. Perform y*log2(x) = n+y' by simulating muti-precision +// 2. Perform y*log2(x) = n+y' by simulating multi-precision // arithmetic, where |y'|<=0.5. // 3. Return x**y = 2**n*exp(y'*log2) // diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 3733ba040..baa0db9f8 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -96,7 +96,7 @@ pub fn sqrt(x: f64) -> f64 { { // Note: This path is unlikely since LLVM will usually have already // optimized sqrt calls into hardware instructions if sse2 is available, - // but if someone does end up here they'll apprected the speed increase. + // but if someone does end up here they'll appreciate the speed increase. #[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")] diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index 8ec72fbf7..12bd60028 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -31,7 +31,7 @@ pub fn sqrtf(x: f32) -> f32 { { // Note: This path is unlikely since LLVM will usually have already // optimized sqrt calls into hardware instructions if sse is available, - // but if someone does end up here they'll apprected the speed increase. + // but if someone does end up here they'll appreciate the speed increase. #[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")] From a285b0a8c31c52c964c4880fab6943deb1a5fdba Mon Sep 17 00:00:00 2001 From: Jeremy Drake Date: Sun, 7 Apr 2024 09:32:17 -0700 Subject: [PATCH 0701/1459] Add __chkstk on i686-pc-windows-gnu. libLLVMSupport.a(DynamicLibrary.cpp.obj) references ___chkstk, which is an alias of __alloca in libgcc. This crate provided __alloca, but libgcc's implementation was also pulled in by the linker due to the reference to ___chkstk, causing a multiple definition linker error. Providing that symbol here prevents that. Fixes #585 --- src/x86.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/x86.rs b/src/x86.rs index 5016816eb..c348d082d 100644 --- a/src/x86.rs +++ b/src/x86.rs @@ -8,6 +8,19 @@ use core::intrinsics; // NOTE These functions are never mangled as they are not tested against compiler-rt intrinsics! { + #[naked] + #[cfg(all( + windows, + target_env = "gnu", + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn __chkstk() { + core::arch::asm!( + "jmp __alloca", // Jump to __alloca since fallthrough may be unreliable" + options(noreturn, att_syntax) + ); + } + #[naked] #[cfg(all( windows, @@ -15,7 +28,7 @@ intrinsics! { not(feature = "no-asm") ))] pub unsafe extern "C" fn _alloca() { - // _chkstk and _alloca are the same function + // __chkstk and _alloca are the same function core::arch::asm!( "push %ecx", "cmp $0x1000,%eax", From b936822e4e17e63d586a291980b9d0ddf2fe1ae2 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 6 Apr 2024 21:56:02 +0200 Subject: [PATCH 0702/1459] add lib.miri.rs file for miri-test-libstd --- src/lib.miri.rs | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 src/lib.miri.rs diff --git a/src/lib.miri.rs b/src/lib.miri.rs new file mode 100644 index 000000000..17288058e --- /dev/null +++ b/src/lib.miri.rs @@ -0,0 +1,5 @@ +//! Grep bootstrap for `MIRI_REPLACE_LIBRS_IF_NOT_TEST` to learn what this is about. +#![no_std] +#![feature(rustc_private)] +extern crate compiler_builtins as real; +pub use real::*; From 018616e78be0b6e213018c16b430d14ec1083cbb Mon Sep 17 00:00:00 2001 From: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> Date: Mon, 19 Feb 2024 16:41:17 +0800 Subject: [PATCH 0703/1459] Always have math functions but with `weak` linking attribute if we can This is a replacement for https://github.com/rust-lang/libm/pull/290 This fixes crashes during compilations for targets that don't have math symbols by default. So, we will provide them libm symbols, but mark it as `weak` (if its supported), so that the linker will choose the system builtin functions, since those are sometimes more optimized. If the linker couldn't find those, it will go with `libm` implementation. --- src/math.rs | 84 +++++++++++------------------------------------------ 1 file changed, 17 insertions(+), 67 deletions(-) diff --git a/src/math.rs b/src/math.rs index 63d858333..b04b0b66a 100644 --- a/src/math.rs +++ b/src/math.rs @@ -7,6 +7,7 @@ macro_rules! no_mangle { ($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => { intrinsics! { $( + #[cfg_attr(all(not(windows), not(target_vendor = "apple")), weak)] pub extern "C" fn $fun($($iid: $ity),+) -> $oty { self::libm::$fun($($iid),+) } @@ -15,17 +16,6 @@ macro_rules! no_mangle { } } -#[cfg(any( - all( - target_family = "wasm", - target_os = "unknown", - not(target_env = "wasi") - ), - target_os = "xous", - target_os = "uefi", - all(target_arch = "xtensa", target_os = "none"), - all(target_vendor = "fortanix", target_env = "sgx") -))] no_mangle! { fn acos(x: f64) -> f64; fn asin(x: f64) -> f64; @@ -41,10 +31,6 @@ no_mangle! { fn log10f(x: f32) -> f32; fn log(x: f64) -> f64; fn logf(x: f32) -> f32; - fn fmin(x: f64, y: f64) -> f64; - fn fminf(x: f32, y: f32) -> f32; - fn fmax(x: f64, y: f64) -> f64; - fn fmaxf(x: f32, y: f32) -> f32; fn round(x: f64) -> f64; fn roundf(x: f32) -> f32; fn rint(x: f64) -> f64; @@ -52,8 +38,6 @@ no_mangle! { fn sin(x: f64) -> f64; fn pow(x: f64, y: f64) -> f64; fn powf(x: f32, y: f32) -> f32; - fn fmod(x: f64, y: f64) -> f64; - fn fmodf(x: f32, y: f32) -> f32; fn acosf(n: f32) -> f32; fn atan2f(a: f32, b: f32) -> f32; fn atanf(n: f32) -> f32; @@ -85,67 +69,17 @@ no_mangle! { fn cbrtf(n: f32) -> f32; fn hypotf(x: f32, y: f32) -> f32; fn tanf(n: f32) -> f32; -} - -#[cfg(any( - all( - target_family = "wasm", - target_os = "unknown", - not(target_env = "wasi") - ), - target_os = "xous", - target_os = "uefi", - all(target_arch = "xtensa", target_os = "none"), - all(target_vendor = "fortanix", target_env = "sgx"), - target_os = "windows" -))] -intrinsics! { - pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 { - let r = self::libm::lgamma_r(x); - *s = r.1; - r.0 - } - - pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 { - let r = self::libm::lgammaf_r(x); - *s = r.1; - r.0 - } -} -#[cfg(any( - target_os = "xous", - target_os = "uefi", - all(target_arch = "xtensa", target_os = "none"), -))] -no_mangle! { fn sqrtf(x: f32) -> f32; fn sqrt(x: f64) -> f64; -} -#[cfg(any( - all(target_vendor = "fortanix", target_env = "sgx"), - all(target_arch = "xtensa", target_os = "none"), - target_os = "xous", - target_os = "uefi" -))] -no_mangle! { fn ceil(x: f64) -> f64; fn ceilf(x: f32) -> f32; fn floor(x: f64) -> f64; fn floorf(x: f32) -> f32; fn trunc(x: f64) -> f64; fn truncf(x: f32) -> f32; -} -// only for the thumb*-none-eabi*, riscv32*-none-elf, x86_64-unknown-none and mips*-unknown-none targets that lack the floating point instruction set -#[cfg(any( - all(target_arch = "arm", target_os = "none"), - all(target_arch = "riscv32", not(target_feature = "f"), target_os = "none"), - all(target_arch = "x86_64", target_os = "none"), - all(target_arch = "mips", target_os = "none"), -))] -no_mangle! { fn fmin(x: f64, y: f64) -> f64; fn fminf(x: f32, y: f32) -> f32; fn fmax(x: f64, y: f64) -> f64; @@ -155,3 +89,19 @@ no_mangle! { // `f32 % f32` fn fmodf(x: f32, y: f32) -> f32; } + +intrinsics! { + #[cfg_attr(all(not(windows), not(target_vendor = "apple")), weak)] + pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 { + let r = self::libm::lgamma_r(x); + *s = r.1; + r.0 + } + + #[cfg_attr(all(not(windows), not(target_vendor = "apple")), weak)] + pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 { + let r = self::libm::lgammaf_r(x); + *s = r.1; + r.0 + } +} From 4d105c9c0f4c045d16c9eb90cf29cee48b6add46 Mon Sep 17 00:00:00 2001 From: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> Date: Sun, 31 Mar 2024 13:53:15 +0300 Subject: [PATCH 0704/1459] Do not include math intrinsics for windows and apple targets --- src/math.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/math.rs b/src/math.rs index b04b0b66a..593a1a19c 100644 --- a/src/math.rs +++ b/src/math.rs @@ -16,6 +16,7 @@ macro_rules! no_mangle { } } +#[cfg(all(not(windows), not(target_vendor = "apple")))] no_mangle! { fn acos(x: f64) -> f64; fn asin(x: f64) -> f64; From 58e87536a69045f1c40ef67f4b58fe8f6ced30f2 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 10 Apr 2024 13:16:43 +0100 Subject: [PATCH 0705/1459] Release 0.1.109 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e94c2f762..267f1b950 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.108" +version = "0.1.109" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 800d97b415912b3fc6d679e69524c89d1602b262 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 13 Apr 2024 04:32:49 -0400 Subject: [PATCH 0706/1459] Change aarch64_linux module and lse tests to have the same gating Trying to run testcrate on non-linux aarch64 currently hits a compilation error. Make this test linux-only, to be consistent with the `aarch64_linux` module that it depends on. Additionally, enable the `aarch64_linux` module for `target_arch = "arm64ec"` to be the same as these tests. --- testcrate/tests/lse.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/testcrate/tests/lse.rs b/testcrate/tests/lse.rs index 63c98957c..cbecd6143 100644 --- a/testcrate/tests/lse.rs +++ b/testcrate/tests/lse.rs @@ -1,8 +1,5 @@ #![feature(decl_macro)] // so we can use pub(super) -#![cfg(all( - any(target_arch = "aarch64", target_arch = "arm64ec"), - not(feature = "no-asm") -))] +#![cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm")))] /// Translate a byte size to a Rust type. macro int_ty { From 1e1376eea69f568f4492b2330ca73747086f474a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 16 Apr 2024 03:04:37 -0400 Subject: [PATCH 0707/1459] Add CI testing for AArch64 Darwin The Apple ARM silicon has been around for a while now and hopefully will become Rust Tier 1 at some point. Add it to CI since it is distinct enough from aarch64-linux and x86_86-darwin that there may be differences. --- .github/workflows/main.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 97d298cf0..57497e050 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -9,6 +9,9 @@ jobs: fail-fast: false matrix: include: + - target: aarch64-apple-darwin + os: macos-14 + rust: nightly - target: aarch64-unknown-linux-gnu os: ubuntu-latest rust: nightly @@ -81,6 +84,8 @@ jobs: os: windows-latest rust: nightly-x86_64-gnu steps: + - name: Print runner information + run: uname -a - uses: actions/checkout@v4 with: submodules: true From d2c3a160d040669b73cb2c553b1a99b9373f2157 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 13 Apr 2024 00:42:36 -0400 Subject: [PATCH 0708/1459] Add a way to run tests on non-linux machines Allow using the `rust-lang/rust:nightly` docker image to run tests in cases where the host rust and cargo cannot be used, such as non-linux hosts. --- .gitignore | 2 + README.md | 20 +++++ build.rs | 4 +- .../aarch64-unknown-linux-gnu/Dockerfile | 3 +- .../arm-unknown-linux-gnueabi/Dockerfile | 3 +- .../arm-unknown-linux-gnueabihf/Dockerfile | 3 +- .../armv7-unknown-linux-gnueabihf/Dockerfile | 3 +- ci/docker/i586-unknown-linux-gnu/Dockerfile | 3 +- ci/docker/i686-unknown-linux-gnu/Dockerfile | 3 +- ci/docker/mips-unknown-linux-gnu/Dockerfile | 3 +- .../mips64-unknown-linux-gnuabi64/Dockerfile | 3 +- .../Dockerfile | 3 +- ci/docker/mipsel-unknown-linux-gnu/Dockerfile | 3 +- .../powerpc-unknown-linux-gnu/Dockerfile | 3 +- .../powerpc64-unknown-linux-gnu/Dockerfile | 3 +- .../powerpc64le-unknown-linux-gnu/Dockerfile | 3 +- ci/docker/thumbv6m-none-eabi/Dockerfile | 3 +- ci/docker/thumbv7em-none-eabi/Dockerfile | 3 +- ci/docker/thumbv7em-none-eabihf/Dockerfile | 3 +- ci/docker/thumbv7m-none-eabi/Dockerfile | 3 +- ci/docker/wasm32-unknown-unknown/Dockerfile | 3 +- ci/docker/x86_64-unknown-linux-gnu/Dockerfile | 3 +- ci/run-docker.sh | 73 +++++++++++---- ci/run.sh | 88 ++++++++++++------- 24 files changed, 176 insertions(+), 68 deletions(-) diff --git a/.gitignore b/.gitignore index b203ea61f..97df30ffa 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.rs.bk Cargo.lock target +compiler-rt +*.tar.gz diff --git a/README.md b/README.md index ffef4e52c..5a364a23d 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,26 @@ features = ["c"] [8]: http://en.cppreference.com/w/cpp/language/implicit_conversion [9]: https://doc.rust-lang.org/std/primitive.i32.html +## Testing + +The easiest way to test locally is using Docker. This can be done by running +`./ci/run-docker.sh [target]`. If no target is specified, all targets will be +run. + +In order to run the full test suite, you will also need the C compiler runtime +to test against, located in a directory called `compiler-rt`. This can be +obtained with the following: + +```sh +curl -L -o rustc-llvm-18.0.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/18.0-2024-02-13.tar.gz +tar xzf rustc-llvm-18.0.tar.gz --strip-components 1 llvm-project-rustc-18.0-2024-02-13/compiler-rt +```` + +Local targets may also be tested with `./ci/run.sh [target]`. + +Note that testing may not work on all hosts, in which cases it is acceptable to +rely on CI. + ## Progress - [x] adddf3.c diff --git a/build.rs b/build.rs index 44946c124..bb2dba97a 100644 --- a/build.rs +++ b/build.rs @@ -572,7 +572,9 @@ mod c { // rust-lang/rust. let root = match env::var_os("RUST_COMPILER_RT_ROOT") { Some(s) => PathBuf::from(s), - None => panic!("RUST_COMPILER_RT_ROOT is not set"), + None => { + panic!("RUST_COMPILER_RT_ROOT is not set. You may need to download compiler-rt.") + } }; if !root.exists() { panic!("RUST_COMPILER_RT_ROOT={} does not exist", root.display()); diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index 9e2559f4a..5de76efc3 100644 --- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/ci/docker/arm-unknown-linux-gnueabi/Dockerfile index afab874bc..dc95da0f3 100644 --- a/ci/docker/arm-unknown-linux-gnueabi/Dockerfile +++ b/ci/docker/arm-unknown-linux-gnueabi/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile index 3ed3602b0..55e5e3d57 100644 --- a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile +++ b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile index 6617af155..fd2ad18d1 100644 --- a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +++ b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/ci/docker/i586-unknown-linux-gnu/Dockerfile b/ci/docker/i586-unknown-linux-gnu/Dockerfile index 5783e28e1..f161ec767 100644 --- a/ci/docker/i586-unknown-linux-gnu/Dockerfile +++ b/ci/docker/i586-unknown-linux-gnu/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc-multilib libc6-dev ca-certificates diff --git a/ci/docker/i686-unknown-linux-gnu/Dockerfile b/ci/docker/i686-unknown-linux-gnu/Dockerfile index 5783e28e1..f161ec767 100644 --- a/ci/docker/i686-unknown-linux-gnu/Dockerfile +++ b/ci/docker/i686-unknown-linux-gnu/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc-multilib libc6-dev ca-certificates diff --git a/ci/docker/mips-unknown-linux-gnu/Dockerfile b/ci/docker/mips-unknown-linux-gnu/Dockerfile index f47e8f522..042dd4219 100644 --- a/ci/docker/mips-unknown-linux-gnu/Dockerfile +++ b/ci/docker/mips-unknown-linux-gnu/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile index 8fa77c7bd..45b3089c9 100644 --- a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile +++ b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates \ diff --git a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile index c6611d9ac..bda6be1d6 100644 --- a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile +++ b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates \ diff --git a/ci/docker/mipsel-unknown-linux-gnu/Dockerfile b/ci/docker/mipsel-unknown-linux-gnu/Dockerfile index 0bc695624..702a26ec1 100644 --- a/ci/docker/mipsel-unknown-linux-gnu/Dockerfile +++ b/ci/docker/mipsel-unknown-linux-gnu/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc-unknown-linux-gnu/Dockerfile index 2d39fef61..6bae7cb3b 100644 --- a/ci/docker/powerpc-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc-unknown-linux-gnu/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile index 653cd3511..2c315e509 100644 --- a/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile index 63ea9af9d..da8f9db60 100644 --- a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/thumbv6m-none-eabi/Dockerfile b/ci/docker/thumbv6m-none-eabi/Dockerfile index dc7dd431b..d7256a9c5 100644 --- a/ci/docker/thumbv6m-none-eabi/Dockerfile +++ b/ci/docker/thumbv6m-none-eabi/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/ci/docker/thumbv7em-none-eabi/Dockerfile b/ci/docker/thumbv7em-none-eabi/Dockerfile index dc7dd431b..d7256a9c5 100644 --- a/ci/docker/thumbv7em-none-eabi/Dockerfile +++ b/ci/docker/thumbv7em-none-eabi/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/ci/docker/thumbv7em-none-eabihf/Dockerfile b/ci/docker/thumbv7em-none-eabihf/Dockerfile index dc7dd431b..d7256a9c5 100644 --- a/ci/docker/thumbv7em-none-eabihf/Dockerfile +++ b/ci/docker/thumbv7em-none-eabihf/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/ci/docker/thumbv7m-none-eabi/Dockerfile b/ci/docker/thumbv7m-none-eabi/Dockerfile index dc7dd431b..d7256a9c5 100644 --- a/ci/docker/thumbv7m-none-eabi/Dockerfile +++ b/ci/docker/thumbv7m-none-eabi/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/ci/docker/wasm32-unknown-unknown/Dockerfile b/ci/docker/wasm32-unknown-unknown/Dockerfile index 85ead29aa..4d12b6ff4 100644 --- a/ci/docker/wasm32-unknown-unknown/Dockerfile +++ b/ci/docker/wasm32-unknown-unknown/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:20.04 +ARG IMAGE=ubuntu:20.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc clang libc6-dev ca-certificates diff --git a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index 98000f4eb..d495d5044 100644 --- a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +ARG IMAGE=ubuntu:18.04 +FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates diff --git a/ci/run-docker.sh b/ci/run-docker.sh index 8c4af0eff..819643039 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -1,38 +1,77 @@ +#!/bin/bash + # Small script to run tests for a target (or all targets) inside all the # respective docker images. -set -ex +set -eux run() { - local target=$1 + local target="$1" - echo $target + echo "TESTING TARGET: $target" # This directory needs to exist before calling docker, otherwise docker will create it but it # will be owned by root mkdir -p target - docker build -t $target ci/docker/$target + if [ $(uname -s) = "Linux" ] && [ -z "${DOCKER_BASE_IMAGE:-}" ]; then + # Share the host rustc and target. Do this only on Linux and if the image + # isn't overridden + run_args=( + --user "$(id -u):$(id -g)" + -e "CARGO_HOME=/cargo" + -v "${HOME}/.cargo:/cargo" + -v "$(pwd)/target:/builtins-target" + -v "$(rustc --print sysroot):/rust:ro" + ) + run_cmd="HOME=/tmp PATH=\$PATH:/rust/bin ci/run.sh $target" + else + # Use rustc provided by a docker image + docker volume create compiler-builtins-cache + build_args=( + "--build-arg" "IMAGE=${DOCKER_BASE_IMAGE:-rustlang/rust:nightly}" + ) + run_args=( + -v "compiler-builtins-cache:/builtins-target" + ) + run_cmd="HOME=/tmp USING_CONTAINER_RUSTC=1 ci/run.sh $target" + fi + + if [ -d compiler-rt ]; then + export RUST_COMPILER_RT_ROOT=./compiler-rt + fi + + docker build \ + -t "builtins-$target" \ + ${build_args[@]:-} \ + "ci/docker/$target" docker run \ --rm \ - --user $(id -u):$(id -g) \ - -e CARGO_HOME=/cargo \ - -e CARGO_TARGET_DIR=/target \ -e RUST_COMPILER_RT_ROOT \ - -v "${HOME}/.cargo":/cargo \ - -v `pwd`/target:/target \ - -v `pwd`:/checkout:ro \ - -v `rustc --print sysroot`:/rust:ro \ + -e "CARGO_TARGET_DIR=/builtins-target" \ + -v "$(pwd):/checkout:ro" \ -w /checkout \ + ${run_args[@]:-} \ --init \ - $target \ - sh -c "HOME=/tmp PATH=\$PATH:/rust/bin ci/run.sh $target" + "builtins-$target" \ + sh -c "$run_cmd" } -if [ -z "$1" ]; then - for d in `ls ci/docker/`; do - run $d +if [ "${1:-}" = "--help" ] || [ "$#" -gt 1 ]; then + set +x + echo "\ + usage: ./ci/run-docker.sh [target] + + you can also set DOCKER_BASE_IMAGE to use something other than the default + ubuntu:18.04 (or rustlang/rust:nightly). + " + exit +fi + +if [ -z "${1:-}" ]; then + for d in ci/docker/*; do + run $(basename "$d") done else - run $1 + run "$1" fi diff --git a/ci/run.sh b/ci/run.sh index 09728191a..d2ea79731 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -1,10 +1,25 @@ -set -ex +set -eux + +target="${1:-}" + +if [ -z "${1:-}" ]; then + host_target=$(rustc -vV | awk '/^host/ { print $2 }') + echo "Defaulted to host target $host_target" + target="$host_target" +fi + +if [ "${USING_CONTAINER_RUSTC:-}" = 1 ]; then + # Install nonstandard components if we have control of the environment + rustup target list --installed | + grep -E "^$target\$" || + rustup target add "$target" +fi # Test our implementation -if [ "$NO_STD" = "1" ]; then - echo nothing to do +if [ "${NO_STD:-}" = "1" ]; then + echo "nothing to do for no_std" else - run="cargo test --manifest-path testcrate/Cargo.toml --target $1" + run="cargo test --manifest-path testcrate/Cargo.toml --target $target" $run $run --release $run --features c @@ -13,24 +28,24 @@ else $run --features no-asm --release fi -if [ -d /target ]; then - path=/target/${1}/debug/deps/libcompiler_builtins-*.rlib +if [ -d /builtins-target ]; then + path=/builtins-target/${target}/debug/deps/libcompiler_builtins-*.rlib else - path=target/${1}/debug/deps/libcompiler_builtins-*.rlib + path=target/${target}/debug/deps/libcompiler_builtins-*.rlib fi # Remove any existing artifacts from previous tests that don't set #![compiler_builtins] rm -f $path -cargo build --target $1 -cargo build --target $1 --release -cargo build --target $1 --features c -cargo build --target $1 --release --features c -cargo build --target $1 --features no-asm -cargo build --target $1 --release --features no-asm +cargo build --target "$target" +cargo build --target "$target" --release +cargo build --target "$target" --features c +cargo build --target "$target" --release --features c +cargo build --target "$target" --features no-asm +cargo build --target "$target" --release --features no-asm -PREFIX=$(echo $1 | sed -e 's/unknown-//')- -case $1 in +PREFIX=$(echo "$target" | sed -e 's/unknown-//')- +case "$target" in armv7-*) PREFIX=arm-linux-gnueabihf- ;; @@ -44,12 +59,12 @@ esac NM=$(find $(rustc --print sysroot) \( -name llvm-nm -o -name llvm-nm.exe \) ) if [ "$NM" = "" ]; then - NM=${PREFIX}nm + NM="${PREFIX}nm" fi # i686-pc-windows-gnu tools have a dependency on some DLLs, so run it with # rustup run to ensure that those are in PATH. -TOOLCHAIN=$(rustup show active-toolchain | sed 's/ (default)//') -if [[ $TOOLCHAIN == *i686-pc-windows-gnu ]]; then +TOOLCHAIN="$(rustup show active-toolchain | sed 's/ (default)//')" +if [[ "$TOOLCHAIN" == *i686-pc-windows-gnu ]]; then NM="rustup run $TOOLCHAIN $NM" fi @@ -57,7 +72,7 @@ fi for rlib in $(echo $path); do set +x echo "================================================================" - echo checking $rlib for duplicate symbols + echo "checking $rlib for duplicate symbols" echo "================================================================" stdout=$($NM -g --defined-only $rlib 2>&1) @@ -80,7 +95,7 @@ done rm -f $path # Verify that we haven't drop any intrinsic/symbol -build_intrinsics="cargo build --target $1 -v --example intrinsics" +build_intrinsics="cargo build --target "$target" -v --example intrinsics" $build_intrinsics $build_intrinsics --release $build_intrinsics --features c @@ -89,25 +104,36 @@ $build_intrinsics --features c --release # Verify that there are no undefined symbols to `panic` within our # implementations CARGO_PROFILE_DEV_LTO=true \ - cargo build --target $1 --example intrinsics + cargo build --target "$target" --example intrinsics CARGO_PROFILE_RELEASE_LTO=true \ - cargo build --target $1 --example intrinsics --release + cargo build --target "$target" --example intrinsics --release # Ensure no references to any symbols from core for rlib in $(echo $path); do - set +ex + set +x echo "================================================================" - echo checking $rlib for references to core + echo "checking $rlib for references to core" echo "================================================================" - - $NM --quiet -U $rlib | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > defined_symbols.txt - $NM --quiet -u $rlib | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > undefined_symbols.txt - grep -v -F -x -f defined_symbols.txt undefined_symbols.txt - - if test $? = 0; then + set -x + + tmpdir="${CARGO_TARGET_DIR:-target}/tmp" + test -d "$tmpdir" || mkdir "$tmpdir" + defined="$tmpdir/defined_symbols.txt" + undefined="$tmpdir/defined_symbols.txt" + + $NM --quiet -U $rlib | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > "$defined" + $NM --quiet -u $rlib | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > "$undefined" + grep_failed=0 + grep -v -F -x -f "$defined" "$undefined" && grep_failed=1 + + if [ "$target" = "powerpc64-unknown-linux-gnu" ]; then + echo "FIXME: powerpc64 fails these tests" + elif [ "$grep_failed" != 0 ]; then + echo "error: found unexpected references to core" exit 1 + else + echo "success; no references to core found" fi - set -ex done true From 656928f9604f8c9575152d110e63b76f6c8837ae Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 15 Apr 2024 14:44:39 -0400 Subject: [PATCH 0709/1459] Apply some more fixes suggested by Shellcheck --- ci/run-docker.sh | 2 +- ci/run.sh | 64 ++++++++++++++++++++++++++---------------------- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/ci/run-docker.sh b/ci/run-docker.sh index 819643039..b85f64133 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -3,7 +3,7 @@ # Small script to run tests for a target (or all targets) inside all the # respective docker images. -set -eux +set -euxo pipefail run() { local target="$1" diff --git a/ci/run.sh b/ci/run.sh index d2ea79731..65fffec5f 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -1,3 +1,5 @@ +#!/bin/bash + set -eux target="${1:-}" @@ -29,13 +31,13 @@ else fi if [ -d /builtins-target ]; then - path=/builtins-target/${target}/debug/deps/libcompiler_builtins-*.rlib + rlib_paths=/builtins-target/"${target}"/debug/deps/libcompiler_builtins-*.rlib else - path=target/${target}/debug/deps/libcompiler_builtins-*.rlib + rlib_paths=target/"${target}"/debug/deps/libcompiler_builtins-*.rlib fi # Remove any existing artifacts from previous tests that don't set #![compiler_builtins] -rm -f $path +rm -f $rlib_paths cargo build --target "$target" cargo build --target "$target" --release @@ -44,7 +46,7 @@ cargo build --target "$target" --release --features c cargo build --target "$target" --features no-asm cargo build --target "$target" --release --features no-asm -PREFIX=$(echo "$target" | sed -e 's/unknown-//')- +PREFIX=${target//unknown-/}- case "$target" in armv7-*) PREFIX=arm-linux-gnueabihf- @@ -57,7 +59,7 @@ case "$target" in ;; esac -NM=$(find $(rustc --print sysroot) \( -name llvm-nm -o -name llvm-nm.exe \) ) +NM=$(find "$(rustc --print sysroot)" \( -name llvm-nm -o -name llvm-nm.exe \) ) if [ "$NM" = "" ]; then NM="${PREFIX}nm" fi @@ -69,37 +71,41 @@ if [[ "$TOOLCHAIN" == *i686-pc-windows-gnu ]]; then fi # Look out for duplicated symbols when we include the compiler-rt (C) implementation -for rlib in $(echo $path); do +for rlib in $rlib_paths; do set +x echo "================================================================" echo "checking $rlib for duplicate symbols" echo "================================================================" + + duplicates_found=0 - stdout=$($NM -g --defined-only $rlib 2>&1) # NOTE On i586, It's normal that the get_pc_thunk symbol appears several # times so ignore it - set +e - echo "$stdout" | \ - sort | \ - uniq -d | \ - grep -v __x86.get_pc_thunk | \ - grep 'T __' - - if test $? = 0; then + $NM -g --defined-only "$rlib" 2>&1 | + sort | + uniq -d | + grep -v __x86.get_pc_thunk --quiet | + grep 'T __' && duplicates_found=1 + + if [ "$duplicates_found" != 0 ]; then + echo "error: found duplicate symbols" exit 1 + else + echo "success; no duplicate symbols found" fi - - set -ex done -rm -f $path +rm -f $rlib_paths + +build_intrinsics() { + cargo build --target "$target" -v --example intrinsics "$@" +} # Verify that we haven't drop any intrinsic/symbol -build_intrinsics="cargo build --target "$target" -v --example intrinsics" -$build_intrinsics -$build_intrinsics --release -$build_intrinsics --features c -$build_intrinsics --features c --release +build_intrinsics +build_intrinsics --release +build_intrinsics --features c +build_intrinsics --features c --release # Verify that there are no undefined symbols to `panic` within our # implementations @@ -109,7 +115,7 @@ CARGO_PROFILE_RELEASE_LTO=true \ cargo build --target "$target" --example intrinsics --release # Ensure no references to any symbols from core -for rlib in $(echo $path); do +for rlib in $(echo $rlib_paths); do set +x echo "================================================================" echo "checking $rlib for references to core" @@ -121,14 +127,14 @@ for rlib in $(echo $path); do defined="$tmpdir/defined_symbols.txt" undefined="$tmpdir/defined_symbols.txt" - $NM --quiet -U $rlib | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > "$defined" - $NM --quiet -u $rlib | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > "$undefined" - grep_failed=0 - grep -v -F -x -f "$defined" "$undefined" && grep_failed=1 + $NM --quiet -U "$rlib" | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > "$defined" + $NM --quiet -u "$rlib" | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > "$undefined" + grep_has_results=0 + grep -v -F -x -f "$defined" "$undefined" && grep_has_results=1 if [ "$target" = "powerpc64-unknown-linux-gnu" ]; then echo "FIXME: powerpc64 fails these tests" - elif [ "$grep_failed" != 0 ]; then + elif [ "$grep_has_results" != 0 ]; then echo "error: found unexpected references to core" exit 1 else From c2ac57f1fb03c695de086397407aa395adda9108 Mon Sep 17 00:00:00 2001 From: beetrees Date: Sun, 28 Apr 2024 17:41:35 +0100 Subject: [PATCH 0710/1459] Update status of 128-bit integers in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a364a23d..0c97690ca 100644 --- a/README.md +++ b/README.md @@ -212,7 +212,7 @@ rely on CI. - [x] umodsi3.c - [x] x86_64/chkstk.S -These builtins are needed to support 128-bit integers, which are in the process of being added to Rust. +These builtins are needed to support 128-bit integers. - [x] ashlti3.c - [x] ashrti3.c From d850a492c2d4d45ec164398fd961fa4869edf41f Mon Sep 17 00:00:00 2001 From: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> Date: Tue, 30 Apr 2024 12:14:55 +0800 Subject: [PATCH 0711/1459] Fix `clippy::deprecated_cfg_attr` on compiler_builtins --- libm/src/math/exp2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/src/math/exp2.rs b/libm/src/math/exp2.rs index e0e385df2..dce2ab4df 100644 --- a/libm/src/math/exp2.rs +++ b/libm/src/math/exp2.rs @@ -28,7 +28,7 @@ use super::scalbn; const TBLSIZE: usize = 256; -#[cfg_attr(rustfmt, rustfmt_skip)] +#[rustfmt::skip] static TBL: [u64; TBLSIZE * 2] = [ // exp2(z + eps) eps 0x3fe6a09e667f3d5d, 0x3d39880000000000, From 3375e3aee6e7e608c311b20ba041b004f2b73d5d Mon Sep 17 00:00:00 2001 From: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> Date: Mon, 15 Apr 2024 19:14:41 +0800 Subject: [PATCH 0712/1459] Fix math libraries not being linked on some platforms This is a continuation/fix of 018616e. In that commit, we made it add the math functions to all platforms (except apple-targets and windows), and use `weak` linking, so that it can be used if the system doesn't have those functions. Didn't notice `mod math` was behind another set of `cfg`, so removed it as well here. --- src/lib.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a414efde0..da438de77 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,14 +45,6 @@ mod macros; pub mod float; pub mod int; -#[cfg(any( - all(target_family = "wasm", target_os = "unknown"), - target_os = "uefi", - target_os = "none", - target_os = "xous", - all(target_vendor = "fortanix", target_env = "sgx"), - target_os = "windows" -))] pub mod math; pub mod mem; From e0e5bb437d9d1f1030ba5ec7ed2c4b53600c9657 Mon Sep 17 00:00:00 2001 From: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> Date: Mon, 15 Apr 2024 19:37:48 +0800 Subject: [PATCH 0713/1459] Fix clippy warnings in `math` The solution is not pretty, but not sure why we still get clippy warning from one of the files in `libm` even though we use `allow(clippy::all)` --- src/lib.rs | 3 +++ src/math.rs | 2 ++ 2 files changed, 5 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index da438de77..e7975098c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,6 +45,9 @@ mod macros; pub mod float; pub mod int; +// For some reason, we still get clippy error `clippy::deprecated_cfg_attr` even though, we have +// used `allow(clippy::all)` in the file. So, we are disabling the clippy check for this file. +#[cfg(not(clippy))] pub mod math; pub mod mem; diff --git a/src/math.rs b/src/math.rs index 593a1a19c..e47b834e4 100644 --- a/src/math.rs +++ b/src/math.rs @@ -1,4 +1,6 @@ #[allow(dead_code)] +#[allow(unused_imports)] +#[allow(clippy::all)] #[path = "../libm/src/math/mod.rs"] mod libm; From dd8539525bd2dece941d014fb01698fa7d7042ad Mon Sep 17 00:00:00 2001 From: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> Date: Tue, 30 Apr 2024 18:40:45 +0800 Subject: [PATCH 0714/1459] Updated `libm` to fix `clippy` warning --- libm | 2 +- src/lib.rs | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/libm b/libm index 721a5edc1..a1e8a5bf9 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 721a5edc1be6b0412e4b1704590aed76f9a55899 +Subproject commit a1e8a5bf95e99309760b764b2a332d0039d08350 diff --git a/src/lib.rs b/src/lib.rs index e7975098c..da438de77 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,9 +45,6 @@ mod macros; pub mod float; pub mod int; -// For some reason, we still get clippy error `clippy::deprecated_cfg_attr` even though, we have -// used `allow(clippy::all)` in the file. So, we are disabling the clippy check for this file. -#[cfg(not(clippy))] pub mod math; pub mod mem; From ae712d4b4f0dd3103686f39e998f9e3b7c6efe10 Mon Sep 17 00:00:00 2001 From: beetrees Date: Sun, 14 Apr 2024 08:05:18 +0100 Subject: [PATCH 0715/1459] Add builtins for `f16`/`f128` float conversions --- Cargo.toml | 4 ++ README.md | 61 +++++++++++--------- build.rs | 21 +++---- ci/run.sh | 4 ++ src/float/extend.rs | 34 +++++++++++ src/float/mod.rs | 19 +++++- src/float/trunc.rs | 55 ++++++++++++++++-- src/lib.rs | 2 + testcrate/Cargo.toml | 5 +- testcrate/tests/conv.rs | 125 ++++++++++++++++++++++++++++++++++++++++ testcrate/tests/misc.rs | 68 ---------------------- 11 files changed, 283 insertions(+), 115 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 267f1b950..96e85d7b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,10 @@ c = ["cc"] # which use inline assembly and fall back to pure Rust versions (if avalible). no-asm = [] +# Workaround for codegen backends which haven't yet implemented `f16` and +# `f128` support. Disabled any intrinsics which use those types. +no-f16-f128 = [] + # Flag this library as the unstable compiler-builtins lib compiler-builtins = [] diff --git a/README.md b/README.md index 0c97690ca..00d547f1b 100644 --- a/README.md +++ b/README.md @@ -162,7 +162,6 @@ rely on CI. - [x] divmodsi4.c - [x] divsf3.c - [x] divsi3.c -- [ ] extendhfsf2.c - [x] extendsfdf2.c - [x] fixdfdi.c - [x] fixdfsi.c @@ -201,9 +200,7 @@ rely on CI. - [x] powisf2.c - [x] subdf3.c - [x] subsf3.c -- [ ] truncdfhf2.c - [x] truncdfsf2.c -- [ ] truncsfhf2.c - [x] udivdi3.c - [x] udivmoddi4.c - [x] udivmodsi4.c @@ -233,60 +230,68 @@ These builtins are needed to support 128-bit integers. - [x] udivti3.c - [x] umodti3.c +These builtins are needed to support `f16` and `f128`, which are in the process of being added to Rust. + +- [ ] addtf3.c +- [ ] comparetf2.c +- [ ] divtf3.c +- [x] extenddftf2.c +- [x] extendhfsf2.c +- [x] extendhftf2.c +- [x] extendsftf2.c +- [ ] fixtfdi.c +- [ ] fixtfsi.c +- [ ] fixtfti.c +- [ ] fixunstfdi.c +- [ ] fixunstfsi.c +- [ ] fixunstfti.c +- [ ] floatditf.c +- [ ] floatsitf.c +- [ ] floatunditf.c +- [ ] floatunsitf.c +- [ ] multf3.c +- [ ] powitf2.c +- [ ] ppc/fixtfdi.c +- [ ] ppc/fixunstfdi.c +- [ ] ppc/floatditf.c +- [ ] ppc/floatunditf.c +- [ ] subtf3.c +- [x] truncdfhf2.c +- [x] truncsfhf2.c +- [x] trunctfdf2.c +- [x] trunctfhf2.c +- [x] trunctfsf2.c + ## Unimplemented functions -These builtins involve floating-point types ("`f128`", "`f80`" and complex numbers) that are not supported by Rust. +These builtins involve floating-point types ("`f80`" and complex numbers) that are not supported by Rust. -- ~~addtf3.c~~ -- ~~comparetf2.c~~ - ~~divdc3.c~~ - ~~divsc3.c~~ - ~~divtc3.c~~ -- ~~divtf3.c~~ - ~~divxc3.c~~ -- ~~extenddftf2.c~~ -- ~~extendsftf2.c~~ -- ~~fixtfdi.c~~ -- ~~fixtfsi.c~~ -- ~~fixtfti.c~~ -- ~~fixunstfdi.c~~ -- ~~fixunstfsi.c~~ -- ~~fixunstfti.c~~ - ~~fixunsxfdi.c~~ - ~~fixunsxfsi.c~~ - ~~fixunsxfti.c~~ - ~~fixxfdi.c~~ - ~~fixxfti.c~~ -- ~~floatditf.c~~ - ~~floatdixf.c~~ -- ~~floatsitf.c~~ - ~~floattixf.c~~ -- ~~floatunditf.c~~ - ~~floatundixf.c~~ -- ~~floatunsitf.c~~ - ~~floatuntixf.c~~ - ~~i386/floatdixf.S~~ - ~~i386/floatundixf.S~~ - ~~muldc3.c~~ - ~~mulsc3.c~~ - ~~multc3.c~~ -- ~~multf3.c~~ - ~~mulxc3.c~~ -- ~~powitf2.c~~ - ~~powixf2.c~~ - ~~ppc/divtc3.c~~ -- ~~ppc/fixtfdi.c~~ -- ~~ppc/fixunstfdi.c~~ -- ~~ppc/floatditf.c~~ -- ~~ppc/floatunditf.c~~ - ~~ppc/gcc_qadd.c~~ - ~~ppc/gcc_qdiv.c~~ - ~~ppc/gcc_qmul.c~~ - ~~ppc/gcc_qsub.c~~ - ~~ppc/multc3.c~~ -- ~~subtf3.c~~ -- ~~trunctfdf2.c~~ -- ~~trunctfsf2.c~~ - ~~x86_64/floatdixf.c~~ - ~~x86_64/floatundixf.S~~ diff --git a/build.rs b/build.rs index bb2dba97a..bafbf75d0 100644 --- a/build.rs +++ b/build.rs @@ -217,6 +217,14 @@ mod c { } } + // `compiler-rt` requires `COMPILER_RT_HAS_FLOAT16` to be defined to make it use the + // `_Float16` type for `f16` intrinsics. This shouldn't matter as all existing `f16` + // intrinsics have been ported to Rust in `compiler-builtins` as C compilers don't + // support `_Float16` on all targets (whereas Rust does). However, define the macro + // anyway to prevent issues like rust#118813 and rust#123885 silently reoccuring if more + // `f16` intrinsics get accidentally added here in the future. + cfg.define("COMPILER_RT_HAS_FLOAT16", None); + cfg.warnings(false); if target_env == "msvc" { @@ -288,13 +296,10 @@ mod c { sources.extend(&[ ("__divdc3", "divdc3.c"), ("__divsc3", "divsc3.c"), - ("__extendhfsf2", "extendhfsf2.c"), ("__muldc3", "muldc3.c"), ("__mulsc3", "mulsc3.c"), ("__negdf2", "negdf2.c"), ("__negsf2", "negsf2.c"), - ("__truncdfhf2", "truncdfhf2.c"), - ("__truncsfhf2", "truncsfhf2.c"), ]); } @@ -464,8 +469,6 @@ mod c { if (target_arch == "aarch64" || target_arch == "arm64ec") && consider_float_intrinsics { sources.extend(&[ ("__comparetf2", "comparetf2.c"), - ("__extenddftf2", "extenddftf2.c"), - ("__extendsftf2", "extendsftf2.c"), ("__fixtfdi", "fixtfdi.c"), ("__fixtfsi", "fixtfsi.c"), ("__fixtfti", "fixtfti.c"), @@ -476,8 +479,6 @@ mod c { ("__floatsitf", "floatsitf.c"), ("__floatunditf", "floatunditf.c"), ("__floatunsitf", "floatunsitf.c"), - ("__trunctfdf2", "trunctfdf2.c"), - ("__trunctfsf2", "trunctfsf2.c"), ("__addtf3", "addtf3.c"), ("__multf3", "multf3.c"), ("__subtf3", "subtf3.c"), @@ -498,7 +499,6 @@ mod c { if target_arch == "mips64" { sources.extend(&[ - ("__extenddftf2", "extenddftf2.c"), ("__netf2", "comparetf2.c"), ("__addtf3", "addtf3.c"), ("__multf3", "multf3.c"), @@ -509,14 +509,11 @@ mod c { ("__floatunsitf", "floatunsitf.c"), ("__fe_getround", "fp_mode.c"), ("__divtf3", "divtf3.c"), - ("__trunctfdf2", "trunctfdf2.c"), - ("__trunctfsf2", "trunctfsf2.c"), ]); } if target_arch == "loongarch64" { sources.extend(&[ - ("__extenddftf2", "extenddftf2.c"), ("__netf2", "comparetf2.c"), ("__addtf3", "addtf3.c"), ("__multf3", "multf3.c"), @@ -527,8 +524,6 @@ mod c { ("__floatunsitf", "floatunsitf.c"), ("__fe_getround", "fp_mode.c"), ("__divtf3", "divtf3.c"), - ("__trunctfdf2", "trunctfdf2.c"), - ("__trunctfsf2", "trunctfsf2.c"), ]); } diff --git a/ci/run.sh b/ci/run.sh index 65fffec5f..1298093a6 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -28,6 +28,8 @@ else $run --features c --release $run --features no-asm $run --features no-asm --release + $run --features no-f16-f128 + $run --features no-f16-f128 --release fi if [ -d /builtins-target ]; then @@ -45,6 +47,8 @@ cargo build --target "$target" --features c cargo build --target "$target" --release --features c cargo build --target "$target" --features no-asm cargo build --target "$target" --release --features no-asm +cargo build --target "$target" --features no-f16-f128 +cargo build --target "$target" --release --features no-f16-f128 PREFIX=${target//unknown-/}- case "$target" in diff --git a/src/float/extend.rs b/src/float/extend.rs index 0e6673b9c..7c2446603 100644 --- a/src/float/extend.rs +++ b/src/float/extend.rs @@ -82,3 +82,37 @@ intrinsics! { a as f64 // LLVM generate 'fcvtds' } } + +#[cfg(not(feature = "no-f16-f128"))] +intrinsics! { + #[avr_skip] + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_h2f] + pub extern "C" fn __extendhfsf2(a: f16) -> f32 { + extend(a) + } + + #[avr_skip] + #[aapcs_on_arm] + pub extern "C" fn __gnu_h2f_ieee(a: f16) -> f32 { + extend(a) + } + + #[avr_skip] + #[aapcs_on_arm] + pub extern "C" fn __extendhftf2(a: f16) -> f128 { + extend(a) + } + + #[avr_skip] + #[aapcs_on_arm] + pub extern "C" fn __extendsftf2(a: f32) -> f128 { + extend(a) + } + + #[avr_skip] + #[aapcs_on_arm] + pub extern "C" fn __extenddftf2(a: f64) -> f128 { + extend(a) + } +} diff --git a/src/float/mod.rs b/src/float/mod.rs index fdbe9dde3..a82dd7d2a 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -127,7 +127,20 @@ macro_rules! float_impl { self.to_bits() as Self::SignedInt } fn eq_repr(self, rhs: Self) -> bool { - if self.is_nan() && rhs.is_nan() { + #[cfg(feature = "mangled-names")] + fn is_nan(x: $ty) -> bool { + // When using mangled-names, the "real" compiler-builtins might not have the + // necessary builtin (__unordtf2) to test whether `f128` is NaN. + // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin + // x is NaN if all the bits of the exponent are set and the significand is non-0 + x.repr() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK + && x.repr() & $ty::SIGNIFICAND_MASK != 0 + } + #[cfg(not(feature = "mangled-names"))] + fn is_nan(x: $ty) -> bool { + x.is_nan() + } + if is_nan(self) && is_nan(rhs) { true } else { self.repr() == rhs.repr() @@ -171,5 +184,9 @@ macro_rules! float_impl { }; } +#[cfg(not(feature = "no-f16-f128"))] +float_impl!(f16, u16, i16, i8, 16, 10); float_impl!(f32, u32, i32, i16, 32, 23); float_impl!(f64, u64, i64, i16, 64, 52); +#[cfg(not(feature = "no-f16-f128"))] +float_impl!(f128, u128, i128, i16, 128, 112); diff --git a/src/float/trunc.rs b/src/float/trunc.rs index 0beeb9f98..6de446c10 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -52,8 +52,10 @@ where // destination format. We can convert by simply right-shifting with // rounding and adjusting the exponent. abs_result = (a_abs >> sign_bits_delta).cast(); - let tmp = src_exp_bias.wrapping_sub(dst_exp_bias) << R::SIGNIFICAND_BITS; - abs_result = abs_result.wrapping_sub(tmp.cast()); + // Cast before shifting to prevent overflow. + let bias_diff: R::Int = src_exp_bias.wrapping_sub(dst_exp_bias).cast(); + let tmp = bias_diff << R::SIGNIFICAND_BITS; + abs_result = abs_result.wrapping_sub(tmp); let round_bits = a_abs & round_mask; if round_bits > halfway { @@ -67,13 +69,17 @@ where // a is NaN. // Conjure the result by beginning with infinity, setting the qNaN // bit and inserting the (truncated) trailing NaN field. - abs_result = (dst_inf_exp << R::SIGNIFICAND_BITS).cast(); + // Cast before shifting to prevent overflow. + let dst_inf_exp: R::Int = dst_inf_exp.cast(); + abs_result = dst_inf_exp << R::SIGNIFICAND_BITS; abs_result |= dst_qnan; abs_result |= dst_nan_code & ((a_abs & src_nan_code) >> (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)).cast(); } else if a_abs >= overflow { // a overflows to infinity. - abs_result = (dst_inf_exp << R::SIGNIFICAND_BITS).cast(); + // Cast before shifting to prevent overflow. + let dst_inf_exp: R::Int = dst_inf_exp.cast(); + abs_result = dst_inf_exp << R::SIGNIFICAND_BITS; } else { // a underflows on conversion to the destination type or is an exact // zero. The result may be a denormal or zero. Extract the exponent @@ -124,3 +130,44 @@ intrinsics! { a as f32 } } + +#[cfg(not(feature = "no-f16-f128"))] +intrinsics! { + #[avr_skip] + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_f2h] + pub extern "C" fn __truncsfhf2(a: f32) -> f16 { + trunc(a) + } + + #[avr_skip] + #[aapcs_on_arm] + pub extern "C" fn __gnu_f2h_ieee(a: f32) -> f16 { + trunc(a) + } + + #[avr_skip] + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_d2h] + pub extern "C" fn __truncdfhf2(a: f64) -> f16 { + trunc(a) + } + + #[avr_skip] + #[aapcs_on_arm] + pub extern "C" fn __trunctfhf2(a: f128) -> f16 { + trunc(a) + } + + #[avr_skip] + #[aapcs_on_arm] + pub extern "C" fn __trunctfsf2(a: f128) -> f32 { + trunc(a) + } + + #[avr_skip] + #[aapcs_on_arm] + pub extern "C" fn __trunctfdf2(a: f128) -> f64 { + trunc(a) + } +} diff --git a/src/lib.rs b/src/lib.rs index da438de77..40564178a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,8 @@ #![feature(naked_functions)] #![feature(repr_simd)] #![feature(c_unwind)] +#![cfg_attr(not(feature = "no-f16-f128"), feature(f16))] +#![cfg_attr(not(feature = "no-f16-f128"), feature(f128))] #![no_builtins] #![no_std] #![allow(unused_features)] diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 762d3293b..6ff3fde17 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -2,7 +2,7 @@ name = "testcrate" version = "0.1.0" authors = ["Alex Crichton "] -edition = "2018" +edition = "2021" [lib] test = false @@ -13,6 +13,8 @@ doctest = false # problems with system RNGs on the variety of platforms this crate is tested on. # `xoshiro128**` is used for its quality, size, and speed at generating `u32` shift amounts. rand_xoshiro = "0.6" +# To compare float builtins against +rustc_apfloat = "0.2.0" [dependencies.compiler_builtins] path = ".." @@ -28,5 +30,6 @@ utest-macros = { git = "https://github.com/japaric/utest" } default = ["mangled-names"] c = ["compiler_builtins/c"] no-asm = ["compiler_builtins/no-asm"] +no-f16-f128 = ["compiler_builtins/no-f16-f128"] mem = ["compiler_builtins/mem"] mangled-names = ["compiler_builtins/mangled-names"] diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 2a70db178..84828dbfa 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -1,3 +1,13 @@ +#![cfg_attr(not(feature = "no-f16-f128"), feature(f16))] +#![cfg_attr(not(feature = "no-f16-f128"), feature(f128))] +// makes configuration easier +#![allow(unused_macros)] + +use compiler_builtins::float::Float; +use rustc_apfloat::ieee::{Double, Single}; +#[cfg(not(feature = "no-f16-f128"))] +use rustc_apfloat::ieee::{Half, Quad}; +use rustc_apfloat::{Float as _, FloatConvert as _}; use testcrate::*; macro_rules! i_to_f { @@ -130,3 +140,118 @@ fn float_to_int() { ); }); } + +macro_rules! conv { + ($fX:ident, $fD:ident, $fn:ident, $apfloatX:ident, $apfloatD:ident) => { + fuzz_float(N, |x: $fX| { + let tmp0: $apfloatD = $apfloatX::from_bits(x.to_bits().into()) + .convert(&mut false) + .value; + let tmp0 = $fD::from_bits(tmp0.to_bits().try_into().unwrap()); + let tmp1: $fD = $fn(x); + if !Float::eq_repr(tmp0, tmp1) { + panic!( + "{}({x:?}): apfloat: {tmp0:?}, builtins: {tmp1:?}", + stringify!($fn) + ); + } + }); + }; +} + +macro_rules! extend { + ($fX:ident, $fD:ident, $fn:ident) => { + fuzz_float(N, |x: $fX| { + let tmp0 = x as $fD; + let tmp1: $fD = $fn(x); + if !Float::eq_repr(tmp0, tmp1) { + panic!( + "{}({}): std: {}, builtins: {}", + stringify!($fn), + x, + tmp0, + tmp1 + ); + } + }); + }; +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[test] +fn float_extend() { + use compiler_builtins::float::extend::__extendsfdf2; + + extend!(f32, f64, __extendsfdf2); + conv!(f32, f64, __extendsfdf2, Single, Double); + #[cfg(not(feature = "no-f16-f128"))] + { + use compiler_builtins::float::extend::{ + __extenddftf2, __extendhfsf2, __extendhftf2, __extendsftf2, __gnu_h2f_ieee, + }; + // FIXME(f16_f128): Also do extend!() for `f16` and `f128` when builtins are in nightly + conv!(f16, f32, __extendhfsf2, Half, Single); + conv!(f16, f32, __gnu_h2f_ieee, Half, Single); + conv!(f16, f128, __extendhftf2, Half, Quad); + conv!(f32, f128, __extendsftf2, Single, Quad); + conv!(f64, f128, __extenddftf2, Double, Quad); + } +} + +#[cfg(target_arch = "arm")] +#[test] +fn float_extend_arm() { + use compiler_builtins::float::extend::__extendsfdf2vfp; + + extend!(f32, f64, __extendsfdf2vfp); + conv!(f32, f64, __extendsfdf2vfp, Single, Double); +} + +macro_rules! trunc { + ($fX:ident, $fD:ident, $fn:ident) => { + fuzz_float(N, |x: $fX| { + let tmp0 = x as $fD; + let tmp1: $fD = $fn(x); + if !Float::eq_repr(tmp0, tmp1) { + panic!( + "{}({}): std: {}, builtins: {}", + stringify!($fn), + x, + tmp0, + tmp1 + ); + } + }); + }; +} + +#[test] +fn float_trunc() { + use compiler_builtins::float::trunc::__truncdfsf2; + + trunc!(f64, f32, __truncdfsf2); + conv!(f64, f32, __truncdfsf2, Double, Single); + #[cfg(not(feature = "no-f16-f128"))] + { + use compiler_builtins::float::trunc::{ + __gnu_f2h_ieee, __truncdfhf2, __truncsfhf2, __trunctfdf2, __trunctfhf2, __trunctfsf2, + }; + // FIXME(f16_f128): Also do trunc!() for `f16` and `f128` when builtins are in nightly + conv!(f32, f16, __truncsfhf2, Single, Half); + conv!(f32, f16, __gnu_f2h_ieee, Single, Half); + conv!(f64, f16, __truncdfhf2, Double, Half); + conv!(f128, f16, __trunctfhf2, Quad, Half); + conv!(f128, f32, __trunctfsf2, Quad, Single); + conv!(f128, f64, __trunctfdf2, Quad, Double); + } +} + +#[cfg(target_arch = "arm")] +#[test] +fn float_trunc_arm() { + use compiler_builtins::float::trunc::__truncdfsf2vfp; + + trunc!(f64, f32, __truncdfsf2vfp); + conv!(f64, f32, __truncdfsf2vfp, Double, Single) +} diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs index 537ba1e60..402d202a8 100644 --- a/testcrate/tests/misc.rs +++ b/testcrate/tests/misc.rs @@ -93,41 +93,6 @@ fn leading_zeros() { }) } -macro_rules! extend { - ($fX:ident, $fD:ident, $fn:ident) => { - fuzz_float(N, |x: $fX| { - let tmp0 = x as $fD; - let tmp1: $fD = $fn(x); - if !Float::eq_repr(tmp0, tmp1) { - panic!( - "{}({}): std: {}, builtins: {}", - stringify!($fn), - x, - tmp0, - tmp1 - ); - } - }); - }; -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[test] -fn float_extend() { - use compiler_builtins::float::extend::__extendsfdf2; - - extend!(f32, f64, __extendsfdf2); -} - -#[cfg(target_arch = "arm")] -#[test] -fn float_extend_arm() { - use compiler_builtins::float::extend::__extendsfdf2vfp; - - extend!(f32, f64, __extendsfdf2vfp); -} - // This is approximate because of issues related to // https://github.com/rust-lang/rust/issues/73920. // TODO how do we resolve this indeterminacy? @@ -179,36 +144,3 @@ fn float_pow() { f64, 1e-12, __powidf2; ); } - -macro_rules! trunc { - ($fX:ident, $fD:ident, $fn:ident) => { - fuzz_float(N, |x: $fX| { - let tmp0 = x as $fD; - let tmp1: $fD = $fn(x); - if !Float::eq_repr(tmp0, tmp1) { - panic!( - "{}({}): std: {}, builtins: {}", - stringify!($fn), - x, - tmp0, - tmp1 - ); - } - }); - }; -} - -#[test] -fn float_trunc() { - use compiler_builtins::float::trunc::__truncdfsf2; - - trunc!(f64, f32, __truncdfsf2); -} - -#[cfg(target_arch = "arm")] -#[test] -fn float_trunc_arm() { - use compiler_builtins::float::trunc::__truncdfsf2vfp; - - trunc!(f64, f32, __truncdfsf2vfp); -} From 7f9c937d136751ebf9876858da62fbc5b92e610e Mon Sep 17 00:00:00 2001 From: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> Date: Mon, 29 Apr 2024 22:05:30 +0800 Subject: [PATCH 0716/1459] Apply `weak` attributes to all intrinsics Removed the `weak-intrinsics` feature, so that all functions will have the `weak` linkage attribute. Also this fixed the bug in https://github.com/rust-lang/rust/issues/124042. Before this commit, generated code will be ```rust pub extern "C" fn (...) -> ... { // code... } pub mod { #[linkage = "weak"] #[no_mangle] pub extern "C" fn (...) -> ... { super::(...) } } ``` The issue is that there is 2 `weak` linkage, the first one is not required. Along refactoring `weak` attributes, this was fixed. --- Cargo.toml | 11 ------ src/arm.rs | 18 ---------- src/macros.rs | 93 ++++++++------------------------------------------ src/math.rs | 3 -- src/mem/mod.rs | 6 ---- 5 files changed, 14 insertions(+), 117 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 96e85d7b5..c8f164c94 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,17 +70,6 @@ rustc-dep-of-std = ['compiler-builtins', 'core'] # are not normally public but are required by the `testcrate` public-test-deps = [] -# Marks all intrinsics functions with weak linkage so that they can be -# replaced at link time by another implementation. This is particularly useful -# for mixed Rust/C++ binaries that want to use the C++ intrinsics, otherwise -# linking against the Rust stdlib will replace those from the compiler-rt -# library. -# -# Unlike the "c" feature, the intrinsics are still provided by the Rust -# implementations and each will be used unless a stronger symbol replaces -# it during linking. -weak-intrinsics = [] - [[example]] name = "intrinsics" required-features = ["compiler-builtins"] diff --git a/src/arm.rs b/src/arm.rs index cc67642e1..dcae22b73 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -20,7 +20,6 @@ macro_rules! bl { intrinsics! { // NOTE This function and the ones below are implemented using assembly because they are using a // custom calling convention which can't be implemented using a normal Rust function. - #[cfg_attr(all(not(windows), not(target_vendor="apple")), weak)] #[naked] #[cfg(not(target_env = "msvc"))] pub unsafe extern "C" fn __aeabi_uidivmod() { @@ -36,7 +35,6 @@ intrinsics! { ); } - #[cfg_attr(all(not(windows), not(target_vendor="apple")), weak)] #[naked] pub unsafe extern "C" fn __aeabi_uldivmod() { core::arch::asm!( @@ -53,7 +51,6 @@ intrinsics! { ); } - #[cfg_attr(all(not(windows), not(target_vendor="apple")), weak)] #[naked] pub unsafe extern "C" fn __aeabi_idivmod() { core::arch::asm!( @@ -67,7 +64,6 @@ intrinsics! { ); } - #[cfg_attr(all(not(windows), not(target_vendor="apple")), weak)] #[naked] pub unsafe extern "C" fn __aeabi_ldivmod() { core::arch::asm!( @@ -84,17 +80,13 @@ intrinsics! { ); } - // The following functions use weak linkage to allow users to override - // with custom implementation. // FIXME: The `*4` and `*8` variants should be defined as aliases. - #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) { crate::mem::memcpy(dest, src, n); } - #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) { // We are guaranteed 4-alignment, so accessing at u32 is okay. @@ -112,38 +104,32 @@ intrinsics! { __aeabi_memcpy(dest as *mut u8, src as *const u8, n); } - #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memcpy4(dest, src, n); } - #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) { crate::mem::memmove(dest, src, n); } - #[weak] #[cfg(not(any(target_os = "ios", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memmove(dest, src, n); } - #[weak] #[cfg(not(any(target_os = "ios", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memmove(dest, src, n); } - #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { // Note the different argument order crate::mem::memset(dest, c, n); } - #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) { let mut dest = dest as *mut u32; @@ -161,25 +147,21 @@ intrinsics! { __aeabi_memset(dest as *mut u8, n, byte as i32); } - #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) { __aeabi_memset4(dest, n, c); } - #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) { __aeabi_memset(dest, n, 0); } - #[weak] #[cfg(not(any(target_os = "ios", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) { __aeabi_memset4(dest, n, 0); } - #[weak] #[cfg(not(any(target_os = "ios", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { __aeabi_memset4(dest, n, 0); diff --git a/src/macros.rs b/src/macros.rs index 2aa9a742c..32c615f01 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -25,11 +25,12 @@ macro_rules! public_test_dep { /// platforms need and elsewhere in this library it just looks like normal Rust /// code. /// -/// When the weak-intrinsics feature is enabled, all intrinsics functions are -/// marked with #[linkage = "weak"] so that they can be replaced by another -/// implementation at link time. This is particularly useful for mixed Rust/C++ -/// binaries that want to use the C++ intrinsics, otherwise linking against the -/// Rust stdlib will replace those from the compiler-rt library. +/// All intrinsics functions are marked with #[linkage = "weak"] when +/// `not(windows) and not(target_vendor = "apple")`. +/// `weak` linkage attribute is used so that these functions can be replaced +/// by another implementation at link time. This is particularly useful for mixed +/// Rust/C++ binaries that want to use the C++ intrinsics, otherwise linking against +/// the Rust stdlib will replace those from the compiler-rt library. /// /// This macro is structured to be invoked with a bunch of functions that looks /// like: @@ -53,10 +54,6 @@ macro_rules! public_test_dep { /// /// A quick overview of attributes supported right now are: /// -/// * `weak` - indicates that the function should always be given weak linkage. -/// This attribute must come before other attributes, as the other attributes -/// will generate the final output function and need to have `weak` modify -/// them. /// * `maybe_use_optimized_c_shim` - indicates that the Rust implementation is /// ignored if an optimized C version was compiled. /// * `aapcs_on_arm` - forces the ABI of the function to be `"aapcs"` on ARM and @@ -128,67 +125,6 @@ macro_rules! intrinsics { intrinsics!($($rest)*); ); - // Explicit weak linkage gets dropped when weak-intrinsics is on since it - // will be added unconditionally to all intrinsics and would conflict - // otherwise. - ( - #[weak] - $(#[$($attr:tt)*])* - pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { - $($body:tt)* - } - - $($rest:tt)* - ) => ( - #[cfg(feature = "weak-intrinsics")] - intrinsics! { - $(#[$($attr)*])* - pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { - $($body)* - } - } - - #[cfg(not(feature = "weak-intrinsics"))] - intrinsics! { - $(#[$($attr)*])* - #[linkage = "weak"] - pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { - $($body)* - } - } - - intrinsics!($($rest)*); - ); - // Same as above but for unsafe. - ( - #[weak] - $(#[$($attr:tt)*])* - pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { - $($body:tt)* - } - - $($rest:tt)* - ) => ( - #[cfg(feature = "weak-intrinsics")] - intrinsics! { - $(#[$($attr)*])* - pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { - $($body)* - } - } - - #[cfg(not(feature = "weak-intrinsics"))] - intrinsics! { - $(#[$($attr)*])* - #[linkage = "weak"] - pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { - $($body)* - } - } - - intrinsics!($($rest)*); - ); - // Right now there's a bunch of architecture-optimized intrinsics in the // stock compiler-rt implementation. Not all of these have been ported over // to Rust yet so when the `c` feature of this crate is enabled we fall back @@ -211,7 +147,7 @@ macro_rules! intrinsics { $($rest:tt)* ) => ( #[cfg($name = "optimized-c")] - #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] + #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] pub $(unsafe $($empty)? )? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { extern $abi { fn $name($($argname: $ty),*) $(-> $ret)?; @@ -311,7 +247,6 @@ macro_rules! intrinsics { ) => ( #[cfg(all(any(windows, target_os = "uefi"), target_arch = "x86_64"))] $(#[$($attr)*])* - #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } @@ -319,7 +254,7 @@ macro_rules! intrinsics { #[cfg(all(any(windows, target_os = "uefi"), target_arch = "x86_64"))] pub mod $name { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] + #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] pub extern $abi fn $name( $($argname: $ty),* ) -> $crate::macros::win64_128bit_abi_hack::U64x2 { @@ -360,7 +295,7 @@ macro_rules! intrinsics { #[cfg(target_arch = "arm")] pub mod $name { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] + #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } @@ -369,7 +304,7 @@ macro_rules! intrinsics { #[cfg(target_arch = "arm")] pub mod $alias { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - #[cfg_attr(any(all(not(windows), not(target_vendor="apple")), feature = "weak-intrinsics"), linkage = "weak")] + #[cfg_attr(all(not(windows), not(target_vendor="apple")), linkage = "weak")] pub extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } @@ -405,7 +340,7 @@ macro_rules! intrinsics { pub mod $name { $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] + #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } @@ -429,7 +364,7 @@ macro_rules! intrinsics { #[naked] $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] + #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } @@ -495,7 +430,7 @@ macro_rules! intrinsics { pub mod $name { $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] + #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } @@ -521,7 +456,7 @@ macro_rules! intrinsics { pub mod $name { $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] + #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } diff --git a/src/math.rs b/src/math.rs index e47b834e4..7d4d17876 100644 --- a/src/math.rs +++ b/src/math.rs @@ -9,7 +9,6 @@ macro_rules! no_mangle { ($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => { intrinsics! { $( - #[cfg_attr(all(not(windows), not(target_vendor = "apple")), weak)] pub extern "C" fn $fun($($iid: $ity),+) -> $oty { self::libm::$fun($($iid),+) } @@ -94,14 +93,12 @@ no_mangle! { } intrinsics! { - #[cfg_attr(all(not(windows), not(target_vendor = "apple")), weak)] pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 { let r = self::libm::lgamma_r(x); *s = r.1; r.0 } - #[cfg_attr(all(not(windows), not(target_vendor = "apple")), weak)] pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 { let r = self::libm::lgammaf_r(x); *s = r.1; diff --git a/src/mem/mod.rs b/src/mem/mod.rs index ccf191779..d0ff50158 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -20,14 +20,12 @@ use core::ops::{BitOr, Shl}; mod impls; intrinsics! { - #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { impls::copy_forward(dest, src, n); dest } - #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { let delta = (dest as usize).wrapping_sub(src as usize); @@ -41,26 +39,22 @@ intrinsics! { dest } - #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] pub unsafe extern "C" fn memset(s: *mut u8, c: crate::mem::c_int, n: usize) -> *mut u8 { impls::set_bytes(s, c as u8, n); s } - #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { impls::compare_bytes(s1, s2, n) } - #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { memcmp(s1, s2, n) } - #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] pub unsafe extern "C" fn strlen(s: *const core::ffi::c_char) -> usize { impls::c_string_length(s) From 16c9ca927cb1767c2285e89b481ff0e97026c072 Mon Sep 17 00:00:00 2001 From: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> Date: Thu, 2 May 2024 21:50:00 +0800 Subject: [PATCH 0717/1459] Remove unneeded `weak` for `optimized-c` function `weak` is only used with `no_mangle` --- src/macros.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/macros.rs b/src/macros.rs index 32c615f01..89cd64689 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -147,7 +147,6 @@ macro_rules! intrinsics { $($rest:tt)* ) => ( #[cfg($name = "optimized-c")] - #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] pub $(unsafe $($empty)? )? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { extern $abi { fn $name($($argname: $ty),*) $(-> $ret)?; From 8a7ba9ab5fe0b1e44c30ad9771e2e7ac9dd42555 Mon Sep 17 00:00:00 2001 From: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> Date: Thu, 2 May 2024 22:00:29 +0800 Subject: [PATCH 0718/1459] Cleanup `manged-names` macro Don't generate the whole function if we are not going to use `no_mangle`, there is no point --- src/macros.rs | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/src/macros.rs b/src/macros.rs index 89cd64689..f762ef4da 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -250,11 +250,11 @@ macro_rules! intrinsics { $($body)* } - #[cfg(all(any(windows, target_os = "uefi"), target_arch = "x86_64"))] - pub mod $name { - #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[cfg(all(any(windows, target_os = "uefi"), target_arch = "x86_64", not(feature = "mangled-names")))] + mod $name { + #[no_mangle] #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] - pub extern $abi fn $name( $($argname: $ty),* ) + extern $abi fn $name( $($argname: $ty),* ) -> $crate::macros::win64_128bit_abi_hack::U64x2 { let e: $($ret)? = super::$name($($argname),*); @@ -291,20 +291,20 @@ macro_rules! intrinsics { $($body)* } - #[cfg(target_arch = "arm")] - pub mod $name { - #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[cfg(all(target_arch = "arm", not(feature = "mangled-names")))] + mod $name { + #[no_mangle] #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] - pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } } - #[cfg(target_arch = "arm")] - pub mod $alias { - #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[cfg(all(target_arch = "arm", not(feature = "mangled-names")))] + mod $alias { + #[no_mangle] #[cfg_attr(all(not(windows), not(target_vendor="apple")), linkage = "weak")] - pub extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? { + extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } } @@ -335,12 +335,12 @@ macro_rules! intrinsics { $($body)* } - #[cfg(feature = "mem")] - pub mod $name { + #[cfg(all(feature = "mem", not(feature = "mangled-names")))] + mod $name { $(#[$($attr)*])* - #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[no_mangle] #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] - pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } } @@ -359,6 +359,7 @@ macro_rules! intrinsics { $($rest:tt)* ) => ( + // `#[naked]` definitions are referenced by other places, so we can't use `cfg` like the others pub mod $name { #[naked] $(#[$($attr)*])* @@ -426,11 +427,12 @@ macro_rules! intrinsics { $($body)* } - pub mod $name { + #[cfg(not(feature = "mangled-names"))] + mod $name { $(#[$($attr)*])* - #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[no_mangle] #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] - pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } } @@ -452,11 +454,12 @@ macro_rules! intrinsics { $($body)* } - pub mod $name { + #[cfg(not(feature = "mangled-names"))] + mod $name { $(#[$($attr)*])* - #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[no_mangle] #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] - pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + unsafe fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } } From ddf904bbca840755a1262b0708f9ee61a8593b9c Mon Sep 17 00:00:00 2001 From: Martin Nordholts Date: Sat, 4 May 2024 14:56:01 +0200 Subject: [PATCH 0719/1459] Update reference to rustc-std-workspace-core --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c8f164c94..6a3c53de8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,8 +28,8 @@ links = 'compiler-rt' test = false [dependencies] -# For more information on this dependency see rust-lang/rust's -# `src/tools/rustc-std-workspace` folder +# For more information on this dependency see +# https://github.com/rust-lang/rust/tree/master/library/rustc-std-workspace-core core = { version = "1.0.0", optional = true, package = 'rustc-std-workspace-core' } [build-dependencies] From 35731dcbe5fa4144d048c5fba499e8fb3ff550d3 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 4 May 2024 15:31:14 +0100 Subject: [PATCH 0720/1459] Release version 0.1.110 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6a3c53de8..1f4d31929 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.109" +version = "0.1.110" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From c0ba865d5d79220039a53f98048cb021cc860a43 Mon Sep 17 00:00:00 2001 From: Patryk Wychowaniec Date: Sat, 4 May 2024 15:30:43 +0200 Subject: [PATCH 0721/1459] fix: Add `#[avr_skip]` for `__addsf3` & `__adddf3` It looks like I've forgotten about them [back in 2023](https://github.com/rust-lang/compiler-builtins/pull/527). --- src/float/add.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/float/add.rs b/src/float/add.rs index 804f4b510..97f73e2f4 100644 --- a/src/float/add.rs +++ b/src/float/add.rs @@ -189,12 +189,14 @@ where } intrinsics! { + #[avr_skip] #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_fadd] pub extern "C" fn __addsf3(a: f32, b: f32) -> f32 { add(a, b) } + #[avr_skip] #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_dadd] pub extern "C" fn __adddf3(a: f64, b: f64) -> f64 { From 536824bfbe56e9d8bcf03a704eb69e5c91a62816 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 4 May 2024 19:57:27 +0100 Subject: [PATCH 0722/1459] Release version 0.1.111 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1f4d31929..0606ef1e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.110" +version = "0.1.111" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From ce62697ef451fd84f698ff80b22b448423515d64 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 May 2024 04:16:52 -0500 Subject: [PATCH 0723/1459] Deny warnings in CI The main crate already has `#![deny(warnings)]`. Set RUSTFLAGS in CI to enforce this for other crates in the workspace. --- libm/.github/workflows/main.yml | 4 ++++ libm/build.rs | 4 ++++ libm/ci/run-docker.sh | 1 + libm/crates/compiler-builtins-smoke-test/Cargo.toml | 3 +++ libm/crates/compiler-builtins-smoke-test/build.rs | 3 +++ libm/crates/compiler-builtins-smoke-test/src/lib.rs | 2 +- 6 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 libm/crates/compiler-builtins-smoke-test/build.rs diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 5408ac23f..2f2e46822 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -1,6 +1,10 @@ name: CI on: [push, pull_request] +env: + RUSTDOCFLAGS: -Dwarnings + RUSTFLAGS: -Dwarnings + jobs: docker: name: Docker diff --git a/libm/build.rs b/libm/build.rs index 80145a9cc..c9ae23260 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -2,10 +2,14 @@ use std::env; fn main() { println!("cargo:rerun-if-changed=build.rs"); + println!("cargo::rustc-check-cfg=cfg(assert_no_panic)"); + println!("cargo::rustc-check-cfg=cfg(feature, values(\"unstable\"))"); #[cfg(feature = "musl-reference-tests")] musl_reference_tests::generate(); + println!("cargo::rustc-check-cfg=cfg(feature, values(\"checked\"))"); + #[allow(unexpected_cfgs)] if !cfg!(feature = "checked") { let lvl = env::var("OPT_LEVEL").unwrap(); if lvl != "0" { diff --git a/libm/ci/run-docker.sh b/libm/ci/run-docker.sh index c7ad60fd4..8d323634a 100755 --- a/libm/ci/run-docker.sh +++ b/libm/ci/run-docker.sh @@ -16,6 +16,7 @@ run() { docker run \ --rm \ --user $(id -u):$(id -g) \ + -e RUSTFLAGS \ -e CARGO_HOME=/cargo \ -e CARGO_TARGET_DIR=/target \ -v "${HOME}/.cargo":/cargo \ diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index ac192a913..695b710ff 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -7,3 +7,6 @@ authors = ["Jorge Aparicio "] test = false bench = false +[features] +unstable = [] +checked = [] diff --git a/libm/crates/compiler-builtins-smoke-test/build.rs b/libm/crates/compiler-builtins-smoke-test/build.rs new file mode 100644 index 000000000..27d4a0e89 --- /dev/null +++ b/libm/crates/compiler-builtins-smoke-test/build.rs @@ -0,0 +1,3 @@ +fn main() { + println!("cargo::rustc-check-cfg=cfg(assert_no_panic)"); +} diff --git a/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/libm/crates/compiler-builtins-smoke-test/src/lib.rs index 7fad301b9..ab744c45b 100644 --- a/libm/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/libm/crates/compiler-builtins-smoke-test/src/lib.rs @@ -6,4 +6,4 @@ #![no_std] #[path = "../../../src/math/mod.rs"] -mod libm; +pub mod libm; From 59f3c77ee6a224c7c5111835e3732738768c2a2c Mon Sep 17 00:00:00 2001 From: Nicholas Bishop Date: Wed, 8 May 2024 21:45:13 -0400 Subject: [PATCH 0724/1459] Enable chkstk/alloca intrinsics on i686-unknown-uefi --- src/x86.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/x86.rs b/src/x86.rs index c348d082d..ceec3912e 100644 --- a/src/x86.rs +++ b/src/x86.rs @@ -10,8 +10,7 @@ use core::intrinsics; intrinsics! { #[naked] #[cfg(all( - windows, - target_env = "gnu", + any(all(windows, target_env = "gnu"), target_os = "uefi"), not(feature = "no-asm") ))] pub unsafe extern "C" fn __chkstk() { @@ -23,8 +22,7 @@ intrinsics! { #[naked] #[cfg(all( - windows, - target_env = "gnu", + any(all(windows, target_env = "gnu"), target_os = "uefi"), not(feature = "no-asm") ))] pub unsafe extern "C" fn _alloca() { From a8c0ecca53a1bdc987164771746d7d3a172551e9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 May 2024 02:49:35 -0500 Subject: [PATCH 0725/1459] Deny warnings in CI There are currently a lot of warnings printed in CI, mostly dead code. Update CI to deny warnings. --- .github/workflows/main.yml | 4 ++++ ci/run-docker.sh | 1 + 2 files changed, 5 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 57497e050..970a32ae5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,6 +1,10 @@ name: CI on: [push, pull_request] +env: + RUSTDOCFLAGS: -Dwarnings + RUSTFLAGS: -Dwarnings + jobs: test: name: Test diff --git a/ci/run-docker.sh b/ci/run-docker.sh index b85f64133..e5ff8a46b 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -48,6 +48,7 @@ run() { docker run \ --rm \ -e RUST_COMPILER_RT_ROOT \ + -e RUSTFLAGS \ -e "CARGO_TARGET_DIR=/builtins-target" \ -v "$(pwd):/checkout:ro" \ -w /checkout \ From b270c706dc88979966f304feb5ad2f205f29f8eb Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 May 2024 03:04:47 -0500 Subject: [PATCH 0726/1459] Emit directives for cargo-check-cfg --- build.rs | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/build.rs b/build.rs index bafbf75d0..31e527a0b 100644 --- a/build.rs +++ b/build.rs @@ -2,6 +2,7 @@ use std::{collections::BTreeMap, env, sync::atomic::Ordering}; fn main() { println!("cargo:rerun-if-changed=build.rs"); + configure_check_cfg(); let target = env::var("TARGET").unwrap(); let cwd = env::current_dir().unwrap(); @@ -9,6 +10,7 @@ fn main() { println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display()); // Activate libm's unstable features to make full use of Nightly. + println!("cargo::rustc-check-cfg=cfg(feature, values(\"unstable\"))"); println!("cargo:rustc-cfg=feature=\"unstable\""); // Emscripten's runtime includes all the builtins @@ -36,6 +38,7 @@ fn main() { } // These targets have hardware unaligned access support. + println!("cargo::rustc-check-cfg=cfg(feature, values(\"mem-unaligned\"))"); if target.contains("x86_64") || target.contains("i686") || target.contains("aarch64") @@ -64,6 +67,7 @@ fn main() { } // To compile intrinsics.rs for thumb targets, where there is no libc + println!("cargo::rustc-check-cfg=cfg(thumb)"); if llvm_target[0].starts_with("thumb") { println!("cargo:rustc-cfg=thumb") } @@ -71,6 +75,7 @@ fn main() { // compiler-rt `cfg`s away some intrinsics for thumbv6m and thumbv8m.base because // these targets do not have full Thumb-2 support but only original Thumb-1. // We have to cfg our code accordingly. + println!("cargo::rustc-check-cfg=cfg(thumb_1)"); if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" { println!("cargo:rustc-cfg=thumb_1") } @@ -78,6 +83,7 @@ fn main() { // Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. This // includes the old androideabi. It is deprecated but it is available as a // rustc target (arm-linux-androideabi). + println!("cargo::rustc-check-cfg=cfg(kernel_user_helpers)"); if llvm_target[0] == "armv4t" || llvm_target[0] == "armv5te" || target == "arm-linux-androideabi" @@ -145,6 +151,72 @@ fn generate_aarch64_outlined_atomics() { std::fs::write(dst, buf).unwrap(); } +/// Emit directives for features we expect to support that aren't in `Cargo.toml`. +/// +/// These are mostly cfg elements emitted by this `build.rs`. +fn configure_check_cfg() { + // Functions where we can set the "optimized-c" flag + const HAS_OPTIMIZED_C: &[&str] = &[ + "__ashldi3", + "__ashlsi3", + "__ashrdi3", + "__ashrsi3", + "__clzsi2", + "__divdi3", + "__divsi3", + "__divmoddi4", + "__divmodsi4", + "__divmodsi4", + "__divmodti4", + "__lshrdi3", + "__lshrsi3", + "__moddi3", + "__modsi3", + "__muldi3", + "__udivdi3", + "__udivmoddi4", + "__udivmodsi4", + "__udivsi3", + "__umoddi3", + "__umodsi3", + ]; + + // Build a list of all aarch64 atomic operation functions + let mut aarch_atomic = Vec::new(); + for aarch_op in ["cas", "ldadd", "ldclr", "ldeor", "ldset", "swp"] { + let op_sizes = if aarch_op == "cas" { + [1, 2, 4, 8, 16].as_slice() + } else { + [1, 2, 4, 8].as_slice() + }; + + for op_size in op_sizes { + for ordering in ["relax", "acq", "rel", "acq_rel"] { + aarch_atomic.push(format!("__aarch64_{}{}_{}", aarch_op, op_size, ordering)); + } + } + } + + for fn_name in HAS_OPTIMIZED_C + .iter() + .copied() + .chain(aarch_atomic.iter().map(|s| s.as_str())) + { + println!( + "cargo::rustc-check-cfg=cfg({}, values(\"optimized-c\"))", + fn_name + ); + } + + // Rustc is unaware of sparc target features, but this does show up from + // `rustc --print target-features --target sparc64-unknown-linux-gnu`. + println!("cargo::rustc-check-cfg=cfg(target_feature, values(\"vis3\"))"); + + // FIXME: these come from libm and should be changed there + println!("cargo::rustc-check-cfg=cfg(feature, values(\"checked\"))"); + println!("cargo::rustc-check-cfg=cfg(assert_no_panic)"); +} + #[cfg(feature = "c")] mod c { extern crate cc; @@ -307,6 +379,7 @@ mod c { // also needs to satisfy intrinsics that jemalloc or C in general may // need, so include a few more that aren't typically needed by // LLVM/Rust. + #[allow(unexpected_cfgs)] if cfg!(feature = "rustbuild") { sources.extend(&[("__ffsdi2", "ffsdi2.c")]); } From 34a2c1206d9c2c5f07d501d22ffe8c3b2e5b0de9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 May 2024 03:23:36 -0500 Subject: [PATCH 0727/1459] Update `cfg` to fix warnings --- src/float/mod.rs | 1 + src/int/mod.rs | 1 + testcrate/tests/cmp.rs | 1 + testcrate/tests/conv.rs | 2 +- testcrate/tests/misc.rs | 2 +- 5 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/float/mod.rs b/src/float/mod.rs index a82dd7d2a..b0fbe8aff 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -14,6 +14,7 @@ pub mod trunc; public_test_dep! { /// Trait for some basic operations on floats +#[allow(dead_code)] pub(crate) trait Float: Copy + core::fmt::Debug diff --git a/src/int/mod.rs b/src/int/mod.rs index 509f9fdae..3ef71da8d 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -13,6 +13,7 @@ pub use self::leading_zeros::__clzsi2; public_test_dep! { /// Trait for some basic operations on integers +#[allow(dead_code)] pub(crate) trait Int: Copy + core::fmt::Debug diff --git a/testcrate/tests/cmp.rs b/testcrate/tests/cmp.rs index 5c10a5601..14dd76b2d 100644 --- a/testcrate/tests/cmp.rs +++ b/testcrate/tests/cmp.rs @@ -1,5 +1,6 @@ #![allow(unused_macros)] +#[cfg(not(target_arch = "powerpc64"))] use testcrate::*; macro_rules! cmp { diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 84828dbfa..5cff01202 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -155,7 +155,7 @@ macro_rules! conv { stringify!($fn) ); } - }); + }) }; } diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs index 402d202a8..cdc37e2a0 100644 --- a/testcrate/tests/misc.rs +++ b/testcrate/tests/misc.rs @@ -1,7 +1,6 @@ // makes configuration easier #![allow(unused_macros)] -use compiler_builtins::float::Float; use testcrate::*; /// Make sure that the the edge case tester and randomized tester don't break, and list examples of @@ -138,6 +137,7 @@ macro_rules! pow { #[test] fn float_pow() { use compiler_builtins::float::pow::{__powidf2, __powisf2}; + use compiler_builtins::float::Float; pow!( f32, 1e-4, __powisf2; From df57940c8908c48c9f1cd3ae116033f9f0511fc5 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 May 2024 04:26:49 -0500 Subject: [PATCH 0728/1459] Remove the undocumented and unused `rustbuild` feature See --- build.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/build.rs b/build.rs index 31e527a0b..47c8b4ffe 100644 --- a/build.rs +++ b/build.rs @@ -375,15 +375,6 @@ mod c { ]); } - // When compiling in rustbuild (the rust-lang/rust repo) this library - // also needs to satisfy intrinsics that jemalloc or C in general may - // need, so include a few more that aren't typically needed by - // LLVM/Rust. - #[allow(unexpected_cfgs)] - if cfg!(feature = "rustbuild") { - sources.extend(&[("__ffsdi2", "ffsdi2.c")]); - } - // On iOS and 32-bit OSX these are all just empty intrinsics, no need to // include them. if target_os != "ios" From b1889e3423748e4933356dc4452d698d1669f6ea Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 11 May 2024 09:50:21 +0200 Subject: [PATCH 0729/1459] Release 0.1.112 --- Cargo.toml | 2 +- libm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0606ef1e7..4564ba9e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.111" +version = "0.1.112" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" diff --git a/libm b/libm index a1e8a5bf9..279e5f6ab 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit a1e8a5bf95e99309760b764b2a332d0039d08350 +Subproject commit 279e5f6abe0a2ca9066962d9ec894f0df1f417ac From baac8ef432fcd3ed21dd6a8a8a0984415a5a5227 Mon Sep 17 00:00:00 2001 From: theKidOfArcrania Date: Mon, 13 May 2024 11:17:07 -0700 Subject: [PATCH 0730/1459] Fix paths for Windows arm64 build --- build.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/build.rs b/build.rs index bafbf75d0..c6af5a9e7 100644 --- a/build.rs +++ b/build.rs @@ -1,4 +1,4 @@ -use std::{collections::BTreeMap, env, sync::atomic::Ordering}; +use std::{collections::BTreeMap, env, path::PathBuf, sync::atomic::Ordering}; fn main() { println!("cargo:rerun-if-changed=build.rs"); @@ -141,8 +141,8 @@ fn generate_aarch64_outlined_atomics() { buf += macro_def; buf += "}; }\n"; } - let dst = std::env::var("OUT_DIR").unwrap() + "/outlined_atomics.rs"; - std::fs::write(dst, buf).unwrap(); + let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap()); + std::fs::write(out_dir.join("outlined_atomics.rs"), buf).unwrap(); } #[cfg(feature = "c")] @@ -612,7 +612,7 @@ mod c { fn build_aarch64_out_of_line_atomics_libraries(builtins_dir: &Path, cfg: &mut cc::Build) { let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); - let outlined_atomics_file = builtins_dir.join("aarch64/lse.S"); + let outlined_atomics_file = builtins_dir.join("aarch64").join("lse.S"); println!("cargo:rerun-if-changed={}", outlined_atomics_file.display()); cfg.include(&builtins_dir); From 69aa7f45fb2747eb606fd6a39ebce1f5e8a13c08 Mon Sep 17 00:00:00 2001 From: theKidOfArcrania Date: Mon, 13 May 2024 11:29:55 -0700 Subject: [PATCH 0731/1459] Add aarch64 target --- .github/workflows/main.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 57497e050..f1c63e4f9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -83,6 +83,9 @@ jobs: - target: x86_64-pc-windows-gnu os: windows-latest rust: nightly-x86_64-gnu + - target: aarch64-pc-windows-gnu + os: windows-latest + rust: nightly steps: - name: Print runner information run: uname -a From 26ffcf4fa580ecbb4720a042b766998e58a2b659 Mon Sep 17 00:00:00 2001 From: theKidOfArcrania Date: Mon, 13 May 2024 11:31:23 -0700 Subject: [PATCH 0732/1459] Update target --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f1c63e4f9..7a799eaed 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -83,7 +83,7 @@ jobs: - target: x86_64-pc-windows-gnu os: windows-latest rust: nightly-x86_64-gnu - - target: aarch64-pc-windows-gnu + - target: aarch64-pc-windows-msvc os: windows-latest rust: nightly steps: From 38b0e43a49a3c0920cfe8d0497f47d056ab7962f Mon Sep 17 00:00:00 2001 From: theKidOfArcrania Date: Mon, 13 May 2024 11:38:43 -0700 Subject: [PATCH 0733/1459] Maybe try arm64 --- .github/workflows/main.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7a799eaed..ba5553879 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -15,6 +15,9 @@ jobs: - target: aarch64-unknown-linux-gnu os: ubuntu-latest rust: nightly + - target: aarch64-pc-windows-msvc + os: [windows-latest, arm64] + rust: nightly - target: arm-unknown-linux-gnueabi os: ubuntu-latest rust: nightly @@ -83,9 +86,6 @@ jobs: - target: x86_64-pc-windows-gnu os: windows-latest rust: nightly-x86_64-gnu - - target: aarch64-pc-windows-msvc - os: windows-latest - rust: nightly steps: - name: Print runner information run: uname -a From 0fc814bc46cc0a543280e79ff283e927dc6f0a55 Mon Sep 17 00:00:00 2001 From: theKidOfArcrania Date: Tue, 14 May 2024 15:47:07 -0700 Subject: [PATCH 0734/1459] Remove aarch64 CI --- .github/workflows/main.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ba5553879..57497e050 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -15,9 +15,6 @@ jobs: - target: aarch64-unknown-linux-gnu os: ubuntu-latest rust: nightly - - target: aarch64-pc-windows-msvc - os: [windows-latest, arm64] - rust: nightly - target: arm-unknown-linux-gnueabi os: ubuntu-latest rust: nightly From 9c6fcb56e8a6176fabdf556672b93053a3109882 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 10 May 2024 18:38:09 -0500 Subject: [PATCH 0735/1459] Split `Int` into `Int` and `MinInt` `MinInt` contains the basic methods that are only needed by integers involved in widening operations, i.e. big integers. `Int` retains all other operations and convenience methods. --- src/float/add.rs | 22 ++-- src/float/cmp.rs | 2 +- src/float/div.rs | 79 +++++++++------ src/float/extend.rs | 2 +- src/float/mod.rs | 7 +- src/float/mul.rs | 2 +- src/float/trunc.rs | 2 +- src/int/addsub.rs | 10 +- src/int/mod.rs | 232 +++++++++++++++++++++++-------------------- src/int/mul.rs | 4 +- src/int/shift.rs | 2 +- testcrate/src/lib.rs | 18 ++-- 12 files changed, 210 insertions(+), 172 deletions(-) diff --git a/src/float/add.rs b/src/float/add.rs index 97f73e2f4..909948ad2 100644 --- a/src/float/add.rs +++ b/src/float/add.rs @@ -1,5 +1,5 @@ use crate::float::Float; -use crate::int::{CastInto, Int}; +use crate::int::{CastInto, Int, MinInt}; /// Returns `a + b` fn add(a: F, b: F) -> F @@ -57,9 +57,9 @@ where } // zero + anything = anything - if a_abs == Int::ZERO { + if a_abs == MinInt::ZERO { // but we need to get the sign right for zero + zero - if b_abs == Int::ZERO { + if b_abs == MinInt::ZERO { return F::from_repr(a.repr() & b.repr()); } else { return b; @@ -67,7 +67,7 @@ where } // anything + zero = anything - if b_abs == Int::ZERO { + if b_abs == MinInt::ZERO { return a; } } @@ -113,10 +113,10 @@ where // Shift the significand of b by the difference in exponents, with a sticky // bottom bit to get rounding correct. let align = a_exponent.wrapping_sub(b_exponent).cast(); - if align != Int::ZERO { + if align != MinInt::ZERO { if align < bits { let sticky = - F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != Int::ZERO); + F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != MinInt::ZERO); b_significand = (b_significand >> align.cast()) | sticky; } else { b_significand = one; // sticky; b is known to be non-zero. @@ -125,8 +125,8 @@ where if subtraction { a_significand = a_significand.wrapping_sub(b_significand); // If a == -b, return +zero. - if a_significand == Int::ZERO { - return F::from_repr(Int::ZERO); + if a_significand == MinInt::ZERO { + return F::from_repr(MinInt::ZERO); } // If partial cancellation occured, we need to left-shift the result @@ -143,8 +143,8 @@ where // If the addition carried up, we need to right-shift the result and // adjust the exponent: - if a_significand & implicit_bit << 4 != Int::ZERO { - let sticky = F::Int::from_bool(a_significand & one != Int::ZERO); + if a_significand & implicit_bit << 4 != MinInt::ZERO { + let sticky = F::Int::from_bool(a_significand & one != MinInt::ZERO); a_significand = a_significand >> 1 | sticky; a_exponent += 1; } @@ -160,7 +160,7 @@ where // need to shift the significand. let shift = (1 - a_exponent).cast(); let sticky = - F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != Int::ZERO); + F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != MinInt::ZERO); a_significand = a_significand >> shift.cast() | sticky; a_exponent = 0; } diff --git a/src/float/cmp.rs b/src/float/cmp.rs index 1c8917af8..193c5df36 100644 --- a/src/float/cmp.rs +++ b/src/float/cmp.rs @@ -1,7 +1,7 @@ #![allow(unreachable_code)] use crate::float::Float; -use crate::int::Int; +use crate::int::MinInt; #[derive(Clone, Copy)] enum Result { diff --git a/src/float/div.rs b/src/float/div.rs index d587fe4f9..c0d780b66 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -3,7 +3,9 @@ #![allow(clippy::needless_return)] use crate::float::Float; -use crate::int::{CastInto, DInt, HInt, Int}; +use crate::int::{CastInto, DInt, HInt, Int, MinInt}; + +use super::HalfRep; fn div32(a: F, b: F) -> F where @@ -454,15 +456,20 @@ where fn div64(a: F, b: F) -> F where - u32: CastInto, F::Int: CastInto, - i32: CastInto, F::Int: CastInto, - u64: CastInto, + F::Int: CastInto>, + F::Int: From>, + F::Int: From, F::Int: CastInto, - i64: CastInto, F::Int: CastInto, - F::Int: HInt, + F::Int: HInt + DInt, + u16: CastInto, + i32: CastInto, + i64: CastInto, + u32: CastInto, + u64: CastInto, + u64: CastInto>, { const NUMBER_OF_HALF_ITERATIONS: usize = 3; const NUMBER_OF_FULL_ITERATIONS: usize = 1; @@ -471,7 +478,7 @@ where let one = F::Int::ONE; let zero = F::Int::ZERO; let hw = F::BITS / 2; - let lo_mask = u64::MAX >> hw; + let lo_mask = F::Int::MAX >> hw; let significand_bits = F::SIGNIFICAND_BITS; let max_exponent = F::EXPONENT_MAX; @@ -616,8 +623,9 @@ where let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 { // Starting with (n-1) half-width iterations - let b_uq1_hw: u32 = - (CastInto::::cast(b_significand) >> (significand_bits + 1 - hw)) as u32; + let b_uq1_hw: HalfRep = CastInto::>::cast( + CastInto::::cast(b_significand) >> (significand_bits + 1 - hw), + ); // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW // with W0 being either 16 or 32 and W0 <= HW. @@ -625,12 +633,13 @@ where // b/2 is subtracted to obtain x0) wrapped to [0, 1) range. // HW is at least 32. Shifting into the highest bits if needed. - let c_hw = (0x7504F333_u64 as u32).wrapping_shl(hw.wrapping_sub(32)); + let c_hw = (CastInto::>::cast(0x7504F333_u64)).wrapping_shl(hw.wrapping_sub(32)); // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572, // so x0 fits to UQ0.HW without wrapping. - let x_uq0_hw: u32 = { - let mut x_uq0_hw: u32 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */); + let x_uq0_hw: HalfRep = { + let mut x_uq0_hw: HalfRep = + c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */); // dbg!(x_uq0_hw); // An e_0 error is comprised of errors due to // * x0 being an inherently imprecise first approximation of 1/b_hw @@ -661,8 +670,9 @@ where // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is // expected to be strictly positive because b_UQ1_hw has its highest bit set // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1). - let corr_uq1_hw: u32 = - 0.wrapping_sub(((x_uq0_hw as u64).wrapping_mul(b_uq1_hw as u64)) >> hw) as u32; + let corr_uq1_hw: HalfRep = CastInto::>::cast(zero.wrapping_sub( + ((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(b_uq1_hw))) >> hw, + )); // dbg!(corr_uq1_hw); // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally @@ -677,7 +687,9 @@ where // The fact corr_UQ1_hw was virtually round up (due to result of // multiplication being **first** truncated, then negated - to improve // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw. - x_uq0_hw = ((x_uq0_hw as u64).wrapping_mul(corr_uq1_hw as u64) >> (hw - 1)) as u32; + x_uq0_hw = ((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(corr_uq1_hw)) + >> (hw - 1)) + .cast(); // dbg!(x_uq0_hw); // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after @@ -707,7 +719,7 @@ where // be not below that value (see g(x) above), so it is safe to decrement just // once after the final iteration. On the other hand, an effective value of // divisor changes after this point (from b_hw to b), so adjust here. - x_uq0_hw.wrapping_sub(1_u32) + x_uq0_hw.wrapping_sub(HalfRep::::ONE) }; // Error estimations for full-precision iterations are calculated just @@ -717,7 +729,7 @@ where // Simulating operations on a twice_rep_t to perform a single final full-width // iteration. Using ad-hoc multiplication implementations to take advantage // of particular structure of operands. - let blo: u64 = (CastInto::::cast(b_uq1)) & lo_mask; + let blo: F::Int = b_uq1 & lo_mask; // x_UQ0 = x_UQ0_hw * 2^HW - 1 // x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1 // @@ -726,19 +738,20 @@ where // + [ x_UQ0_hw * blo ] // - [ b_UQ1 ] // = [ result ][.... discarded ...] - let corr_uq1 = negate_u64( - (x_uq0_hw as u64) * (b_uq1_hw as u64) + (((x_uq0_hw as u64) * (blo)) >> hw) - 1, - ); // account for *possible* carry - let lo_corr = corr_uq1 & lo_mask; - let hi_corr = corr_uq1 >> hw; + let corr_uq1: F::Int = (F::Int::from(x_uq0_hw) * F::Int::from(b_uq1_hw) + + ((F::Int::from(x_uq0_hw) * blo) >> hw)) + .wrapping_sub(one) + .wrapping_neg(); // account for *possible* carry + let lo_corr: F::Int = corr_uq1 & lo_mask; + let hi_corr: F::Int = corr_uq1 >> hw; // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1 - let mut x_uq0: ::Int = ((((x_uq0_hw as u64) * hi_corr) << 1) - .wrapping_add(((x_uq0_hw as u64) * lo_corr) >> (hw - 1)) - .wrapping_sub(2)) - .cast(); // 1 to account for the highest bit of corr_UQ1 can be 1 - // 1 to account for possible carry - // Just like the case of half-width iterations but with possibility - // of overflowing by one extra Ulp of x_UQ0. + let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1) + .wrapping_add((F::Int::from(x_uq0_hw) * lo_corr) >> (hw - 1)) + .wrapping_sub(F::Int::from(2u8)); + // 1 to account for the highest bit of corr_UQ1 can be 1 + // 1 to account for possible carry + // Just like the case of half-width iterations but with possibility + // of overflowing by one extra Ulp of x_UQ0. x_uq0 -= one; // ... and then traditional fixup by 2 should work @@ -755,8 +768,8 @@ where x_uq0 } else { // C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n - let c: ::Int = (0x7504F333 << (F::BITS - 32)).cast(); - let x_uq0: ::Int = c.wrapping_sub(b_uq1); + let c: F::Int = (0x7504F333 << (F::BITS - 32)).cast(); + let x_uq0: F::Int = c.wrapping_sub(b_uq1); // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64 x_uq0 }; @@ -806,7 +819,7 @@ where // Now 1/b - (2*P) * 2^-W < x < 1/b // FIXME Is x_UQ0 still >= 0.5? - let mut quotient: ::Int = x_uq0.widen_mul(a_significand << 1).hi(); + let mut quotient: F::Int = x_uq0.widen_mul(a_significand << 1).hi(); // Now, a/b - 4*P * 2^-W < q < a/b for q= in UQ1.(SB+1+W). // quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1), @@ -868,7 +881,7 @@ where // r = a - b * q let abs_result = if written_exponent > 0 { let mut ret = quotient & significand_mask; - ret |= ((written_exponent as u64) << significand_bits).cast(); + ret |= written_exponent.cast() << significand_bits; residual <<= 1; ret } else { diff --git a/src/float/extend.rs b/src/float/extend.rs index 7c2446603..5b0c0d972 100644 --- a/src/float/extend.rs +++ b/src/float/extend.rs @@ -1,5 +1,5 @@ use crate::float::Float; -use crate::int::{CastInto, Int}; +use crate::int::{CastInto, Int, MinInt}; /// Generic conversion from a narrower to a wider IEEE-754 floating-point type fn extend(a: F) -> R diff --git a/src/float/mod.rs b/src/float/mod.rs index b0fbe8aff..e62a3fe0f 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -1,6 +1,6 @@ use core::ops; -use super::int::Int; +use crate::int::{DInt, Int, MinInt}; pub mod add; pub mod cmp; @@ -12,6 +12,9 @@ pub mod pow; pub mod sub; pub mod trunc; +/// Wrapper to extract the integer type half of the float's size +pub(crate) type HalfRep = <::Int as DInt>::H; + public_test_dep! { /// Trait for some basic operations on floats #[allow(dead_code)] @@ -60,7 +63,7 @@ pub(crate) trait Float: /// A mask for the significand const SIGNIFICAND_MASK: Self::Int; - // The implicit bit of the float format + /// The implicit bit of the float format const IMPLICIT_BIT: Self::Int; /// A mask for the exponent diff --git a/src/float/mul.rs b/src/float/mul.rs index 378fa9701..46c41d09f 100644 --- a/src/float/mul.rs +++ b/src/float/mul.rs @@ -1,5 +1,5 @@ use crate::float::Float; -use crate::int::{CastInto, DInt, HInt, Int}; +use crate::int::{CastInto, DInt, HInt, Int, MinInt}; fn mul(a: F, b: F) -> F where diff --git a/src/float/trunc.rs b/src/float/trunc.rs index 6de446c10..b607a6549 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -1,5 +1,5 @@ use crate::float::Float; -use crate::int::{CastInto, Int}; +use crate::int::{CastInto, Int, MinInt}; fn trunc(a: F) -> R where diff --git a/src/int/addsub.rs b/src/int/addsub.rs index f31eff4bd..e95590d84 100644 --- a/src/int/addsub.rs +++ b/src/int/addsub.rs @@ -1,6 +1,6 @@ -use crate::int::{DInt, Int}; +use crate::int::{DInt, Int, MinInt}; -trait UAddSub: DInt { +trait UAddSub: DInt + Int { fn uadd(self, other: Self) -> Self { let (lo, carry) = self.lo().overflowing_add(other.lo()); let hi = self.hi().wrapping_add(other.hi()); @@ -22,7 +22,7 @@ impl UAddSub for u128 {} trait AddSub: Int where - ::UnsignedInt: UAddSub, + ::UnsignedInt: UAddSub, { fn add(self, other: Self) -> Self { Self::from_unsigned(self.unsigned().uadd(other.unsigned())) @@ -37,7 +37,7 @@ impl AddSub for i128 {} trait Addo: AddSub where - ::UnsignedInt: UAddSub, + ::UnsignedInt: UAddSub, { fn addo(self, other: Self) -> (Self, bool) { let sum = AddSub::add(self, other); @@ -50,7 +50,7 @@ impl Addo for u128 {} trait Subo: AddSub where - ::UnsignedInt: UAddSub, + ::UnsignedInt: UAddSub, { fn subo(self, other: Self) -> (Self, bool) { let sum = AddSub::sub(self, other); diff --git a/src/int/mod.rs b/src/int/mod.rs index 3ef71da8d..dd23da3b7 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -9,37 +9,22 @@ pub mod sdiv; pub mod shift; pub mod udiv; -pub use self::leading_zeros::__clzsi2; +pub use leading_zeros::__clzsi2; public_test_dep! { -/// Trait for some basic operations on integers +/// Minimal integer implementations needed on all integer types, including wide integers. #[allow(dead_code)] -pub(crate) trait Int: - Copy +pub(crate) trait MinInt: Copy + core::fmt::Debug - + PartialEq - + PartialOrd - + ops::AddAssign - + ops::SubAssign - + ops::BitAndAssign - + ops::BitOrAssign - + ops::BitXorAssign - + ops::ShlAssign - + ops::ShrAssign - + ops::Add - + ops::Sub - + ops::Div - + ops::Shl - + ops::Shr + ops::BitOr - + ops::BitXor - + ops::BitAnd + ops::Not + + ops::Shl { + /// Type with the same width but other signedness - type OtherSign: Int; + type OtherSign: MinInt; /// Unsigned version of Self - type UnsignedInt: Int; + type UnsignedInt: MinInt; /// If `Self` is a signed integer const SIGNED: bool; @@ -51,13 +36,47 @@ pub(crate) trait Int: const ONE: Self; const MIN: Self; const MAX: Self; +} +} +public_test_dep! { +/// Trait for some basic operations on integers +#[allow(dead_code)] +pub(crate) trait Int: MinInt + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::SubAssign + + ops::BitAndAssign + + ops::BitOrAssign + + ops::BitXorAssign + + ops::ShlAssign + + ops::ShrAssign + + ops::Add + + ops::Sub + + ops::Mul + + ops::Div + + ops::Shr + + ops::BitXor + + ops::BitAnd +{ /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, /// 112,119,120,125,126,127]. - const FUZZ_LENGTHS: [u8; 20]; + const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(::BITS); + /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. - const FUZZ_NUM: usize; + const FUZZ_NUM: usize = { + let log2 = (::BITS - 1).count_ones() as usize; + if log2 == 3 { + // case for u8 + 6 + } else { + // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate + // boundaries. + 8 + (4 * (log2 - 4)) + } + }; fn unsigned(self) -> Self::UnsignedInt; fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; @@ -84,74 +103,54 @@ pub(crate) trait Int: } } +pub(crate) const fn make_fuzz_lengths(bits: u32) -> [u8; 20] { + let mut v = [0u8; 20]; + v[0] = 0; + v[1] = 1; + v[2] = 2; // important for parity and the iX::MIN case when reversed + let mut i = 3; + + // No need for any more until the byte boundary, because there should be no algorithms + // that are sensitive to anything not next to byte boundaries after 2. We also scale + // in powers of two, which is important to prevent u128 corner tests from getting too + // big. + let mut l = 8; + loop { + if l >= ((bits / 2) as u8) { + break; + } + // get both sides of the byte boundary + v[i] = l - 1; + i += 1; + v[i] = l; + i += 1; + l *= 2; + } + + if bits != 8 { + // add the lower side of the middle boundary + v[i] = ((bits / 2) - 1) as u8; + i += 1; + } + + // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS + // boundary because of algorithms that split the high part up. We reverse the scaling + // as we go to Self::BITS. + let mid = i; + let mut j = 1; + loop { + v[i] = (bits as u8) - (v[mid - j]) - 1; + if j == mid { + break; + } + i += 1; + j += 1; + } + v +} + macro_rules! int_impl_common { ($ty:ty) => { - const BITS: u32 = ::ZERO.count_zeros(); - const SIGNED: bool = Self::MIN != Self::ZERO; - - const ZERO: Self = 0; - const ONE: Self = 1; - const MIN: Self = ::MIN; - const MAX: Self = ::MAX; - - const FUZZ_LENGTHS: [u8; 20] = { - let bits = ::BITS; - let mut v = [0u8; 20]; - v[0] = 0; - v[1] = 1; - v[2] = 2; // important for parity and the iX::MIN case when reversed - let mut i = 3; - // No need for any more until the byte boundary, because there should be no algorithms - // that are sensitive to anything not next to byte boundaries after 2. We also scale - // in powers of two, which is important to prevent u128 corner tests from getting too - // big. - let mut l = 8; - loop { - if l >= ((bits / 2) as u8) { - break; - } - // get both sides of the byte boundary - v[i] = l - 1; - i += 1; - v[i] = l; - i += 1; - l *= 2; - } - - if bits != 8 { - // add the lower side of the middle boundary - v[i] = ((bits / 2) - 1) as u8; - i += 1; - } - - // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS - // boundary because of algorithms that split the high part up. We reverse the scaling - // as we go to Self::BITS. - let mid = i; - let mut j = 1; - loop { - v[i] = (bits as u8) - (v[mid - j]) - 1; - if j == mid { - break; - } - i += 1; - j += 1; - } - v - }; - - const FUZZ_NUM: usize = { - let log2 = (::BITS - 1).count_ones() as usize; - if log2 == 3 { - // case for u8 - 6 - } else { - // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate - // boundaries. - 8 + (4 * (log2 - 4)) - } - }; - fn from_bool(b: bool) -> Self { b as $ty } @@ -204,10 +203,20 @@ macro_rules! int_impl_common { macro_rules! int_impl { ($ity:ty, $uty:ty) => { - impl Int for $uty { + impl MinInt for $uty { type OtherSign = $ity; type UnsignedInt = $uty; + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $uty { fn unsigned(self) -> $uty { self } @@ -229,10 +238,20 @@ macro_rules! int_impl { int_impl_common!($uty); } - impl Int for $ity { + impl MinInt for $ity { type OtherSign = $uty; type UnsignedInt = $uty; + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $ity { fn unsigned(self) -> $uty { self as $uty } @@ -260,18 +279,22 @@ int_impl!(i128, u128); public_test_dep! { /// Trait for integers twice the bit width of another integer. This is implemented for all /// primitives except for `u8`, because there is not a smaller primitive. -pub(crate) trait DInt: Int { +pub(crate) trait DInt: MinInt { /// Integer that is half the bit width of the integer this trait is implemented for - type H: HInt + Int; + type H: HInt; /// Returns the low half of `self` fn lo(self) -> Self::H; /// Returns the high half of `self` fn hi(self) -> Self::H; /// Returns the low and high halves of `self` as a tuple - fn lo_hi(self) -> (Self::H, Self::H); + fn lo_hi(self) -> (Self::H, Self::H) { + (self.lo(), self.hi()) + } /// Constructs an integer using lower and higher half parts - fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self; + fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { + lo.zero_widen() | hi.widen_hi() + } } } @@ -280,7 +303,7 @@ public_test_dep! { /// primitives except for `u128`, because it there is not a larger primitive. pub(crate) trait HInt: Int { /// Integer that is double the bit width of the integer this trait is implemented for - type D: DInt + Int; + type D: DInt + MinInt; /// Widens (using default extension) the integer to have double bit width fn widen(self) -> Self::D; @@ -288,7 +311,9 @@ pub(crate) trait HInt: Int { /// around problems with associated type bounds (such as `Int`) being unstable fn zero_widen(self) -> Self::D; /// Widens the integer to have double bit width and shifts the integer into the higher bits - fn widen_hi(self) -> Self::D; + fn widen_hi(self) -> Self::D { + self.widen() << ::BITS + } /// Widening multiplication with zero widening. This cannot overflow. fn zero_widen_mul(self, rhs: Self) -> Self::D; /// Widening multiplication. This cannot overflow. @@ -306,13 +331,7 @@ macro_rules! impl_d_int { self as $X } fn hi(self) -> Self::H { - (self >> <$X as Int>::BITS) as $X - } - fn lo_hi(self) -> (Self::H, Self::H) { - (self.lo(), self.hi()) - } - fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { - lo.zero_widen() | hi.widen_hi() + (self >> <$X as MinInt>::BITS) as $X } } )* @@ -331,9 +350,6 @@ macro_rules! impl_h_int { fn zero_widen(self) -> Self::D { (self as $uH) as $X } - fn widen_hi(self) -> Self::D { - (self as $X) << <$H as Int>::BITS - } fn zero_widen_mul(self, rhs: Self) -> Self::D { self.zero_widen().wrapping_mul(rhs.zero_widen()) } diff --git a/src/int/mul.rs b/src/int/mul.rs index 2538e2f41..e0093a725 100644 --- a/src/int/mul.rs +++ b/src/int/mul.rs @@ -1,6 +1,6 @@ use crate::int::{DInt, HInt, Int}; -trait Mul: DInt +trait Mul: DInt + Int where Self::H: DInt, { @@ -30,7 +30,7 @@ where impl Mul for u64 {} impl Mul for i128 {} -pub(crate) trait UMulo: Int + DInt { +pub(crate) trait UMulo: DInt + Int { fn mulo(self, rhs: Self) -> (Self, bool) { match (self.hi().is_zero(), rhs.hi().is_zero()) { // overflow is guaranteed diff --git a/src/int/shift.rs b/src/int/shift.rs index dbd040187..317272988 100644 --- a/src/int/shift.rs +++ b/src/int/shift.rs @@ -1,4 +1,4 @@ -use crate::int::{DInt, HInt, Int}; +use crate::int::{DInt, HInt, Int, MinInt}; trait Ashl: DInt { /// Returns `a << b`, requires `b < Self::BITS` diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs index 9bd155f6f..13abf459e 100644 --- a/testcrate/src/lib.rs +++ b/testcrate/src/lib.rs @@ -15,7 +15,7 @@ #![no_std] use compiler_builtins::float::Float; -use compiler_builtins::int::Int; +use compiler_builtins::int::{Int, MinInt}; use rand_xoshiro::rand_core::{RngCore, SeedableRng}; use rand_xoshiro::Xoshiro128StarStar; @@ -101,7 +101,10 @@ macro_rules! edge_cases { /// Feeds a series of fuzzing inputs to `f`. The fuzzer first uses an algorithm designed to find /// edge cases, followed by a more random fuzzer that runs `n` times. -pub fn fuzz(n: u32, mut f: F) { +pub fn fuzz(n: u32, mut f: F) +where + ::UnsignedInt: Int, +{ // edge case tester. Calls `f` 210 times for u128. // zero gets skipped by the loop f(I::ZERO); @@ -111,7 +114,7 @@ pub fn fuzz(n: u32, mut f: F) { // random fuzzer let mut rng = Xoshiro128StarStar::seed_from_u64(0); - let mut x: I = Int::ZERO; + let mut x: I = MinInt::ZERO; for _ in 0..n { fuzz_step(&mut rng, &mut x); f(x) @@ -119,7 +122,10 @@ pub fn fuzz(n: u32, mut f: F) { } /// The same as `fuzz`, except `f` has two inputs. -pub fn fuzz_2(n: u32, f: F) { +pub fn fuzz_2(n: u32, f: F) +where + ::UnsignedInt: Int, +{ // Check cases where the first and second inputs are zero. Both call `f` 210 times for `u128`. edge_cases!(I, case, { f(I::ZERO, case); @@ -150,10 +156,10 @@ pub fn fuzz_shift(f: F) { // Shift functions are very simple and do not need anything other than shifting a small // set of random patterns for every fuzz length. let mut rng = Xoshiro128StarStar::seed_from_u64(0); - let mut x: I = Int::ZERO; + let mut x: I = MinInt::ZERO; for i in 0..I::FUZZ_NUM { fuzz_step(&mut rng, &mut x); - f(x, Int::ZERO); + f(x, MinInt::ZERO); f(x, I::FUZZ_LENGTHS[i] as u32); } } From 2868c2643ec9d9d70b7c156e70711fe5c8d3c94c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 10 May 2024 18:50:36 -0500 Subject: [PATCH 0736/1459] Add `i256` and `u256` bigint types --- src/int/big.rs | 251 +++++++++++++++++++++++++++++++++++++++++ src/int/mod.rs | 2 + testcrate/tests/big.rs | 61 ++++++++++ 3 files changed, 314 insertions(+) create mode 100644 src/int/big.rs create mode 100644 testcrate/tests/big.rs diff --git a/src/int/big.rs b/src/int/big.rs new file mode 100644 index 000000000..019dd728b --- /dev/null +++ b/src/int/big.rs @@ -0,0 +1,251 @@ +//! Integers used for wide operations, larger than `u128`. + +#![allow(unused)] + +use crate::int::{DInt, HInt, Int, MinInt}; +use core::{fmt, ops}; + +const WORD_LO_MASK: u64 = 0x00000000ffffffff; +const WORD_HI_MASK: u64 = 0xffffffff00000000; +const WORD_FULL_MASK: u64 = 0xffffffffffffffff; +const U128_LO_MASK: u128 = u64::MAX as u128; +const U128_HI_MASK: u128 = (u64::MAX as u128) << 64; + +/// A 256-bit unsigned integer represented as 4 64-bit limbs. +/// +/// Each limb is a native-endian number, but the array is little-limb-endian. +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub struct u256(pub [u64; 4]); + +impl u256 { + pub const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX]); + + /// Reinterpret as a signed integer + pub fn signed(self) -> i256 { + i256(self.0) + } +} + +/// A 256-bit signed integer represented as 4 64-bit limbs. +/// +/// Each limb is a native-endian number, but the array is little-limb-endian. +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub struct i256(pub [u64; 4]); + +impl i256 { + /// Reinterpret as an unsigned integer + pub fn unsigned(self) -> u256 { + u256(self.0) + } +} + +impl MinInt for u256 { + type OtherSign = i256; + + type UnsignedInt = u256; + + const SIGNED: bool = false; + const BITS: u32 = 256; + const ZERO: Self = Self([0u64; 4]); + const ONE: Self = Self([1, 0, 0, 0]); + const MIN: Self = Self([0u64; 4]); + const MAX: Self = Self([u64::MAX; 4]); +} + +impl MinInt for i256 { + type OtherSign = u256; + + type UnsignedInt = u256; + + const SIGNED: bool = false; + const BITS: u32 = 256; + const ZERO: Self = Self([0u64; 4]); + const ONE: Self = Self([1, 0, 0, 0]); + const MIN: Self = Self([0, 0, 0, 1 << 63]); + const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX << 1]); +} + +macro_rules! impl_common { + ($ty:ty) => { + impl ops::BitOr for $ty { + type Output = Self; + + fn bitor(mut self, rhs: Self) -> Self::Output { + self.0[0] |= rhs.0[0]; + self.0[1] |= rhs.0[1]; + self.0[2] |= rhs.0[2]; + self.0[3] |= rhs.0[3]; + self + } + } + + impl ops::Not for $ty { + type Output = Self; + + fn not(self) -> Self::Output { + Self([!self.0[0], !self.0[1], !self.0[2], !self.0[3]]) + } + } + + impl ops::Shl for $ty { + type Output = Self; + + fn shl(self, rhs: u32) -> Self::Output { + todo!() + } + } + }; +} + +impl_common!(i256); +impl_common!(u256); + +macro_rules! word { + (1, $val:expr) => { + (($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64 + }; + (2, $val:expr) => { + (($val >> (32 * 2)) & Self::from(WORD_LO_MASK)) as u64 + }; + (3, $val:expr) => { + (($val >> (32 * 1)) & Self::from(WORD_LO_MASK)) as u64 + }; + (4, $val:expr) => { + (($val >> (32 * 0)) & Self::from(WORD_LO_MASK)) as u64 + }; +} + +impl HInt for u128 { + type D = u256; + + fn widen(self) -> Self::D { + let w0 = self & u128::from(u64::MAX); + let w1 = (self >> u64::BITS) & u128::from(u64::MAX); + u256([w0 as u64, w1 as u64, 0, 0]) + } + + fn zero_widen(self) -> Self::D { + self.widen() + } + + fn zero_widen_mul(self, rhs: Self) -> Self::D { + let product11: u64 = word!(1, self) * word!(1, rhs); + let product12: u64 = word!(1, self) * word!(2, rhs); + let product13: u64 = word!(1, self) * word!(3, rhs); + let product14: u64 = word!(1, self) * word!(4, rhs); + let product21: u64 = word!(2, self) * word!(1, rhs); + let product22: u64 = word!(2, self) * word!(2, rhs); + let product23: u64 = word!(2, self) * word!(3, rhs); + let product24: u64 = word!(2, self) * word!(4, rhs); + let product31: u64 = word!(3, self) * word!(1, rhs); + let product32: u64 = word!(3, self) * word!(2, rhs); + let product33: u64 = word!(3, self) * word!(3, rhs); + let product34: u64 = word!(3, self) * word!(4, rhs); + let product41: u64 = word!(4, self) * word!(1, rhs); + let product42: u64 = word!(4, self) * word!(2, rhs); + let product43: u64 = word!(4, self) * word!(3, rhs); + let product44: u64 = word!(4, self) * word!(4, rhs); + + let sum0: u128 = u128::from(product44); + let sum1: u128 = u128::from(product34) + u128::from(product43); + let sum2: u128 = u128::from(product24) + u128::from(product33) + u128::from(product42); + let sum3: u128 = u128::from(product14) + + u128::from(product23) + + u128::from(product32) + + u128::from(product41); + let sum4: u128 = u128::from(product13) + u128::from(product22) + u128::from(product31); + let sum5: u128 = u128::from(product12) + u128::from(product21); + let sum6: u128 = u128::from(product11); + + let r0: u128 = + (sum0 & u128::from(WORD_FULL_MASK)) + ((sum1 & u128::from(WORD_LO_MASK)) << 32); + let r1: u128 = (sum0 >> 64) + + ((sum1 >> 32) & u128::from(WORD_FULL_MASK)) + + (sum2 & u128::from(WORD_FULL_MASK)) + + ((sum3 << 32) & u128::from(WORD_HI_MASK)); + + let (lo, carry) = r0.overflowing_add(r1 << 64); + let hi = (r1 >> 64) + + (sum1 >> 96) + + (sum2 >> 64) + + (sum3 >> 32) + + sum4 + + (sum5 << 32) + + (sum6 << 64) + + u128::from(carry); + + u256([ + (lo & U128_LO_MASK) as u64, + ((lo >> 64) & U128_LO_MASK) as u64, + (hi & U128_LO_MASK) as u64, + ((hi >> 64) & U128_LO_MASK) as u64, + ]) + } + + fn widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen_mul(rhs) + } +} + +impl HInt for i128 { + type D = i256; + + fn widen(self) -> Self::D { + let mut ret = self.unsigned().zero_widen().signed(); + if self.is_negative() { + ret.0[2] = u64::MAX; + ret.0[3] = u64::MAX; + } + ret + } + + fn zero_widen(self) -> Self::D { + self.unsigned().zero_widen().signed() + } + + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.unsigned().zero_widen_mul(rhs.unsigned()).signed() + } + + fn widen_mul(self, rhs: Self) -> Self::D { + unimplemented!("signed i128 widening multiply is not used") + } +} + +impl DInt for u256 { + type H = u128; + + fn lo(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[0].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[1].to_le_bytes()); + u128::from_le_bytes(tmp) + } + + fn hi(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[2].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[3].to_le_bytes()); + u128::from_le_bytes(tmp) + } +} + +impl DInt for i256 { + type H = i128; + + fn lo(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[0].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[1].to_le_bytes()); + i128::from_le_bytes(tmp) + } + + fn hi(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[2].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[3].to_le_bytes()); + i128::from_le_bytes(tmp) + } +} diff --git a/src/int/mod.rs b/src/int/mod.rs index dd23da3b7..2b6d4b812 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -3,12 +3,14 @@ use core::ops; mod specialized_div_rem; pub mod addsub; +mod big; pub mod leading_zeros; pub mod mul; pub mod sdiv; pub mod shift; pub mod udiv; +pub use big::{i256, u256}; pub use leading_zeros::__clzsi2; public_test_dep! { diff --git a/testcrate/tests/big.rs b/testcrate/tests/big.rs new file mode 100644 index 000000000..128b5ddfd --- /dev/null +++ b/testcrate/tests/big.rs @@ -0,0 +1,61 @@ +use compiler_builtins::int::{i256, u256, HInt, MinInt}; + +const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff; + +/// Print a `u256` as hex since we can't add format implementations +fn hexu(v: u256) -> String { + format!( + "0x{:016x}{:016x}{:016x}{:016x}", + v.0[3], v.0[2], v.0[1], v.0[0] + ) +} + +#[test] +fn widen_u128() { + assert_eq!(u128::MAX.widen(), u256([u64::MAX, u64::MAX, 0, 0])); + assert_eq!( + LOHI_SPLIT.widen(), + u256([u64::MAX, 0xaaaaaaaaaaaaaaaa, 0, 0]) + ); +} + +#[test] +fn widen_i128() { + assert_eq!((-1i128).widen(), u256::MAX.signed()); + assert_eq!( + (LOHI_SPLIT as i128).widen(), + i256([u64::MAX, 0xaaaaaaaaaaaaaaaa, u64::MAX, u64::MAX]) + ); + assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen()); +} + +#[test] +fn widen_mul_u128() { + let tests = [ + (u128::MAX / 2, 2_u128, u256([u64::MAX - 1, u64::MAX, 0, 0])), + (u128::MAX, 2_u128, u256([u64::MAX - 1, u64::MAX, 1, 0])), + (u128::MAX, u128::MAX, u256([1, 0, u64::MAX - 1, u64::MAX])), + (u128::MIN, u128::MIN, u256::ZERO), + (1234, 0, u256::ZERO), + (0, 1234, u256::ZERO), + ]; + + let mut errors = Vec::new(); + for (i, (a, b, exp)) in tests.iter().copied().enumerate() { + let res = a.widen_mul(b); + let res_z = a.zero_widen_mul(b); + assert_eq!(res, res_z); + if res != exp { + errors.push((i, a, b, exp, res)); + } + } + + for (i, a, b, exp, res) in &errors { + eprintln!( + "FAILURE ({i}): {a:#034x} * {b:#034x} = {} got {}", + hexu(*exp), + hexu(*res) + ); + } + assert!(errors.is_empty()); +} From 77faba1ef1a597463e8a6c748f0e0c037192c45f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 10 May 2024 19:01:49 -0500 Subject: [PATCH 0737/1459] Refactor float test macros to have a fallback Change float test macros to fall back to testing against `rustc_apfloat` when system implementations are not available, rather than just skipping tests. This allows for easier debugging where operations may not be supported. --- testcrate/src/lib.rs | 52 ++++++++++++++++++++++++++++++++++++++ testcrate/tests/addsub.rs | 21 +++++++-------- testcrate/tests/cmp.rs | 39 +++++++++++++++++++++++----- testcrate/tests/div_rem.rs | 35 ++++++++++++++++--------- testcrate/tests/mul.rs | 16 +++++++----- 5 files changed, 127 insertions(+), 36 deletions(-) diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs index 13abf459e..1f3a4b826 100644 --- a/testcrate/src/lib.rs +++ b/testcrate/src/lib.rs @@ -263,3 +263,55 @@ pub fn fuzz_float_2(n: u32, f: E) { f(x, y) } } + +/// Perform an operation using builtin types if available, falling back to apfloat if not. +#[macro_export] +macro_rules! apfloat_fallback { + ( + $float_ty:ty, + // Type name in `rustc_apfloat::ieee`. Not a full path, it automatically gets the prefix. + $apfloat_ty:ident, + // Cfg expression for when builtin system operations should be used + $sys_available:meta, + // The expression to run. This expression may use `FloatTy` for its signature. + // Optionally, the final conversion back to a float can be suppressed using + // `=> no_convert` (for e.g. operations that return a bool). + $op:expr $(=> $convert:ident)?, + // Arguments that get passed to `$op` after converting to a float + $($arg:expr),+ + $(,)? + ) => {{ + #[cfg($sys_available)] + let ret = { + type FloatTy = $float_ty; + $op( $($arg),+ ) + }; + + #[cfg(not($sys_available))] + let ret = { + use rustc_apfloat::Float; + type FloatTy = rustc_apfloat::ieee::$apfloat_ty; + + let op_res = $op( $(FloatTy::from_bits($arg.to_bits().into())),+ ); + + apfloat_fallback!(@convert $float_ty, op_res $(,$convert)?) + }; + + ret + }}; + + // Operations that do not need converting back to a float + (@convert $float_ty:ty, $val:expr, no_convert) => { + $val + }; + + // Some apfloat operations return a `StatusAnd` that we need to extract the value from. This + // is the default. + (@convert $float_ty:ty, $val:expr) => {{ + // ignore the status, just get the value + let unwrapped = $val.value; + + <$float_ty>::from_bits(FloatTy::to_bits(unwrapped).try_into().unwrap()) + }}; + +} diff --git a/testcrate/tests/addsub.rs b/testcrate/tests/addsub.rs index da7684ec9..85250ac3d 100644 --- a/testcrate/tests/addsub.rs +++ b/testcrate/tests/addsub.rs @@ -1,5 +1,6 @@ #![allow(unused_macros)] +use core::ops::{Add, Sub}; use testcrate::*; macro_rules! sum { @@ -71,28 +72,28 @@ fn addsub() { } macro_rules! float_sum { - ($($f:ty, $fn_add:ident, $fn_sub:ident);*;) => { + ($($f:ty, $fn_add:ident, $fn_sub:ident, $apfloat_ty:ident, $sys_available:meta);*;) => { $( fuzz_float_2(N, |x: $f, y: $f| { - let add0 = x + y; - let sub0 = x - y; + let add0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Add::add, x, y); + let sub0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Sub::sub, x, y); let add1: $f = $fn_add(x, y); let sub1: $f = $fn_sub(x, y); if !Float::eq_repr(add0, add1) { panic!( - "{}({}, {}): std: {}, builtins: {}", + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", stringify!($fn_add), x, y, add0, add1 ); } if !Float::eq_repr(sub0, sub1) { panic!( - "{}({}, {}): std: {}, builtins: {}", + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", stringify!($fn_sub), x, y, sub0, sub1 ); } }); )* - }; + } } #[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] @@ -105,8 +106,8 @@ fn float_addsub() { }; float_sum!( - f32, __addsf3, __subsf3; - f64, __adddf3, __subdf3; + f32, __addsf3, __subsf3, Single, all(); + f64, __adddf3, __subdf3, Double, all(); ); } @@ -120,7 +121,7 @@ fn float_addsub_arm() { }; float_sum!( - f32, __addsf3vfp, __subsf3vfp; - f64, __adddf3vfp, __subdf3vfp; + f32, __addsf3vfp, __subsf3vfp, Single, all(); + f64, __adddf3vfp, __subdf3vfp, Double, all(); ); } diff --git a/testcrate/tests/cmp.rs b/testcrate/tests/cmp.rs index 14dd76b2d..20915b25d 100644 --- a/testcrate/tests/cmp.rs +++ b/testcrate/tests/cmp.rs @@ -1,23 +1,48 @@ #![allow(unused_macros)] +#![allow(unreachable_code)] #[cfg(not(target_arch = "powerpc64"))] use testcrate::*; macro_rules! cmp { - ($x:ident, $y:ident, $($unordered_val:expr, $fn:ident);*;) => { + ( + $f:ty, $x:ident, $y:ident, $apfloat_ty:ident, $sys_available:meta, + $($unordered_val:expr, $fn:ident);*; + ) => { $( - let cmp0 = if $x.is_nan() || $y.is_nan() { + let cmp0 = if apfloat_fallback!( + $f, $apfloat_ty, $sys_available, + |x: FloatTy| x.is_nan() => no_convert, + $x + ) || apfloat_fallback!( + $f, $apfloat_ty, $sys_available, + |y: FloatTy| y.is_nan() => no_convert, + $y + ) + { $unordered_val - } else if $x < $y { + } else if apfloat_fallback!( + $f, $apfloat_ty, $sys_available, + |x, y| x < y => no_convert, + $x, $y + ) { -1 - } else if $x == $y { + } else if apfloat_fallback!( + $f, $apfloat_ty, $sys_available, + |x, y| x == y => no_convert, + $x, $y + ) { 0 } else { 1 }; + let cmp1 = $fn($x, $y); if cmp0 != cmp1 { - panic!("{}({}, {}): std: {}, builtins: {}", stringify!($fn_builtins), $x, $y, cmp0, cmp1); + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn), $x, $y, cmp0, cmp1 + ); } )* }; @@ -34,7 +59,7 @@ fn float_comparisons() { fuzz_float_2(N, |x: f32, y: f32| { assert_eq!(__unordsf2(x, y) != 0, x.is_nan() || y.is_nan()); - cmp!(x, y, + cmp!(f32, x, y, Single, all(), 1, __ltsf2; 1, __lesf2; 1, __eqsf2; @@ -45,7 +70,7 @@ fn float_comparisons() { }); fuzz_float_2(N, |x: f64, y: f64| { assert_eq!(__unorddf2(x, y) != 0, x.is_nan() || y.is_nan()); - cmp!(x, y, + cmp!(f64, x, y, Double, all(), 1, __ltdf2; 1, __ledf2; 1, __eqdf2; diff --git a/testcrate/tests/div_rem.rs b/testcrate/tests/div_rem.rs index de3bd9bee..461e084d0 100644 --- a/testcrate/tests/div_rem.rs +++ b/testcrate/tests/div_rem.rs @@ -2,6 +2,7 @@ use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4}; use compiler_builtins::int::udiv::{__udivmoddi4, __udivmodsi4, __udivmodti4, u128_divide_sparc}; + use testcrate::*; // Division algorithms have by far the nastiest and largest number of edge cases, and experience shows @@ -104,16 +105,20 @@ fn divide_sparc() { } macro_rules! float { - ($($i:ty, $fn:ident);*;) => { + ($($f:ty, $fn:ident, $apfloat_ty:ident, $sys_available:meta);*;) => { $( - fuzz_float_2(N, |x: $i, y: $i| { - let quo0 = x / y; - let quo1: $i = $fn(x, y); + fuzz_float_2(N, |x: $f, y: $f| { + let quo0: $f = apfloat_fallback!($f, $apfloat_ty, $sys_available, Div::div, x, y); + let quo1: $f = $fn(x, y); #[cfg(not(target_arch = "arm"))] if !Float::eq_repr(quo0, quo1) { panic!( - "{}({}, {}): std: {}, builtins: {}", - stringify!($fn), x, y, quo0, quo1 + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn), + x, + y, + quo0, + quo1 ); } @@ -122,8 +127,12 @@ macro_rules! float { if !(Float::is_subnormal(quo0) || Float::is_subnormal(quo1)) { if !Float::eq_repr(quo0, quo1) { panic!( - "{}({}, {}): std: {}, builtins: {}", - stringify!($fn), x, y, quo0, quo1 + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn), + x, + y, + quo0, + quo1 ); } } @@ -139,10 +148,11 @@ fn float_div() { div::{__divdf3, __divsf3}, Float, }; + use core::ops::Div; float!( - f32, __divsf3; - f64, __divdf3; + f32, __divsf3, Single, all(); + f64, __divdf3, Double, all(); ); } @@ -153,9 +163,10 @@ fn float_div_arm() { div::{__divdf3vfp, __divsf3vfp}, Float, }; + use core::ops::Div; float!( - f32, __divsf3vfp; - f64, __divdf3vfp; + f32, __divsf3vfp, Single, all(); + f64, __divdf3vfp, Double, all(); ); } diff --git a/testcrate/tests/mul.rs b/testcrate/tests/mul.rs index 819f06ca9..4fca2c13f 100644 --- a/testcrate/tests/mul.rs +++ b/testcrate/tests/mul.rs @@ -82,16 +82,16 @@ fn overflowing_mul() { } macro_rules! float_mul { - ($($f:ty, $fn:ident);*;) => { + ($($f:ty, $fn:ident, $apfloat_ty:ident, $sys_available:meta);*;) => { $( fuzz_float_2(N, |x: $f, y: $f| { - let mul0 = x * y; + let mul0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Mul::mul, x, y); let mul1: $f = $fn(x, y); // multiplication of subnormals is not currently handled if !(Float::is_subnormal(mul0) || Float::is_subnormal(mul1)) { if !Float::eq_repr(mul0, mul1) { panic!( - "{}({}, {}): std: {}, builtins: {}", + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", stringify!($fn), x, y, mul0, mul1 ); } @@ -108,10 +108,11 @@ fn float_mul() { mul::{__muldf3, __mulsf3}, Float, }; + use core::ops::Mul; float_mul!( - f32, __mulsf3; - f64, __muldf3; + f32, __mulsf3, Single, all(); + f64, __muldf3, Double, all(); ); } @@ -122,9 +123,10 @@ fn float_mul_arm() { mul::{__muldf3vfp, __mulsf3vfp}, Float, }; + use core::ops::Mul; float_mul!( - f32, __mulsf3vfp; - f64, __muldf3vfp; + f32, __mulsf3vfp, Single, all(); + f64, __muldf3vfp, Double, all(); ); } From 255c9f3601a760757e6424028f30296e75753c20 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 10 May 2024 19:36:35 -0500 Subject: [PATCH 0738/1459] Enable no-fail-fast for more usable test output --- ci/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run.sh b/ci/run.sh index 1298093a6..847b52435 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -21,7 +21,7 @@ fi if [ "${NO_STD:-}" = "1" ]; then echo "nothing to do for no_std" else - run="cargo test --manifest-path testcrate/Cargo.toml --target $target" + run="cargo test --manifest-path testcrate/Cargo.toml --no-fail-fast --target $target" $run $run --release $run --features c From c8cc819eb37e13442ba807ed997d64a50730edf6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 10 May 2024 19:22:03 -0500 Subject: [PATCH 0739/1459] Implement `f128` addition and subtraction --- README.md | 4 ++-- build.rs | 6 ------ src/float/add.rs | 10 ++++++++++ src/float/sub.rs | 12 ++++++++++++ testcrate/Cargo.toml | 2 ++ testcrate/build.rs | 27 +++++++++++++++++++++++++++ testcrate/tests/addsub.rs | 17 ++++++++++++++++- 7 files changed, 69 insertions(+), 9 deletions(-) create mode 100644 testcrate/build.rs diff --git a/README.md b/README.md index 00d547f1b..c7241ec7c 100644 --- a/README.md +++ b/README.md @@ -232,7 +232,7 @@ These builtins are needed to support 128-bit integers. These builtins are needed to support `f16` and `f128`, which are in the process of being added to Rust. -- [ ] addtf3.c +- [x] addtf3.c - [ ] comparetf2.c - [ ] divtf3.c - [x] extenddftf2.c @@ -255,7 +255,7 @@ These builtins are needed to support `f16` and `f128`, which are in the process - [ ] ppc/fixunstfdi.c - [ ] ppc/floatditf.c - [ ] ppc/floatunditf.c -- [ ] subtf3.c +- [x] subtf3.c - [x] truncdfhf2.c - [x] truncsfhf2.c - [x] trunctfdf2.c diff --git a/build.rs b/build.rs index f9eba6ee2..a57052429 100644 --- a/build.rs +++ b/build.rs @@ -543,9 +543,7 @@ mod c { ("__floatsitf", "floatsitf.c"), ("__floatunditf", "floatunditf.c"), ("__floatunsitf", "floatunsitf.c"), - ("__addtf3", "addtf3.c"), ("__multf3", "multf3.c"), - ("__subtf3", "subtf3.c"), ("__divtf3", "divtf3.c"), ("__powitf2", "powitf2.c"), ("__fe_getround", "fp_mode.c"), @@ -564,9 +562,7 @@ mod c { if target_arch == "mips64" { sources.extend(&[ ("__netf2", "comparetf2.c"), - ("__addtf3", "addtf3.c"), ("__multf3", "multf3.c"), - ("__subtf3", "subtf3.c"), ("__fixtfsi", "fixtfsi.c"), ("__floatsitf", "floatsitf.c"), ("__fixunstfsi", "fixunstfsi.c"), @@ -579,9 +575,7 @@ mod c { if target_arch == "loongarch64" { sources.extend(&[ ("__netf2", "comparetf2.c"), - ("__addtf3", "addtf3.c"), ("__multf3", "multf3.c"), - ("__subtf3", "subtf3.c"), ("__fixtfsi", "fixtfsi.c"), ("__floatsitf", "floatsitf.c"), ("__fixunstfsi", "fixunstfsi.c"), diff --git a/src/float/add.rs b/src/float/add.rs index 909948ad2..fd151f77d 100644 --- a/src/float/add.rs +++ b/src/float/add.rs @@ -203,6 +203,16 @@ intrinsics! { add(a, b) } + #[cfg(not(any(feature = "no-f16-f128", target_arch = "powerpc", target_arch = "powerpc64")))] + pub extern "C" fn __addtf3(a: f128, b: f128) -> f128 { + add(a, b) + } + + #[cfg(all(not(feature = "no-f16-f128"), any(target_arch = "powerpc", target_arch = "powerpc64")))] + pub extern "C" fn __addkf3(a: f128, b: f128) -> f128 { + add(a, b) + } + #[cfg(target_arch = "arm")] pub extern "C" fn __addsf3vfp(a: f32, b: f32) -> f32 { a + b diff --git a/src/float/sub.rs b/src/float/sub.rs index 64653ee25..de33259d6 100644 --- a/src/float/sub.rs +++ b/src/float/sub.rs @@ -15,6 +15,18 @@ intrinsics! { __adddf3(a, f64::from_repr(b.repr() ^ f64::SIGN_MASK)) } + #[cfg(not(any(feature = "no-f16-f128", target_arch = "powerpc", target_arch = "powerpc64")))] + pub extern "C" fn __subtf3(a: f128, b: f128) -> f128 { + use crate::float::add::__addtf3; + __addtf3(a, f128::from_repr(b.repr() ^ f128::SIGN_MASK)) + } + + #[cfg(all(not(feature = "no-f16-f128"), any(target_arch = "powerpc", target_arch = "powerpc64")))] + pub extern "C" fn __subkf3(a: f128, b: f128) -> f128 { + use crate::float::add::__addkf3; + __addkf3(a, f128::from_repr(b.repr() ^ f128::SIGN_MASK)) + } + #[cfg(target_arch = "arm")] pub extern "C" fn __subsf3vfp(a: f32, b: f32) -> f32 { a - b diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 6ff3fde17..6f771181a 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -33,3 +33,5 @@ no-asm = ["compiler_builtins/no-asm"] no-f16-f128 = ["compiler_builtins/no-f16-f128"] mem = ["compiler_builtins/mem"] mangled-names = ["compiler_builtins/mangled-names"] +# Skip tests that rely on f128 symbols being available on the system +no-sys-f128 = [] diff --git a/testcrate/build.rs b/testcrate/build.rs new file mode 100644 index 000000000..f24dae3c6 --- /dev/null +++ b/testcrate/build.rs @@ -0,0 +1,27 @@ +use std::env; + +fn main() { + let target = env::var("TARGET").unwrap(); + + // These platforms do not have f128 symbols available in their system libraries, so + // skip related tests. + if target.starts_with("arm-") + || target.contains("apple-darwin") + || target.contains("windows-msvc") + // GCC and LLVM disagree on the ABI of `f16` and `f128` with MinGW. See + // . + || target.contains("windows-gnu") + // FIXME(llvm): There is an ABI incompatibility between GCC and Clang on 32-bit x86. + // See . + || target.starts_with("i686") + // 32-bit PowerPC gets code generated that Qemu cannot handle. See + // . + || target.starts_with("powerpc-") + // FIXME: We get different results from the builtin functions. See + // . + || target.starts_with("powerpc64-") + { + println!("cargo:warning=using apfloat fallback for f128"); + println!("cargo:rustc-cfg=feature=\"no-sys-f128\""); + } +} diff --git a/testcrate/tests/addsub.rs b/testcrate/tests/addsub.rs index 85250ac3d..d3e96d57d 100644 --- a/testcrate/tests/addsub.rs +++ b/testcrate/tests/addsub.rs @@ -1,6 +1,7 @@ #![allow(unused_macros)] +#![feature(f128)] +#![feature(f16)] -use core::ops::{Add, Sub}; use testcrate::*; macro_rules! sum { @@ -104,11 +105,24 @@ fn float_addsub() { sub::{__subdf3, __subsf3}, Float, }; + use core::ops::{Add, Sub}; float_sum!( f32, __addsf3, __subsf3, Single, all(); f64, __adddf3, __subdf3, Double, all(); ); + + #[cfg(not(feature = "no-f16-f128"))] + { + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + use compiler_builtins::float::{add::__addkf3 as __addtf3, sub::__subkf3 as __subtf3}; + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + use compiler_builtins::float::{add::__addtf3, sub::__subtf3}; + + float_sum!( + f128, __addtf3, __subtf3, Quad, not(feature = "no-sys-f128"); + ); + } } #[cfg(target_arch = "arm")] @@ -119,6 +133,7 @@ fn float_addsub_arm() { sub::{__subdf3vfp, __subsf3vfp}, Float, }; + use core::ops::{Add, Sub}; float_sum!( f32, __addsf3vfp, __subsf3vfp, Single, all(); From 58ad3176548b4bf58eeed8237ab861fff976be15 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 10 May 2024 19:46:50 -0500 Subject: [PATCH 0740/1459] Implement `f128` multiplication --- README.md | 2 +- build.rs | 5 ----- src/float/mul.rs | 11 +++++++++++ testcrate/tests/mul.rs | 17 +++++++++++++++++ 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index c7241ec7c..1986c21c6 100644 --- a/README.md +++ b/README.md @@ -249,7 +249,7 @@ These builtins are needed to support `f16` and `f128`, which are in the process - [ ] floatsitf.c - [ ] floatunditf.c - [ ] floatunsitf.c -- [ ] multf3.c +- [x] multf3.c - [ ] powitf2.c - [ ] ppc/fixtfdi.c - [ ] ppc/fixunstfdi.c diff --git a/build.rs b/build.rs index a57052429..ec830ecb3 100644 --- a/build.rs +++ b/build.rs @@ -543,7 +543,6 @@ mod c { ("__floatsitf", "floatsitf.c"), ("__floatunditf", "floatunditf.c"), ("__floatunsitf", "floatunsitf.c"), - ("__multf3", "multf3.c"), ("__divtf3", "divtf3.c"), ("__powitf2", "powitf2.c"), ("__fe_getround", "fp_mode.c"), @@ -562,26 +561,22 @@ mod c { if target_arch == "mips64" { sources.extend(&[ ("__netf2", "comparetf2.c"), - ("__multf3", "multf3.c"), ("__fixtfsi", "fixtfsi.c"), ("__floatsitf", "floatsitf.c"), ("__fixunstfsi", "fixunstfsi.c"), ("__floatunsitf", "floatunsitf.c"), ("__fe_getround", "fp_mode.c"), - ("__divtf3", "divtf3.c"), ]); } if target_arch == "loongarch64" { sources.extend(&[ ("__netf2", "comparetf2.c"), - ("__multf3", "multf3.c"), ("__fixtfsi", "fixtfsi.c"), ("__floatsitf", "floatsitf.c"), ("__fixunstfsi", "fixunstfsi.c"), ("__floatunsitf", "floatunsitf.c"), ("__fe_getround", "fp_mode.c"), - ("__divtf3", "divtf3.c"), ]); } diff --git a/src/float/mul.rs b/src/float/mul.rs index 46c41d09f..9866b6280 100644 --- a/src/float/mul.rs +++ b/src/float/mul.rs @@ -199,6 +199,17 @@ intrinsics! { mul(a, b) } + #[cfg(not(any(feature = "no-f16-f128", target_arch = "powerpc", target_arch = "powerpc64")))] + pub extern "C" fn __multf3(a: f128, b: f128) -> f128 { + mul(a, b) + } + + + #[cfg(all(not(feature = "no-f16-f128"), any(target_arch = "powerpc", target_arch = "powerpc64")))] + pub extern "C" fn __mulkf3(a: f128, b: f128) -> f128 { + mul(a, b) + } + #[cfg(target_arch = "arm")] pub extern "C" fn __mulsf3vfp(a: f32, b: f32) -> f32 { a * b diff --git a/testcrate/tests/mul.rs b/testcrate/tests/mul.rs index 4fca2c13f..ffbe63864 100644 --- a/testcrate/tests/mul.rs +++ b/testcrate/tests/mul.rs @@ -1,4 +1,6 @@ #![allow(unused_macros)] +#![feature(f128)] +#![feature(f16)] use testcrate::*; @@ -114,6 +116,21 @@ fn float_mul() { f32, __mulsf3, Single, all(); f64, __muldf3, Double, all(); ); + + #[cfg(not(feature = "no-f16-f128"))] + { + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + use compiler_builtins::float::mul::__mulkf3 as __multf3; + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + use compiler_builtins::float::mul::__multf3; + + float_mul!( + f128, __multf3, Quad, + // FIXME(llvm): there is a bug in LLVM rt. + // See . + not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux"))); + ); + } } #[cfg(target_arch = "arm")] From 9bea1962232be51f27a8323563b95def16e52c77 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 11 May 2024 01:18:38 -0500 Subject: [PATCH 0741/1459] Implement `f128` comparison --- README.md | 2 +- src/float/cmp.rs | 83 ++++++++++++++++++++++++++++++++++++++++++ testcrate/tests/cmp.rs | 40 ++++++++++++++++++++ 3 files changed, 124 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1986c21c6..37d7ab2e6 100644 --- a/README.md +++ b/README.md @@ -233,7 +233,7 @@ These builtins are needed to support 128-bit integers. These builtins are needed to support `f16` and `f128`, which are in the process of being added to Rust. - [x] addtf3.c -- [ ] comparetf2.c +- [x] comparetf2.c - [ ] divtf3.c - [x] extenddftf2.c - [x] extendhfsf2.c diff --git a/src/float/cmp.rs b/src/float/cmp.rs index 193c5df36..44ebf6262 100644 --- a/src/float/cmp.rs +++ b/src/float/cmp.rs @@ -172,6 +172,89 @@ intrinsics! { } } +#[cfg(not(any( + feature = "no-f16-f128", + target_arch = "powerpc", + target_arch = "powerpc64" +)))] +intrinsics! { + #[avr_skip] + pub extern "C" fn __letf2(a: f128, b: f128) -> i32 { + cmp(a, b).to_le_abi() + } + + #[avr_skip] + pub extern "C" fn __getf2(a: f128, b: f128) -> i32 { + cmp(a, b).to_ge_abi() + } + + #[avr_skip] + pub extern "C" fn __unordtf2(a: f128, b: f128) -> i32 { + unord(a, b) as i32 + } + + #[avr_skip] + pub extern "C" fn __eqtf2(a: f128, b: f128) -> i32 { + cmp(a, b).to_le_abi() + } + + #[avr_skip] + pub extern "C" fn __lttf2(a: f128, b: f128) -> i32 { + cmp(a, b).to_le_abi() + } + + #[avr_skip] + pub extern "C" fn __netf2(a: f128, b: f128) -> i32 { + cmp(a, b).to_le_abi() + } + + #[avr_skip] + pub extern "C" fn __gttf2(a: f128, b: f128) -> i32 { + cmp(a, b).to_ge_abi() + } +} + +#[cfg(all( + not(feature = "no-f16-f128"), + any(target_arch = "powerpc", target_arch = "powerpc64") +))] +intrinsics! { + #[avr_skip] + pub extern "C" fn __lekf2(a: f128, b: f128) -> i32 { + cmp(a, b).to_le_abi() + } + + #[avr_skip] + pub extern "C" fn __gekf2(a: f128, b: f128) -> i32 { + cmp(a, b).to_ge_abi() + } + + #[avr_skip] + pub extern "C" fn __unordkf2(a: f128, b: f128) -> i32 { + unord(a, b) as i32 + } + + #[avr_skip] + pub extern "C" fn __eqkf2(a: f128, b: f128) -> i32 { + cmp(a, b).to_le_abi() + } + + #[avr_skip] + pub extern "C" fn __ltkf2(a: f128, b: f128) -> i32 { + cmp(a, b).to_le_abi() + } + + #[avr_skip] + pub extern "C" fn __nekf2(a: f128, b: f128) -> i32 { + cmp(a, b).to_le_abi() + } + + #[avr_skip] + pub extern "C" fn __gtkf2(a: f128, b: f128) -> i32 { + cmp(a, b).to_ge_abi() + } +} + #[cfg(target_arch = "arm")] intrinsics! { pub extern "aapcs" fn __aeabi_fcmple(a: f32, b: f32) -> i32 { diff --git a/testcrate/tests/cmp.rs b/testcrate/tests/cmp.rs index 20915b25d..0d15f5d42 100644 --- a/testcrate/tests/cmp.rs +++ b/testcrate/tests/cmp.rs @@ -1,5 +1,7 @@ #![allow(unused_macros)] #![allow(unreachable_code)] +#![feature(f128)] +#![feature(f16)] #[cfg(not(target_arch = "powerpc64"))] use testcrate::*; @@ -79,6 +81,44 @@ fn float_comparisons() { 1, __nedf2; ); }); + + #[cfg(not(feature = "no-f16-f128"))] + { + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + use compiler_builtins::float::cmp::{ + __eqkf2 as __eqtf2, __gekf2 as __getf2, __gtkf2 as __gttf2, __lekf2 as __letf2, + __ltkf2 as __lttf2, __nekf2 as __netf2, __unordkf2 as __unordtf2, + }; + + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + use compiler_builtins::float::cmp::{ + __eqtf2, __getf2, __gttf2, __letf2, __lttf2, __netf2, __unordtf2, + }; + + fuzz_float_2(N, |x: f128, y: f128| { + let x_is_nan = apfloat_fallback!( + f128, Quad, not(feature = "no-sys-f128"), + |x: FloatTy| x.is_nan() => no_convert, + x + ); + let y_is_nan = apfloat_fallback!( + f128, Quad, not(feature = "no-sys-f128"), + |x: FloatTy| x.is_nan() => no_convert, + y + ); + + assert_eq!(__unordtf2(x, y) != 0, x_is_nan || y_is_nan); + + cmp!(f128, x, y, Quad, not(feature = "no-sys-f128"), + 1, __lttf2; + 1, __letf2; + 1, __eqtf2; + -1, __getf2; + -1, __gttf2; + 1, __netf2; + ); + }); + } } macro_rules! cmp2 { From 6a847ab4f6bc1132dff8481e28c52cc0340f72e9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 12 May 2024 17:17:46 -0500 Subject: [PATCH 0742/1459] Correct f128 extend and truncate symbol names on powerpc PowerPC uses `kf` instead of `tf`: --- src/float/extend.rs | 18 ++++++++++++++++++ src/float/trunc.rs | 18 ++++++++++++++++++ testcrate/tests/conv.rs | 16 ++++++++++++++-- 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/src/float/extend.rs b/src/float/extend.rs index 5b0c0d972..12e5fc9e1 100644 --- a/src/float/extend.rs +++ b/src/float/extend.rs @@ -100,19 +100,37 @@ intrinsics! { #[avr_skip] #[aapcs_on_arm] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] pub extern "C" fn __extendhftf2(a: f16) -> f128 { extend(a) } + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + pub extern "C" fn __extendhfkf2(a: f16) -> f128 { + extend(a) + } + #[avr_skip] #[aapcs_on_arm] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] pub extern "C" fn __extendsftf2(a: f32) -> f128 { extend(a) } + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + pub extern "C" fn __extendsfkf2(a: f32) -> f128 { + extend(a) + } + #[avr_skip] #[aapcs_on_arm] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] pub extern "C" fn __extenddftf2(a: f64) -> f128 { extend(a) } + + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + pub extern "C" fn __extenddfkf2(a: f64) -> f128 { + extend(a) + } } diff --git a/src/float/trunc.rs b/src/float/trunc.rs index b607a6549..31351b5e9 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -155,19 +155,37 @@ intrinsics! { #[avr_skip] #[aapcs_on_arm] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] pub extern "C" fn __trunctfhf2(a: f128) -> f16 { trunc(a) } + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + pub extern "C" fn __trunckfhf2(a: f128) -> f16 { + trunc(a) + } + #[avr_skip] #[aapcs_on_arm] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] pub extern "C" fn __trunctfsf2(a: f128) -> f32 { trunc(a) } + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + pub extern "C" fn __trunckfsf2(a: f128) -> f32 { + trunc(a) + } + #[avr_skip] #[aapcs_on_arm] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] pub extern "C" fn __trunctfdf2(a: f128) -> f64 { trunc(a) } + + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + pub extern "C" fn __trunckfdf2(a: f128) -> f64 { + trunc(a) + } } diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 5cff01202..f0ef95255 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -187,9 +187,15 @@ fn float_extend() { conv!(f32, f64, __extendsfdf2, Single, Double); #[cfg(not(feature = "no-f16-f128"))] { + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] use compiler_builtins::float::extend::{ - __extenddftf2, __extendhfsf2, __extendhftf2, __extendsftf2, __gnu_h2f_ieee, + __extenddfkf2 as __extenddftf2, __extendhfkf2 as __extendhftf2, + __extendsfkf2 as __extendsftf2, }; + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + use compiler_builtins::float::extend::{__extenddftf2, __extendhftf2, __extendsftf2}; + use compiler_builtins::float::extend::{__extendhfsf2, __gnu_h2f_ieee}; + // FIXME(f16_f128): Also do extend!() for `f16` and `f128` when builtins are in nightly conv!(f16, f32, __extendhfsf2, Half, Single); conv!(f16, f32, __gnu_h2f_ieee, Half, Single); @@ -234,9 +240,15 @@ fn float_trunc() { conv!(f64, f32, __truncdfsf2, Double, Single); #[cfg(not(feature = "no-f16-f128"))] { + use compiler_builtins::float::trunc::{__gnu_f2h_ieee, __truncdfhf2, __truncsfhf2}; + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] use compiler_builtins::float::trunc::{ - __gnu_f2h_ieee, __truncdfhf2, __truncsfhf2, __trunctfdf2, __trunctfhf2, __trunctfsf2, + __trunckfdf2 as __trunctfdf2, __trunckfhf2 as __trunctfhf2, + __trunckfsf2 as __trunctfsf2, }; + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + use compiler_builtins::float::trunc::{__trunctfdf2, __trunctfhf2, __trunctfsf2}; + // FIXME(f16_f128): Also do trunc!() for `f16` and `f128` when builtins are in nightly conv!(f32, f16, __truncsfhf2, Single, Half); conv!(f32, f16, __gnu_f2h_ieee, Single, Half); From 658cac7f4faee593576c8d514152bc336ae802d0 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 18 May 2024 04:07:37 -0400 Subject: [PATCH 0743/1459] Rework the test crate to separate individual tests Currently, tests of the same kind are grouped together across all types into a single function. This makes it difficult to understand exactly what failed in CI. Change test macros to create separate functions for separate types so failures are more fine grained. --- testcrate/tests/addsub.rs | 226 ++++++++++--------- testcrate/tests/cmp.rs | 279 +++++++++++++----------- testcrate/tests/conv.rs | 435 ++++++++++++++++++++++--------------- testcrate/tests/div_rem.rs | 70 +++--- testcrate/tests/misc.rs | 71 +++--- testcrate/tests/mul.rs | 238 ++++++++++---------- testcrate/tests/shift.rs | 52 +++-- 7 files changed, 756 insertions(+), 615 deletions(-) diff --git a/testcrate/tests/addsub.rs b/testcrate/tests/addsub.rs index d3e96d57d..f21f61ff6 100644 --- a/testcrate/tests/addsub.rs +++ b/testcrate/tests/addsub.rs @@ -4,139 +4,149 @@ use testcrate::*; -macro_rules! sum { - ($($i:ty, $fn_add:ident, $fn_sub:ident);*;) => { - $( - fuzz_2(N, |x: $i, y: $i| { - let add0 = x.wrapping_add(y); - let sub0 = x.wrapping_sub(y); - let add1: $i = $fn_add(x, y); - let sub1: $i = $fn_sub(x, y); - if add0 != add1 { - panic!( - "{}({}, {}): std: {}, builtins: {}", - stringify!($fn_add), x, y, add0, add1 - ); - } - if sub0 != sub1 { - panic!( - "{}({}, {}): std: {}, builtins: {}", - stringify!($fn_sub), x, y, sub0, sub1 - ); - } - }); - )* - }; -} +mod int_addsub { + use super::*; -macro_rules! overflowing_sum { - ($($i:ty, $fn_add:ident, $fn_sub:ident);*;) => { - $( - fuzz_2(N, |x: $i, y: $i| { - let add0 = x.overflowing_add(y); - let sub0 = x.overflowing_sub(y); - let add1: ($i, bool) = $fn_add(x, y); - let sub1: ($i, bool) = $fn_sub(x, y); - if add0.0 != add1.0 || add0.1 != add1.1 { - panic!( - "{}({}, {}): std: {:?}, builtins: {:?}", - stringify!($fn_add), x, y, add0, add1 - ); - } - if sub0.0 != sub1.0 || sub0.1 != sub1.1 { - panic!( - "{}({}, {}): std: {:?}, builtins: {:?}", - stringify!($fn_sub), x, y, sub0, sub1 - ); + macro_rules! sum { + ($($i:ty, $fn_add:ident, $fn_sub:ident);*;) => { + $( + #[test] + fn $fn_add() { + use compiler_builtins::int::addsub::{$fn_add, $fn_sub}; + + fuzz_2(N, |x: $i, y: $i| { + let add0 = x.wrapping_add(y); + let sub0 = x.wrapping_sub(y); + let add1: $i = $fn_add(x, y); + let sub1: $i = $fn_sub(x, y); + if add0 != add1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn_add), x, y, add0, add1 + ); + } + if sub0 != sub1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn_sub), x, y, sub0, sub1 + ); + } + }); } - }); - )* - }; -} + )* + }; + } + + macro_rules! overflowing_sum { + ($($i:ty, $fn_add:ident, $fn_sub:ident);*;) => { + $( + #[test] + fn $fn_add() { + use compiler_builtins::int::addsub::{$fn_add, $fn_sub}; -#[test] -fn addsub() { - use compiler_builtins::int::addsub::{ - __rust_i128_add, __rust_i128_addo, __rust_i128_sub, __rust_i128_subo, __rust_u128_add, - __rust_u128_addo, __rust_u128_sub, __rust_u128_subo, - }; + fuzz_2(N, |x: $i, y: $i| { + let add0 = x.overflowing_add(y); + let sub0 = x.overflowing_sub(y); + let add1: ($i, bool) = $fn_add(x, y); + let sub1: ($i, bool) = $fn_sub(x, y); + if add0.0 != add1.0 || add0.1 != add1.1 { + panic!( + "{}({}, {}): std: {:?}, builtins: {:?}", + stringify!($fn_add), x, y, add0, add1 + ); + } + if sub0.0 != sub1.0 || sub0.1 != sub1.1 { + panic!( + "{}({}, {}): std: {:?}, builtins: {:?}", + stringify!($fn_sub), x, y, sub0, sub1 + ); + } + }); + } + )* + }; + } // Integer addition and subtraction is very simple, so 100 fuzzing passes should be plenty. - sum!( + sum! { u128, __rust_u128_add, __rust_u128_sub; i128, __rust_i128_add, __rust_i128_sub; - ); - overflowing_sum!( + } + + overflowing_sum! { u128, __rust_u128_addo, __rust_u128_subo; i128, __rust_i128_addo, __rust_i128_subo; - ); + } } macro_rules! float_sum { ($($f:ty, $fn_add:ident, $fn_sub:ident, $apfloat_ty:ident, $sys_available:meta);*;) => { $( - fuzz_float_2(N, |x: $f, y: $f| { - let add0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Add::add, x, y); - let sub0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Sub::sub, x, y); - let add1: $f = $fn_add(x, y); - let sub1: $f = $fn_sub(x, y); - if !Float::eq_repr(add0, add1) { - panic!( - "{}({:?}, {:?}): std: {:?}, builtins: {:?}", - stringify!($fn_add), x, y, add0, add1 - ); - } - if !Float::eq_repr(sub0, sub1) { - panic!( - "{}({:?}, {:?}): std: {:?}, builtins: {:?}", - stringify!($fn_sub), x, y, sub0, sub1 - ); - } - }); + #[test] + fn $fn_add() { + use core::ops::{Add, Sub}; + use compiler_builtins::float::{{add::$fn_add, sub::$fn_sub}, Float}; + + fuzz_float_2(N, |x: $f, y: $f| { + let add0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Add::add, x, y); + let sub0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Sub::sub, x, y); + let add1: $f = $fn_add(x, y); + let sub1: $f = $fn_sub(x, y); + if !Float::eq_repr(add0, add1) { + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn_add), x, y, add0, add1 + ); + } + if !Float::eq_repr(sub0, sub1) { + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn_sub), x, y, sub0, sub1 + ); + } + }); + } )* } } #[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] -#[test] -fn float_addsub() { - use compiler_builtins::float::{ - add::{__adddf3, __addsf3}, - sub::{__subdf3, __subsf3}, - Float, - }; - use core::ops::{Add, Sub}; - - float_sum!( +mod float_addsub { + use super::*; + + float_sum! { f32, __addsf3, __subsf3, Single, all(); f64, __adddf3, __subdf3, Double, all(); - ); - - #[cfg(not(feature = "no-f16-f128"))] - { - #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] - use compiler_builtins::float::{add::__addkf3 as __addtf3, sub::__subkf3 as __subtf3}; - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] - use compiler_builtins::float::{add::__addtf3, sub::__subtf3}; - - float_sum!( - f128, __addtf3, __subtf3, Quad, not(feature = "no-sys-f128"); - ); + } +} + +#[cfg(not(feature = "no-f16-f128"))] +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] +mod float_addsub_f128 { + use super::*; + + float_sum! { + f128, __addtf3, __subtf3, Quad, not(feature = "no-sys-f128"); + } +} + +#[cfg(not(feature = "no-f16-f128"))] +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] +mod float_addsub_f128_ppc { + use super::*; + + float_sum! { + f128, __addkf3, __subkf3, Quad, not(feature = "no-sys-f128"); } } #[cfg(target_arch = "arm")] -#[test] -fn float_addsub_arm() { - use compiler_builtins::float::{ - add::{__adddf3vfp, __addsf3vfp}, - sub::{__subdf3vfp, __subsf3vfp}, - Float, - }; - use core::ops::{Add, Sub}; - - float_sum!( +mod float_addsub_arm { + use super::*; + + float_sum! { f32, __addsf3vfp, __subsf3vfp, Single, all(); f64, __adddf3vfp, __subdf3vfp, Double, all(); - ); + } } diff --git a/testcrate/tests/cmp.rs b/testcrate/tests/cmp.rs index 0d15f5d42..e8a0eb165 100644 --- a/testcrate/tests/cmp.rs +++ b/testcrate/tests/cmp.rs @@ -6,84 +6,96 @@ #[cfg(not(target_arch = "powerpc64"))] use testcrate::*; -macro_rules! cmp { - ( - $f:ty, $x:ident, $y:ident, $apfloat_ty:ident, $sys_available:meta, - $($unordered_val:expr, $fn:ident);*; - ) => { - $( - let cmp0 = if apfloat_fallback!( +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +mod float_comparisons { + use super::*; + + macro_rules! cmp { + ( + $f:ty, $x:ident, $y:ident, $apfloat_ty:ident, $sys_available:meta, + $($unordered_val:expr, $fn:ident);*; + ) => { + $( + let cmp0 = if apfloat_fallback!( + $f, $apfloat_ty, $sys_available, + |x: FloatTy| x.is_nan() => no_convert, + $x + ) || apfloat_fallback!( + $f, $apfloat_ty, $sys_available, + |y: FloatTy| y.is_nan() => no_convert, + $y + ) + { + $unordered_val + } else if apfloat_fallback!( $f, $apfloat_ty, $sys_available, - |x: FloatTy| x.is_nan() => no_convert, - $x - ) || apfloat_fallback!( + |x, y| x < y => no_convert, + $x, $y + ) { + -1 + } else if apfloat_fallback!( $f, $apfloat_ty, $sys_available, - |y: FloatTy| y.is_nan() => no_convert, - $y - ) - { - $unordered_val - } else if apfloat_fallback!( - $f, $apfloat_ty, $sys_available, - |x, y| x < y => no_convert, - $x, $y - ) { - -1 - } else if apfloat_fallback!( - $f, $apfloat_ty, $sys_available, - |x, y| x == y => no_convert, - $x, $y - ) { - 0 - } else { - 1 - }; - - let cmp1 = $fn($x, $y); - if cmp0 != cmp1 { - panic!( - "{}({:?}, {:?}): std: {:?}, builtins: {:?}", - stringify!($fn), $x, $y, cmp0, cmp1 - ); - } - )* - }; -} + |x, y| x == y => no_convert, + $x, $y + ) { + 0 + } else { + 1 + }; -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[test] -fn float_comparisons() { - use compiler_builtins::float::cmp::{ - __eqdf2, __eqsf2, __gedf2, __gesf2, __gtdf2, __gtsf2, __ledf2, __lesf2, __ltdf2, __ltsf2, - __nedf2, __nesf2, __unorddf2, __unordsf2, - }; - - fuzz_float_2(N, |x: f32, y: f32| { - assert_eq!(__unordsf2(x, y) != 0, x.is_nan() || y.is_nan()); - cmp!(f32, x, y, Single, all(), - 1, __ltsf2; - 1, __lesf2; - 1, __eqsf2; - -1, __gesf2; - -1, __gtsf2; - 1, __nesf2; - ); - }); - fuzz_float_2(N, |x: f64, y: f64| { - assert_eq!(__unorddf2(x, y) != 0, x.is_nan() || y.is_nan()); - cmp!(f64, x, y, Double, all(), - 1, __ltdf2; - 1, __ledf2; - 1, __eqdf2; - -1, __gedf2; - -1, __gtdf2; - 1, __nedf2; - ); - }); + let cmp1 = $fn($x, $y); + if cmp0 != cmp1 { + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn), $x, $y, cmp0, cmp1 + ); + } + )* + }; + } + + #[test] + fn cmp_f32() { + use compiler_builtins::float::cmp::{ + __eqsf2, __gesf2, __gtsf2, __lesf2, __ltsf2, __nesf2, __unordsf2, + }; + fuzz_float_2(N, |x: f32, y: f32| { + assert_eq!(__unordsf2(x, y) != 0, x.is_nan() || y.is_nan()); + cmp!(f32, x, y, Single, all(), + 1, __ltsf2; + 1, __lesf2; + 1, __eqsf2; + -1, __gesf2; + -1, __gtsf2; + 1, __nesf2; + ); + }); + } + + #[test] + fn cmp_f64() { + use compiler_builtins::float::cmp::{ + __eqdf2, __gedf2, __gtdf2, __ledf2, __ltdf2, __nedf2, __unorddf2, + }; + + fuzz_float_2(N, |x: f64, y: f64| { + assert_eq!(__unorddf2(x, y) != 0, x.is_nan() || y.is_nan()); + cmp!(f64, x, y, Double, all(), + 1, __ltdf2; + 1, __ledf2; + 1, __eqdf2; + -1, __gedf2; + -1, __gtdf2; + 1, __nedf2; + ); + }); + } + + #[test] #[cfg(not(feature = "no-f16-f128"))] - { + fn cmp_f128() { #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] use compiler_builtins::float::cmp::{ __eqkf2 as __eqtf2, __gekf2 as __getf2, __gtkf2 as __gttf2, __lekf2 as __letf2, @@ -121,60 +133,71 @@ fn float_comparisons() { } } -macro_rules! cmp2 { - ($x:ident, $y:ident, $($unordered_val:expr, $fn_std:expr, $fn_builtins:ident);*;) => { - $( - let cmp0: i32 = if $x.is_nan() || $y.is_nan() { - $unordered_val - } else { - $fn_std as i32 - }; - let cmp1: i32 = $fn_builtins($x, $y); - if cmp0 != cmp1 { - panic!("{}({}, {}): std: {}, builtins: {}", stringify!($fn_builtins), $x, $y, cmp0, cmp1); - } - )* - }; -} - #[cfg(target_arch = "arm")] -#[test] -fn float_comparisons_arm() { - use compiler_builtins::float::cmp::{ - __aeabi_dcmpeq, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmple, __aeabi_dcmplt, - __aeabi_fcmpeq, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmple, __aeabi_fcmplt, __eqdf2vfp, - __eqsf2vfp, __gedf2vfp, __gesf2vfp, __gtdf2vfp, __gtsf2vfp, __ledf2vfp, __lesf2vfp, - __ltdf2vfp, __ltsf2vfp, __nedf2vfp, __nesf2vfp, - }; - - fuzz_float_2(N, |x: f32, y: f32| { - cmp2!(x, y, - 0, x < y, __aeabi_fcmplt; - 0, x <= y, __aeabi_fcmple; - 0, x == y, __aeabi_fcmpeq; - 0, x >= y, __aeabi_fcmpge; - 0, x > y, __aeabi_fcmpgt; - 0, x < y, __ltsf2vfp; - 0, x <= y, __lesf2vfp; - 0, x == y, __eqsf2vfp; - 0, x >= y, __gesf2vfp; - 0, x > y, __gtsf2vfp; - 1, x != y, __nesf2vfp; - ); - }); - fuzz_float_2(N, |x: f64, y: f64| { - cmp2!(x, y, - 0, x < y, __aeabi_dcmplt; - 0, x <= y, __aeabi_dcmple; - 0, x == y, __aeabi_dcmpeq; - 0, x >= y, __aeabi_dcmpge; - 0, x > y, __aeabi_dcmpgt; - 0, x < y, __ltdf2vfp; - 0, x <= y, __ledf2vfp; - 0, x == y, __eqdf2vfp; - 0, x >= y, __gedf2vfp; - 0, x > y, __gtdf2vfp; - 1, x != y, __nedf2vfp; - ); - }); +mod float_comparisons_arm { + use super::*; + + macro_rules! cmp2 { + ($x:ident, $y:ident, $($unordered_val:expr, $fn_std:expr, $fn_builtins:ident);*;) => { + $( + let cmp0: i32 = if $x.is_nan() || $y.is_nan() { + $unordered_val + } else { + $fn_std as i32 + }; + let cmp1: i32 = $fn_builtins($x, $y); + if cmp0 != cmp1 { + panic!("{}({}, {}): std: {}, builtins: {}", stringify!($fn_builtins), $x, $y, cmp0, cmp1); + } + )* + }; + } + + #[test] + fn cmp_f32() { + use compiler_builtins::float::cmp::{ + __aeabi_fcmpeq, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmple, __aeabi_fcmplt, + __eqsf2vfp, __gesf2vfp, __gtsf2vfp, __lesf2vfp, __ltsf2vfp, __nesf2vfp, + }; + + fuzz_float_2(N, |x: f32, y: f32| { + cmp2!(x, y, + 0, x < y, __aeabi_fcmplt; + 0, x <= y, __aeabi_fcmple; + 0, x == y, __aeabi_fcmpeq; + 0, x >= y, __aeabi_fcmpge; + 0, x > y, __aeabi_fcmpgt; + 0, x < y, __ltsf2vfp; + 0, x <= y, __lesf2vfp; + 0, x == y, __eqsf2vfp; + 0, x >= y, __gesf2vfp; + 0, x > y, __gtsf2vfp; + 1, x != y, __nesf2vfp; + ); + }); + } + + #[test] + fn cmp_f64() { + use compiler_builtins::float::cmp::{ + __aeabi_dcmpeq, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmple, __aeabi_dcmplt, + __eqdf2vfp, __gedf2vfp, __gtdf2vfp, __ledf2vfp, __ltdf2vfp, __nedf2vfp, + }; + + fuzz_float_2(N, |x: f64, y: f64| { + cmp2!(x, y, + 0, x < y, __aeabi_dcmplt; + 0, x <= y, __aeabi_dcmple; + 0, x == y, __aeabi_dcmpeq; + 0, x >= y, __aeabi_dcmpge; + 0, x > y, __aeabi_dcmpgt; + 0, x < y, __ltdf2vfp; + 0, x <= y, __ledf2vfp; + 0, x == y, __eqdf2vfp; + 0, x >= y, __gedf2vfp; + 0, x > y, __gtdf2vfp; + 1, x != y, __nedf2vfp; + ); + }); + } } diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index f0ef95255..7b672ac25 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -2,84 +2,83 @@ #![cfg_attr(not(feature = "no-f16-f128"), feature(f128))] // makes configuration easier #![allow(unused_macros)] +#![allow(unused_imports)] use compiler_builtins::float::Float; -use rustc_apfloat::ieee::{Double, Single}; -#[cfg(not(feature = "no-f16-f128"))] -use rustc_apfloat::ieee::{Half, Quad}; use rustc_apfloat::{Float as _, FloatConvert as _}; use testcrate::*; -macro_rules! i_to_f { - ($($from:ty, $into:ty, $fn:ident);*;) => { - $( - fuzz(N, |x: $from| { - let f0 = x as $into; - let f1: $into = $fn(x); - // This makes sure that the conversion produced the best rounding possible, and does - // this independent of `x as $into` rounding correctly. - // This assumes that float to integer conversion is correct. - let y_minus_ulp = <$into>::from_bits(f1.to_bits().wrapping_sub(1)) as $from; - let y = f1 as $from; - let y_plus_ulp = <$into>::from_bits(f1.to_bits().wrapping_add(1)) as $from; - let error_minus = <$from as Int>::abs_diff(y_minus_ulp, x); - let error = <$from as Int>::abs_diff(y, x); - let error_plus = <$from as Int>::abs_diff(y_plus_ulp, x); - // The first two conditions check that none of the two closest float values are - // strictly closer in representation to `x`. The second makes sure that rounding is - // towards even significand if two float values are equally close to the integer. - if error_minus < error - || error_plus < error - || ((error_minus == error || error_plus == error) - && ((f0.to_bits() & 1) != 0)) - { - if !cfg!(any( - target_arch = "powerpc", - target_arch = "powerpc64" - )) { - panic!( - "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})", - stringify!($fn), - x, - f1.to_bits(), - y_minus_ulp, - y, - y_plus_ulp, - error_minus, - error, - error_plus, - ); - } - } - // Test against native conversion. We disable testing on all `x86` because of - // rounding bugs with `i686`. `powerpc` also has the same rounding bug. - if f0 != f1 && !cfg!(any( - target_arch = "x86", - target_arch = "powerpc", - target_arch = "powerpc64" - )) { - panic!( - "{}({}): std: {}, builtins: {}", - stringify!($fn), - x, - f0, - f1, - ); - } - }); - )* - }; -} +mod int_to_float { + use super::*; -#[test] -fn int_to_float() { - use compiler_builtins::float::conv::{ - __floatdidf, __floatdisf, __floatsidf, __floatsisf, __floattidf, __floattisf, - __floatundidf, __floatundisf, __floatunsidf, __floatunsisf, __floatuntidf, __floatuntisf, - }; - use compiler_builtins::int::Int; + macro_rules! i_to_f { + ($($from:ty, $into:ty, $fn:ident);*;) => { + $( + #[test] + fn $fn() { + use compiler_builtins::float::conv::$fn; + use compiler_builtins::int::Int; - i_to_f!( + fuzz(N, |x: $from| { + let f0 = x as $into; + let f1: $into = $fn(x); + // This makes sure that the conversion produced the best rounding possible, and does + // this independent of `x as $into` rounding correctly. + // This assumes that float to integer conversion is correct. + let y_minus_ulp = <$into>::from_bits(f1.to_bits().wrapping_sub(1)) as $from; + let y = f1 as $from; + let y_plus_ulp = <$into>::from_bits(f1.to_bits().wrapping_add(1)) as $from; + let error_minus = <$from as Int>::abs_diff(y_minus_ulp, x); + let error = <$from as Int>::abs_diff(y, x); + let error_plus = <$from as Int>::abs_diff(y_plus_ulp, x); + // The first two conditions check that none of the two closest float values are + // strictly closer in representation to `x`. The second makes sure that rounding is + // towards even significand if two float values are equally close to the integer. + if error_minus < error + || error_plus < error + || ((error_minus == error || error_plus == error) + && ((f0.to_bits() & 1) != 0)) + { + if !cfg!(any( + target_arch = "powerpc", + target_arch = "powerpc64" + )) { + panic!( + "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})", + stringify!($fn), + x, + f1.to_bits(), + y_minus_ulp, + y, + y_plus_ulp, + error_minus, + error, + error_plus, + ); + } + } + // Test against native conversion. We disable testing on all `x86` because of + // rounding bugs with `i686`. `powerpc` also has the same rounding bug. + if f0 != f1 && !cfg!(any( + target_arch = "x86", + target_arch = "powerpc", + target_arch = "powerpc64" + )) { + panic!( + "{}({}): std: {}, builtins: {}", + stringify!($fn), + x, + f0, + f1, + ); + } + }); + } + )* + }; + } + + i_to_f! { u32, f32, __floatunsisf; u32, f64, __floatunsidf; i32, f32, __floatsisf; @@ -92,53 +91,64 @@ fn int_to_float() { u128, f64, __floatuntidf; i128, f32, __floattisf; i128, f64, __floattidf; - ); -} - -macro_rules! f_to_i { - ($x:ident, $($f:ty, $fn:ident);*;) => { - $( - // it is undefined behavior in the first place to do conversions with NaNs - if !$x.is_nan() { - let conv0 = $x as $f; - let conv1: $f = $fn($x); - if conv0 != conv1 { - panic!("{}({}): std: {}, builtins: {}", stringify!($fn), $x, conv0, conv1); - } - } - )* - }; + } } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 #[cfg(not(target_arch = "powerpc64"))] -#[test] -fn float_to_int() { - use compiler_builtins::float::conv::{ - __fixdfdi, __fixdfsi, __fixdfti, __fixsfdi, __fixsfsi, __fixsfti, __fixunsdfdi, - __fixunsdfsi, __fixunsdfti, __fixunssfdi, __fixunssfsi, __fixunssfti, - }; +mod f_to_i { + use super::*; + + macro_rules! f_to_i { + ($x:ident, $($f:ty, $fn:ident);*;) => { + $( + // it is undefined behavior in the first place to do conversions with NaNs + if !$x.is_nan() { + let conv0 = $x as $f; + let conv1: $f = $fn($x); + if conv0 != conv1 { + panic!("{}({}): std: {}, builtins: {}", stringify!($fn), $x, conv0, conv1); + } + } + )* + }; + } - fuzz_float(N, |x: f32| { - f_to_i!(x, - u32, __fixunssfsi; - u64, __fixunssfdi; - u128, __fixunssfti; - i32, __fixsfsi; - i64, __fixsfdi; - i128, __fixsfti; - ); - }); - fuzz_float(N, |x: f64| { - f_to_i!(x, - u32, __fixunsdfsi; - u64, __fixunsdfdi; - u128, __fixunsdfti; - i32, __fixdfsi; - i64, __fixdfdi; - i128, __fixdfti; - ); - }); + #[test] + fn f32_to_int() { + use compiler_builtins::float::conv::{ + __fixsfdi, __fixsfsi, __fixsfti, __fixunssfdi, __fixunssfsi, __fixunssfti, + }; + + fuzz_float(N, |x: f32| { + f_to_i!(x, + u32, __fixunssfsi; + u64, __fixunssfdi; + u128, __fixunssfti; + i32, __fixsfsi; + i64, __fixsfdi; + i128, __fixsfti; + ); + }); + } + + #[test] + fn f64_to_int() { + use compiler_builtins::float::conv::{ + __fixdfdi, __fixdfsi, __fixdfti, __fixunsdfdi, __fixunsdfsi, __fixunsdfti, + }; + + fuzz_float(N, |x: f64| { + f_to_i!(x, + u32, __fixunsdfsi; + u64, __fixunsdfdi; + u128, __fixunsdfti; + i32, __fixdfsi; + i64, __fixdfdi; + i128, __fixdfti; + ); + }); + } } macro_rules! conv { @@ -161,40 +171,54 @@ macro_rules! conv { macro_rules! extend { ($fX:ident, $fD:ident, $fn:ident) => { - fuzz_float(N, |x: $fX| { - let tmp0 = x as $fD; - let tmp1: $fD = $fn(x); - if !Float::eq_repr(tmp0, tmp1) { - panic!( - "{}({}): std: {}, builtins: {}", - stringify!($fn), - x, - tmp0, - tmp1 - ); - } - }); + #[test] + fn $fn() { + use compiler_builtins::float::extend::$fn; + + fuzz_float(N, |x: $fX| { + let tmp0 = x as $fD; + let tmp1: $fD = $fn(x); + if !Float::eq_repr(tmp0, tmp1) { + panic!( + "{}({}): std: {}, builtins: {}", + stringify!($fn), + x, + tmp0, + tmp1 + ); + } + }); + } }; } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 #[cfg(not(target_arch = "powerpc64"))] -#[test] -fn float_extend() { - use compiler_builtins::float::extend::__extendsfdf2; +mod float_extend { + use super::*; extend!(f32, f64, __extendsfdf2); - conv!(f32, f64, __extendsfdf2, Single, Double); - #[cfg(not(feature = "no-f16-f128"))] - { - #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + + #[test] + fn conv() { + use compiler_builtins::float::extend::__extendsfdf2; + use rustc_apfloat::ieee::{Double, Single}; + + conv!(f32, f64, __extendsfdf2, Single, Double); + } +} + +#[cfg(not(feature = "no-f16-f128"))] +#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] +mod float_extend_f128 { + use super::*; + + #[test] + fn conv() { use compiler_builtins::float::extend::{ - __extenddfkf2 as __extenddftf2, __extendhfkf2 as __extendhftf2, - __extendsfkf2 as __extendsftf2, + __extenddftf2, __extendhfsf2, __extendhftf2, __extendsftf2, __gnu_h2f_ieee, }; - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] - use compiler_builtins::float::extend::{__extenddftf2, __extendhftf2, __extendsftf2}; - use compiler_builtins::float::extend::{__extendhfsf2, __gnu_h2f_ieee}; + use rustc_apfloat::ieee::{Double, Half, Quad, Single}; // FIXME(f16_f128): Also do extend!() for `f16` and `f128` when builtins are in nightly conv!(f16, f32, __extendhfsf2, Half, Single); @@ -205,49 +229,91 @@ fn float_extend() { } } +#[cfg(not(feature = "no-f16-f128"))] +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] +mod float_extend_f128_ppc { + use super::*; + + #[test] + fn conv() { + use compiler_builtins::float::extend::{ + __extenddfkf2, __extendhfkf2, __extendhfsf2, __extendsfkf2, __gnu_h2f_ieee, + }; + use rustc_apfloat::ieee::{Double, Half, Quad, Single}; + + // FIXME(f16_f128): Also do extend!() for `f16` and `f128` when builtins are in nightly + conv!(f16, f32, __extendhfsf2, Half, Single); + conv!(f16, f32, __gnu_h2f_ieee, Half, Single); + conv!(f16, f128, __extendhfkf2, Half, Quad); + conv!(f32, f128, __extendsfkf2, Single, Quad); + conv!(f64, f128, __extenddfkf2, Double, Quad); + } +} + #[cfg(target_arch = "arm")] -#[test] -fn float_extend_arm() { - use compiler_builtins::float::extend::__extendsfdf2vfp; +mod float_extend_arm { + use super::*; extend!(f32, f64, __extendsfdf2vfp); - conv!(f32, f64, __extendsfdf2vfp, Single, Double); + + #[test] + fn conv() { + use compiler_builtins::float::extend::__extendsfdf2vfp; + use rustc_apfloat::ieee::{Double, Single}; + + conv!(f32, f64, __extendsfdf2vfp, Single, Double); + } } macro_rules! trunc { ($fX:ident, $fD:ident, $fn:ident) => { - fuzz_float(N, |x: $fX| { - let tmp0 = x as $fD; - let tmp1: $fD = $fn(x); - if !Float::eq_repr(tmp0, tmp1) { - panic!( - "{}({}): std: {}, builtins: {}", - stringify!($fn), - x, - tmp0, - tmp1 - ); - } - }); + #[test] + fn $fn() { + use compiler_builtins::float::trunc::$fn; + + fuzz_float(N, |x: $fX| { + let tmp0 = x as $fD; + let tmp1: $fD = $fn(x); + if !Float::eq_repr(tmp0, tmp1) { + panic!( + "{}({}): std: {}, builtins: {}", + stringify!($fn), + x, + tmp0, + tmp1 + ); + } + }); + } }; } -#[test] -fn float_trunc() { - use compiler_builtins::float::trunc::__truncdfsf2; +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +mod float_trunc { + use super::*; trunc!(f64, f32, __truncdfsf2); - conv!(f64, f32, __truncdfsf2, Double, Single); - #[cfg(not(feature = "no-f16-f128"))] - { + + #[test] + fn conv() { + use compiler_builtins::float::trunc::__truncdfsf2; + use rustc_apfloat::ieee::{Double, Single}; + + conv!(f64, f32, __truncdfsf2, Double, Single); + } +} + +#[cfg(not(feature = "no-f16-f128"))] +#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] +mod float_trunc_f128 { + use super::*; + + #[test] + fn conv() { use compiler_builtins::float::trunc::{__gnu_f2h_ieee, __truncdfhf2, __truncsfhf2}; - #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] - use compiler_builtins::float::trunc::{ - __trunckfdf2 as __trunctfdf2, __trunckfhf2 as __trunctfhf2, - __trunckfsf2 as __trunctfsf2, - }; - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] use compiler_builtins::float::trunc::{__trunctfdf2, __trunctfhf2, __trunctfsf2}; + use rustc_apfloat::ieee::{Double, Half, Quad, Single}; // FIXME(f16_f128): Also do trunc!() for `f16` and `f128` when builtins are in nightly conv!(f32, f16, __truncsfhf2, Single, Half); @@ -259,11 +325,38 @@ fn float_trunc() { } } +#[cfg(not(feature = "no-f16-f128"))] +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] +mod float_trunc_f128_ppc { + use super::*; + + #[test] + fn conv() { + use compiler_builtins::float::trunc::{__gnu_f2h_ieee, __truncdfhf2, __truncsfhf2}; + use compiler_builtins::float::trunc::{__trunckfdf2, __trunckfhf2, __trunckfsf2}; + use rustc_apfloat::ieee::{Double, Half, Quad, Single}; + + // FIXME(f16_f128): Also do trunc!() for `f16` and `f128` when builtins are in nightly + conv!(f32, f16, __truncsfhf2, Single, Half); + conv!(f32, f16, __gnu_f2h_ieee, Single, Half); + conv!(f64, f16, __truncdfhf2, Double, Half); + conv!(f128, f16, __trunckfhf2, Quad, Half); + conv!(f128, f32, __trunckfsf2, Quad, Single); + conv!(f128, f64, __trunckfdf2, Quad, Double); + } +} + #[cfg(target_arch = "arm")] -#[test] -fn float_trunc_arm() { - use compiler_builtins::float::trunc::__truncdfsf2vfp; +mod float_trunc_arm { + use super::*; trunc!(f64, f32, __truncdfsf2vfp); - conv!(f64, f32, __truncdfsf2vfp, Double, Single) + + #[test] + fn conv() { + use compiler_builtins::float::trunc::__truncdfsf2vfp; + use rustc_apfloat::ieee::{Double, Single}; + + conv!(f64, f32, __truncdfsf2vfp, Double, Single) + } } diff --git a/testcrate/tests/div_rem.rs b/testcrate/tests/div_rem.rs index 461e084d0..ff78b4f54 100644 --- a/testcrate/tests/div_rem.rs +++ b/testcrate/tests/div_rem.rs @@ -107,24 +107,15 @@ fn divide_sparc() { macro_rules! float { ($($f:ty, $fn:ident, $apfloat_ty:ident, $sys_available:meta);*;) => { $( - fuzz_float_2(N, |x: $f, y: $f| { - let quo0: $f = apfloat_fallback!($f, $apfloat_ty, $sys_available, Div::div, x, y); - let quo1: $f = $fn(x, y); - #[cfg(not(target_arch = "arm"))] - if !Float::eq_repr(quo0, quo1) { - panic!( - "{}({:?}, {:?}): std: {:?}, builtins: {:?}", - stringify!($fn), - x, - y, - quo0, - quo1 - ); - } + #[test] + fn $fn() { + use compiler_builtins::float::{div::$fn, Float}; + use core::ops::Div; - // ARM SIMD instructions always flush subnormals to zero - #[cfg(target_arch = "arm")] - if !(Float::is_subnormal(quo0) || Float::is_subnormal(quo1)) { + fuzz_float_2(N, |x: $f, y: $f| { + let quo0: $f = apfloat_fallback!($f, $apfloat_ty, $sys_available, Div::div, x, y); + let quo1: $f = $fn(x, y); + #[cfg(not(target_arch = "arm"))] if !Float::eq_repr(quo0, quo1) { panic!( "{}({:?}, {:?}): std: {:?}, builtins: {:?}", @@ -135,38 +126,43 @@ macro_rules! float { quo1 ); } - } - }); + + // ARM SIMD instructions always flush subnormals to zero + #[cfg(target_arch = "arm")] + if !(Float::is_subnormal(quo0) || Float::is_subnormal(quo1)) { + if !Float::eq_repr(quo0, quo1) { + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn), + x, + y, + quo0, + quo1 + ); + } + } + }); + } )* }; } #[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] -#[test] -fn float_div() { - use compiler_builtins::float::{ - div::{__divdf3, __divsf3}, - Float, - }; - use core::ops::Div; +mod float_div { + use super::*; - float!( + float! { f32, __divsf3, Single, all(); f64, __divdf3, Double, all(); - ); + } } #[cfg(target_arch = "arm")] -#[test] -fn float_div_arm() { - use compiler_builtins::float::{ - div::{__divdf3vfp, __divsf3vfp}, - Float, - }; - use core::ops::Div; +mod float_div_arm { + use super::*; - float!( + float! { f32, __divsf3vfp, Single, all(); f64, __divdf3vfp, Double, all(); - ); + } } diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs index cdc37e2a0..e01223c74 100644 --- a/testcrate/tests/misc.rs +++ b/testcrate/tests/misc.rs @@ -98,49 +98,52 @@ fn leading_zeros() { macro_rules! pow { ($($f:ty, $tolerance:expr, $fn:ident);*;) => { $( - fuzz_float_2(N, |x: $f, y: $f| { - if !(Float::is_subnormal(x) || Float::is_subnormal(y) || x.is_nan()) { - let n = y.to_bits() & !<$f as Float>::SIGNIFICAND_MASK; - let n = (n as <$f as Float>::SignedInt) >> <$f as Float>::SIGNIFICAND_BITS; - let n = n as i32; - let tmp0: $f = x.powi(n); - let tmp1: $f = $fn(x, n); - let (a, b) = if tmp0 < tmp1 { - (tmp0, tmp1) - } else { - (tmp1, tmp0) - }; - let good = { - if a == b { - // handles infinity equality - true - } else if a < $tolerance { - b < $tolerance + #[test] + fn $fn() { + use compiler_builtins::float::pow::$fn; + use compiler_builtins::float::Float; + fuzz_float_2(N, |x: $f, y: $f| { + if !(Float::is_subnormal(x) || Float::is_subnormal(y) || x.is_nan()) { + let n = y.to_bits() & !<$f as Float>::SIGNIFICAND_MASK; + let n = (n as <$f as Float>::SignedInt) >> <$f as Float>::SIGNIFICAND_BITS; + let n = n as i32; + let tmp0: $f = x.powi(n); + let tmp1: $f = $fn(x, n); + let (a, b) = if tmp0 < tmp1 { + (tmp0, tmp1) } else { - let quo = b / a; - (quo < (1. + $tolerance)) && (quo > (1. - $tolerance)) + (tmp1, tmp0) + }; + let good = { + if a == b { + // handles infinity equality + true + } else if a < $tolerance { + b < $tolerance + } else { + let quo = b / a; + (quo < (1. + $tolerance)) && (quo > (1. - $tolerance)) + } + }; + if !good { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn), x, n, tmp0, tmp1 + ); } - }; - if !good { - panic!( - "{}({}, {}): std: {}, builtins: {}", - stringify!($fn), x, n, tmp0, tmp1 - ); } - } - }); + }); + } )* }; } #[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] -#[test] -fn float_pow() { - use compiler_builtins::float::pow::{__powidf2, __powisf2}; - use compiler_builtins::float::Float; +mod float_pow { + use super::*; - pow!( + pow! { f32, 1e-4, __powisf2; f64, 1e-12, __powidf2; - ); + } } diff --git a/testcrate/tests/mul.rs b/testcrate/tests/mul.rs index ffbe63864..5daeadeb2 100644 --- a/testcrate/tests/mul.rs +++ b/testcrate/tests/mul.rs @@ -4,146 +4,164 @@ use testcrate::*; -macro_rules! mul { - ($($i:ty, $fn:ident);*;) => { - $( - fuzz_2(N, |x: $i, y: $i| { - let mul0 = x.wrapping_mul(y); - let mul1: $i = $fn(x, y); - if mul0 != mul1 { - panic!( - "{}({}, {}): std: {}, builtins: {}", - stringify!($fn), x, y, mul0, mul1 - ); - } - }); - )* - }; -} +mod int_mul { + use super::*; -#[test] -fn mul() { - use compiler_builtins::int::mul::{__muldi3, __multi3}; + macro_rules! mul { + ($($i:ty, $fn:ident);*;) => { + $( + #[test] + fn $fn() { + use compiler_builtins::int::mul::$fn; - mul!( + fuzz_2(N, |x: $i, y: $i| { + let mul0 = x.wrapping_mul(y); + let mul1: $i = $fn(x, y); + if mul0 != mul1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn), x, y, mul0, mul1 + ); + } + }); + + } + )* + }; + } + + mul! { u64, __muldi3; i128, __multi3; - ); + } } -macro_rules! overflowing_mul { - ($($i:ty, $fn:ident);*;) => { - $( - fuzz_2(N, |x: $i, y: $i| { - let (mul0, o0) = x.overflowing_mul(y); - let mut o1 = 0i32; - let mul1: $i = $fn(x, y, &mut o1); - let o1 = o1 != 0; - if mul0 != mul1 || o0 != o1 { - panic!( - "{}({}, {}): std: ({}, {}), builtins: ({}, {})", - stringify!($fn), x, y, mul0, o0, mul1, o1 - ); - } - }); - )* - }; -} +mod int_overflowing_mul { + use super::*; -#[test] -fn overflowing_mul() { - use compiler_builtins::int::mul::{ - __mulodi4, __mulosi4, __muloti4, __rust_i128_mulo, __rust_u128_mulo, - }; + macro_rules! overflowing_mul { + ($($i:ty, $fn:ident);*;) => { + $( + #[test] + fn $fn() { + use compiler_builtins::int::mul::$fn; + + fuzz_2(N, |x: $i, y: $i| { + let (mul0, o0) = x.overflowing_mul(y); + let mut o1 = 0i32; + let mul1: $i = $fn(x, y, &mut o1); + let o1 = o1 != 0; + if mul0 != mul1 || o0 != o1 { + panic!( + "{}({}, {}): std: ({}, {}), builtins: ({}, {})", + stringify!($fn), x, y, mul0, o0, mul1, o1 + ); + } + }); + } + )* + }; + } - overflowing_mul!( + overflowing_mul! { i32, __mulosi4; i64, __mulodi4; i128, __muloti4; - ); - fuzz_2(N, |x: u128, y: u128| { - let (mul0, o0) = x.overflowing_mul(y); - let (mul1, o1) = __rust_u128_mulo(x, y); - if mul0 != mul1 || o0 != o1 { - panic!( - "__rust_u128_mulo({}, {}): std: ({}, {}), builtins: ({}, {})", - x, y, mul0, o0, mul1, o1 - ); - } - let x = x as i128; - let y = y as i128; - let (mul0, o0) = x.overflowing_mul(y); - let (mul1, o1) = __rust_i128_mulo(x, y); - if mul0 != mul1 || o0 != o1 { - panic!( - "__rust_i128_mulo({}, {}): std: ({}, {}), builtins: ({}, {})", - x, y, mul0, o0, mul1, o1 - ); - } - }); + } + + #[test] + fn overflowing_mul_u128() { + use compiler_builtins::int::mul::{__rust_i128_mulo, __rust_u128_mulo}; + + fuzz_2(N, |x: u128, y: u128| { + let (mul0, o0) = x.overflowing_mul(y); + let (mul1, o1) = __rust_u128_mulo(x, y); + if mul0 != mul1 || o0 != o1 { + panic!( + "__rust_u128_mulo({}, {}): std: ({}, {}), builtins: ({}, {})", + x, y, mul0, o0, mul1, o1 + ); + } + let x = x as i128; + let y = y as i128; + let (mul0, o0) = x.overflowing_mul(y); + let (mul1, o1) = __rust_i128_mulo(x, y); + if mul0 != mul1 || o0 != o1 { + panic!( + "__rust_i128_mulo({}, {}): std: ({}, {}), builtins: ({}, {})", + x, y, mul0, o0, mul1, o1 + ); + } + }); + } } macro_rules! float_mul { ($($f:ty, $fn:ident, $apfloat_ty:ident, $sys_available:meta);*;) => { $( - fuzz_float_2(N, |x: $f, y: $f| { - let mul0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Mul::mul, x, y); - let mul1: $f = $fn(x, y); - // multiplication of subnormals is not currently handled - if !(Float::is_subnormal(mul0) || Float::is_subnormal(mul1)) { - if !Float::eq_repr(mul0, mul1) { - panic!( - "{}({:?}, {:?}): std: {:?}, builtins: {:?}", - stringify!($fn), x, y, mul0, mul1 - ); + #[test] + fn $fn() { + use compiler_builtins::float::{mul::$fn, Float}; + use core::ops::Mul; + + fuzz_float_2(N, |x: $f, y: $f| { + let mul0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Mul::mul, x, y); + let mul1: $f = $fn(x, y); + // multiplication of subnormals is not currently handled + if !(Float::is_subnormal(mul0) || Float::is_subnormal(mul1)) { + if !Float::eq_repr(mul0, mul1) { + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn), x, y, mul0, mul1 + ); + } } - } - }); + }); + } )* }; } #[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] -#[test] -fn float_mul() { - use compiler_builtins::float::{ - mul::{__muldf3, __mulsf3}, - Float, - }; - use core::ops::Mul; +mod float_mul { + use super::*; - float_mul!( + float_mul! { f32, __mulsf3, Single, all(); f64, __muldf3, Double, all(); - ); - - #[cfg(not(feature = "no-f16-f128"))] - { - #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] - use compiler_builtins::float::mul::__mulkf3 as __multf3; - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] - use compiler_builtins::float::mul::__multf3; - - float_mul!( - f128, __multf3, Quad, - // FIXME(llvm): there is a bug in LLVM rt. - // See . - not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux"))); - ); + } +} + +#[cfg(not(feature = "no-f16-f128"))] +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] +mod float_mul_f128 { + use super::*; + + float_mul! { + f128, __multf3, Quad, + // FIXME(llvm): there is a bug in LLVM rt. + // See . + not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux"))); + } +} + +#[cfg(not(feature = "no-f16-f128"))] +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] +mod float_mul_f128_ppc { + use super::*; + + float_mul! { + f128, __mulkf3, Quad, not(feature = "no-sys-f128"); } } #[cfg(target_arch = "arm")] -#[test] -fn float_mul_arm() { - use compiler_builtins::float::{ - mul::{__muldf3vfp, __mulsf3vfp}, - Float, - }; - use core::ops::Mul; +mod float_mul_arm { + use super::*; - float_mul!( + float_mul! { f32, __mulsf3vfp, Single, all(); f64, __muldf3vfp, Double, all(); - ); + } } diff --git a/testcrate/tests/shift.rs b/testcrate/tests/shift.rs index 7a76b1646..23e3395ed 100644 --- a/testcrate/tests/shift.rs +++ b/testcrate/tests/shift.rs @@ -3,35 +3,33 @@ use testcrate::*; macro_rules! shift { ($($i:ty, $fn_std:ident, $fn_builtins:ident);*;) => { $( - fuzz_shift(|x: $i, s: u32| { - let tmp0: $i = x.$fn_std(s); - let tmp1: $i = $fn_builtins(x, s); - if tmp0 != tmp1 { - panic!( - "{}({}, {}): std: {}, builtins: {}", - stringify!($fn_builtins), x, s, tmp0, tmp1 - ); - } - }); + #[test] + fn $fn_builtins() { + use compiler_builtins::int::shift::$fn_builtins; + + fuzz_shift(|x: $i, s: u32| { + let tmp0: $i = x.$fn_std(s); + let tmp1: $i = $fn_builtins(x, s); + if tmp0 != tmp1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn_builtins), x, s, tmp0, tmp1 + ); + } + }); + } )* }; } -#[test] -fn shift() { - use compiler_builtins::int::shift::{ - __ashldi3, __ashlsi3, __ashlti3, __ashrdi3, __ashrsi3, __ashrti3, __lshrdi3, __lshrsi3, - __lshrti3, - }; - shift!( - u32, wrapping_shl, __ashlsi3; - u64, wrapping_shl, __ashldi3; - u128, wrapping_shl, __ashlti3; - i32, wrapping_shr, __ashrsi3; - i64, wrapping_shr, __ashrdi3; - i128, wrapping_shr, __ashrti3; - u32, wrapping_shr, __lshrsi3; - u64, wrapping_shr, __lshrdi3; - u128, wrapping_shr, __lshrti3; - ); +shift! { + u32, wrapping_shl, __ashlsi3; + u64, wrapping_shl, __ashldi3; + u128, wrapping_shl, __ashlti3; + i32, wrapping_shr, __ashrsi3; + i64, wrapping_shr, __ashrdi3; + i128, wrapping_shr, __ashrti3; + u32, wrapping_shr, __lshrsi3; + u64, wrapping_shr, __lshrdi3; + u128, wrapping_shr, __lshrti3; } From b0f19660f015762aa1302f7a0fd31b8ee9c4a38a Mon Sep 17 00:00:00 2001 From: theKidOfArcrania Date: Wed, 15 May 2024 10:32:06 -0700 Subject: [PATCH 0744/1459] Add tests for UNC paths on windows builds --- .github/workflows/main.yml | 9 +++++++++ ci/run.sh | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 970a32ae5..011886366 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,6 +19,11 @@ jobs: - target: aarch64-unknown-linux-gnu os: ubuntu-latest rust: nightly + - target: aarch64-pc-windows-msvc + os: windows-latest + rust: nightly + test_unc: 1 + no_std: 1 - target: arm-unknown-linux-gnueabi os: ubuntu-latest rust: nightly @@ -78,6 +83,7 @@ jobs: - target: i686-pc-windows-msvc os: windows-latest rust: nightly + test_unc: 1 - target: x86_64-pc-windows-msvc os: windows-latest rust: nightly @@ -109,6 +115,9 @@ jobs: - run: ./ci/run.sh ${{ matrix.target }} if: matrix.os != 'ubuntu-latest' shell: bash + env: + NO_STD: ${{ matrix.no_std }} + TEST_UNC: ${{ matrix.test_unc }} # Otherwise we use our docker containers to run builds - run: cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} diff --git a/ci/run.sh b/ci/run.sh index 847b52435..f05744e87 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -32,6 +32,18 @@ else $run --features no-f16-f128 --release fi +if [ "${TEST_UNC:-}" = "1" ]; then + run="cargo build --manifest-path testcrate/Cargo.toml --target $target --target-dir \"\\\\?\\$(pwd)\"" + $run + $run --release + $run --features c + $run --features c --release + $run --features no-asm + $run --features no-asm --release + $run --features no-f16-f128 + $run --features no-f16-f128 --release +fi + if [ -d /builtins-target ]; then rlib_paths=/builtins-target/"${target}"/debug/deps/libcompiler_builtins-*.rlib else From 853263a9582fb989c02a5d96092bcf91c90734af Mon Sep 17 00:00:00 2001 From: theKidOfArcrania Date: Wed, 15 May 2024 10:40:48 -0700 Subject: [PATCH 0745/1459] Fix CI --- .github/workflows/main.yml | 1 + ci/run.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 011886366..6518a4dd6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -87,6 +87,7 @@ jobs: - target: x86_64-pc-windows-msvc os: windows-latest rust: nightly + test_unc: 1 - target: i686-pc-windows-gnu os: windows-latest rust: nightly-i686-gnu diff --git a/ci/run.sh b/ci/run.sh index f05744e87..b2728eacf 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -33,7 +33,7 @@ else fi if [ "${TEST_UNC:-}" = "1" ]; then - run="cargo build --manifest-path testcrate/Cargo.toml --target $target --target-dir \"\\\\?\\$(pwd)\"" + run="cargo build --manifest-path testcrate/Cargo.toml --target $target --target-dir \\\\?\\$TEMP\\test_unc" $run $run --release $run --features c From 04c486666b623985c7097f6022708ae42c970caf Mon Sep 17 00:00:00 2001 From: theKidOfArcrania Date: Wed, 15 May 2024 10:44:58 -0700 Subject: [PATCH 0746/1459] Use cmd.exe --- .github/workflows/main.yml | 2 ++ ci/run.sh | 20 +++++++++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6518a4dd6..531a4c308 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -91,9 +91,11 @@ jobs: - target: i686-pc-windows-gnu os: windows-latest rust: nightly-i686-gnu + test_unc: 1 - target: x86_64-pc-windows-gnu os: windows-latest rust: nightly-x86_64-gnu + test_unc: 1 steps: - name: Print runner information run: uname -a diff --git a/ci/run.sh b/ci/run.sh index b2728eacf..2620dfff8 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -33,15 +33,17 @@ else fi if [ "${TEST_UNC:-}" = "1" ]; then - run="cargo build --manifest-path testcrate/Cargo.toml --target $target --target-dir \\\\?\\$TEMP\\test_unc" - $run - $run --release - $run --features c - $run --features c --release - $run --features no-asm - $run --features no-asm --release - $run --features no-f16-f128 - $run --features no-f16-f128 --release + function run() { + cmd.exe /c cargo build --manifest-path testcrate/Cargo.toml --target $target --target-dir "\\\\?\\%TEMP%\\test_unc" "$@" + } + run + run --release + run --features c + run --features c --release + run --features no-asm + run --features no-asm --release + run --features no-f16-f128 + run --features no-f16-f128 --release fi if [ -d /builtins-target ]; then From 73175bd0b379b5a9853100b0546a68507b840845 Mon Sep 17 00:00:00 2001 From: theKidOfArcrania Date: Wed, 15 May 2024 10:47:07 -0700 Subject: [PATCH 0747/1459] Fix backslash --- ci/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run.sh b/ci/run.sh index 2620dfff8..d4f0b80fb 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -34,7 +34,7 @@ fi if [ "${TEST_UNC:-}" = "1" ]; then function run() { - cmd.exe /c cargo build --manifest-path testcrate/Cargo.toml --target $target --target-dir "\\\\?\\%TEMP%\\test_unc" "$@" + cmd.exe /c cargo build --manifest-path "testcrate\\Cargo.toml" --target $target --target-dir "\\\\?\\%TEMP%\\test_unc" "$@" } run run --release From b8b09d5b1293313a0a2747e4e47889add9891712 Mon Sep 17 00:00:00 2001 From: theKidOfArcrania Date: Wed, 15 May 2024 10:54:40 -0700 Subject: [PATCH 0748/1459] Instead have cmd.exe dump out path --- ci/run.sh | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/ci/run.sh b/ci/run.sh index d4f0b80fb..b4bf18b10 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -33,17 +33,16 @@ else fi if [ "${TEST_UNC:-}" = "1" ]; then - function run() { - cmd.exe /c cargo build --manifest-path "testcrate\\Cargo.toml" --target $target --target-dir "\\\\?\\%TEMP%\\test_unc" "$@" - } - run - run --release - run --features c - run --features c --release - run --features no-asm - run --features no-asm --release - run --features no-f16-f128 - run --features no-f16-f128 --release + path=$(cmd.exe "/C echo \\\\?\\%cd%\\testcrate\\target_unc") + run="cargo test --manifest-path testcrate/Cargo.toml --target $target --target-dir $path" + $run + $run --release + $run --features c + $run --features c --release + $run --features no-asm + $run --features no-asm --release + $run --features no-f16-f128 + $run --features no-f16-f128 --release fi if [ -d /builtins-target ]; then From ac30f02ab7d36f270b09244457b2c31449368aef Mon Sep 17 00:00:00 2001 From: theKidOfArcrania Date: Wed, 15 May 2024 11:05:20 -0700 Subject: [PATCH 0749/1459] Properly escape /C and fix naming --- .github/workflows/main.yml | 12 ++++++------ ci/run.sh | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 531a4c308..b27267ba6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -22,7 +22,7 @@ jobs: - target: aarch64-pc-windows-msvc os: windows-latest rust: nightly - test_unc: 1 + test_verbatim: 1 no_std: 1 - target: arm-unknown-linux-gnueabi os: ubuntu-latest @@ -83,19 +83,19 @@ jobs: - target: i686-pc-windows-msvc os: windows-latest rust: nightly - test_unc: 1 + test_verbatim: 1 - target: x86_64-pc-windows-msvc os: windows-latest rust: nightly - test_unc: 1 + test_verbatim: 1 - target: i686-pc-windows-gnu os: windows-latest rust: nightly-i686-gnu - test_unc: 1 + test_verbatim: 1 - target: x86_64-pc-windows-gnu os: windows-latest rust: nightly-x86_64-gnu - test_unc: 1 + test_verbatim: 1 steps: - name: Print runner information run: uname -a @@ -120,7 +120,7 @@ jobs: shell: bash env: NO_STD: ${{ matrix.no_std }} - TEST_UNC: ${{ matrix.test_unc }} + TEST_VERBATIM: ${{ matrix.test_verbatim }} # Otherwise we use our docker containers to run builds - run: cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} diff --git a/ci/run.sh b/ci/run.sh index b4bf18b10..cc07090d4 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -32,9 +32,9 @@ else $run --features no-f16-f128 --release fi -if [ "${TEST_UNC:-}" = "1" ]; then - path=$(cmd.exe "/C echo \\\\?\\%cd%\\testcrate\\target_unc") - run="cargo test --manifest-path testcrate/Cargo.toml --target $target --target-dir $path" +if [ "${TEST_VERBATIM:-}" = "1" ]; then + verb_path=$(cmd.exe //C echo \\\\?\\%cd%\\testcrate\\target2) + run="cargo test --manifest-path testcrate/Cargo.toml --target $target --target-dir $verb_path" $run $run --release $run --features c From e6ff35e46dc7bec485fa7f44e46c27a4980ed8fc Mon Sep 17 00:00:00 2001 From: theKidOfArcrania Date: Wed, 15 May 2024 11:07:06 -0700 Subject: [PATCH 0750/1459] verbatim tests only need to build --- ci/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run.sh b/ci/run.sh index cc07090d4..3c33bbf83 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -34,7 +34,7 @@ fi if [ "${TEST_VERBATIM:-}" = "1" ]; then verb_path=$(cmd.exe //C echo \\\\?\\%cd%\\testcrate\\target2) - run="cargo test --manifest-path testcrate/Cargo.toml --target $target --target-dir $verb_path" + run="cargo build --manifest-path testcrate/Cargo.toml --target $target --target-dir $verb_path" $run $run --release $run --features c From 8fda43490571f1482c31a42b808ddeb89dd3d177 Mon Sep 17 00:00:00 2001 From: Henry Wang Date: Fri, 17 May 2024 08:40:39 -0700 Subject: [PATCH 0751/1459] Only run `--features c` for verbatim test --- ci/run.sh | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/ci/run.sh b/ci/run.sh index 3c33bbf83..2512dc633 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -34,15 +34,7 @@ fi if [ "${TEST_VERBATIM:-}" = "1" ]; then verb_path=$(cmd.exe //C echo \\\\?\\%cd%\\testcrate\\target2) - run="cargo build --manifest-path testcrate/Cargo.toml --target $target --target-dir $verb_path" - $run - $run --release - $run --features c - $run --features c --release - $run --features no-asm - $run --features no-asm --release - $run --features no-f16-f128 - $run --features no-f16-f128 --release + cargo build --manifest-path testcrate/Cargo.toml --target $target --target-dir $verb_path --features c fi if [ -d /builtins-target ]; then From 12795d27d16a7a52f3a62a0ef5cd71c66d07e100 Mon Sep 17 00:00:00 2001 From: Henry Wang Date: Fri, 17 May 2024 08:41:59 -0700 Subject: [PATCH 0752/1459] Don't run verbatim test on windows-gnu --- .github/workflows/main.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b27267ba6..72d441c4e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -91,11 +91,9 @@ jobs: - target: i686-pc-windows-gnu os: windows-latest rust: nightly-i686-gnu - test_verbatim: 1 - target: x86_64-pc-windows-gnu os: windows-latest rust: nightly-x86_64-gnu - test_verbatim: 1 steps: - name: Print runner information run: uname -a From f4c8ec4cab0e3ed22ae1c9d9ab3a568b6e983ec1 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 17 May 2024 22:02:37 -0500 Subject: [PATCH 0753/1459] Add `CastFrom` as a convenience form of `CastInto` --- src/int/mod.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/int/mod.rs b/src/int/mod.rs index 2b6d4b812..0c67a4c16 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -380,6 +380,16 @@ public_test_dep! { pub(crate) trait CastInto: Copy { fn cast(self) -> T; } + +pub(crate) trait CastFrom:Copy { + fn cast_from(value: T) -> Self; +} +} + +impl + Copy> CastFrom for T { + fn cast_from(value: U) -> Self { + value.cast() + } } macro_rules! cast_into { From 5c78c6e85b881196de50af8acf9cb5dd17514f93 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 17 May 2024 22:04:03 -0500 Subject: [PATCH 0754/1459] Make float to integer conversions generic Deduplicate code used for float to integer conversions in order to make adding `f128` conversion functions easier. --- src/float/conv.rs | 250 +++++++++++++++++++--------------------------- src/float/mod.rs | 8 +- src/int/mod.rs | 5 + 3 files changed, 109 insertions(+), 154 deletions(-) diff --git a/src/float/conv.rs b/src/float/conv.rs index 790c0ab9f..931840f15 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -1,3 +1,9 @@ +use core::ops::Neg; + +use crate::int::{CastFrom, CastInto, Int, MinInt}; + +use super::Float; + /// Conversions from integers to floats. /// /// These are hand-optimized bit twiddling code, @@ -142,102 +148,118 @@ intrinsics! { } } +/// Generic float to unsigned int conversions. +fn float_to_unsigned_int(f: F) -> U +where + F: Float, + U: Int, + F::Int: CastInto, + F::Int: CastFrom, + F::Int: CastInto, + u32: CastFrom, +{ + float_to_int_inner::(f.repr(), |i: U| i, || U::MAX) +} + +/// Generic float to signed int conversions. +fn float_to_signed_int(f: F) -> I +where + F: Float, + I: Int + Neg, + I::UnsignedInt: Int, + F::Int: CastInto, + F::Int: CastFrom, + u32: CastFrom, +{ + float_to_int_inner::( + f.repr() & !F::SIGN_MASK, + |i: I| if f.is_sign_negative() { -i } else { i }, + || if f.is_sign_negative() { I::MIN } else { I::MAX }, + ) +} + +/// Float to int conversions, generic for both signed and unsigned. +/// +/// Parameters: +/// - `fbits`: `abg(f)` bitcasted to an integer. +/// - `map_inbounds`: apply this transformation to integers that are within range (add the sign +/// back). +/// - `out_of_bounds`: return value when out of range for `I`. +fn float_to_int_inner( + fbits: F::Int, + map_inbounds: FnFoo, + out_of_bounds: FnOob, +) -> I +where + F: Float, + I: Int, + FnFoo: FnOnce(I) -> I, + FnOob: FnOnce() -> I, + I::UnsignedInt: Int, + F::Int: CastInto, + F::Int: CastFrom, + u32: CastFrom, +{ + let int_max_exp = F::EXPONENT_BIAS + I::MAX.ilog2() + 1; + let foobar = F::EXPONENT_BIAS + I::UnsignedInt::BITS - 1; + + if fbits < F::ONE.repr() { + // < 0 gets rounded to 0 + I::ZERO + } else if fbits < F::Int::cast_from(int_max_exp) << F::SIGNIFICAND_BITS { + // >= 1, < integer max + let m_base = if I::UnsignedInt::BITS >= F::Int::BITS { + I::UnsignedInt::cast_from(fbits) << (I::BITS - F::SIGNIFICAND_BITS - 1) + } else { + I::UnsignedInt::cast_from(fbits >> (F::SIGNIFICAND_BITS - I::BITS + 1)) + }; + + // Set the implicit 1-bit. + let m: I::UnsignedInt = I::UnsignedInt::ONE << (I::BITS - 1) | m_base; + + // Shift based on the exponent and bias. + let s: u32 = (foobar) - u32::cast_from(fbits >> F::SIGNIFICAND_BITS); + + let unsigned = m >> s; + map_inbounds(I::from_unsigned(unsigned)) + } else if fbits <= F::EXPONENT_MASK { + // >= max (incl. inf) + out_of_bounds() + } else { + I::ZERO + } +} + // Conversions from floats to unsigned integers. intrinsics! { #[arm_aeabi_alias = __aeabi_f2uiz] pub extern "C" fn __fixunssfsi(f: f32) -> u32 { - let fbits = f.to_bits(); - if fbits < 127 << 23 { // >= 0, < 1 - 0 - } else if fbits < 159 << 23 { // >= 1, < max - let m = 1 << 31 | fbits << 8; // Mantissa and the implicit 1-bit. - let s = 158 - (fbits >> 23); // Shift based on the exponent and bias. - m >> s - } else if fbits <= 255 << 23 { // >= max (incl. inf) - u32::MAX - } else { // Negative or NaN - 0 - } + float_to_unsigned_int(f) } #[arm_aeabi_alias = __aeabi_f2ulz] pub extern "C" fn __fixunssfdi(f: f32) -> u64 { - let fbits = f.to_bits(); - if fbits < 127 << 23 { // >= 0, < 1 - 0 - } else if fbits < 191 << 23 { // >= 1, < max - let m = 1 << 63 | (fbits as u64) << 40; // Mantissa and the implicit 1-bit. - let s = 190 - (fbits >> 23); // Shift based on the exponent and bias. - m >> s - } else if fbits <= 255 << 23 { // >= max (incl. inf) - u64::MAX - } else { // Negative or NaN - 0 - } + float_to_unsigned_int(f) } #[win64_128bit_abi_hack] pub extern "C" fn __fixunssfti(f: f32) -> u128 { - let fbits = f.to_bits(); - if fbits < 127 << 23 { // >= 0, < 1 - 0 - } else if fbits < 255 << 23 { // >= 1, < inf - let m = 1 << 127 | (fbits as u128) << 104; // Mantissa and the implicit 1-bit. - let s = 254 - (fbits >> 23); // Shift based on the exponent and bias. - m >> s - } else if fbits == 255 << 23 { // == inf - u128::MAX - } else { // Negative or NaN - 0 - } + float_to_unsigned_int(f) } #[arm_aeabi_alias = __aeabi_d2uiz] pub extern "C" fn __fixunsdfsi(f: f64) -> u32 { - let fbits = f.to_bits(); - if fbits < 1023 << 52 { // >= 0, < 1 - 0 - } else if fbits < 1055 << 52 { // >= 1, < max - let m = 1 << 31 | (fbits >> 21) as u32; // Mantissa and the implicit 1-bit. - let s = 1054 - (fbits >> 52); // Shift based on the exponent and bias. - m >> s - } else if fbits <= 2047 << 52 { // >= max (incl. inf) - u32::MAX - } else { // Negative or NaN - 0 - } + float_to_unsigned_int(f) } #[arm_aeabi_alias = __aeabi_d2ulz] pub extern "C" fn __fixunsdfdi(f: f64) -> u64 { - let fbits = f.to_bits(); - if fbits < 1023 << 52 { // >= 0, < 1 - 0 - } else if fbits < 1087 << 52 { // >= 1, < max - let m = 1 << 63 | fbits << 11; // Mantissa and the implicit 1-bit. - let s = 1086 - (fbits >> 52); // Shift based on the exponent and bias. - m >> s - } else if fbits <= 2047 << 52 { // >= max (incl. inf) - u64::MAX - } else { // Negative or NaN - 0 - } + float_to_unsigned_int(f) } #[win64_128bit_abi_hack] pub extern "C" fn __fixunsdfti(f: f64) -> u128 { - let fbits = f.to_bits(); - if fbits < 1023 << 52 { // >= 0, < 1 - 0 - } else if fbits < 1151 << 52 { // >= 1, < max - let m = 1 << 127 | (fbits as u128) << 75; // Mantissa and the implicit 1-bit. - let s = 1150 - (fbits >> 52); // Shift based on the exponent and bias. - m >> s - } else if fbits <= 2047 << 52 { // >= max (incl. inf) - u128::MAX - } else { // Negative or NaN - 0 - } + float_to_unsigned_int(f) } } @@ -245,103 +267,31 @@ intrinsics! { intrinsics! { #[arm_aeabi_alias = __aeabi_f2iz] pub extern "C" fn __fixsfsi(f: f32) -> i32 { - let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. - if fbits < 127 << 23 { // >= 0, < 1 - 0 - } else if fbits < 158 << 23 { // >= 1, < max - let m = 1 << 31 | fbits << 8; // Mantissa and the implicit 1-bit. - let s = 158 - (fbits >> 23); // Shift based on the exponent and bias. - let u = (m >> s) as i32; // Unsigned result. - if f.is_sign_negative() { -u } else { u } - } else if fbits <= 255 << 23 { // >= max (incl. inf) - if f.is_sign_negative() { i32::MIN } else { i32::MAX } - } else { // NaN - 0 - } + float_to_signed_int(f) } #[arm_aeabi_alias = __aeabi_f2lz] pub extern "C" fn __fixsfdi(f: f32) -> i64 { - let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. - if fbits < 127 << 23 { // >= 0, < 1 - 0 - } else if fbits < 190 << 23 { // >= 1, < max - let m = 1 << 63 | (fbits as u64) << 40; // Mantissa and the implicit 1-bit. - let s = 190 - (fbits >> 23); // Shift based on the exponent and bias. - let u = (m >> s) as i64; // Unsigned result. - if f.is_sign_negative() { -u } else { u } - } else if fbits <= 255 << 23 { // >= max (incl. inf) - if f.is_sign_negative() { i64::MIN } else { i64::MAX } - } else { // NaN - 0 - } + float_to_signed_int(f) } #[win64_128bit_abi_hack] pub extern "C" fn __fixsfti(f: f32) -> i128 { - let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. - if fbits < 127 << 23 { // >= 0, < 1 - 0 - } else if fbits < 254 << 23 { // >= 1, < max - let m = 1 << 127 | (fbits as u128) << 104; // Mantissa and the implicit 1-bit. - let s = 254 - (fbits >> 23); // Shift based on the exponent and bias. - let u = (m >> s) as i128; // Unsigned result. - if f.is_sign_negative() { -u } else { u } - } else if fbits <= 255 << 23 { // >= max (incl. inf) - if f.is_sign_negative() { i128::MIN } else { i128::MAX } - } else { // NaN - 0 - } + float_to_signed_int(f) } #[arm_aeabi_alias = __aeabi_d2iz] pub extern "C" fn __fixdfsi(f: f64) -> i32 { - let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. - if fbits < 1023 << 52 { // >= 0, < 1 - 0 - } else if fbits < 1054 << 52 { // >= 1, < max - let m = 1 << 31 | (fbits >> 21) as u32; // Mantissa and the implicit 1-bit. - let s = 1054 - (fbits >> 52); // Shift based on the exponent and bias. - let u = (m >> s) as i32; // Unsigned result. - if f.is_sign_negative() { -u } else { u } - } else if fbits <= 2047 << 52 { // >= max (incl. inf) - if f.is_sign_negative() { i32::MIN } else { i32::MAX } - } else { // NaN - 0 - } + float_to_signed_int(f) } #[arm_aeabi_alias = __aeabi_d2lz] pub extern "C" fn __fixdfdi(f: f64) -> i64 { - let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. - if fbits < 1023 << 52 { // >= 0, < 1 - 0 - } else if fbits < 1086 << 52 { // >= 1, < max - let m = 1 << 63 | fbits << 11; // Mantissa and the implicit 1-bit. - let s = 1086 - (fbits >> 52); // Shift based on the exponent and bias. - let u = (m >> s) as i64; // Unsigned result. - if f.is_sign_negative() { -u } else { u } - } else if fbits <= 2047 << 52 { // >= max (incl. inf) - if f.is_sign_negative() { i64::MIN } else { i64::MAX } - } else { // NaN - 0 - } + float_to_signed_int(f) } #[win64_128bit_abi_hack] pub extern "C" fn __fixdfti(f: f64) -> i128 { - let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. - if fbits < 1023 << 52 { // >= 0, < 1 - 0 - } else if fbits < 1150 << 52 { // >= 1, < max - let m = 1 << 127 | (fbits as u128) << 75; // Mantissa and the implicit 1-bit. - let s = 1150 - (fbits >> 52); // Shift based on the exponent and bias. - let u = (m >> s) as i128; // Unsigned result. - if f.is_sign_negative() { -u } else { u } - } else if fbits <= 2047 << 52 { // >= max (incl. inf) - if f.is_sign_negative() { i128::MIN } else { i128::MAX } - } else { // NaN - 0 - } + float_to_signed_int(f) } } diff --git a/src/float/mod.rs b/src/float/mod.rs index e62a3fe0f..5fef1df32 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -80,8 +80,8 @@ pub(crate) trait Float: /// compared. fn eq_repr(self, rhs: Self) -> bool; - /// Returns the sign bit - fn sign(self) -> bool; + /// Returns true if the sign is negative + fn is_sign_negative(self) -> bool; /// Returns the exponent with bias fn exp(self) -> Self::ExpInt; @@ -150,8 +150,8 @@ macro_rules! float_impl { self.repr() == rhs.repr() } } - fn sign(self) -> bool { - self.signed_repr() < Self::SignedInt::ZERO + fn is_sign_negative(self) -> bool { + self.is_sign_negative() } fn exp(self) -> Self::ExpInt { ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt diff --git a/src/int/mod.rs b/src/int/mod.rs index 0c67a4c16..45d383880 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -102,6 +102,7 @@ pub(crate) trait Int: MinInt fn rotate_left(self, other: u32) -> Self; fn overflowing_add(self, other: Self) -> (Self, bool); fn leading_zeros(self) -> u32; + fn ilog2(self) -> u32; } } @@ -200,6 +201,10 @@ macro_rules! int_impl_common { fn leading_zeros(self) -> u32 { ::leading_zeros(self) } + + fn ilog2(self) -> u32 { + ::ilog2(self) + } }; } From 1613291e2e023860993019fafe693be81273bc6f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 17 May 2024 22:04:49 -0500 Subject: [PATCH 0755/1459] Allow a specific fallback function in `apfloat_fallback` `as` casts are only allowed for primitives, doing the same operations with `rustc_apfloat` requires using functions. Add a way to specify these separately. --- testcrate/src/lib.rs | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs index 1f3a4b826..5ee96ad27 100644 --- a/testcrate/src/lib.rs +++ b/testcrate/src/lib.rs @@ -276,7 +276,7 @@ macro_rules! apfloat_fallback { // The expression to run. This expression may use `FloatTy` for its signature. // Optionally, the final conversion back to a float can be suppressed using // `=> no_convert` (for e.g. operations that return a bool). - $op:expr $(=> $convert:ident)?, + $op:expr $(=> $convert:ident)? $(; $apfloat_op:expr)?, // Arguments that get passed to `$op` after converting to a float $($arg:expr),+ $(,)? @@ -292,26 +292,40 @@ macro_rules! apfloat_fallback { use rustc_apfloat::Float; type FloatTy = rustc_apfloat::ieee::$apfloat_ty; - let op_res = $op( $(FloatTy::from_bits($arg.to_bits().into())),+ ); - - apfloat_fallback!(@convert $float_ty, op_res $(,$convert)?) + apfloat_fallback!(@inner + fty: $float_ty, + // Apply a conversion to `FloatTy` to each arg, then pass all args to `$op` + op_res: $op( $(FloatTy::from_bits($arg.to_bits().into())),+ ), + $(apfloat_op: $apfloat_op, )? + $(conv_opts: $convert,)? + args: $($arg),+ + ) }; ret }}; // Operations that do not need converting back to a float - (@convert $float_ty:ty, $val:expr, no_convert) => { + (@inner fty: $float_ty:ty, op_res: $val:expr, conv_opts: no_convert, args: $($_arg:expr),+) => { $val }; // Some apfloat operations return a `StatusAnd` that we need to extract the value from. This // is the default. - (@convert $float_ty:ty, $val:expr) => {{ + (@inner fty: $float_ty:ty, op_res: $val:expr, args: $($_arg:expr),+) => {{ // ignore the status, just get the value let unwrapped = $val.value; <$float_ty>::from_bits(FloatTy::to_bits(unwrapped).try_into().unwrap()) }}; + // This is the case where we can't use the same expression for the default builtin and + // nonstandard apfloat fallbac (e.g. `as` casts in std are normal functions in apfloat, so + // two separate expressions must be specified. + (@inner + fty: $float_ty:ty, op_res: $_val:expr, + apfloat_op: $apfloat_op:expr, args: $($arg:expr),+ + ) => {{ + $apfloat_op($($arg),+) + }}; } From 8aa675ddfd2cd9dfe62dcc5fbd57dfb66b103f70 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 20 May 2024 20:00:02 -0400 Subject: [PATCH 0756/1459] Add an apfloat fallback for float to integer tests --- testcrate/tests/conv.rs | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 7b672ac25..1e26950fe 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -100,14 +100,39 @@ mod f_to_i { use super::*; macro_rules! f_to_i { - ($x:ident, $($f:ty, $fn:ident);*;) => { + ($x:ident, $f_ty:ty, $apfloat_ty:ident, $sys_available:meta, $($i_ty:ty, $fn:ident);*;) => { $( // it is undefined behavior in the first place to do conversions with NaNs - if !$x.is_nan() { - let conv0 = $x as $f; - let conv1: $f = $fn($x); + if !apfloat_fallback!( + $f_ty, $apfloat_ty, $sys_available, |x: FloatTy| x.is_nan() => no_convert, $x + ) { + let conv0 = apfloat_fallback!( + $f_ty, $apfloat_ty, $sys_available, + // Use an `as` cast when the builtin is available on the system. + |x| x as $i_ty; + // When the builtin is not available, we need to use a different conversion + // method (since apfloat doesn't support `as` casting). + |x: $f_ty| { + use compiler_builtins::int::MinInt; + + let apf = FloatTy::from_bits(x.to_bits().into()); + let bits: usize = <$i_ty>::BITS.try_into().unwrap(); + + let err_fn = || panic!( + "Unable to convert value {x:?} to type {}:", stringify!($i_ty) + ); + + if <$i_ty>::SIGNED { + <$i_ty>::try_from(apf.to_i128(bits).value).ok().unwrap_or_else(err_fn) + } else { + <$i_ty>::try_from(apf.to_u128(bits).value).ok().unwrap_or_else(err_fn) + } + }, + $x + ); + let conv1: $i_ty = $fn($x); if conv0 != conv1 { - panic!("{}({}): std: {}, builtins: {}", stringify!($fn), $x, conv0, conv1); + panic!("{}({:?}): std: {:?}, builtins: {:?}", stringify!($fn), $x, conv0, conv1); } } )* From 4edcd5a99454892f50cbaf8306522de6b76b5b94 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 20 May 2024 20:00:11 -0400 Subject: [PATCH 0757/1459] Add `f128` float to integer conversion functions Add the following: - `__fixtfsi` - `__fixtfdi` - `__fixtfti` - `__fixunstfsi` - `__fixunstfdi` - `__fixunstfti` --- README.md | 12 ++++++------ build.rs | 10 ---------- src/float/conv.rs | 30 ++++++++++++++++++++++++++++++ testcrate/Cargo.toml | 4 +++- testcrate/build.rs | 31 ++++++++++++++++++++++++++++--- testcrate/tests/conv.rs | 27 +++++++++++++++++++++++++-- 6 files changed, 92 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 37d7ab2e6..8233c669c 100644 --- a/README.md +++ b/README.md @@ -239,12 +239,12 @@ These builtins are needed to support `f16` and `f128`, which are in the process - [x] extendhfsf2.c - [x] extendhftf2.c - [x] extendsftf2.c -- [ ] fixtfdi.c -- [ ] fixtfsi.c -- [ ] fixtfti.c -- [ ] fixunstfdi.c -- [ ] fixunstfsi.c -- [ ] fixunstfti.c +- [x] fixtfdi.c +- [x] fixtfsi.c +- [x] fixtfti.c +- [x] fixunstfdi.c +- [x] fixunstfsi.c +- [x] fixunstfti.c - [ ] floatditf.c - [ ] floatsitf.c - [ ] floatunditf.c diff --git a/build.rs b/build.rs index ec830ecb3..50415910d 100644 --- a/build.rs +++ b/build.rs @@ -533,12 +533,6 @@ mod c { if (target_arch == "aarch64" || target_arch == "arm64ec") && consider_float_intrinsics { sources.extend(&[ ("__comparetf2", "comparetf2.c"), - ("__fixtfdi", "fixtfdi.c"), - ("__fixtfsi", "fixtfsi.c"), - ("__fixtfti", "fixtfti.c"), - ("__fixunstfdi", "fixunstfdi.c"), - ("__fixunstfsi", "fixunstfsi.c"), - ("__fixunstfti", "fixunstfti.c"), ("__floatditf", "floatditf.c"), ("__floatsitf", "floatsitf.c"), ("__floatunditf", "floatunditf.c"), @@ -561,9 +555,7 @@ mod c { if target_arch == "mips64" { sources.extend(&[ ("__netf2", "comparetf2.c"), - ("__fixtfsi", "fixtfsi.c"), ("__floatsitf", "floatsitf.c"), - ("__fixunstfsi", "fixunstfsi.c"), ("__floatunsitf", "floatunsitf.c"), ("__fe_getround", "fp_mode.c"), ]); @@ -572,9 +564,7 @@ mod c { if target_arch == "loongarch64" { sources.extend(&[ ("__netf2", "comparetf2.c"), - ("__fixtfsi", "fixtfsi.c"), ("__floatsitf", "floatsitf.c"), - ("__fixunstfsi", "fixunstfsi.c"), ("__floatunsitf", "floatunsitf.c"), ("__fe_getround", "fp_mode.c"), ]); diff --git a/src/float/conv.rs b/src/float/conv.rs index 931840f15..a37206cdc 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -261,6 +261,21 @@ intrinsics! { pub extern "C" fn __fixunsdfti(f: f64) -> u128 { float_to_unsigned_int(f) } + + #[cfg(not(feature = "no-f16-f128"))] + pub extern "C" fn __fixunstfsi(f: f128) -> u32 { + float_to_unsigned_int(f) + } + + #[cfg(not(feature = "no-f16-f128"))] + pub extern "C" fn __fixunstfdi(f: f128) -> u64 { + float_to_unsigned_int(f) + } + + #[cfg(not(feature = "no-f16-f128"))] + pub extern "C" fn __fixunstfti(f: f128) -> u128 { + float_to_unsigned_int(f) + } } // Conversions from floats to signed integers. @@ -294,4 +309,19 @@ intrinsics! { pub extern "C" fn __fixdfti(f: f64) -> i128 { float_to_signed_int(f) } + + #[cfg(not(feature = "no-f16-f128"))] + pub extern "C" fn __fixtfsi(f: f128) -> i32 { + float_to_signed_int(f) + } + + #[cfg(not(feature = "no-f16-f128"))] + pub extern "C" fn __fixtfdi(f: f128) -> i64 { + float_to_signed_int(f) + } + + #[cfg(not(feature = "no-f16-f128"))] + pub extern "C" fn __fixtfti(f: f128) -> i128 { + float_to_signed_int(f) + } } diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 6f771181a..1de0c3976 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -34,4 +34,6 @@ no-f16-f128 = ["compiler_builtins/no-f16-f128"] mem = ["compiler_builtins/mem"] mangled-names = ["compiler_builtins/mangled-names"] # Skip tests that rely on f128 symbols being available on the system -no-sys-f128 = [] +no-sys-f128 = ["no-sys-f128-int-convert"] +# Some platforms have some f128 functions but everything except integer conversions +no-sys-f128-int-convert = [] diff --git a/testcrate/build.rs b/testcrate/build.rs index f24dae3c6..1dad6c5e6 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -1,7 +1,15 @@ -use std::env; +use std::{collections::HashSet, env}; + +/// Features to enable +#[derive(Debug, PartialEq, Eq, Hash)] +enum Feature { + NoSysF128, + NoSysF128IntConvert, +} fn main() { let target = env::var("TARGET").unwrap(); + let mut features = HashSet::new(); // These platforms do not have f128 symbols available in their system libraries, so // skip related tests. @@ -21,7 +29,24 @@ fn main() { // . || target.starts_with("powerpc64-") { - println!("cargo:warning=using apfloat fallback for f128"); - println!("cargo:rustc-cfg=feature=\"no-sys-f128\""); + features.insert(Feature::NoSysF128); + features.insert(Feature::NoSysF128IntConvert); + } + + if target.starts_with("i586") || target.starts_with("i686") { + // 32-bit x86 seems to not have `__fixunstfti`, but does have everything else + features.insert(Feature::NoSysF128IntConvert); + } + + for feature in features { + let (name, warning) = match feature { + Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"), + Feature::NoSysF128IntConvert => ( + "no-sys-f128-int-convert", + "using apfloat fallback for f128 to int conversions", + ), + }; + println!("cargo:warning={warning}"); + println!("cargo:rustc-cfg=feature=\"{name}\""); } } diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 1e26950fe..f73b809d0 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -146,7 +146,7 @@ mod f_to_i { }; fuzz_float(N, |x: f32| { - f_to_i!(x, + f_to_i!(x, f32, Single, all(), u32, __fixunssfsi; u64, __fixunssfdi; u128, __fixunssfti; @@ -164,7 +164,7 @@ mod f_to_i { }; fuzz_float(N, |x: f64| { - f_to_i!(x, + f_to_i!(x, f64, Double, all(), u32, __fixunsdfsi; u64, __fixunsdfdi; u128, __fixunsdfti; @@ -174,6 +174,29 @@ mod f_to_i { ); }); } + + #[test] + #[cfg(not(feature = "no-f16-f128"))] + fn f128_to_int() { + use compiler_builtins::float::conv::{ + __fixtfdi, __fixtfsi, __fixtfti, __fixunstfdi, __fixunstfsi, __fixunstfti, + }; + + fuzz_float(N, |x: f128| { + f_to_i!( + x, + f128, + Quad, + not(feature = "no-sys-f128-int-convert"), + u32, __fixunstfsi; + u64, __fixunstfdi; + u128, __fixunstfti; + i32, __fixtfsi; + i64, __fixtfdi; + i128, __fixtfti; + ); + }); + } } macro_rules! conv { From aa2ed5fa755d604b079e15ff7b63e472a60f1bee Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 21 May 2024 03:41:06 -0400 Subject: [PATCH 0758/1459] Add `ppc_alias` to the `intrinsics!` macro PowerPC platforms use `kf` rather than `tf` for `f128`. Add a way to alias this in the macro to make the code cleaner. This also fixes the names of `fixunstf*` and `fixtf*` on Power PC (`fixunskf*` and `fixkf*` are correct). --- src/float/add.rs | 8 ++---- src/float/cmp.rs | 54 ++++++----------------------------------- src/float/conv.rs | 6 +++++ src/float/extend.rs | 21 +++------------- src/float/mul.rs | 9 ++----- src/float/sub.rs | 13 +++++----- src/float/trunc.rs | 21 +++------------- src/macros.rs | 33 +++++++++++++++++++++++++ testcrate/tests/conv.rs | 7 ++++++ 9 files changed, 70 insertions(+), 102 deletions(-) diff --git a/src/float/add.rs b/src/float/add.rs index fd151f77d..7e8529f3e 100644 --- a/src/float/add.rs +++ b/src/float/add.rs @@ -203,16 +203,12 @@ intrinsics! { add(a, b) } - #[cfg(not(any(feature = "no-f16-f128", target_arch = "powerpc", target_arch = "powerpc64")))] + #[ppc_alias = __addkf3] + #[cfg(not(feature = "no-f16-f128"))] pub extern "C" fn __addtf3(a: f128, b: f128) -> f128 { add(a, b) } - #[cfg(all(not(feature = "no-f16-f128"), any(target_arch = "powerpc", target_arch = "powerpc64")))] - pub extern "C" fn __addkf3(a: f128, b: f128) -> f128 { - add(a, b) - } - #[cfg(target_arch = "arm")] pub extern "C" fn __addsf3vfp(a: f32, b: f32) -> f32 { a + b diff --git a/src/float/cmp.rs b/src/float/cmp.rs index 44ebf6262..5c431304c 100644 --- a/src/float/cmp.rs +++ b/src/float/cmp.rs @@ -172,89 +172,51 @@ intrinsics! { } } -#[cfg(not(any( - feature = "no-f16-f128", - target_arch = "powerpc", - target_arch = "powerpc64" -)))] +#[cfg(not(feature = "no-f16-f128",))] intrinsics! { #[avr_skip] + #[ppc_alias = __lekf2] pub extern "C" fn __letf2(a: f128, b: f128) -> i32 { cmp(a, b).to_le_abi() } #[avr_skip] + #[ppc_alias = __gekf2] pub extern "C" fn __getf2(a: f128, b: f128) -> i32 { cmp(a, b).to_ge_abi() } #[avr_skip] + #[ppc_alias = __unordkf2] pub extern "C" fn __unordtf2(a: f128, b: f128) -> i32 { unord(a, b) as i32 } #[avr_skip] + #[ppc_alias = __eqkf2] pub extern "C" fn __eqtf2(a: f128, b: f128) -> i32 { cmp(a, b).to_le_abi() } #[avr_skip] + #[ppc_alias = __ltkf2] pub extern "C" fn __lttf2(a: f128, b: f128) -> i32 { cmp(a, b).to_le_abi() } #[avr_skip] + #[ppc_alias = __nekf2] pub extern "C" fn __netf2(a: f128, b: f128) -> i32 { cmp(a, b).to_le_abi() } #[avr_skip] + #[ppc_alias = __gtkf2] pub extern "C" fn __gttf2(a: f128, b: f128) -> i32 { cmp(a, b).to_ge_abi() } } -#[cfg(all( - not(feature = "no-f16-f128"), - any(target_arch = "powerpc", target_arch = "powerpc64") -))] -intrinsics! { - #[avr_skip] - pub extern "C" fn __lekf2(a: f128, b: f128) -> i32 { - cmp(a, b).to_le_abi() - } - - #[avr_skip] - pub extern "C" fn __gekf2(a: f128, b: f128) -> i32 { - cmp(a, b).to_ge_abi() - } - - #[avr_skip] - pub extern "C" fn __unordkf2(a: f128, b: f128) -> i32 { - unord(a, b) as i32 - } - - #[avr_skip] - pub extern "C" fn __eqkf2(a: f128, b: f128) -> i32 { - cmp(a, b).to_le_abi() - } - - #[avr_skip] - pub extern "C" fn __ltkf2(a: f128, b: f128) -> i32 { - cmp(a, b).to_le_abi() - } - - #[avr_skip] - pub extern "C" fn __nekf2(a: f128, b: f128) -> i32 { - cmp(a, b).to_le_abi() - } - - #[avr_skip] - pub extern "C" fn __gtkf2(a: f128, b: f128) -> i32 { - cmp(a, b).to_ge_abi() - } -} - #[cfg(target_arch = "arm")] intrinsics! { pub extern "aapcs" fn __aeabi_fcmple(a: f32, b: f32) -> i32 { diff --git a/src/float/conv.rs b/src/float/conv.rs index a37206cdc..52119f3e8 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -262,16 +262,19 @@ intrinsics! { float_to_unsigned_int(f) } + #[ppc_alias = __fixunskfsi] #[cfg(not(feature = "no-f16-f128"))] pub extern "C" fn __fixunstfsi(f: f128) -> u32 { float_to_unsigned_int(f) } + #[ppc_alias = __fixunskfdi] #[cfg(not(feature = "no-f16-f128"))] pub extern "C" fn __fixunstfdi(f: f128) -> u64 { float_to_unsigned_int(f) } + #[ppc_alias = __fixunskfti] #[cfg(not(feature = "no-f16-f128"))] pub extern "C" fn __fixunstfti(f: f128) -> u128 { float_to_unsigned_int(f) @@ -310,16 +313,19 @@ intrinsics! { float_to_signed_int(f) } + #[ppc_alias = __fixkfsi] #[cfg(not(feature = "no-f16-f128"))] pub extern "C" fn __fixtfsi(f: f128) -> i32 { float_to_signed_int(f) } + #[ppc_alias = __fixkfdi] #[cfg(not(feature = "no-f16-f128"))] pub extern "C" fn __fixtfdi(f: f128) -> i64 { float_to_signed_int(f) } + #[ppc_alias = __fixkfti] #[cfg(not(feature = "no-f16-f128"))] pub extern "C" fn __fixtfti(f: f128) -> i128 { float_to_signed_int(f) diff --git a/src/float/extend.rs b/src/float/extend.rs index 12e5fc9e1..556048991 100644 --- a/src/float/extend.rs +++ b/src/float/extend.rs @@ -100,37 +100,22 @@ intrinsics! { #[avr_skip] #[aapcs_on_arm] - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[ppc_alias = __extendhfkf2] pub extern "C" fn __extendhftf2(a: f16) -> f128 { extend(a) } - #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] - pub extern "C" fn __extendhfkf2(a: f16) -> f128 { - extend(a) - } - #[avr_skip] #[aapcs_on_arm] - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[ppc_alias = __extendsfkf2] pub extern "C" fn __extendsftf2(a: f32) -> f128 { extend(a) } - #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] - pub extern "C" fn __extendsfkf2(a: f32) -> f128 { - extend(a) - } - #[avr_skip] #[aapcs_on_arm] - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[ppc_alias = __extenddfkf2] pub extern "C" fn __extenddftf2(a: f64) -> f128 { extend(a) } - - #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] - pub extern "C" fn __extenddfkf2(a: f64) -> f128 { - extend(a) - } } diff --git a/src/float/mul.rs b/src/float/mul.rs index 9866b6280..007cc09a4 100644 --- a/src/float/mul.rs +++ b/src/float/mul.rs @@ -199,17 +199,12 @@ intrinsics! { mul(a, b) } - #[cfg(not(any(feature = "no-f16-f128", target_arch = "powerpc", target_arch = "powerpc64")))] + #[ppc_alias = __mulkf3] + #[cfg(not(feature = "no-f16-f128"))] pub extern "C" fn __multf3(a: f128, b: f128) -> f128 { mul(a, b) } - - #[cfg(all(not(feature = "no-f16-f128"), any(target_arch = "powerpc", target_arch = "powerpc64")))] - pub extern "C" fn __mulkf3(a: f128, b: f128) -> f128 { - mul(a, b) - } - #[cfg(target_arch = "arm")] pub extern "C" fn __mulsf3vfp(a: f32, b: f32) -> f32 { a * b diff --git a/src/float/sub.rs b/src/float/sub.rs index de33259d6..1492679f6 100644 --- a/src/float/sub.rs +++ b/src/float/sub.rs @@ -15,16 +15,15 @@ intrinsics! { __adddf3(a, f64::from_repr(b.repr() ^ f64::SIGN_MASK)) } - #[cfg(not(any(feature = "no-f16-f128", target_arch = "powerpc", target_arch = "powerpc64")))] + #[ppc_alias = __subkf3] + #[cfg(not(feature = "no-f16-f128"))] pub extern "C" fn __subtf3(a: f128, b: f128) -> f128 { + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + use crate::float::add::__addkf3 as __addtf3; + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] use crate::float::add::__addtf3; - __addtf3(a, f128::from_repr(b.repr() ^ f128::SIGN_MASK)) - } - #[cfg(all(not(feature = "no-f16-f128"), any(target_arch = "powerpc", target_arch = "powerpc64")))] - pub extern "C" fn __subkf3(a: f128, b: f128) -> f128 { - use crate::float::add::__addkf3; - __addkf3(a, f128::from_repr(b.repr() ^ f128::SIGN_MASK)) + __addtf3(a, f128::from_repr(b.repr() ^ f128::SIGN_MASK)) } #[cfg(target_arch = "arm")] diff --git a/src/float/trunc.rs b/src/float/trunc.rs index 31351b5e9..9aea6f91e 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -155,37 +155,22 @@ intrinsics! { #[avr_skip] #[aapcs_on_arm] - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[ppc_alias = __trunckfhf2] pub extern "C" fn __trunctfhf2(a: f128) -> f16 { trunc(a) } - #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] - pub extern "C" fn __trunckfhf2(a: f128) -> f16 { - trunc(a) - } - #[avr_skip] #[aapcs_on_arm] - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[ppc_alias = __trunckfsf2] pub extern "C" fn __trunctfsf2(a: f128) -> f32 { trunc(a) } - #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] - pub extern "C" fn __trunckfsf2(a: f128) -> f32 { - trunc(a) - } - #[avr_skip] #[aapcs_on_arm] - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[ppc_alias = __trunckfdf2] pub extern "C" fn __trunctfdf2(a: f128) -> f64 { trunc(a) } - - #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] - pub extern "C" fn __trunckfdf2(a: f128) -> f64 { - trunc(a) - } } diff --git a/src/macros.rs b/src/macros.rs index f762ef4da..f537c1a96 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -65,6 +65,9 @@ macro_rules! public_test_dep { /// it's a normal ABI elsewhere for returning a 128 bit integer. /// * `arm_aeabi_alias` - handles the "aliasing" of various intrinsics on ARM /// their otherwise typical names to other prefixed ones. +/// * `ppc_alias` - changes the name of the symbol on PowerPC platforms without +/// changing any other behavior. This is mostly for `f128`, which is `tf` on +/// most platforms but `kf` on PowerPC. macro_rules! intrinsics { () => (); @@ -320,6 +323,36 @@ macro_rules! intrinsics { intrinsics!($($rest)*); ); + // PowerPC usually uses `kf` rather than `tf` for `f128`. This is just an easy + // way to add an alias on those targets. + ( + #[ppc_alias = $alias:ident] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + intrinsics! { + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + intrinsics! { + $(#[$($attr)*])* + pub extern $abi fn $alias( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + // C mem* functions are only generated when the "mem" feature is enabled. ( #[mem_builtin] diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index f73b809d0..1425b49ce 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -178,6 +178,13 @@ mod f_to_i { #[test] #[cfg(not(feature = "no-f16-f128"))] fn f128_to_int() { + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + use compiler_builtins::float::conv::{ + __fixkfdi as __fixtfdi, __fixkfsi as __fixtfsi, __fixkfti as __fixtfti, + __fixunskfdi as __fixunstfdi, __fixunskfsi as __fixunstfsi, + __fixunskfti as __fixunstfti, + }; + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] use compiler_builtins::float::conv::{ __fixtfdi, __fixtfsi, __fixtfti, __fixunstfdi, __fixunstfsi, __fixunstfti, }; From 39f45e912afda497ae70211789e28d7b560a3af3 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 21 May 2024 01:45:17 -0400 Subject: [PATCH 0759/1459] Organize functions in intrinsics example --- examples/intrinsics.rs | 179 ++++++++++++++++++++++------------------- 1 file changed, 96 insertions(+), 83 deletions(-) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 54b703dfb..201c9a7e0 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -8,10 +8,12 @@ #![allow(internal_features)] #![cfg_attr(thumb, no_main)] #![deny(dead_code)] +#![feature(allocator_api)] #![feature(bench_black_box)] +#![feature(f128)] +#![feature(f16)] #![feature(lang_items)] #![feature(start)] -#![feature(allocator_api)] #![no_std] extern crate panic_handler; @@ -26,126 +28,132 @@ extern "C" {} // have an additional comment: the function name is the ARM name for the intrinsic and the comment // in the non-ARM name for the intrinsic. mod intrinsics { - // truncdfsf2 - pub fn aeabi_d2f(x: f64) -> f32 { - x as f32 + /* f32 operations */ + + // extendsfdf2 + pub fn aeabi_f2d(x: f32) -> f64 { + x as f64 } - // fixdfsi - pub fn aeabi_d2i(x: f64) -> i32 { + // fixsfsi + pub fn aeabi_f2iz(x: f32) -> i32 { x as i32 } - // fixdfdi - pub fn aeabi_d2l(x: f64) -> i64 { + // fixsfdi + pub fn aeabi_f2lz(x: f32) -> i64 { x as i64 } - // fixunsdfsi - pub fn aeabi_d2uiz(x: f64) -> u32 { + // fixunssfsi + pub fn aeabi_f2uiz(x: f32) -> u32 { x as u32 } - // fixunsdfdi - pub fn aeabi_d2ulz(x: f64) -> u64 { + // fixunssfdi + pub fn aeabi_f2ulz(x: f32) -> u64 { x as u64 } - // adddf3 - pub fn aeabi_dadd(a: f64, b: f64) -> f64 { + // addsf3 + pub fn aeabi_fadd(a: f32, b: f32) -> f32 { a + b } - // eqdf2 - pub fn aeabi_dcmpeq(a: f64, b: f64) -> bool { + // eqsf2 + pub fn aeabi_fcmpeq(a: f32, b: f32) -> bool { a == b } - // gtdf2 - pub fn aeabi_dcmpgt(a: f64, b: f64) -> bool { + // gtsf2 + pub fn aeabi_fcmpgt(a: f32, b: f32) -> bool { a > b } - // ltdf2 - pub fn aeabi_dcmplt(a: f64, b: f64) -> bool { + // ltsf2 + pub fn aeabi_fcmplt(a: f32, b: f32) -> bool { a < b } - // divdf3 - pub fn aeabi_ddiv(a: f64, b: f64) -> f64 { + // divsf3 + pub fn aeabi_fdiv(a: f32, b: f32) -> f32 { a / b } - // muldf3 - pub fn aeabi_dmul(a: f64, b: f64) -> f64 { + // mulsf3 + pub fn aeabi_fmul(a: f32, b: f32) -> f32 { a * b } - // subdf3 - pub fn aeabi_dsub(a: f64, b: f64) -> f64 { + // subsf3 + pub fn aeabi_fsub(a: f32, b: f32) -> f32 { a - b } - // extendsfdf2 - pub fn aeabi_f2d(x: f32) -> f64 { - x as f64 + /* f64 operations */ + + // truncdfsf2 + pub fn aeabi_d2f(x: f64) -> f32 { + x as f32 } - // fixsfsi - pub fn aeabi_f2iz(x: f32) -> i32 { + // fixdfsi + pub fn aeabi_d2i(x: f64) -> i32 { x as i32 } - // fixsfdi - pub fn aeabi_f2lz(x: f32) -> i64 { + // fixdfdi + pub fn aeabi_d2l(x: f64) -> i64 { x as i64 } - // fixunssfsi - pub fn aeabi_f2uiz(x: f32) -> u32 { + // fixunsdfsi + pub fn aeabi_d2uiz(x: f64) -> u32 { x as u32 } - // fixunssfdi - pub fn aeabi_f2ulz(x: f32) -> u64 { + // fixunsdfdi + pub fn aeabi_d2ulz(x: f64) -> u64 { x as u64 } - // addsf3 - pub fn aeabi_fadd(a: f32, b: f32) -> f32 { + // adddf3 + pub fn aeabi_dadd(a: f64, b: f64) -> f64 { a + b } - // eqsf2 - pub fn aeabi_fcmpeq(a: f32, b: f32) -> bool { + // eqdf2 + pub fn aeabi_dcmpeq(a: f64, b: f64) -> bool { a == b } - // gtsf2 - pub fn aeabi_fcmpgt(a: f32, b: f32) -> bool { + // gtdf2 + pub fn aeabi_dcmpgt(a: f64, b: f64) -> bool { a > b } - // ltsf2 - pub fn aeabi_fcmplt(a: f32, b: f32) -> bool { + // ltdf2 + pub fn aeabi_dcmplt(a: f64, b: f64) -> bool { a < b } - // divsf3 - pub fn aeabi_fdiv(a: f32, b: f32) -> f32 { + // divdf3 + pub fn aeabi_ddiv(a: f64, b: f64) -> f64 { a / b } - // mulsf3 - pub fn aeabi_fmul(a: f32, b: f32) -> f32 { + // muldf3 + pub fn aeabi_dmul(a: f64, b: f64) -> f64 { a * b } - // subsf3 - pub fn aeabi_fsub(a: f32, b: f32) -> f32 { + // subdf3 + pub fn aeabi_dsub(a: f64, b: f64) -> f64 { a - b } + /* i32 operations */ + // floatsidf pub fn aeabi_i2d(x: i32) -> f64 { x as f64 @@ -164,14 +172,20 @@ mod intrinsics { a % b } + /* i64 operations */ + + // floatdisf + pub fn aeabi_l2f(x: i64) -> f32 { + x as f32 + } + // floatdidf pub fn aeabi_l2d(x: i64) -> f64 { x as f64 } - // floatdisf - pub fn aeabi_l2f(x: i64) -> f32 { - x as f32 + pub fn mulodi4(a: i64, b: i64) -> i64 { + a * b } // divdi3 @@ -179,11 +193,31 @@ mod intrinsics { a / b } + pub fn moddi3(a: i64, b: i64) -> i64 { + a % b + } + // muldi3 pub fn aeabi_lmul(a: i64, b: i64) -> i64 { a.wrapping_mul(b) } + /* i128 operations */ + + pub fn lshrti3(a: i128, b: usize) -> i128 { + a >> b + } + + pub fn divti3(a: i128, b: i128) -> i128 { + a / b + } + + pub fn modti3(a: i128, b: i128) -> i128 { + a % b + } + + /* u32 operations */ + // floatunsidf pub fn aeabi_ui2d(x: u32) -> f64 { x as f64 @@ -202,26 +236,20 @@ mod intrinsics { a % b } - // floatundidf - pub fn aeabi_ul2d(x: u64) -> f64 { - x as f64 - } + /* u64 operations */ // floatundisf pub fn aeabi_ul2f(x: u64) -> f32 { x as f32 } - // udivdi3 - pub fn aeabi_uldivmod(a: u64, b: u64) -> u64 { - a * b - } - - pub fn moddi3(a: i64, b: i64) -> i64 { - a % b + // floatundidf + pub fn aeabi_ul2d(x: u64) -> f64 { + x as f64 } - pub fn mulodi4(a: i64, b: i64) -> i64 { + // udivdi3 + pub fn aeabi_uldivmod(a: u64, b: u64) -> u64 { a * b } @@ -229,6 +257,8 @@ mod intrinsics { a % b } + /* u128 operations */ + pub fn muloti4(a: u128, b: u128) -> Option { a.checked_mul(b) } @@ -245,10 +275,6 @@ mod intrinsics { a << b } - pub fn lshrti3(a: i128, b: usize) -> i128 { - a >> b - } - pub fn udivti3(a: u128, b: u128) -> u128 { a / b } @@ -256,18 +282,6 @@ mod intrinsics { pub fn umodti3(a: u128, b: u128) -> u128 { a % b } - - pub fn divti3(a: i128, b: i128) -> i128 { - a / b - } - - pub fn modti3(a: i128, b: i128) -> i128 { - a % b - } - - pub fn udivsi3(a: u32, b: u32) -> u32 { - a / b - } } fn run() { @@ -325,7 +339,6 @@ fn run() { bb(umodti3(bb(2), bb(2))); bb(divti3(bb(2), bb(2))); bb(modti3(bb(2), bb(2))); - bb(udivsi3(bb(2), bb(2))); something_with_a_dtor(&|| assert_eq!(bb(1), 1)); From 80d022e6f3c6c8b3ddb7f66a32177bd76916d7f3 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 21 May 2024 01:55:46 -0400 Subject: [PATCH 0760/1459] Add f16 and f128 intrinsics to the example test --- examples/intrinsics.rs | 134 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 128 insertions(+), 6 deletions(-) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 201c9a7e0..e4fcb3e35 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -28,13 +28,36 @@ extern "C" {} // have an additional comment: the function name is the ARM name for the intrinsic and the comment // in the non-ARM name for the intrinsic. mod intrinsics { + /* f16 operations */ + + pub fn extendhfsf(x: f16) -> f32 { + x as f32 + } + + pub fn extendhfdf(x: f16) -> f64 { + x as f64 + } + + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + pub fn extendhftf(x: f16) -> f128 { + x as f128 + } + /* f32 operations */ + pub fn truncsfhf(x: f32) -> f16 { + x as f16 + } + // extendsfdf2 pub fn aeabi_f2d(x: f32) -> f64 { x as f64 } + pub fn extendsftf(x: f32) -> f128 { + x as f128 + } + // fixsfsi pub fn aeabi_f2iz(x: f32) -> i32 { x as i32 @@ -152,6 +175,75 @@ mod intrinsics { a - b } + /* f128 operations */ + + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + pub fn trunctfhf(x: f128) -> f16 { + x as f16 + } + + pub fn trunctfsf(x: f128) -> f32 { + x as f32 + } + + pub fn trunctfdf(x: f128) -> f64 { + x as f64 + } + + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + pub fn fixtfsi(x: f128) -> i32 { + x as i32 + } + + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + pub fn fixtfdi(x: f128) -> i64 { + x as i64 + } + + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + pub fn fixtfti(x: f128) -> i128 { + x as i128 + } + + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + pub fn fixunstfsi(x: f128) -> u32 { + x as u32 + } + + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + pub fn fixunstfdi(x: f128) -> u64 { + x as u64 + } + + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + pub fn fixunstfti(x: f128) -> u128 { + x as u128 + } + + pub fn addtf(a: f128, b: f128) -> f128 { + a + b + } + + pub fn eqtf(a: f128, b: f128) -> bool { + a == b + } + + pub fn gttf(a: f128, b: f128) -> bool { + a > b + } + + pub fn lttf(a: f128, b: f128) -> bool { + a < b + } + + pub fn multf(a: f128, b: f128) -> f128 { + a * b + } + + pub fn subtf(a: f128, b: f128) -> f128 { + a - b + } + /* i32 operations */ // floatsidf @@ -288,6 +380,9 @@ fn run() { use core::hint::black_box as bb; use intrinsics::*; + // FIXME(f16_f128): some PPC f128 <-> int conversion functions have the wrong names + + bb(addtf(bb(2.), bb(2.))); bb(aeabi_d2f(bb(2.))); bb(aeabi_d2i(bb(2.))); bb(aeabi_d2l(bb(2.))); @@ -327,18 +422,45 @@ fn run() { bb(aeabi_ul2d(bb(2))); bb(aeabi_ul2f(bb(2))); bb(aeabi_uldivmod(bb(2), bb(3))); + bb(ashlti3(bb(2), bb(2))); + bb(ashrti3(bb(2), bb(2))); + bb(divti3(bb(2), bb(2))); + bb(eqtf(bb(2.), bb(2.))); + bb(extendhfdf(bb(2.))); + bb(extendhfsf(bb(2.))); + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + bb(extendhftf(bb(2.))); + bb(extendsftf(bb(2.))); + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + bb(fixtfdi(bb(2.))); + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + bb(fixtfsi(bb(2.))); + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + bb(fixtfti(bb(2.))); + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + bb(fixunstfdi(bb(2.))); + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + bb(fixunstfsi(bb(2.))); + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + bb(fixunstfti(bb(2.))); + bb(gttf(bb(2.), bb(2.))); + bb(lshrti3(bb(2), bb(2))); + bb(lttf(bb(2.), bb(2.))); bb(moddi3(bb(2), bb(3))); + bb(modti3(bb(2), bb(2))); bb(mulodi4(bb(2), bb(3))); - bb(umoddi3(bb(2), bb(3))); bb(muloti4(bb(2), bb(2))); + bb(multf(bb(2.), bb(2.))); bb(multi3(bb(2), bb(2))); - bb(ashlti3(bb(2), bb(2))); - bb(ashrti3(bb(2), bb(2))); - bb(lshrti3(bb(2), bb(2))); + bb(subtf(bb(2.), bb(2.))); + bb(truncsfhf(bb(2.))); + bb(trunctfdf(bb(2.))); + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + bb(trunctfhf(bb(2.))); + bb(trunctfsf(bb(2.))); bb(udivti3(bb(2), bb(2))); + bb(umoddi3(bb(2), bb(3))); bb(umodti3(bb(2), bb(2))); - bb(divti3(bb(2), bb(2))); - bb(modti3(bb(2), bb(2))); something_with_a_dtor(&|| assert_eq!(bb(1), 1)); From 32c99b0368e18d171ee9f57f5aef33e53ddf4ba0 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 21 May 2024 01:59:00 -0400 Subject: [PATCH 0761/1459] Update outdated contribution guidelines --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 8233c669c..9a847da3f 100644 --- a/README.md +++ b/README.md @@ -52,17 +52,17 @@ features = ["c"] 2. Fork this repository. 3. Port the intrinsic(s) and their corresponding [unit tests][1] from their [C implementation][2] to Rust. -4. Implement a [test generator][3] to compare the behavior of the ported intrinsic(s) - with their implementation on the testing host. Note that randomized compiler-builtin tests - should be run using `cargo test --features gen-tests`. -4. Send a Pull Request (PR). -5. Once the PR passes our extensive [testing infrastructure][4], we'll merge it! -6. Celebrate :tada: +4. Add a test to compare the behavior of the ported intrinsic(s) with their + implementation on the testing host. +5. Add the intrinsic to `examples/intrinsics.rs` to verify it can be linked on + all targets. +6. Send a Pull Request (PR). +7. Once the PR passes our extensive [testing infrastructure][4], we'll merge it! +8. Celebrate :tada: [1]: https://github.com/rust-lang/llvm-project/tree/9e3de9490ff580cd484fbfa2908292b4838d56e7/compiler-rt/test/builtins/Unit [2]: https://github.com/rust-lang/llvm-project/tree/9e3de9490ff580cd484fbfa2908292b4838d56e7/compiler-rt/lib/builtins -[3]: https://github.com/rust-lang/compiler-builtins/blob/0ba07e49264a54cb5bbd4856fcea083bb3fbec15/build.rs#L180-L265 -[4]: https://github.com/rust-lang/compiler-builtins/actions +[3]: https://github.com/rust-lang/compiler-builtins/actions ### Porting Reminders From 4f4443fbedee4c98432da164bd2867a9f75ed786 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 21 May 2024 02:02:21 -0400 Subject: [PATCH 0762/1459] Add some missing functions to examples/intrinsics --- examples/intrinsics.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index e4fcb3e35..8bb707673 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -68,6 +68,10 @@ mod intrinsics { x as i64 } + pub fn fixsfti(x: f32) -> i128 { + x as i128 + } + // fixunssfsi pub fn aeabi_f2uiz(x: f32) -> u32 { x as u32 @@ -78,6 +82,10 @@ mod intrinsics { x as u64 } + pub fn fixunssfti(x: f32) -> u128 { + x as u128 + } + // addsf3 pub fn aeabi_fadd(a: f32, b: f32) -> f32 { a + b @@ -130,6 +138,10 @@ mod intrinsics { x as i64 } + pub fn fixdfti(x: f64) -> i128 { + x as i128 + } + // fixunsdfsi pub fn aeabi_d2uiz(x: f64) -> u32 { x as u32 @@ -140,6 +152,10 @@ mod intrinsics { x as u64 } + pub fn fixunsdfti(x: f64) -> u128 { + x as u128 + } + // adddf3 pub fn aeabi_dadd(a: f64, b: f64) -> f64 { a + b @@ -431,12 +447,16 @@ fn run() { #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] bb(extendhftf(bb(2.))); bb(extendsftf(bb(2.))); + bb(fixdfti(bb(2.))); + bb(fixsfti(bb(2.))); #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] bb(fixtfdi(bb(2.))); #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] bb(fixtfsi(bb(2.))); #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] bb(fixtfti(bb(2.))); + bb(fixunsdfti(bb(2.))); + bb(fixunssfti(bb(2.))); #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] bb(fixunstfdi(bb(2.))); #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] From f35530f8f6e3debeed8383943de1b506501302ec Mon Sep 17 00:00:00 2001 From: Quentin Perez Date: Wed, 22 May 2024 09:49:08 +0200 Subject: [PATCH 0763/1459] Add Apple visionOS support --- build.rs | 13 ++----------- src/arm.rs | 30 +++++++++++++++--------------- 2 files changed, 17 insertions(+), 26 deletions(-) diff --git a/build.rs b/build.rs index 50415910d..0ecd39911 100644 --- a/build.rs +++ b/build.rs @@ -377,11 +377,7 @@ mod c { // On iOS and 32-bit OSX these are all just empty intrinsics, no need to // include them. - if target_os != "ios" - && target_os != "watchos" - && target_os != "tvos" - && (target_vendor != "apple" || target_arch != "x86") - { + if target_vendor != "apple" || target_arch != "x86" { sources.extend(&[ ("__absvti2", "absvti2.c"), ("__addvti3", "addvti3.c"), @@ -431,12 +427,7 @@ mod c { } } - if target_arch == "arm" - && target_os != "ios" - && target_os != "watchos" - && target_os != "tvos" - && target_env != "msvc" - { + if target_arch == "arm" && target_vendor != "apple" && target_env != "msvc" { sources.extend(&[ ("__aeabi_div0", "arm/aeabi_div0.c"), ("__aeabi_drsub", "arm/aeabi_drsub.c"), diff --git a/src/arm.rs b/src/arm.rs index dcae22b73..55cdda1f3 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -3,14 +3,14 @@ use core::intrinsics; -// iOS symbols have a leading underscore. -#[cfg(target_os = "ios")] +// Apple symbols have a leading underscore. +#[cfg(target_vendor = "apple")] macro_rules! bl { ($func:literal) => { concat!("bl _", $func) }; } -#[cfg(not(target_os = "ios"))] +#[cfg(not(target_vendor = "apple"))] macro_rules! bl { ($func:literal) => { concat!("bl ", $func) @@ -82,12 +82,12 @@ intrinsics! { // FIXME: The `*4` and `*8` variants should be defined as aliases. - #[cfg(not(target_os = "ios"))] + #[cfg(not(target_vendor = "apple"))] pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) { crate::mem::memcpy(dest, src, n); } - #[cfg(not(target_os = "ios"))] + #[cfg(not(target_vendor = "apple"))] pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) { // We are guaranteed 4-alignment, so accessing at u32 is okay. let mut dest = dest as *mut u32; @@ -104,33 +104,33 @@ intrinsics! { __aeabi_memcpy(dest as *mut u8, src as *const u8, n); } - #[cfg(not(target_os = "ios"))] + #[cfg(not(target_vendor = "apple"))] pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memcpy4(dest, src, n); } - #[cfg(not(target_os = "ios"))] + #[cfg(not(target_vendor = "apple"))] pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) { crate::mem::memmove(dest, src, n); } - #[cfg(not(any(target_os = "ios", target_env = "msvc")))] + #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memmove(dest, src, n); } - #[cfg(not(any(target_os = "ios", target_env = "msvc")))] + #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memmove(dest, src, n); } - #[cfg(not(target_os = "ios"))] + #[cfg(not(target_vendor = "apple"))] pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { // Note the different argument order crate::mem::memset(dest, c, n); } - #[cfg(not(target_os = "ios"))] + #[cfg(not(target_vendor = "apple"))] pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) { let mut dest = dest as *mut u32; let mut n = n; @@ -147,22 +147,22 @@ intrinsics! { __aeabi_memset(dest as *mut u8, n, byte as i32); } - #[cfg(not(target_os = "ios"))] + #[cfg(not(target_vendor = "apple"))] pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) { __aeabi_memset4(dest, n, c); } - #[cfg(not(target_os = "ios"))] + #[cfg(not(target_vendor = "apple"))] pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) { __aeabi_memset(dest, n, 0); } - #[cfg(not(any(target_os = "ios", target_env = "msvc")))] + #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) { __aeabi_memset4(dest, n, 0); } - #[cfg(not(any(target_os = "ios", target_env = "msvc")))] + #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { __aeabi_memset4(dest, n, 0); } From 40bf78b2bbcdd3e955153fe600d0102c0d4b36e7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 19 May 2024 05:02:25 -0400 Subject: [PATCH 0764/1459] Add benchmarks for floating point math This adds comparisons among the compiler-builtins function, system functions if available, and optionally handwritten assembly. These also help us identify inconsistencies between this crate and system functions, which may otherwise go unnoticed if intrinsics get lowered to inline operations rather than library calls. --- ci/run.sh | 6 +- testcrate/Cargo.toml | 48 +- testcrate/bench-175b45d1-aarch64-macos.txt | 500 +++++++++++++++ testcrate/bench-3cee6376-aarch64-macos.txt | 699 +++++++++++++++++++++ testcrate/benches/float_add.rs | 81 +++ testcrate/benches/float_cmp.rs | 202 ++++++ testcrate/benches/float_conv.rs | 547 ++++++++++++++++ testcrate/benches/float_div.rs | 70 +++ testcrate/benches/float_extend.rs | 93 +++ testcrate/benches/float_mul.rs | 81 +++ testcrate/benches/float_pow.rs | 24 + testcrate/benches/float_sub.rs | 81 +++ testcrate/benches/float_trunc.rs | 127 ++++ testcrate/build.rs | 19 + testcrate/src/bench.rs | 348 ++++++++++ testcrate/src/lib.rs | 6 + 16 files changed, 2930 insertions(+), 2 deletions(-) create mode 100644 testcrate/bench-175b45d1-aarch64-macos.txt create mode 100644 testcrate/bench-3cee6376-aarch64-macos.txt create mode 100644 testcrate/benches/float_add.rs create mode 100644 testcrate/benches/float_cmp.rs create mode 100644 testcrate/benches/float_conv.rs create mode 100644 testcrate/benches/float_div.rs create mode 100644 testcrate/benches/float_extend.rs create mode 100644 testcrate/benches/float_mul.rs create mode 100644 testcrate/benches/float_pow.rs create mode 100644 testcrate/benches/float_sub.rs create mode 100644 testcrate/benches/float_trunc.rs create mode 100644 testcrate/src/bench.rs diff --git a/ci/run.sh b/ci/run.sh index 2512dc633..dcbe1caf4 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -4,7 +4,9 @@ set -eux target="${1:-}" -if [ -z "${1:-}" ]; then +export RUST_BACKTRACE="${RUST_BACKTRACE:-full}" + +if [ -z "$target" ]; then host_target=$(rustc -vV | awk '/^host/ { print $2 }') echo "Defaulted to host target $host_target" target="$host_target" @@ -30,6 +32,8 @@ else $run --features no-asm --release $run --features no-f16-f128 $run --features no-f16-f128 --release + $run --benches + $run --benches --release fi if [ "${TEST_VERBATIM:-}" = "1" ]; then diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 1de0c3976..6b5c4cf48 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -21,6 +21,10 @@ path = ".." default-features = false features = ["public-test-deps"] +[dev-dependencies] +criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } +paste = "1.0.15" + [target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies] test = { git = "https://github.com/japaric/utest" } utest-cortex-m-qemu = { default-features = false, git = "https://github.com/japaric/utest" } @@ -34,6 +38,48 @@ no-f16-f128 = ["compiler_builtins/no-f16-f128"] mem = ["compiler_builtins/mem"] mangled-names = ["compiler_builtins/mangled-names"] # Skip tests that rely on f128 symbols being available on the system -no-sys-f128 = ["no-sys-f128-int-convert"] +no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"] # Some platforms have some f128 functions but everything except integer conversions no-sys-f128-int-convert = [] +no-sys-f16-f128-convert = [] +# Skip tests that rely on f16 symbols being available on the system +no-sys-f16 = [] + +# Enable report generation without bringing in more dependencies by default +benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] + +[[bench]] +name = "float_add" +harness = false + +[[bench]] +name = "float_sub" +harness = false + +[[bench]] +name = "float_mul" +harness = false + +[[bench]] +name = "float_div" +harness = false + +[[bench]] +name = "float_cmp" +harness = false + +[[bench]] +name = "float_conv" +harness = false + +[[bench]] +name = "float_extend" +harness = false + +[[bench]] +name = "float_trunc" +harness = false + +[[bench]] +name = "float_pow" +harness = false diff --git a/testcrate/bench-175b45d1-aarch64-macos.txt b/testcrate/bench-175b45d1-aarch64-macos.txt new file mode 100644 index 000000000..e79bbe368 --- /dev/null +++ b/testcrate/bench-175b45d1-aarch64-macos.txt @@ -0,0 +1,500 @@ + +running 0 tests + +test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s + +add_f32 compiler-builtins + time: [35.804 µs 35.863 µs 35.920 µs] +Found 5 outliers among 100 measurements (5.00%) + 2 (2.00%) high mild + 3 (3.00%) high severe + +add_f32 system time: [39.084 µs 39.127 µs 39.169 µs] +Found 11 outliers among 100 measurements (11.00%) + 7 (7.00%) high mild + 4 (4.00%) high severe + +add_f32 assembly (aarch64 unix) + time: [8.1034 µs 8.1441 µs 8.1866 µs] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild + +add_f64 compiler-builtins + time: [35.647 µs 35.725 µs 35.799 µs] +Found 10 outliers among 100 measurements (10.00%) + 8 (8.00%) high mild + 2 (2.00%) high severe + +add_f64 system time: [39.308 µs 39.322 µs 39.336 µs] +Found 7 outliers among 100 measurements (7.00%) + 4 (4.00%) high mild + 3 (3.00%) high severe + +add_f64 assembly (aarch64 unix) + time: [8.0401 µs 8.0442 µs 8.0499 µs] +Found 11 outliers among 100 measurements (11.00%) + 2 (2.00%) high mild + 9 (9.00%) high severe + +add_f128 compiler-builtins + time: [41.801 µs 41.986 µs 42.201 µs] +Found 7 outliers among 100 measurements (7.00%) + 4 (4.00%) high mild + 3 (3.00%) high severe + +cmp_f32_gt compiler-builtins + time: [13.579 µs 13.675 µs 13.778 µs] +Found 16 outliers among 100 measurements (16.00%) + 6 (6.00%) high mild + 10 (10.00%) high severe + +cmp_f32_gt system time: [12.343 µs 12.348 µs 12.355 µs] +Found 13 outliers among 100 measurements (13.00%) + 1 (1.00%) low mild + 3 (3.00%) high mild + 9 (9.00%) high severe + +cmp_f32_gt assembly (aarch64 unix) + time: [8.2593 µs 8.3185 µs 8.3813 µs] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high mild + +cmp_f32_unord compiler-builtins + time: [11.977 µs 12.042 µs 12.109 µs] +Found 13 outliers among 100 measurements (13.00%) + 5 (5.00%) low severe + 6 (6.00%) low mild + 2 (2.00%) high mild + +cmp_f32_unord system time: [8.1236 µs 8.1736 µs 8.2350 µs] +Found 18 outliers among 100 measurements (18.00%) + 5 (5.00%) high mild + 13 (13.00%) high severe + +cmp_f32_unord assembly (aarch64 unix) + time: [8.1446 µs 8.2080 µs 8.2762 µs] +Found 14 outliers among 100 measurements (14.00%) + 6 (6.00%) high mild + 8 (8.00%) high severe + +cmp_f64_gt compiler-builtins + time: [16.073 µs 16.077 µs 16.082 µs] +Found 17 outliers among 100 measurements (17.00%) + 2 (2.00%) low mild + 4 (4.00%) high mild + 11 (11.00%) high severe + +cmp_f64_gt system time: [12.456 µs 12.487 µs 12.522 µs] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +cmp_f64_gt assembly (aarch64 unix) + time: [8.0557 µs 8.0616 µs 8.0685 µs] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +cmp_f64_unord compiler-builtins + time: [10.715 µs 10.724 µs 10.737 µs] +Found 13 outliers among 100 measurements (13.00%) + 3 (3.00%) high mild + 10 (10.00%) high severe + +cmp_f64_unord system time: [8.0692 µs 8.0734 µs 8.0784 µs] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +cmp_f64_unord assembly (aarch64 unix) + time: [8.0569 µs 8.0677 µs 8.0818 µs] +Found 18 outliers among 100 measurements (18.00%) + 4 (4.00%) high mild + 14 (14.00%) high severe + +cmp_f128_gt compiler-builtins + time: [18.234 µs 18.401 µs 18.602 µs] + +cmp_f128_unord compiler-builtins + time: [13.410 µs 13.471 µs 13.542 µs] +Found 7 outliers among 100 measurements (7.00%) + 7 (7.00%) high mild + +conv_u32_f32 compiler-builtins + time: [774.58 ns 776.01 ns 777.59 ns] +Found 9 outliers among 100 measurements (9.00%) + 2 (2.00%) high mild + 7 (7.00%) high severe + +conv_u32_f32 system time: [622.68 ns 625.64 ns 629.26 ns] +Found 16 outliers among 100 measurements (16.00%) + 7 (7.00%) high mild + 9 (9.00%) high severe + +conv_u32_f32 assembly (aarch64 unix) + time: [468.05 ns 469.76 ns 471.46 ns] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +conv_u32_f64 compiler-builtins + time: [617.61 ns 618.00 ns 618.52 ns] +Found 13 outliers among 100 measurements (13.00%) + 4 (4.00%) high mild + 9 (9.00%) high severe + +conv_u32_f64 system time: [469.56 ns 471.03 ns 472.81 ns] +Found 11 outliers among 100 measurements (11.00%) + 7 (7.00%) high mild + 4 (4.00%) high severe + +conv_u32_f64 assembly (aarch64 unix) + time: [464.43 ns 465.01 ns 465.72 ns] +Found 13 outliers among 100 measurements (13.00%) + 5 (5.00%) high mild + 8 (8.00%) high severe + +conv_u64_f32 compiler-builtins + time: [847.95 ns 848.19 ns 848.46 ns] +Found 19 outliers among 100 measurements (19.00%) + 3 (3.00%) low mild + 9 (9.00%) high mild + 7 (7.00%) high severe + +conv_u64_f32 system time: [701.68 ns 701.95 ns 702.30 ns] +Found 10 outliers among 100 measurements (10.00%) + 4 (4.00%) high mild + 6 (6.00%) high severe + +conv_u64_f32 assembly (aarch64 unix) + time: [511.73 ns 512.43 ns 513.32 ns] +Found 6 outliers among 100 measurements (6.00%) + 6 (6.00%) high mild + +conv_u64_f64 compiler-builtins + time: [681.23 ns 682.55 ns 684.30 ns] +Found 18 outliers among 100 measurements (18.00%) + 1 (1.00%) high mild + 17 (17.00%) high severe + +conv_u64_f64 system time: [679.34 ns 679.57 ns 679.88 ns] +Found 18 outliers among 100 measurements (18.00%) + 1 (1.00%) low mild + 6 (6.00%) high mild + 11 (11.00%) high severe + +conv_u64_f64 assembly (aarch64 unix) + time: [509.90 ns 510.09 ns 510.30 ns] +Found 15 outliers among 100 measurements (15.00%) + 6 (6.00%) high mild + 9 (9.00%) high severe + +conv_u128_f32 compiler-builtins + time: [1.1368 µs 1.1372 µs 1.1377 µs] +Found 14 outliers among 100 measurements (14.00%) + 8 (8.00%) high mild + 6 (6.00%) high severe + +conv_u128_f32 system time: [1.4338 µs 1.4370 µs 1.4410 µs] +Found 7 outliers among 100 measurements (7.00%) + 2 (2.00%) high mild + 5 (5.00%) high severe + +conv_u128_f64 compiler-builtins + time: [1.0133 µs 1.0143 µs 1.0156 µs] +Found 16 outliers among 100 measurements (16.00%) + 2 (2.00%) high mild + 14 (14.00%) high severe + +conv_u128_f64 system time: [1.3473 µs 1.3530 µs 1.3600 µs] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild + +conv_i32_f32 compiler-builtins + time: [906.53 ns 907.86 ns 909.23 ns] +Found 7 outliers among 100 measurements (7.00%) + 4 (4.00%) high mild + 3 (3.00%) high severe + +conv_i32_f32 system time: [914.53 ns 915.69 ns 917.01 ns] +Found 10 outliers among 100 measurements (10.00%) + 6 (6.00%) high mild + 4 (4.00%) high severe + +conv_i32_f32 assembly (aarch64 unix) + time: [464.55 ns 465.10 ns 465.83 ns] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild + +conv_i32_f64 compiler-builtins + time: [617.63 ns 617.92 ns 618.27 ns] +Found 12 outliers among 100 measurements (12.00%) + 3 (3.00%) high mild + 9 (9.00%) high severe + +conv_i32_f64 system time: [622.83 ns 624.19 ns 625.61 ns] +Found 6 outliers among 100 measurements (6.00%) + 5 (5.00%) high mild + 1 (1.00%) high severe + +conv_i32_f64 assembly (aarch64 unix) + time: [465.24 ns 466.04 ns 466.95 ns] +Found 11 outliers among 100 measurements (11.00%) + 4 (4.00%) high mild + 7 (7.00%) high severe + +conv_i64_f32 compiler-builtins + time: [852.67 ns 853.92 ns 855.34 ns] +Found 11 outliers among 100 measurements (11.00%) + 3 (3.00%) high mild + 8 (8.00%) high severe + +conv_i64_f32 system time: [906.94 ns 908.04 ns 909.33 ns] +Found 15 outliers among 100 measurements (15.00%) + 2 (2.00%) high mild + 13 (13.00%) high severe + +conv_i64_f32 assembly (aarch64 unix) + time: [510.84 ns 511.27 ns 511.80 ns] +Found 8 outliers among 100 measurements (8.00%) + 3 (3.00%) high mild + 5 (5.00%) high severe + +conv_i64_f64 compiler-builtins + time: [932.35 ns 932.97 ns 933.76 ns] +Found 10 outliers among 100 measurements (10.00%) + 4 (4.00%) high mild + 6 (6.00%) high severe + +conv_i64_f64 system time: [955.91 ns 958.95 ns 962.05 ns] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe + +conv_i64_f64 assembly (aarch64 unix) + time: [510.19 ns 510.72 ns 511.44 ns] +Found 9 outliers among 100 measurements (9.00%) + 5 (5.00%) high mild + 4 (4.00%) high severe + +conv_i128_f32 compiler-builtins + time: [1.4248 µs 1.4285 µs 1.4323 µs] +Found 12 outliers among 100 measurements (12.00%) + 7 (7.00%) high mild + 5 (5.00%) high severe + +conv_i128_f32 system time: [1.6970 µs 1.7017 µs 1.7069 µs] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe + +conv_i128_f64 compiler-builtins + time: [1.3132 µs 1.3161 µs 1.3191 µs] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe + +conv_i128_f64 system time: [1.6071 µs 1.6100 µs 1.6133 µs] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe + +conv_f64_u32 compiler-builtins + time: [640.35 ns 641.00 ns 641.68 ns] +Found 6 outliers among 100 measurements (6.00%) + 4 (4.00%) high mild + 2 (2.00%) high severe + +conv_f64_u32 system time: [640.87 ns 641.63 ns 642.42 ns] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +conv_f64_u32 assembly (aarch64 unix) + time: [482.02 ns 482.67 ns 483.38 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high severe + +conv_f64_u64 compiler-builtins + time: [638.58 ns 638.98 ns 639.45 ns] +Found 15 outliers among 100 measurements (15.00%) + 1 (1.00%) high mild + 14 (14.00%) high severe + +conv_f64_u64 system time: [642.54 ns 644.07 ns 645.59 ns] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe + +conv_f64_u64 assembly (aarch64 unix) + time: [482.65 ns 483.70 ns 484.87 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high mild + +conv_f64_u128 compiler-builtins + time: [1.0631 µs 1.0652 µs 1.0674 µs] +Found 8 outliers among 100 measurements (8.00%) + 7 (7.00%) high mild + 1 (1.00%) high severe + +conv_f64_u128 system time: [821.41 ns 823.45 ns 825.74 ns] +Found 11 outliers among 100 measurements (11.00%) + 8 (8.00%) high mild + 3 (3.00%) high severe + +conv_f64_i32 compiler-builtins + time: [826.76 ns 845.08 ns 870.23 ns] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild + +conv_f64_i32 system time: [764.12 ns 764.63 ns 765.26 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high severe + +conv_f64_i32 assembly (aarch64 unix) + time: [484.50 ns 485.98 ns 487.54 ns] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +conv_f64_i64 compiler-builtins + time: [797.27 ns 798.19 ns 799.84 ns] +Found 9 outliers among 100 measurements (9.00%) + 5 (5.00%) high mild + 4 (4.00%) high severe + +conv_f64_i64 system time: [768.74 ns 769.52 ns 770.23 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high severe + +conv_f64_i64 assembly (aarch64 unix) + time: [480.59 ns 481.03 ns 481.46 ns] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +conv_f64_i128 compiler-builtins + time: [1.0577 µs 1.0591 µs 1.0606 µs] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe + +conv_f64_i128 system time: [1.0181 µs 1.0195 µs 1.0211 µs] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild + +conv_f32_u32 compiler-builtins + time: [800.40 ns 801.39 ns 802.35 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild + +conv_f32_u32 system time: [638.12 ns 638.34 ns 638.63 ns] +Found 11 outliers among 100 measurements (11.00%) + 4 (4.00%) high mild + 7 (7.00%) high severe + +conv_f32_u32 assembly (aarch64 unix) + time: [479.37 ns 480.97 ns 483.32 ns] +Found 13 outliers among 100 measurements (13.00%) + 6 (6.00%) high mild + 7 (7.00%) high severe + +conv_f32_u64 compiler-builtins + time: [801.95 ns 803.64 ns 805.75 ns] + +conv_f32_u64 system time: [638.20 ns 638.56 ns 639.07 ns] +Found 10 outliers among 100 measurements (10.00%) + 1 (1.00%) high mild + 9 (9.00%) high severe + +conv_f32_u64 assembly (aarch64 unix) + time: [480.07 ns 480.47 ns 480.86 ns] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe + +conv_f32_u128 compiler-builtins + time: [1.1579 µs 1.1623 µs 1.1657 µs] +Found 14 outliers among 100 measurements (14.00%) + 2 (2.00%) low severe + 7 (7.00%) high mild + 5 (5.00%) high severe + +conv_f32_u128 system time: [1.0344 µs 1.0394 µs 1.0450 µs] + +conv_f32_i32 compiler-builtins + time: [800.14 ns 801.52 ns 803.26 ns] +Found 10 outliers among 100 measurements (10.00%) + 8 (8.00%) high mild + 2 (2.00%) high severe + +conv_f32_i32 system time: [741.36 ns 741.74 ns 742.13 ns] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe + +conv_f32_i32 assembly (aarch64 unix) + time: [484.35 ns 486.08 ns 488.11 ns] +Found 17 outliers among 100 measurements (17.00%) + 9 (9.00%) high mild + 8 (8.00%) high severe + +conv_f32_i64 compiler-builtins + time: [800.94 ns 802.68 ns 804.74 ns] + +conv_f32_i64 system time: [748.60 ns 750.68 ns 753.16 ns] +Found 9 outliers among 100 measurements (9.00%) + 4 (4.00%) high mild + 5 (5.00%) high severe + +conv_f32_i64 assembly (aarch64 unix) + time: [480.70 ns 481.23 ns 481.82 ns] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe + +conv_f32_i128 compiler-builtins + time: [1.1774 µs 1.1829 µs 1.1887 µs] +Found 11 outliers among 100 measurements (11.00%) + 1 (1.00%) low severe + 7 (7.00%) low mild + 1 (1.00%) high mild + 2 (2.00%) high severe + +conv_f32_i128 system time: [1.1785 µs 1.1853 µs 1.1941 µs] +Found 7 outliers among 100 measurements (7.00%) + 2 (2.00%) high mild + 5 (5.00%) high severe + +div_f32 compiler-builtins + time: [38.852 µs 39.011 µs 39.178 µs] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild + +div_f32 system time: [41.846 µs 41.920 µs 42.005 µs] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +div_f32 assembly (aarch64 unix) + time: [8.1309 µs 8.1627 µs 8.2005 µs] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild + +div_f64 compiler-builtins + time: [50.369 µs 50.605 µs 50.857 µs] +Found 15 outliers among 100 measurements (15.00%) + 11 (11.00%) high mild + 4 (4.00%) high severe + +div_f64 system time: [53.506 µs 53.582 µs 53.676 µs] +Found 8 outliers among 100 measurements (8.00%) + 4 (4.00%) high mild + 4 (4.00%) high severe + +div_f64 assembly (aarch64 unix) + time: [8.0695 µs 8.0807 µs 8.0948 µs] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe + diff --git a/testcrate/bench-3cee6376-aarch64-macos.txt b/testcrate/bench-3cee6376-aarch64-macos.txt new file mode 100644 index 000000000..131e7a85a --- /dev/null +++ b/testcrate/bench-3cee6376-aarch64-macos.txt @@ -0,0 +1,699 @@ + +running 0 tests + +test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s + +add_f32/compiler-builtins + time: [36.813 µs 37.048 µs 37.303 µs] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high mild +add_f32/system time: [39.103 µs 39.142 µs 39.189 µs] +Found 8 outliers among 100 measurements (8.00%) + 2 (2.00%) high mild + 6 (6.00%) high severe +add_f32/assembly (aarch64 unix) + time: [8.3786 µs 8.4680 µs 8.5570 µs] + +add_f64/compiler-builtins + time: [35.784 µs 35.819 µs 35.863 µs] +Found 4 outliers among 100 measurements (4.00%) + 1 (1.00%) high mild + 3 (3.00%) high severe +add_f64/system time: [39.634 µs 39.689 µs 39.746 µs] +Found 16 outliers among 100 measurements (16.00%) + 4 (4.00%) high mild + 12 (12.00%) high severe +add_f64/assembly (aarch64 unix) + time: [8.0533 µs 8.0599 µs 8.0670 µs] +Found 14 outliers among 100 measurements (14.00%) + 6 (6.00%) high mild + 8 (8.00%) high severe + +add_f128/compiler-builtins + time: [41.830 µs 41.920 µs 42.005 µs] + +cmp_f32_gt/compiler-builtins + time: [13.405 µs 13.411 µs 13.418 µs] +Found 18 outliers among 100 measurements (18.00%) + 4 (4.00%) high mild + 14 (14.00%) high severe +cmp_f32_gt/system time: [12.348 µs 12.355 µs 12.363 µs] +Found 12 outliers among 100 measurements (12.00%) + 2 (2.00%) high mild + 10 (10.00%) high severe +cmp_f32_gt/assembly (aarch64 unix) + time: [8.1233 µs 8.1625 µs 8.2072 µs] +Found 12 outliers among 100 measurements (12.00%) + 7 (7.00%) high mild + 5 (5.00%) high severe + +cmp_f32_unord/compiler-builtins + time: [11.349 µs 11.467 µs 11.584 µs] +cmp_f32_unord/system time: [8.0714 µs 8.0792 µs 8.0890 µs] +Found 16 outliers among 100 measurements (16.00%) + 4 (4.00%) high mild + 12 (12.00%) high severe +cmp_f32_unord/assembly (aarch64 unix) + time: [8.1121 µs 8.1705 µs 8.2325 µs] +Found 20 outliers among 100 measurements (20.00%) + 3 (3.00%) high mild + 17 (17.00%) high severe + +cmp_f64_gt/compiler-builtins + time: [13.749 µs 13.837 µs 13.934 µs] +Found 20 outliers among 100 measurements (20.00%) + 9 (9.00%) low mild + 7 (7.00%) high mild + 4 (4.00%) high severe +cmp_f64_gt/system time: [12.475 µs 12.515 µs 12.565 µs] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild +cmp_f64_gt/assembly (aarch64 unix) + time: [8.0456 µs 8.0540 µs 8.0653 µs] +Found 12 outliers among 100 measurements (12.00%) + 3 (3.00%) high mild + 9 (9.00%) high severe + +cmp_f64_unord/compiler-builtins + time: [10.723 µs 10.730 µs 10.739 µs] +Found 15 outliers among 100 measurements (15.00%) + 5 (5.00%) high mild + 10 (10.00%) high severe +cmp_f64_unord/system time: [8.0944 µs 8.1296 µs 8.1683 µs] +Found 17 outliers among 100 measurements (17.00%) + 4 (4.00%) high mild + 13 (13.00%) high severe +cmp_f64_unord/assembly (aarch64 unix) + time: [8.1042 µs 8.1337 µs 8.1662 µs] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild + +cmp_f128_gt/compiler-builtins + time: [20.508 µs 20.558 µs 20.615 µs] +Found 8 outliers among 100 measurements (8.00%) + 2 (2.00%) high mild + 6 (6.00%) high severe + +cmp_f128_unord/compiler-builtins + time: [13.332 µs 13.346 µs 13.360 µs] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe + +conv_u32_f32/compiler-builtins + time: [621.20 ns 621.89 ns 622.65 ns] +Found 7 outliers among 100 measurements (7.00%) + 4 (4.00%) high mild + 3 (3.00%) high severe +conv_u32_f32/system time: [621.44 ns 622.08 ns 622.74 ns] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe +conv_u32_f32/assembly (aarch64 unix) + time: [465.96 ns 466.65 ns 467.45 ns] +Found 13 outliers among 100 measurements (13.00%) + 3 (3.00%) high mild + 10 (10.00%) high severe + +conv_u32_f64/compiler-builtins + time: [619.71 ns 620.51 ns 621.52 ns] +Found 5 outliers among 100 measurements (5.00%) + 4 (4.00%) high mild + 1 (1.00%) high severe +conv_u32_f64/system time: [466.60 ns 467.14 ns 467.77 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild +conv_u32_f64/assembly (aarch64 unix) + time: [464.02 ns 464.32 ns 464.69 ns] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe + +conv_u64_f32/compiler-builtins + time: [851.24 ns 852.98 ns 854.77 ns] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high mild +conv_u64_f32/system time: [724.35 ns 729.43 ns 735.07 ns] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild +conv_u64_f32/assembly (aarch64 unix) + time: [513.30 ns 514.64 ns 516.16 ns] +Found 8 outliers among 100 measurements (8.00%) + 8 (8.00%) high mild + +conv_u64_f64/compiler-builtins + time: [850.72 ns 853.26 ns 856.54 ns] +Found 15 outliers among 100 measurements (15.00%) + 2 (2.00%) high mild + 13 (13.00%) high severe +conv_u64_f64/system time: [681.43 ns 682.54 ns 683.79 ns] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe +conv_u64_f64/assembly (aarch64 unix) + time: [511.37 ns 511.71 ns 512.02 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high severe + +conv_u128_f32/compiler-builtins + time: [1.1395 µs 1.1409 µs 1.1424 µs] +Found 10 outliers among 100 measurements (10.00%) + 6 (6.00%) high mild + 4 (4.00%) high severe +conv_u128_f32/system time: [1.4348 µs 1.4369 µs 1.4390 µs] +Found 5 outliers among 100 measurements (5.00%) + 4 (4.00%) high mild + 1 (1.00%) high severe + +conv_u128_f64/compiler-builtins + time: [1.0148 µs 1.0157 µs 1.0167 µs] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe +conv_u128_f64/system time: [1.3404 µs 1.3423 µs 1.3442 µs] +Found 8 outliers among 100 measurements (8.00%) + 7 (7.00%) high mild + 1 (1.00%) high severe + +conv_i32_f32/compiler-builtins + time: [902.89 ns 903.81 ns 904.84 ns] +Found 7 outliers among 100 measurements (7.00%) + 4 (4.00%) high mild + 3 (3.00%) high severe +conv_i32_f32/system time: [942.62 ns 949.04 ns 955.77 ns] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe +conv_i32_f32/assembly (aarch64 unix) + time: [466.06 ns 466.60 ns 467.27 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high severe + +conv_i32_f64/compiler-builtins + time: [618.98 ns 619.24 ns 619.55 ns] +Found 17 outliers among 100 measurements (17.00%) + 1 (1.00%) low mild + 3 (3.00%) high mild + 13 (13.00%) high severe +conv_i32_f64/system time: [622.18 ns 623.41 ns 624.85 ns] +Found 8 outliers among 100 measurements (8.00%) + 5 (5.00%) high mild + 3 (3.00%) high severe +conv_i32_f64/assembly (aarch64 unix) + time: [466.26 ns 466.76 ns 467.35 ns] +Found 9 outliers among 100 measurements (9.00%) + 5 (5.00%) high mild + 4 (4.00%) high severe + +conv_i64_f32/compiler-builtins + time: [850.11 ns 850.45 ns 850.88 ns] +Found 15 outliers among 100 measurements (15.00%) + 1 (1.00%) low severe + 1 (1.00%) low mild + 3 (3.00%) high mild + 10 (10.00%) high severe +conv_i64_f32/system time: [908.36 ns 908.70 ns 909.10 ns] +Found 12 outliers among 100 measurements (12.00%) + 3 (3.00%) high mild + 9 (9.00%) high severe +conv_i64_f32/assembly (aarch64 unix) + time: [513.56 ns 514.44 ns 515.38 ns] +Found 8 outliers among 100 measurements (8.00%) + 8 (8.00%) high mild + +conv_i64_f64/compiler-builtins + time: [935.39 ns 935.78 ns 936.26 ns] +Found 13 outliers among 100 measurements (13.00%) + 5 (5.00%) high mild + 8 (8.00%) high severe +conv_i64_f64/system time: [946.56 ns 947.33 ns 948.20 ns] +Found 8 outliers among 100 measurements (8.00%) + 6 (6.00%) high mild + 2 (2.00%) high severe +conv_i64_f64/assembly (aarch64 unix) + time: [511.55 ns 512.03 ns 512.56 ns] +Found 21 outliers among 100 measurements (21.00%) + 4 (4.00%) high mild + 17 (17.00%) high severe + +conv_i128_f32/compiler-builtins + time: [1.4206 µs 1.4218 µs 1.4232 µs] +Found 10 outliers among 100 measurements (10.00%) + 5 (5.00%) high mild + 5 (5.00%) high severe +conv_i128_f32/system time: [1.6863 µs 1.6891 µs 1.6922 µs] +Found 10 outliers among 100 measurements (10.00%) + 9 (9.00%) high mild + 1 (1.00%) high severe + +conv_i128_f64/compiler-builtins + time: [1.3110 µs 1.3122 µs 1.3136 µs] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe +conv_i128_f64/system time: [1.6022 µs 1.6048 µs 1.6090 µs] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe + +conv_f64_u32/compiler-builtins + time: [798.65 ns 799.42 ns 800.39 ns] +Found 15 outliers among 100 measurements (15.00%) + 6 (6.00%) high mild + 9 (9.00%) high severe +conv_f64_u32/system time: [639.48 ns 639.88 ns 640.40 ns] +Found 16 outliers among 100 measurements (16.00%) + 1 (1.00%) low mild + 5 (5.00%) high mild + 10 (10.00%) high severe +conv_f64_u32/assembly (aarch64 unix) + time: [480.78 ns 481.35 ns 482.17 ns] +Found 7 outliers among 100 measurements (7.00%) + 5 (5.00%) high mild + 2 (2.00%) high severe + +conv_f64_u64/compiler-builtins + time: [799.56 ns 800.54 ns 801.89 ns] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe +conv_f64_u64/system time: [640.72 ns 641.24 ns 641.81 ns] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe +conv_f64_u64/assembly (aarch64 unix) + time: [481.54 ns 482.48 ns 483.53 ns] +Found 6 outliers among 100 measurements (6.00%) + 1 (1.00%) low severe + 1 (1.00%) low mild + 3 (3.00%) high mild + 1 (1.00%) high severe + +conv_f64_u128/compiler-builtins + time: [1.0510 µs 1.0515 µs 1.0520 µs] +Found 13 outliers among 100 measurements (13.00%) + 1 (1.00%) low mild + 2 (2.00%) high mild + 10 (10.00%) high severe +conv_f64_u128/system time: [818.45 ns 819.23 ns 820.15 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild + +conv_f64_i32/compiler-builtins + time: [800.56 ns 801.31 ns 802.21 ns] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe +conv_f64_i32/system time: [765.62 ns 766.15 ns 766.80 ns] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe +conv_f64_i32/assembly (aarch64 unix) + time: [471.65 ns 472.77 ns 473.89 ns] +Found 10 outliers among 100 measurements (10.00%) + 1 (1.00%) low mild + 8 (8.00%) high mild + 1 (1.00%) high severe + +conv_f64_i64/compiler-builtins + time: [801.00 ns 804.55 ns 808.72 ns] +Found 18 outliers among 100 measurements (18.00%) + 6 (6.00%) high mild + 12 (12.00%) high severe +conv_f64_i64/system time: [770.28 ns 772.47 ns 775.21 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild +conv_f64_i64/assembly (aarch64 unix) + time: [491.56 ns 494.96 ns 499.19 ns] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +conv_f64_i128/compiler-builtins + time: [1.0637 µs 1.0704 µs 1.0762 µs] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high mild +conv_f64_i128/system time: [1.0022 µs 1.0027 µs 1.0033 µs] +Found 4 outliers among 100 measurements (4.00%) + 1 (1.00%) low severe + 3 (3.00%) high severe + +conv_f32_u32/compiler-builtins + time: [644.56 ns 647.01 ns 649.95 ns] +Found 15 outliers among 100 measurements (15.00%) + 13 (13.00%) high mild + 2 (2.00%) high severe +conv_f32_u32/system time: [648.12 ns 651.20 ns 654.54 ns] +Found 9 outliers among 100 measurements (9.00%) + 7 (7.00%) high mild + 2 (2.00%) high severe +conv_f32_u32/assembly (aarch64 unix) + time: [481.02 ns 482.71 ns 484.60 ns] +Found 12 outliers among 100 measurements (12.00%) + 1 (1.00%) low mild + 10 (10.00%) high mild + 1 (1.00%) high severe + +conv_f32_u64/compiler-builtins + time: [644.14 ns 646.61 ns 649.53 ns] +Found 11 outliers among 100 measurements (11.00%) + 6 (6.00%) high mild + 5 (5.00%) high severe +conv_f32_u64/system time: [646.21 ns 650.17 ns 654.55 ns] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild +conv_f32_u64/assembly (aarch64 unix) + time: [473.36 ns 474.60 ns 476.00 ns] +Found 9 outliers among 100 measurements (9.00%) + 2 (2.00%) low mild + 5 (5.00%) high mild + 2 (2.00%) high severe + +conv_f32_u128/compiler-builtins + time: [1.0820 µs 1.0828 µs 1.0839 µs] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe +conv_f32_u128/system time: [1.0003 µs 1.0042 µs 1.0076 µs] +Found 21 outliers among 100 measurements (21.00%) + 1 (1.00%) low mild + 3 (3.00%) high mild + 17 (17.00%) high severe + +conv_f32_i32/compiler-builtins + time: [801.13 ns 801.82 ns 802.53 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high severe +conv_f32_i32/system time: [745.17 ns 745.97 ns 746.78 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high severe +conv_f32_i32/assembly (aarch64 unix) + time: [469.87 ns 470.65 ns 471.57 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high mild + +conv_f32_i64/compiler-builtins + time: [799.44 ns 799.94 ns 800.59 ns] +Found 4 outliers among 100 measurements (4.00%) + 1 (1.00%) high mild + 3 (3.00%) high severe +conv_f32_i64/system time: [744.81 ns 745.17 ns 745.62 ns] +Found 14 outliers among 100 measurements (14.00%) + 5 (5.00%) high mild + 9 (9.00%) high severe +conv_f32_i64/assembly (aarch64 unix) + time: [465.06 ns 466.01 ns 467.12 ns] +Found 13 outliers among 100 measurements (13.00%) + 2 (2.00%) low severe + 5 (5.00%) high mild + 6 (6.00%) high severe + +conv_f32_i128/compiler-builtins + time: [1.1390 µs 1.1515 µs 1.1637 µs] +conv_f32_i128/system time: [1.1315 µs 1.1330 µs 1.1347 µs] +Found 6 outliers among 100 measurements (6.00%) + 3 (3.00%) low mild + 2 (2.00%) high mild + 1 (1.00%) high severe + +div_f32/compiler-builtins + time: [39.408 µs 39.676 µs 39.969 µs] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high mild +div_f32/system time: [42.108 µs 42.248 µs 42.528 µs] +Found 11 outliers among 100 measurements (11.00%) + 4 (4.00%) high mild + 7 (7.00%) high severe +div_f32/assembly (aarch64 unix) + time: [8.0724 µs 8.0794 µs 8.0870 µs] +Found 7 outliers among 100 measurements (7.00%) + 5 (5.00%) high mild + 2 (2.00%) high severe + +div_f64/compiler-builtins + time: [49.992 µs 50.014 µs 50.040 µs] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high severe +div_f64/system time: [53.577 µs 53.651 µs 53.743 µs] +Found 6 outliers among 100 measurements (6.00%) + 4 (4.00%) high mild + 2 (2.00%) high severe +div_f64/assembly (aarch64 unix) + time: [8.0976 µs 8.1064 µs 8.1158 µs] +Found 6 outliers among 100 measurements (6.00%) + 3 (3.00%) high mild + 3 (3.00%) high severe + +extend_f16_f32/compiler-builtins + time: [804.09 ns 805.38 ns 807.09 ns] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe +extend_f16_f32/system time: [641.07 ns 641.76 ns 642.60 ns] +Found 12 outliers among 100 measurements (12.00%) + 6 (6.00%) high mild + 6 (6.00%) high severe +extend_f16_f32/assembly (aarch64 unix) + time: [456.69 ns 457.14 ns 457.68 ns] +Found 8 outliers among 100 measurements (8.00%) + 4 (4.00%) low mild + 2 (2.00%) high mild + 2 (2.00%) high severe + +extend_f16_f128/compiler-builtins + time: [1.1025 µs 1.1035 µs 1.1045 µs] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe + +extend_f32_f64/compiler-builtins + time: [799.30 ns 799.68 ns 800.16 ns] +Found 13 outliers among 100 measurements (13.00%) + 3 (3.00%) high mild + 10 (10.00%) high severe +extend_f32_f64/system time: [992.48 ns 993.27 ns 994.32 ns] +Found 15 outliers among 100 measurements (15.00%) + 3 (3.00%) high mild + 12 (12.00%) high severe +extend_f32_f64/assembly (aarch64 unix) + time: [457.65 ns 460.39 ns 463.78 ns] + +extend_f32_f128/compiler-builtins + time: [1.0295 µs 1.0311 µs 1.0327 µs] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) low mild + 1 (1.00%) high mild + +extend_f64_f128/compiler-builtins + time: [1.0400 µs 1.0412 µs 1.0426 µs] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild + +mul_f32/compiler-builtins + time: [25.604 µs 25.705 µs 25.818 µs] +Found 23 outliers among 100 measurements (23.00%) + 17 (17.00%) low severe + 3 (3.00%) high mild + 3 (3.00%) high severe +mul_f32/system time: [29.914 µs 29.977 µs 30.043 µs] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high mild +mul_f32/assembly (aarch64 unix) + time: [8.1384 µs 8.1964 µs 8.2603 µs] +Found 13 outliers among 100 measurements (13.00%) + 3 (3.00%) high mild + 10 (10.00%) high severe + +mul_f64/compiler-builtins + time: [25.596 µs 25.615 µs 25.637 µs] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe +mul_f64/system time: [30.931 µs 30.963 µs 31.002 µs] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild +mul_f64/assembly (aarch64 unix) + time: [8.0589 µs 8.0638 µs 8.0695 µs] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +mul_f128/compiler-builtins + time: [54.242 µs 54.306 µs 54.374 µs] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +powi_f32/compiler-builtins + time: [129.91 µs 130.09 µs 130.24 µs] +powi_f32/system time: [126.97 µs 127.34 µs 127.82 µs] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe + +powi_f64/compiler-builtins + time: [130.08 µs 130.81 µs 131.46 µs] +Found 13 outliers among 100 measurements (13.00%) + 13 (13.00%) high mild +powi_f64/system time: [128.51 µs 128.68 µs 128.88 µs] +Found 21 outliers among 100 measurements (21.00%) + 4 (4.00%) high mild + 17 (17.00%) high severe + +sub_f32/compiler-builtins + time: [37.861 µs 38.012 µs 38.158 µs] +Found 26 outliers among 100 measurements (26.00%) + 18 (18.00%) low mild + 7 (7.00%) high mild + 1 (1.00%) high severe +sub_f32/system time: [39.586 µs 39.628 µs 39.673 µs] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe +sub_f32/assembly (aarch64 unix) + time: [8.0976 µs 8.1584 µs 8.2208 µs] +Found 6 outliers among 100 measurements (6.00%) + 6 (6.00%) high mild + +sub_f64/compiler-builtins + time: [37.755 µs 37.838 µs 37.921 µs] +Found 25 outliers among 100 measurements (25.00%) + 7 (7.00%) low severe + 3 (3.00%) low mild + 4 (4.00%) high mild + 11 (11.00%) high severe +sub_f64/system time: [39.979 µs 40.019 µs 40.064 µs] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe +sub_f64/assembly (aarch64 unix) + time: [8.0669 µs 8.0733 µs 8.0801 µs] +Found 7 outliers among 100 measurements (7.00%) + 3 (3.00%) high mild + 4 (4.00%) high severe + +sub_f128/compiler-builtins + time: [68.618 µs 68.899 µs 69.293 µs] +Found 11 outliers among 100 measurements (11.00%) + 2 (2.00%) high mild + 9 (9.00%) high severe + +trunc_f32_f16/compiler-builtins + time: [1.3343 µs 1.3468 µs 1.3608 µs] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe +trunc_f32_f16/system time: [1.2687 µs 1.2714 µs 1.2738 µs] +trunc_f32_f16/assembly (aarch64 unix) + time: [470.06 ns 472.96 ns 475.30 ns] + +trunc_f64_f16/compiler-builtins + time: [1.2729 µs 1.2738 µs 1.2749 µs] +Found 7 outliers among 100 measurements (7.00%) + 2 (2.00%) high mild + 5 (5.00%) high severe +trunc_f64_f16/assembly (aarch64 unix) + time: [455.91 ns 456.61 ns 457.33 ns] +Found 12 outliers among 100 measurements (12.00%) + 1 (1.00%) low severe + 2 (2.00%) low mild + 6 (6.00%) high mild + 3 (3.00%) high severe + +trunc_f64_f32/compiler-builtins + time: [1.2240 µs 1.2325 µs 1.2410 µs] +Found 17 outliers among 100 measurements (17.00%) + 4 (4.00%) low mild + 2 (2.00%) high mild + 11 (11.00%) high severe +trunc_f64_f32/system time: [1.2784 µs 1.2835 µs 1.2884 µs] +Found 10 outliers among 100 measurements (10.00%) + 6 (6.00%) low severe + 1 (1.00%) low mild + 2 (2.00%) high mild + 1 (1.00%) high severe +trunc_f64_f32/assembly (aarch64 unix) + time: [455.64 ns 456.08 ns 456.58 ns] +Found 18 outliers among 100 measurements (18.00%) + 3 (3.00%) low severe + 4 (4.00%) low mild + 8 (8.00%) high mild + 3 (3.00%) high severe + +trunc_f128_f16/compiler-builtins + time: [1.2563 µs 1.2666 µs 1.2776 µs] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild + +trunc_f128_f32/compiler-builtins + time: [1.2459 µs 1.2482 µs 1.2507 µs] +Found 6 outliers among 100 measurements (6.00%) + 2 (2.00%) low mild + 2 (2.00%) high mild + 2 (2.00%) high severe + +trunc_f128_f64/compiler-builtins + time: [1.2821 µs 1.3047 µs 1.3452 µs] +Found 8 outliers among 100 measurements (8.00%) + 4 (4.00%) low severe + 1 (1.00%) low mild + 2 (2.00%) high mild + 1 (1.00%) high severe + + +running 52 tests +test memcmp_builtin_1048576 ... bench: 20,975.52 ns/iter (+/- 239.69) = 49991 MB/s +test memcmp_builtin_16 ... bench: 1.60 ns/iter (+/- 0.05) = 16000 MB/s +test memcmp_builtin_32 ... bench: 1.61 ns/iter (+/- 0.03) = 32000 MB/s +test memcmp_builtin_4096 ... bench: 95.84 ns/iter (+/- 2.82) = 43115 MB/s +test memcmp_builtin_64 ... bench: 2.39 ns/iter (+/- 0.09) = 32000 MB/s +test memcmp_builtin_8 ... bench: 1.60 ns/iter (+/- 0.04) = 8000 MB/s +test memcmp_builtin_unaligned_1048575 ... bench: 22,060.00 ns/iter (+/- 873.55) = 47532 MB/s +test memcmp_builtin_unaligned_15 ... bench: 3.19 ns/iter (+/- 0.02) = 5333 MB/s +test memcmp_builtin_unaligned_31 ... bench: 1.61 ns/iter (+/- 0.01) = 32000 MB/s +test memcmp_builtin_unaligned_4095 ... bench: 96.63 ns/iter (+/- 4.58) = 42666 MB/s +test memcmp_builtin_unaligned_63 ... bench: 2.40 ns/iter (+/- 0.11) = 32000 MB/s +test memcmp_builtin_unaligned_7 ... bench: 3.37 ns/iter (+/- 0.05) = 2666 MB/s +test memcmp_rust_1048576 ... bench: 309,647.23 ns/iter (+/- 6,077.35) = 3386 MB/s +test memcmp_rust_16 ... bench: 5.66 ns/iter (+/- 0.30) = 3200 MB/s +test memcmp_rust_32 ... bench: 10.47 ns/iter (+/- 0.14) = 3200 MB/s +test memcmp_rust_4096 ... bench: 1,124.34 ns/iter (+/- 36.92) = 3644 MB/s +test memcmp_rust_64 ... bench: 19.90 ns/iter (+/- 0.36) = 3368 MB/s +test memcmp_rust_8 ... bench: 3.46 ns/iter (+/- 0.11) = 2666 MB/s +test memcmp_rust_unaligned_1048575 ... bench: 308,613.87 ns/iter (+/- 6,613.18) = 3397 MB/s +test memcmp_rust_unaligned_15 ... bench: 5.35 ns/iter (+/- 0.05) = 3200 MB/s +test memcmp_rust_unaligned_31 ... bench: 9.94 ns/iter (+/- 0.06) = 3555 MB/s +test memcmp_rust_unaligned_4095 ... bench: 1,120.06 ns/iter (+/- 5.03) = 3657 MB/s +test memcmp_rust_unaligned_63 ... bench: 19.64 ns/iter (+/- 0.82) = 3368 MB/s +test memcmp_rust_unaligned_7 ... bench: 3.22 ns/iter (+/- 0.10) = 2666 MB/s +test memcpy_builtin_1048576 ... bench: 12,538.05 ns/iter (+/- 354.79) = 83631 MB/s +test memcpy_builtin_1048576_misalign ... bench: 30,092.56 ns/iter (+/- 8,064.04) = 34845 MB/s +test memcpy_builtin_1048576_offset ... bench: 12,538.36 ns/iter (+/- 359.04) = 83631 MB/s +test memcpy_builtin_4096 ... bench: 44.24 ns/iter (+/- 6.80) = 93090 MB/s +test memcpy_builtin_4096_misalign ... bench: 45.34 ns/iter (+/- 2.13) = 91022 MB/s +test memcpy_builtin_4096_offset ... bench: 44.71 ns/iter (+/- 0.61) = 93090 MB/s +test memcpy_rust_1048576 ... bench: 17,943.33 ns/iter (+/- 243.18) = 58439 MB/s +test memcpy_rust_1048576_misalign ... bench: 15,004.68 ns/iter (+/- 3,978.65) = 69886 MB/s +test memcpy_rust_1048576_offset ... bench: 14,722.06 ns/iter (+/- 479.54) = 71225 MB/s +test memcpy_rust_4096 ... bench: 44.91 ns/iter (+/- 4.62) = 93090 MB/s +test memcpy_rust_4096_misalign ... bench: 76.21 ns/iter (+/- 8.21) = 53894 MB/s +test memcpy_rust_4096_offset ... bench: 76.27 ns/iter (+/- 4.69) = 53894 MB/s +test memmove_builtin_1048576 ... bench: 18,644.50 ns/iter (+/- 379.84) = 56242 MB/s +test memmove_builtin_1048576_misalign ... bench: 18,947.70 ns/iter (+/- 1,226.26) = 55342 MB/s +test memmove_builtin_4096 ... bench: 44.21 ns/iter (+/- 0.79) = 93090 MB/s +test memmove_builtin_4096_misalign ... bench: 47.21 ns/iter (+/- 3.12) = 87148 MB/s +test memmove_rust_1048576 ... bench: 34,813.33 ns/iter (+/- 3,637.47) = 30120 MB/s +test memmove_rust_1048576_misalign ... bench: 35,067.19 ns/iter (+/- 1,699.63) = 29902 MB/s +test memmove_rust_4096 ... bench: 148.69 ns/iter (+/- 1.31) = 27675 MB/s +test memmove_rust_4096_misalign ... bench: 153.81 ns/iter (+/- 1.71) = 26771 MB/s +test memset_builtin_1048576 ... bench: 15,704.12 ns/iter (+/- 12,113.86) = 66771 MB/s +test memset_builtin_1048576_offset ... bench: 17,894.23 ns/iter (+/- 175.12) = 58599 MB/s +test memset_builtin_4096 ... bench: 39.95 ns/iter (+/- 0.19) = 105025 MB/s +test memset_builtin_4096_offset ... bench: 40.48 ns/iter (+/- 3.11) = 102400 MB/s +test memset_rust_1048576 ... bench: 10,600.66 ns/iter (+/- 1,559.93) = 98922 MB/s +test memset_rust_1048576_offset ... bench: 14,810.85 ns/iter (+/- 575.27) = 70801 MB/s +test memset_rust_4096 ... bench: 37.91 ns/iter (+/- 2.77) = 110702 MB/s +test memset_rust_4096_offset ... bench: 59.99 ns/iter (+/- 10.45) = 69423 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 52 measured; 0 filtered out; finished in 97.74s + diff --git a/testcrate/benches/float_add.rs b/testcrate/benches/float_add.rs new file mode 100644 index 000000000..eef1ecc57 --- /dev/null +++ b/testcrate/benches/float_add.rs @@ -0,0 +1,81 @@ +#![feature(f128)] + +use compiler_builtins::float::add; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: add_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: add::__addsf3, + sys_fn: __addsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "addss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fadd {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: add_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: add::__adddf3, + sys_fn: __adddf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "addsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fadd {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: add_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: add::__addtf3, + crate_fn_ppc: add::__addkf3, + sys_fn: __addtf3, + sys_fn_ppc: __addkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!(float_add, add_f32, add_f64, add_f128); +criterion_main!(float_add); diff --git a/testcrate/benches/float_cmp.rs b/testcrate/benches/float_cmp.rs new file mode 100644 index 000000000..641eb0ac5 --- /dev/null +++ b/testcrate/benches/float_cmp.rs @@ -0,0 +1,202 @@ +#![feature(f128)] + +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +use compiler_builtins::float::cmp; + +/// `gt` symbols are allowed to return differing results, they just get compared +/// to 0. +fn gt_res_eq(a: i32, b: i32) -> bool { + let a_lt_0 = a <= 0; + let b_lt_0 = b <= 0; + (a_lt_0 && b_lt_0) || (!a_lt_0 && !b_lt_0) +} + +float_bench! { + name: cmp_f32_gt, + sig: (a: f32, b: f32) -> i32, + crate_fn: cmp::__gtsf2, + sys_fn: __gtsf2, + sys_available: all(), + output_eq: gt_res_eq, + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomiss {a}, {b}", + "seta {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:s}, {b:s}", + "cset {ret:w}, gt", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem,nostack), + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f32_unord, + sig: (a: f32, b: f32) -> i32, + crate_fn: cmp::__unordsf2, + sys_fn: __unordsf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomiss {a}, {b}", + "setp {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:s}, {b:s}", + "cset {ret:w}, vs", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f64_gt, + sig: (a: f64, b: f64) -> i32, + crate_fn: cmp::__gtdf2, + sys_fn: __gtdf2, + sys_available: all(), + output_eq: gt_res_eq, + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomisd {a}, {b}", + "seta {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:d}, {b:d}", + "cset {ret:w}, gt", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f64_unord, + sig: (a: f64, b: f64) -> i32, + crate_fn: cmp::__unorddf2, + sys_fn: __unorddf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomisd {a}, {b}", + "setp {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:d}, {b:d}", + "cset {ret:w}, vs", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f128_gt, + sig: (a: f128, b: f128) -> i32, + crate_fn: cmp::__gttf2, + crate_fn_ppc: cmp::__gtkf2, + sys_fn: __gttf2, + sys_fn_ppc: __gtkf2, + sys_available: not(feature = "no-sys-f128"), + output_eq: gt_res_eq, + asm: [] +} + +float_bench! { + name: cmp_f128_unord, + sig: (a: f128, b: f128) -> i32, + crate_fn: cmp::__unordtf2, + crate_fn_ppc: cmp::__unordkf2, + sys_fn: __unordtf2, + sys_fn_ppc: __unordkf2, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!( + float_cmp, + cmp_f32_gt, + cmp_f32_unord, + cmp_f64_gt, + cmp_f64_unord, + cmp_f128_gt, + cmp_f128_unord +); +criterion_main!(float_cmp); diff --git a/testcrate/benches/float_conv.rs b/testcrate/benches/float_conv.rs new file mode 100644 index 000000000..bbd3a0685 --- /dev/null +++ b/testcrate/benches/float_conv.rs @@ -0,0 +1,547 @@ +#![feature(f128)] +#![allow(improper_ctypes)] + +use compiler_builtins::float::conv; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +/* unsigned int -> float */ + +float_bench! { + name: conv_u32_f32, + sig: (a: u32) -> f32, + crate_fn: conv::__floatunsisf, + sys_fn: __floatunsisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "mov {tmp:e}, {a:e}", + "cvtsi2ss {ret}, {tmp}", + a = in(reg) a, + tmp = out(reg) _, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "ucvtf {ret:s}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u32_f64, + sig: (a: u32) -> f64, + crate_fn: conv::__floatunsidf, + sys_fn: __floatunsidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f64; + asm!( + "mov {tmp:e}, {a:e}", + "cvtsi2sd {ret}, {tmp}", + a = in(reg) a, + tmp = out(reg) _, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "ucvtf {ret:d}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u64_f32, + sig: (a: u64) -> f32, + crate_fn: conv::__floatundisf, + sys_fn: __floatundisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "ucvtf {ret:s}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u64_f64, + sig: (a: u64) -> f64, + crate_fn: conv::__floatundidf, + sys_fn: __floatundidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "ucvtf {ret:d}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u128_f32, + sig: (a: u128) -> f32, + crate_fn: conv::__floatuntisf, + sys_fn: __floatuntisf, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_u128_f64, + sig: (a: u128) -> f64, + crate_fn: conv::__floatuntidf, + sys_fn: __floatuntidf, + sys_available: all(), + asm: [] +} + +/* signed int -> float */ + +float_bench! { + name: conv_i32_f32, + sig: (a: i32) -> f32, + crate_fn: conv::__floatsisf, + sys_fn: __floatsisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "cvtsi2ss {ret}, {a:e}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "scvtf {ret:s}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i32_f64, + sig: (a: i32) -> f64, + crate_fn: conv::__floatsidf, + sys_fn: __floatsidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f64; + asm!( + "cvtsi2sd {ret}, {a:e}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "scvtf {ret:d}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i64_f32, + sig: (a: i64) -> f32, + crate_fn: conv::__floatdisf, + sys_fn: __floatdisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "cvtsi2ss {ret}, {a:r}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "scvtf {ret:s}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i64_f64, + sig: (a: i64) -> f64, + crate_fn: conv::__floatdidf, + sys_fn: __floatdidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f64; + asm!( + "cvtsi2sd {ret}, {a:r}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "scvtf {ret:d}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i128_f32, + sig: (a: i128) -> f32, + crate_fn: conv::__floattisf, + sys_fn: __floattisf, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_i128_f64, + sig: (a: i128) -> f64, + crate_fn: conv::__floattidf, + sys_fn: __floattidf, + sys_available: all(), + asm: [] +} + +/* float -> unsigned int */ + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_u32, + sig: (a: f32) -> u32, + crate_fn: conv::__fixunssfsi, + sys_fn: __fixunssfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u32; + asm!( + "fcvtzu {ret:w}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_u64, + sig: (a: f32) -> u64, + crate_fn: conv::__fixunssfdi, + sys_fn: __fixunssfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u64; + asm!( + "fcvtzu {ret:x}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_u128, + sig: (a: f32) -> u128, + crate_fn: conv::__fixunssfti, + sys_fn: __fixunssfti, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_f64_u32, + sig: (a: f64) -> u32, + crate_fn: conv::__fixunsdfsi, + sys_fn: __fixunsdfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u32; + asm!( + "fcvtzu {ret:w}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_u64, + sig: (a: f64) -> u64, + crate_fn: conv::__fixunsdfdi, + sys_fn: __fixunsdfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u64; + asm!( + "fcvtzu {ret:x}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_u128, + sig: (a: f64) -> u128, + crate_fn: conv::__fixunsdfti, + sys_fn: __fixunsdfti, + sys_available: all(), + asm: [] +} + +/* float -> signed int */ + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_i32, + sig: (a: f32) -> i32, + crate_fn: conv::__fixsfsi, + sys_fn: __fixsfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcvtzs {ret:w}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_i64, + sig: (a: f32) -> i64, + crate_fn: conv::__fixsfdi, + sys_fn: __fixsfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i64; + asm!( + "fcvtzs {ret:x}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_i128, + sig: (a: f32) -> i128, + crate_fn: conv::__fixsfti, + sys_fn: __fixsfti, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_f64_i32, + sig: (a: f64) -> i32, + crate_fn: conv::__fixdfsi, + sys_fn: __fixdfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcvtzs {ret:w}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_i64, + sig: (a: f64) -> i64, + crate_fn: conv::__fixdfdi, + sys_fn: __fixdfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i64; + asm!( + "fcvtzs {ret:x}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_i128, + sig: (a: f64) -> i128, + crate_fn: conv::__fixdfti, + sys_fn: __fixdfti, + sys_available: all(), + asm: [] +} + +criterion_group!( + float_conv, + conv_u32_f32, + conv_u32_f64, + conv_u64_f32, + conv_u64_f64, + conv_u128_f32, + conv_u128_f64, + conv_i32_f32, + conv_i32_f64, + conv_i64_f32, + conv_i64_f64, + conv_i128_f32, + conv_i128_f64, + conv_f64_u32, + conv_f64_u64, + conv_f64_u128, + conv_f64_i32, + conv_f64_i64, + conv_f64_i128, +); + +// FIXME: ppc64le has a sporadic overflow panic in the crate functions +// +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +criterion_group!( + float_conv_not_ppc64le, + conv_f32_u32, + conv_f32_u64, + conv_f32_u128, + conv_f32_i32, + conv_f32_i64, + conv_f32_i128, +); + +#[cfg(all(target_arch = "powerpc64", target_endian = "little"))] +criterion_main!(float_conv); + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +criterion_main!(float_conv, float_conv_not_ppc64le); diff --git a/testcrate/benches/float_div.rs b/testcrate/benches/float_div.rs new file mode 100644 index 000000000..e679f8ccc --- /dev/null +++ b/testcrate/benches/float_div.rs @@ -0,0 +1,70 @@ +#![feature(f128)] + +use compiler_builtins::float::div; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: div_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: div::__divsf3, + sys_fn: __divsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "divss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fdiv {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: div_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: div::__divdf3, + sys_fn: __divdf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "divsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fdiv {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +criterion_group!(float_div, div_f32, div_f64); +criterion_main!(float_div); diff --git a/testcrate/benches/float_extend.rs b/testcrate/benches/float_extend.rs new file mode 100644 index 000000000..9bd8009e9 --- /dev/null +++ b/testcrate/benches/float_extend.rs @@ -0,0 +1,93 @@ +#![allow(unused_variables)] // "unused" f16 registers +#![feature(f128)] +#![feature(f16)] + +use compiler_builtins::float::extend; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: extend_f16_f32, + sig: (a: f16) -> f32, + crate_fn: extend::__extendhfsf2, + sys_fn: __extendhfsf2, + sys_available: not(feature = "no-sys-f16"), + asm: [ + #[cfg(target_arch = "aarch64")] { + // FIXME(f16_f128): remove `to_bits()` after f16 asm support (rust-lang/rust/#116909) + let ret: f32; + asm!( + "fcvt {ret:s}, {a:h}", + a = in(vreg) a.to_bits(), + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: extend_f16_f128, + sig: (a: f16) -> f128, + crate_fn: extend::__extendhftf2, + crate_fn_ppc: extend::__extendhfkf2, + sys_fn: __extendhftf2, + sys_fn_ppc: __extendhfkf2, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [], +} + +float_bench! { + name: extend_f32_f64, + sig: (a: f32) -> f64, + crate_fn: extend::__extendsfdf2, + sys_fn: __extendsfdf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "fcvt {ret:d}, {a:s}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: extend_f32_f128, + sig: (a: f32) -> f128, + crate_fn: extend::__extendsftf2, + crate_fn_ppc: extend::__extendsfkf2, + sys_fn: __extendsftf2, + sys_fn_ppc: __extendsfkf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +float_bench! { + name: extend_f64_f128, + sig: (a: f64) -> f128, + crate_fn: extend::__extenddftf2, + crate_fn_ppc: extend::__extenddfkf2, + sys_fn: __extenddftf2, + sys_fn_ppc: __extenddfkf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +criterion_group!( + float_extend, + extend_f16_f32, + extend_f16_f128, + extend_f32_f64, + extend_f32_f128, + extend_f64_f128, +); +criterion_main!(float_extend); diff --git a/testcrate/benches/float_mul.rs b/testcrate/benches/float_mul.rs new file mode 100644 index 000000000..efa32b285 --- /dev/null +++ b/testcrate/benches/float_mul.rs @@ -0,0 +1,81 @@ +#![feature(f128)] + +use compiler_builtins::float::mul; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: mul_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: mul::__mulsf3, + sys_fn: __mulsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "mulss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fmul {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: mul_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: mul::__muldf3, + sys_fn: __muldf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "mulsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fmul {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: mul_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: mul::__multf3, + crate_fn_ppc: mul::__mulkf3, + sys_fn: __multf3, + sys_fn_ppc: __mulkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!(float_mul, mul_f32, mul_f64, mul_f128); +criterion_main!(float_mul); diff --git a/testcrate/benches/float_pow.rs b/testcrate/benches/float_pow.rs new file mode 100644 index 000000000..252f74012 --- /dev/null +++ b/testcrate/benches/float_pow.rs @@ -0,0 +1,24 @@ +use compiler_builtins::float::pow; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: powi_f32, + sig: (a: f32, b: i32) -> f32, + crate_fn: pow::__powisf2, + sys_fn: __powisf2, + sys_available: all(), + asm: [], +} + +float_bench! { + name: powi_f64, + sig: (a: f64, b: i32) -> f64, + crate_fn: pow::__powidf2, + sys_fn: __powidf2, + sys_available: all(), + asm: [], +} + +criterion_group!(float_add, powi_f32, powi_f64); +criterion_main!(float_add); diff --git a/testcrate/benches/float_sub.rs b/testcrate/benches/float_sub.rs new file mode 100644 index 000000000..6d87604aa --- /dev/null +++ b/testcrate/benches/float_sub.rs @@ -0,0 +1,81 @@ +#![feature(f128)] + +use compiler_builtins::float::sub; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: sub_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: sub::__subsf3, + sys_fn: __subsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "subss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fsub {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: sub_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: sub::__subdf3, + sys_fn: __subdf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "subsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fsub {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: sub_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: sub::__subtf3, + crate_fn_ppc: sub::__subkf3, + sys_fn: __subtf3, + sys_fn_ppc: __subkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!(float_sub, sub_f32, sub_f64, sub_f128); +criterion_main!(float_sub); diff --git a/testcrate/benches/float_trunc.rs b/testcrate/benches/float_trunc.rs new file mode 100644 index 000000000..1553dacee --- /dev/null +++ b/testcrate/benches/float_trunc.rs @@ -0,0 +1,127 @@ +#![feature(f128)] +#![feature(f16)] + +use compiler_builtins::float::trunc; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: trunc_f32_f16, + sig: (a: f32) -> f16, + crate_fn: trunc::__truncsfhf2, + sys_fn: __truncsfhf2, + sys_available: not(feature = "no-sys-f16"), + asm: [ + #[cfg(target_arch = "aarch64")] { + // FIXME(f16_f128): remove `from_bits()` after f16 asm support (rust-lang/rust/#116909) + let ret: u16; + asm!( + "fcvt {ret:h}, {a:s}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + f16::from_bits(ret) + }; + ], +} + +float_bench! { + name: trunc_f64_f16, + sig: (a: f64) -> f16, + crate_fn: trunc::__truncdfhf2, + sys_fn: __truncdfhf2, + sys_available: not(feature = "no-sys-f128"), + asm: [ + #[cfg(target_arch = "aarch64")] { + // FIXME(f16_f128): remove `from_bits()` after f16 asm support (rust-lang/rust/#116909) + let ret: u16; + asm!( + "fcvt {ret:h}, {a:d}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + f16::from_bits(ret) + }; + ], +} + +float_bench! { + name: trunc_f64_f32, + sig: (a: f64) -> f32, + crate_fn: trunc::__truncdfsf2, + sys_fn: __truncdfsf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "cvtsd2ss {ret}, {a}", + a = in(xmm_reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "fcvt {ret:s}, {a:d}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: trunc_f128_f16, + sig: (a: f128) -> f16, + crate_fn: trunc::__trunctfhf2, + crate_fn_ppc: trunc::__trunckfhf2, + sys_fn: __trunctfhf2, + sys_fn_ppc: __trunckfhf2, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [], +} + +float_bench! { + name: trunc_f128_f32, + sig: (a: f128) -> f32, + crate_fn: trunc::__trunctfsf2, + crate_fn_ppc: trunc::__trunckfsf2, + sys_fn: __trunctfsf2, + sys_fn_ppc: __trunckfsf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +float_bench! { + name: trunc_f128_f64, + sig: (a: f128) -> f64, + crate_fn: trunc::__trunctfdf2, + crate_fn_ppc: trunc::__trunckfdf2, + sys_fn: __trunctfdf2, + sys_fn_ppc: __trunckfdf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +criterion_group!( + float_trunc, + trunc_f32_f16, + trunc_f64_f16, + trunc_f64_f32, + trunc_f128_f16, + trunc_f128_f32, + trunc_f128_f64, +); +criterion_main!(float_trunc); diff --git a/testcrate/build.rs b/testcrate/build.rs index 1dad6c5e6..cae83e1fc 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -5,6 +5,8 @@ use std::{collections::HashSet, env}; enum Feature { NoSysF128, NoSysF128IntConvert, + NoSysF16, + NoSysF16F128Convert, } fn main() { @@ -31,6 +33,7 @@ fn main() { { features.insert(Feature::NoSysF128); features.insert(Feature::NoSysF128IntConvert); + features.insert(Feature::NoSysF16F128Convert); } if target.starts_with("i586") || target.starts_with("i686") { @@ -38,6 +41,17 @@ fn main() { features.insert(Feature::NoSysF128IntConvert); } + if target.contains("-unknown-linux-") { + // No `__extendhftf2` on x86, no `__trunctfhf2` on aarch64 + features.insert(Feature::NoSysF16F128Convert); + } + + if target.starts_with("wasm32-") { + // Linking says "error: function signature mismatch: __extendhfsf2" and seems to + // think the signature is either `(i32) -> f32` or `(f32) -> f32` + features.insert(Feature::NoSysF16); + } + for feature in features { let (name, warning) = match feature { Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"), @@ -45,6 +59,11 @@ fn main() { "no-sys-f128-int-convert", "using apfloat fallback for f128 to int conversions", ), + Feature::NoSysF16F128Convert => ( + "no-sys-f16-f128-convert", + "skipping using apfloat fallback for f16 <-> f128 conversions", + ), + Feature::NoSysF16 => ("no-sys-f16", "using apfloat fallback for f16"), }; println!("cargo:warning={warning}"); println!("cargo:rustc-cfg=feature=\"{name}\""); diff --git a/testcrate/src/bench.rs b/testcrate/src/bench.rs new file mode 100644 index 000000000..1374d7b4f --- /dev/null +++ b/testcrate/src/bench.rs @@ -0,0 +1,348 @@ +use core::cell::RefCell; + +use alloc::vec::Vec; +use compiler_builtins::float::Float; + +/// Fuzz with these many items to ensure equal functions +pub const CHECK_ITER_ITEMS: u32 = 10_000; +/// Benchmark with this many items to get a variety +pub const BENCH_ITER_ITEMS: u32 = 500; + +/// Still run benchmarks/tests but don't check correctness between compiler-builtins and +/// builtin system functions functions +pub fn skip_sys_checks(test_name: &str) -> bool { + const ALWAYS_SKIPPED: &[&str] = &[ + // FIXME(f16_f128): system symbols have incorrect results + // + "extend_f16_f32", + "trunc_f32_f16", + "trunc_f64_f16", + // FIXME(f16_f128): rounding error + // + "mul_f128", + ]; + + // FIXME(f16_f128): error on LE ppc64. There are more tests that are cfg-ed out completely + // in their benchmark modules due to runtime panics. + // + const PPC64LE_SKIPPED: &[&str] = &["extend_f32_f128"]; + + // FIXME(f16_f128): system symbols have incorrect results + // + const X86_NO_SSE_SKIPPED: &[&str] = &["add_f128", "sub_f128", "powi_f32", "powi_f64"]; + + // FIXME(llvm): system symbols have incorrect results on Windows + // + const WINDOWS_SKIPPED: &[&str] = &[ + "conv_f32_u128", + "conv_f32_i128", + "conv_f64_u128", + "conv_f64_i128", + ]; + + if cfg!(target_arch = "arm") { + // The Arm symbols need a different ABI that our macro doesn't handle, just skip it + return true; + } + + if ALWAYS_SKIPPED.contains(&test_name) { + return true; + } + + if cfg!(all(target_arch = "powerpc64", target_endian = "little")) + && PPC64LE_SKIPPED.contains(&test_name) + { + return true; + } + + if cfg!(all(target_arch = "x86", not(target_feature = "sse"))) + && X86_NO_SSE_SKIPPED.contains(&test_name) + { + return true; + } + + if cfg!(target_family = "windows") && WINDOWS_SKIPPED.contains(&test_name) { + return true; + } + + false +} + +/// Still run benchmarks/tests but don't check correctness between compiler-builtins and +/// assembly functions +pub fn skip_asm_checks(test_name: &str) -> bool { + // FIXME(f16_f128): rounding error + // + const SKIPPED: &[&str] = &["mul_f32", "mul_f64"]; + + SKIPPED.contains(&test_name) +} + +/// Create a comparison of the system symbol, compiler_builtins, and optionally handwritten +/// assembly. +#[macro_export] +macro_rules! float_bench { + ( + // Name of this benchmark + name: $name:ident, + // The function signature to be tested + sig: ($($arg:ident: $arg_ty:ty),*) -> $ret_ty:ty, + // Path to the crate in compiler_builtins + crate_fn: $crate_fn:path, + // Optional alias on ppc + $( crate_fn_ppc: $crate_fn_ppc:path, )? + // Name of the system symbol + sys_fn: $sys_fn:ident, + // Optional alias on ppc + $( sys_fn_ppc: $sys_fn_ppc:path, )? + // Meta saying whether the system symbol is available + sys_available: $sys_available:meta, + // An optional function to validate the results of two functions are equal, if not + // just `$ret_ty::check_eq` + $( output_eq: $output_eq:expr, )? + // Assembly implementations, if any. + asm: [ + $( + #[cfg($asm_meta:meta)] { + $($asm_tt:tt)* + } + );* + $(;)? + ] + $(,)? + ) => {paste::paste! { + #[cfg($sys_available)] + extern "C" { + /// Binding for the system function + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty; + + + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + float_bench! { @coalesce_fn $($sys_fn_ppc)? => + fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty; + } + } + + fn $name(c: &mut Criterion) { + use core::hint::black_box; + use compiler_builtins::float::Float; + use $crate::bench::TestIO; + + #[inline(never)] // equalize with external calls + fn crate_fn($($arg: $arg_ty),*) -> $ret_ty { + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + let target_crate_fn = $crate_fn; + + // On PPC, use an alias if specified + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + let target_crate_fn = float_bench!(@coalesce $($crate_fn_ppc)?, $crate_fn); + + target_crate_fn( $($arg),* ) + } + + #[inline(always)] // already a branch + #[cfg($sys_available)] + fn sys_fn($($arg: $arg_ty),*) -> $ret_ty { + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + let target_sys_fn = $sys_fn; + + // On PPC, use an alias if specified + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + let target_sys_fn = float_bench!(@coalesce $($sys_fn_ppc)?, $sys_fn); + + unsafe { target_sys_fn( $($arg),* ) } + } + + #[inline(never)] // equalize with external calls + #[cfg(any( $($asm_meta),* ))] + fn asm_fn($(mut $arg: $arg_ty),*) -> $ret_ty { + use core::arch::asm; + $( + #[cfg($asm_meta)] + unsafe { $($asm_tt)* } + )* + } + + let testvec = <($($arg_ty),*)>::make_testvec($crate::bench::CHECK_ITER_ITEMS); + let benchvec = <($($arg_ty),*)>::make_testvec($crate::bench::BENCH_ITER_ITEMS); + let test_name = stringify!($name); + let check_eq = float_bench!(@coalesce $($output_eq)?, $ret_ty::check_eq); + + // Verify math lines up. We run the crate functions even if we don't validate the + // output here to make sure there are no panics or crashes. + + #[cfg($sys_available)] + for ($($arg),*) in testvec.iter().copied() { + let crate_res = crate_fn($($arg),*); + let sys_res = sys_fn($($arg),*); + + if $crate::bench::skip_sys_checks(test_name) { + continue; + } + + assert!( + check_eq(crate_res, sys_res), + "{test_name}{:?}: crate: {crate_res:?}, sys: {sys_res:?}", + ($($arg),* ,) + ); + } + + #[cfg(any( $($asm_meta),* ))] + { + for ($($arg),*) in testvec.iter().copied() { + let crate_res = crate_fn($($arg),*); + let asm_res = asm_fn($($arg),*); + + if $crate::bench::skip_asm_checks(test_name) { + continue; + } + + assert!( + check_eq(crate_res, asm_res), + "{test_name}{:?}: crate: {crate_res:?}, asm: {asm_res:?}", + ($($arg),* ,) + ); + } + } + + let mut group = c.benchmark_group(test_name); + group.bench_function("compiler-builtins", |b| b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(crate_fn( $(black_box($arg)),* )); + } + })); + + #[cfg($sys_available)] + group.bench_function("system", |b| b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(sys_fn( $(black_box($arg)),* )); + } + })); + + #[cfg(any( $($asm_meta),* ))] + group.bench_function(&format!( + "assembly ({} {})", std::env::consts::ARCH, std::env::consts::FAMILY + ), |b| b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(asm_fn( $(black_box($arg)),* )); + } + })); + + group.finish(); + } + }}; + + // Allow overriding a default + (@coalesce $specified:expr, $default:expr) => { $specified }; + (@coalesce, $default:expr) => { $default }; + + // Allow overriding a function name + (@coalesce_fn $specified:ident => fn $default_name:ident $($tt:tt)+) => { + fn $specified $($tt)+ + }; + (@coalesce_fn => fn $default_name:ident $($tt:tt)+) => { + fn $default_name $($tt)+ + }; +} + +/// A type used as either an input or output to/from a benchmark function. +pub trait TestIO: Sized { + fn make_testvec(len: u32) -> Vec; + fn check_eq(a: Self, b: Self) -> bool; +} + +macro_rules! impl_testio { + (float $($f_ty:ty),+) => {$( + impl TestIO for $f_ty { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_float(len, |a| ret.borrow_mut().push(a)); + ret.into_inner() + } + + fn check_eq(a: Self, b: Self) -> bool { + Float::eq_repr(a, b) + } + } + + impl TestIO for ($f_ty, $f_ty) { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_float_2(len, |a, b| ret.borrow_mut().push((a, b))); + ret.into_inner() + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + )*}; + + (int $($i_ty:ty),+) => {$( + impl TestIO for $i_ty { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz(len, |a| ret.borrow_mut().push(a)); + ret.into_inner() + } + + fn check_eq(a: Self, b: Self) -> bool { + a == b + } + } + + impl TestIO for ($i_ty, $i_ty) { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_2(len, |a, b| ret.borrow_mut().push((a, b))); + ret.into_inner() + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + )*}; + + ((float, int) ($f_ty:ty, $i_ty:ty)) => { + impl TestIO for ($f_ty, $i_ty) { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ivec = RefCell::new(Vec::new()); + let fvec = RefCell::new(Vec::new()); + + crate::fuzz(len.isqrt(), |a| ivec.borrow_mut().push(a)); + crate::fuzz_float(len.isqrt(), |a| fvec.borrow_mut().push(a)); + + let mut ret = Vec::new(); + let ivec = ivec.into_inner(); + let fvec = fvec.into_inner(); + + for f in fvec { + for i in &ivec { + ret.push((f, *i)); + } + } + + ret + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + } +} + +#[cfg(not(feature = "no-f16-f128"))] +impl_testio!(float f16, f128); +impl_testio!(float f32, f64); +impl_testio!(int i16, i32, i64, i128); +impl_testio!(int u16, u32, u64, u128); +impl_testio!((float, int)(f32, i32)); +impl_testio!((float, int)(f64, i32)); diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs index 5ee96ad27..f9b052528 100644 --- a/testcrate/src/lib.rs +++ b/testcrate/src/lib.rs @@ -13,6 +13,12 @@ //! Some floating point tests are disabled for specific architectures, because they do not have //! correct rounding. #![no_std] +#![cfg_attr(not(feature = "no-f16-f128"), feature(f128))] +#![cfg_attr(not(feature = "no-f16-f128"), feature(f16))] +#![feature(isqrt)] + +pub mod bench; +extern crate alloc; use compiler_builtins::float::Float; use compiler_builtins::int::{Int, MinInt}; From 9b066b61647d98fa4f858ad092c3b91087ffacb6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 24 May 2024 05:06:36 -0400 Subject: [PATCH 0765/1459] Enable cache for Cargo components of the build --- .github/workflows/main.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 72d441c4e..98d22c75a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -105,6 +105,9 @@ jobs: shell: bash - run: rustup target add ${{ matrix.target }} - run: rustup component add llvm-tools-preview + - uses: Swatinem/rust-cache@v2 + with: + key: ${{ matrix.target }} - name: Download compiler-rt reference sources run: | curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/18.0-2024-02-13.tar.gz @@ -147,6 +150,7 @@ jobs: - name: Install nightly `clippy` run: | rustup set profile minimal && rustup default "nightly-$(curl -s https://rust-lang.github.io/rustup-components-history/x86_64-unknown-linux-gnu/clippy)" && rustup component add clippy + - uses: Swatinem/rust-cache@v2 - run: cargo clippy -- -D clippy::all success: From 9f3c865e2ff55d6a95857f8399f43df7976bdaeb Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 24 May 2024 05:23:06 -0400 Subject: [PATCH 0766/1459] Enable cache for Docker images --- .github/workflows/main.yml | 21 +++++++++++++++++++++ ci/run-docker.sh | 14 +++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 98d22c75a..9e89c3a98 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -108,6 +108,14 @@ jobs: - uses: Swatinem/rust-cache@v2 with: key: ${{ matrix.target }} + - name: Cache Docker layers + uses: actions/cache@v2 + if: matrix.os == 'ubuntu-latest' + with: + path: /tmp/.buildx-cache + key: ${{ matrix.target }}-buildx-${{ github.sha }} + restore-keys: ${{ matrix.target }}-buildx- + - name: Download compiler-rt reference sources run: | curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/18.0-2024-02-13.tar.gz @@ -123,10 +131,23 @@ jobs: NO_STD: ${{ matrix.no_std }} TEST_VERBATIM: ${{ matrix.test_verbatim }} + # Configure buildx to use Docker layer caching + - uses: docker/setup-buildx-action@v3 + if: matrix.os == 'ubuntu-latest' + # Otherwise we use our docker containers to run builds - run: cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} if: matrix.os == 'ubuntu-latest' + # Workaround to keep Docker cache smaller + # https://github.com/docker/build-push-action/issues/252 + # https://github.com/moby/buildkit/issues/1896 + - name: Move Docker cache + if: matrix.os == 'ubuntu-latest' + run: | + rm -rf /tmp/.buildx-cache + mv /tmp/.buildx-cache-new /tmp/.buildx-cache + rustfmt: name: Rustfmt runs-on: ubuntu-latest diff --git a/ci/run-docker.sh b/ci/run-docker.sh index e5ff8a46b..14b1a32d7 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -41,7 +41,19 @@ run() { export RUST_COMPILER_RT_ROOT=./compiler-rt fi - docker build \ + if [ "$GITHUB_ACTIONS" = "true" ]; then + # Enable Docker image caching on GHA + + buildx="buildx" + build_args=( + "--cache-from" "type=local,src=/tmp/.buildx-cache" + "--cache-to" "type=local,dest=/tmp/.buildx-cache-new" + "${build_args[@]:-}" + "--load" + ) + fi + + docker "${buildx:-}" build \ -t "builtins-$target" \ ${build_args[@]:-} \ "ci/docker/$target" From 13f9ce3e9f2245ed310067399967c317bc28fee0 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 24 May 2024 15:20:31 -0400 Subject: [PATCH 0767/1459] Add caching for downloading compiler-rt --- .github/workflows/main.yml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9e89c3a98..38064543f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -4,6 +4,8 @@ on: [push, pull_request] env: RUSTDOCFLAGS: -Dwarnings RUSTFLAGS: -Dwarnings + RUST_LLVM_VERSION: 18.0-2024-02-13 + RUST_COMPILER_RT_ROOT: ./compiler-rt jobs: test: @@ -115,12 +117,18 @@ jobs: path: /tmp/.buildx-cache key: ${{ matrix.target }}-buildx-${{ github.sha }} restore-keys: ${{ matrix.target }}-buildx- - + + - name: Cache compiler-rt + id: cache-compiler-rt + uses: actions/cache@v4 + with: + path: compiler-rt + key: ${{ runner.os }}-compiler-rt-${{ env.RUST_LLVM_VERSION }} - name: Download compiler-rt reference sources + if: steps.cache-compiler-rt.outputs.cache-hit != 'true' run: | - curl -L -o code.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/18.0-2024-02-13.tar.gz - tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-18.0-2024-02-13/compiler-rt - echo RUST_COMPILER_RT_ROOT=./compiler-rt >> $GITHUB_ENV + curl -L -o code.tar.gz "https://github.com/rust-lang/llvm-project/archive/rustc/${RUST_LLVM_VERSION}.tar.gz" + tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-${RUST_LLVM_VERSION}/compiler-rt shell: bash # Non-linux tests just use our raw script From 9ba77d1583e6de5ab9cf7c9b82827ba8fcb9062f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 17 Jun 2024 19:38:55 -0500 Subject: [PATCH 0768/1459] Disable libm on x86 without sse2 In , symbols for the Rust port of libm were made always weakly available. This seems to be causing problems on platforms with ABI issues, as explained at . Disable Rust libm on x86 without sse2 to mitigate this. --- src/lib.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 40564178a..0d207a914 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,6 +47,9 @@ mod macros; pub mod float; pub mod int; +// Disabled on x86 without sse2 due to ABI issues +// +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse2"))))] pub mod math; pub mod mem; From 3032f496c56a524a7de756ec7af1a66186e5d7df Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 29 May 2024 03:38:57 -0500 Subject: [PATCH 0769/1459] Update the Ubuntu docker image to the latest version --- ci/docker/aarch64-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/arm-unknown-linux-gnueabi/Dockerfile | 2 +- ci/docker/arm-unknown-linux-gnueabihf/Dockerfile | 2 +- ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile | 2 +- ci/docker/i586-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/i686-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/mips-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile | 2 +- ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile | 2 +- ci/docker/mipsel-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/powerpc-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/powerpc64-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/thumbv6m-none-eabi/Dockerfile | 2 +- ci/docker/thumbv7em-none-eabi/Dockerfile | 2 +- ci/docker/thumbv7em-none-eabihf/Dockerfile | 2 +- ci/docker/thumbv7m-none-eabi/Dockerfile | 2 +- ci/docker/x86_64-unknown-linux-gnu/Dockerfile | 2 +- ci/run-docker.sh | 2 +- 19 files changed, 19 insertions(+), 19 deletions(-) diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index 5de76efc3..1aef14a96 100644 --- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/ci/docker/arm-unknown-linux-gnueabi/Dockerfile index dc95da0f3..fc9803777 100644 --- a/ci/docker/arm-unknown-linux-gnueabi/Dockerfile +++ b/ci/docker/arm-unknown-linux-gnueabi/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile index 55e5e3d57..a127f67cb 100644 --- a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile +++ b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile index fd2ad18d1..67a3e51a9 100644 --- a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +++ b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/i586-unknown-linux-gnu/Dockerfile b/ci/docker/i586-unknown-linux-gnu/Dockerfile index f161ec767..15285d9bb 100644 --- a/ci/docker/i586-unknown-linux-gnu/Dockerfile +++ b/ci/docker/i586-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/i686-unknown-linux-gnu/Dockerfile b/ci/docker/i686-unknown-linux-gnu/Dockerfile index f161ec767..15285d9bb 100644 --- a/ci/docker/i686-unknown-linux-gnu/Dockerfile +++ b/ci/docker/i686-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/mips-unknown-linux-gnu/Dockerfile b/ci/docker/mips-unknown-linux-gnu/Dockerfile index 042dd4219..a47dd9f19 100644 --- a/ci/docker/mips-unknown-linux-gnu/Dockerfile +++ b/ci/docker/mips-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile index 45b3089c9..688aa1ab2 100644 --- a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile +++ b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile index bda6be1d6..27d032a14 100644 --- a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile +++ b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/mipsel-unknown-linux-gnu/Dockerfile b/ci/docker/mipsel-unknown-linux-gnu/Dockerfile index 702a26ec1..4d18a6edb 100644 --- a/ci/docker/mipsel-unknown-linux-gnu/Dockerfile +++ b/ci/docker/mipsel-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc-unknown-linux-gnu/Dockerfile index 6bae7cb3b..5225b833c 100644 --- a/ci/docker/powerpc-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile index 2c315e509..cbd78eac4 100644 --- a/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile index da8f9db60..bad064297 100644 --- a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/thumbv6m-none-eabi/Dockerfile b/ci/docker/thumbv6m-none-eabi/Dockerfile index d7256a9c5..f966b2b9f 100644 --- a/ci/docker/thumbv6m-none-eabi/Dockerfile +++ b/ci/docker/thumbv6m-none-eabi/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/thumbv7em-none-eabi/Dockerfile b/ci/docker/thumbv7em-none-eabi/Dockerfile index d7256a9c5..f966b2b9f 100644 --- a/ci/docker/thumbv7em-none-eabi/Dockerfile +++ b/ci/docker/thumbv7em-none-eabi/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/thumbv7em-none-eabihf/Dockerfile b/ci/docker/thumbv7em-none-eabihf/Dockerfile index d7256a9c5..f966b2b9f 100644 --- a/ci/docker/thumbv7em-none-eabihf/Dockerfile +++ b/ci/docker/thumbv7em-none-eabihf/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/thumbv7m-none-eabi/Dockerfile b/ci/docker/thumbv7m-none-eabi/Dockerfile index d7256a9c5..f966b2b9f 100644 --- a/ci/docker/thumbv7m-none-eabi/Dockerfile +++ b/ci/docker/thumbv7m-none-eabi/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index d495d5044..670c24397 100644 --- a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:18.04 +ARG IMAGE=ubuntu:24.04 FROM $IMAGE RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/ci/run-docker.sh b/ci/run-docker.sh index 14b1a32d7..aff356473 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -76,7 +76,7 @@ if [ "${1:-}" = "--help" ] || [ "$#" -gt 1 ]; then usage: ./ci/run-docker.sh [target] you can also set DOCKER_BASE_IMAGE to use something other than the default - ubuntu:18.04 (or rustlang/rust:nightly). + ubuntu:24.04 (or rustlang/rust:nightly). " exit fi From c7de914f589948629b36270fec664eaaacf1977c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 22 Jun 2024 06:05:25 -0400 Subject: [PATCH 0770/1459] Skip f128 tests on powerpc64le __addkf3 and __mulkf3 seem to hit a nondescript SIGILL. This is probably likely to just be another Qemu limitation. --- testcrate/build.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/testcrate/build.rs b/testcrate/build.rs index cae83e1fc..f18bd90df 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -24,9 +24,10 @@ fn main() { // FIXME(llvm): There is an ABI incompatibility between GCC and Clang on 32-bit x86. // See . || target.starts_with("i686") - // 32-bit PowerPC gets code generated that Qemu cannot handle. See + // 32-bit PowerPC and 64-bit LE gets code generated that Qemu cannot handle. See // . || target.starts_with("powerpc-") + || target.starts_with("powerpc64le-") // FIXME: We get different results from the builtin functions. See // . || target.starts_with("powerpc64-") From e1b7d8f5b70777f165a8e7f92ea78728654c6589 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 22 Jun 2024 04:20:48 -0500 Subject: [PATCH 0771/1459] Fix unset variables in the build script These were preventing building via Docker locally. --- ci/run-docker.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/run-docker.sh b/ci/run-docker.sh index aff356473..50ae9dc83 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -41,10 +41,10 @@ run() { export RUST_COMPILER_RT_ROOT=./compiler-rt fi - if [ "$GITHUB_ACTIONS" = "true" ]; then + if [ "${GITHUB_ACTIONS:-}" = "true" ]; then # Enable Docker image caching on GHA - buildx="buildx" + build_cmd=("buildx" "build") build_args=( "--cache-from" "type=local,src=/tmp/.buildx-cache" "--cache-to" "type=local,dest=/tmp/.buildx-cache-new" @@ -53,7 +53,7 @@ run() { ) fi - docker "${buildx:-}" build \ + docker ${build_cmd[@]:-build} \ -t "builtins-$target" \ ${build_args[@]:-} \ "ci/docker/$target" From 32a6047968991c28fea1cc32b4cf7615e04e6bb0 Mon Sep 17 00:00:00 2001 From: beetrees Date: Sun, 23 Jun 2024 19:19:25 +0100 Subject: [PATCH 0772/1459] Fix building on AVR --- src/float/sub.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/float/sub.rs b/src/float/sub.rs index 1492679f6..3ab46495d 100644 --- a/src/float/sub.rs +++ b/src/float/sub.rs @@ -1,18 +1,16 @@ -use crate::float::add::__adddf3; -use crate::float::add::__addsf3; use crate::float::Float; intrinsics! { #[avr_skip] #[arm_aeabi_alias = __aeabi_fsub] pub extern "C" fn __subsf3(a: f32, b: f32) -> f32 { - __addsf3(a, f32::from_repr(b.repr() ^ f32::SIGN_MASK)) + crate::float::add::__addsf3(a, f32::from_repr(b.repr() ^ f32::SIGN_MASK)) } #[avr_skip] #[arm_aeabi_alias = __aeabi_dsub] pub extern "C" fn __subdf3(a: f64, b: f64) -> f64 { - __adddf3(a, f64::from_repr(b.repr() ^ f64::SIGN_MASK)) + crate::float::add::__adddf3(a, f64::from_repr(b.repr() ^ f64::SIGN_MASK)) } #[ppc_alias = __subkf3] From 5cd852d072b03294ac00f6d2fdc6e409882ba712 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Tue, 25 Jun 2024 22:51:05 +0100 Subject: [PATCH 0773/1459] Release 0.1.113 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4564ba9e5..ab4395159 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.112" +version = "0.1.113" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 101ba13a1a569998b9f9f76b501929b6654cee9a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 22 Jun 2024 13:47:33 -0500 Subject: [PATCH 0774/1459] Enable `f128 -> f16` tests on Linux Since updating the docker images in , it looks like `__extendhftf2` and `__trunctfhf2` are available on all 64-bit Linux platforms. --- testcrate/build.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/testcrate/build.rs b/testcrate/build.rs index f18bd90df..12c3e7d13 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -38,12 +38,9 @@ fn main() { } if target.starts_with("i586") || target.starts_with("i686") { - // 32-bit x86 seems to not have `__fixunstfti`, but does have everything else + // 32-bit x86 does not have `__fixunstfti`/`__fixtfti` but does have everything else features.insert(Feature::NoSysF128IntConvert); - } - - if target.contains("-unknown-linux-") { - // No `__extendhftf2` on x86, no `__trunctfhf2` on aarch64 + // FIXME: 32-bit x86 has a bug in `f128 -> f16` system libraries features.insert(Feature::NoSysF16F128Convert); } From 367ba35bb2fbc193c41dbb6681a131c0b20de0dd Mon Sep 17 00:00:00 2001 From: beetrees Date: Sun, 30 Jun 2024 22:02:41 +0100 Subject: [PATCH 0775/1459] Fix incorrect rounding with subnormal/zero results of float multiplication --- src/float/mul.rs | 19 +++++++------------ testcrate/tests/mul.rs | 13 +++++-------- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/src/float/mul.rs b/src/float/mul.rs index 007cc09a4..decf722e2 100644 --- a/src/float/mul.rs +++ b/src/float/mul.rs @@ -149,18 +149,13 @@ where } // Otherwise, shift the significand of the result so that the round - // bit is the high bit of productLo. - if shift < bits { - let sticky = product_low << (bits - shift); - product_low = product_high << (bits - shift) | product_low >> shift | sticky; - product_high >>= shift; - } else if shift < (2 * bits) { - let sticky = product_high << (2 * bits - shift) | product_low; - product_low = product_high >> (shift - bits) | sticky; - product_high = zero; - } else { - product_high = zero; - } + // bit is the high bit of `product_low`. + // Ensure one of the non-highest bits in `product_low` is set if the shifted out bit are + // not all zero so that the result is correctly rounded below. + let sticky = product_low << (bits - shift) != zero; + product_low = + product_high << (bits - shift) | product_low >> shift | (sticky as u32).cast(); + product_high >>= shift; } else { // Result is normal before rounding; insert the exponent. product_high &= significand_mask; diff --git a/testcrate/tests/mul.rs b/testcrate/tests/mul.rs index 5daeadeb2..818ca656a 100644 --- a/testcrate/tests/mul.rs +++ b/testcrate/tests/mul.rs @@ -107,14 +107,11 @@ macro_rules! float_mul { fuzz_float_2(N, |x: $f, y: $f| { let mul0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Mul::mul, x, y); let mul1: $f = $fn(x, y); - // multiplication of subnormals is not currently handled - if !(Float::is_subnormal(mul0) || Float::is_subnormal(mul1)) { - if !Float::eq_repr(mul0, mul1) { - panic!( - "{}({:?}, {:?}): std: {:?}, builtins: {:?}", - stringify!($fn), x, y, mul0, mul1 - ); - } + if !Float::eq_repr(mul0, mul1) { + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn), x, y, mul0, mul1 + ); } }); } From 7bc3d6e08215e3cee34c26b5f662bda27bb80d9d Mon Sep 17 00:00:00 2001 From: beetrees Date: Sun, 30 Jun 2024 22:40:57 +0100 Subject: [PATCH 0776/1459] Ignore broken nightly/system builtins --- testcrate/src/bench.rs | 17 ++++++++++++++--- testcrate/tests/mul.rs | 6 ++++-- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/testcrate/src/bench.rs b/testcrate/src/bench.rs index 1374d7b4f..5ab9bac87 100644 --- a/testcrate/src/bench.rs +++ b/testcrate/src/bench.rs @@ -17,9 +17,10 @@ pub fn skip_sys_checks(test_name: &str) -> bool { "extend_f16_f32", "trunc_f32_f16", "trunc_f64_f16", - // FIXME(f16_f128): rounding error + // FIXME(#616): re-enable once fix is in nightly // - "mul_f128", + "mul_f32", + "mul_f64", ]; // FIXME(f16_f128): error on LE ppc64. There are more tests that are cfg-ed out completely @@ -29,7 +30,13 @@ pub fn skip_sys_checks(test_name: &str) -> bool { // FIXME(f16_f128): system symbols have incorrect results // - const X86_NO_SSE_SKIPPED: &[&str] = &["add_f128", "sub_f128", "powi_f32", "powi_f64"]; + const X86_NO_SSE_SKIPPED: &[&str] = + &["add_f128", "sub_f128", "mul_f128", "powi_f32", "powi_f64"]; + + // FIXME(f16_f128): Wide multiply carry bug in `compiler-rt`, re-enable when nightly no longer + // uses `compiler-rt` version. + // + const AARCH64_SKIPPED: &[&str] = &["mul_f128"]; // FIXME(llvm): system symbols have incorrect results on Windows // @@ -61,6 +68,10 @@ pub fn skip_sys_checks(test_name: &str) -> bool { return true; } + if cfg!(target_arch = "aarch64") && AARCH64_SKIPPED.contains(&test_name) { + return true; + } + if cfg!(target_family = "windows") && WINDOWS_SKIPPED.contains(&test_name) { return true; } diff --git a/testcrate/tests/mul.rs b/testcrate/tests/mul.rs index 818ca656a..90144bb06 100644 --- a/testcrate/tests/mul.rs +++ b/testcrate/tests/mul.rs @@ -123,9 +123,11 @@ macro_rules! float_mul { mod float_mul { use super::*; + // FIXME(#616): Stop ignoring arches that don't have native support once fix for builtins is in + // nightly. float_mul! { - f32, __mulsf3, Single, all(); - f64, __muldf3, Double, all(); + f32, __mulsf3, Single, not(target_arch = "arm"); + f64, __muldf3, Double, not(target_arch = "arm"); } } From 254edbcad4cfd6a8af32e3297c1037d7984c3c49 Mon Sep 17 00:00:00 2001 From: beetrees Date: Sun, 30 Jun 2024 23:14:19 +0100 Subject: [PATCH 0777/1459] Temporarily `use define_rust_probestack;` --- src/probestack.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/probestack.rs b/src/probestack.rs index 0c30384db..46caf1676 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -120,6 +120,10 @@ macro_rules! define_rust_probestack { }; } +// FIXME(rust-lang/rust#126984): Remove allow once lint is fixed +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use define_rust_probestack; + // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, // ensuring that if any pages are unmapped we'll make a page fault. // From 599c8fbd63f813cb9319bbd1439e581e2a55cabf Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 30 Jun 2024 23:03:44 -0400 Subject: [PATCH 0778/1459] Remove unnecessary benchmark files --- testcrate/bench-175b45d1-aarch64-macos.txt | 500 --------------- testcrate/bench-3cee6376-aarch64-macos.txt | 699 --------------------- 2 files changed, 1199 deletions(-) delete mode 100644 testcrate/bench-175b45d1-aarch64-macos.txt delete mode 100644 testcrate/bench-3cee6376-aarch64-macos.txt diff --git a/testcrate/bench-175b45d1-aarch64-macos.txt b/testcrate/bench-175b45d1-aarch64-macos.txt deleted file mode 100644 index e79bbe368..000000000 --- a/testcrate/bench-175b45d1-aarch64-macos.txt +++ /dev/null @@ -1,500 +0,0 @@ - -running 0 tests - -test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s - -add_f32 compiler-builtins - time: [35.804 µs 35.863 µs 35.920 µs] -Found 5 outliers among 100 measurements (5.00%) - 2 (2.00%) high mild - 3 (3.00%) high severe - -add_f32 system time: [39.084 µs 39.127 µs 39.169 µs] -Found 11 outliers among 100 measurements (11.00%) - 7 (7.00%) high mild - 4 (4.00%) high severe - -add_f32 assembly (aarch64 unix) - time: [8.1034 µs 8.1441 µs 8.1866 µs] -Found 4 outliers among 100 measurements (4.00%) - 4 (4.00%) high mild - -add_f64 compiler-builtins - time: [35.647 µs 35.725 µs 35.799 µs] -Found 10 outliers among 100 measurements (10.00%) - 8 (8.00%) high mild - 2 (2.00%) high severe - -add_f64 system time: [39.308 µs 39.322 µs 39.336 µs] -Found 7 outliers among 100 measurements (7.00%) - 4 (4.00%) high mild - 3 (3.00%) high severe - -add_f64 assembly (aarch64 unix) - time: [8.0401 µs 8.0442 µs 8.0499 µs] -Found 11 outliers among 100 measurements (11.00%) - 2 (2.00%) high mild - 9 (9.00%) high severe - -add_f128 compiler-builtins - time: [41.801 µs 41.986 µs 42.201 µs] -Found 7 outliers among 100 measurements (7.00%) - 4 (4.00%) high mild - 3 (3.00%) high severe - -cmp_f32_gt compiler-builtins - time: [13.579 µs 13.675 µs 13.778 µs] -Found 16 outliers among 100 measurements (16.00%) - 6 (6.00%) high mild - 10 (10.00%) high severe - -cmp_f32_gt system time: [12.343 µs 12.348 µs 12.355 µs] -Found 13 outliers among 100 measurements (13.00%) - 1 (1.00%) low mild - 3 (3.00%) high mild - 9 (9.00%) high severe - -cmp_f32_gt assembly (aarch64 unix) - time: [8.2593 µs 8.3185 µs 8.3813 µs] -Found 1 outliers among 100 measurements (1.00%) - 1 (1.00%) high mild - -cmp_f32_unord compiler-builtins - time: [11.977 µs 12.042 µs 12.109 µs] -Found 13 outliers among 100 measurements (13.00%) - 5 (5.00%) low severe - 6 (6.00%) low mild - 2 (2.00%) high mild - -cmp_f32_unord system time: [8.1236 µs 8.1736 µs 8.2350 µs] -Found 18 outliers among 100 measurements (18.00%) - 5 (5.00%) high mild - 13 (13.00%) high severe - -cmp_f32_unord assembly (aarch64 unix) - time: [8.1446 µs 8.2080 µs 8.2762 µs] -Found 14 outliers among 100 measurements (14.00%) - 6 (6.00%) high mild - 8 (8.00%) high severe - -cmp_f64_gt compiler-builtins - time: [16.073 µs 16.077 µs 16.082 µs] -Found 17 outliers among 100 measurements (17.00%) - 2 (2.00%) low mild - 4 (4.00%) high mild - 11 (11.00%) high severe - -cmp_f64_gt system time: [12.456 µs 12.487 µs 12.522 µs] -Found 3 outliers among 100 measurements (3.00%) - 2 (2.00%) high mild - 1 (1.00%) high severe - -cmp_f64_gt assembly (aarch64 unix) - time: [8.0557 µs 8.0616 µs 8.0685 µs] -Found 3 outliers among 100 measurements (3.00%) - 1 (1.00%) high mild - 2 (2.00%) high severe - -cmp_f64_unord compiler-builtins - time: [10.715 µs 10.724 µs 10.737 µs] -Found 13 outliers among 100 measurements (13.00%) - 3 (3.00%) high mild - 10 (10.00%) high severe - -cmp_f64_unord system time: [8.0692 µs 8.0734 µs 8.0784 µs] -Found 3 outliers among 100 measurements (3.00%) - 1 (1.00%) high mild - 2 (2.00%) high severe - -cmp_f64_unord assembly (aarch64 unix) - time: [8.0569 µs 8.0677 µs 8.0818 µs] -Found 18 outliers among 100 measurements (18.00%) - 4 (4.00%) high mild - 14 (14.00%) high severe - -cmp_f128_gt compiler-builtins - time: [18.234 µs 18.401 µs 18.602 µs] - -cmp_f128_unord compiler-builtins - time: [13.410 µs 13.471 µs 13.542 µs] -Found 7 outliers among 100 measurements (7.00%) - 7 (7.00%) high mild - -conv_u32_f32 compiler-builtins - time: [774.58 ns 776.01 ns 777.59 ns] -Found 9 outliers among 100 measurements (9.00%) - 2 (2.00%) high mild - 7 (7.00%) high severe - -conv_u32_f32 system time: [622.68 ns 625.64 ns 629.26 ns] -Found 16 outliers among 100 measurements (16.00%) - 7 (7.00%) high mild - 9 (9.00%) high severe - -conv_u32_f32 assembly (aarch64 unix) - time: [468.05 ns 469.76 ns 471.46 ns] -Found 3 outliers among 100 measurements (3.00%) - 2 (2.00%) high mild - 1 (1.00%) high severe - -conv_u32_f64 compiler-builtins - time: [617.61 ns 618.00 ns 618.52 ns] -Found 13 outliers among 100 measurements (13.00%) - 4 (4.00%) high mild - 9 (9.00%) high severe - -conv_u32_f64 system time: [469.56 ns 471.03 ns 472.81 ns] -Found 11 outliers among 100 measurements (11.00%) - 7 (7.00%) high mild - 4 (4.00%) high severe - -conv_u32_f64 assembly (aarch64 unix) - time: [464.43 ns 465.01 ns 465.72 ns] -Found 13 outliers among 100 measurements (13.00%) - 5 (5.00%) high mild - 8 (8.00%) high severe - -conv_u64_f32 compiler-builtins - time: [847.95 ns 848.19 ns 848.46 ns] -Found 19 outliers among 100 measurements (19.00%) - 3 (3.00%) low mild - 9 (9.00%) high mild - 7 (7.00%) high severe - -conv_u64_f32 system time: [701.68 ns 701.95 ns 702.30 ns] -Found 10 outliers among 100 measurements (10.00%) - 4 (4.00%) high mild - 6 (6.00%) high severe - -conv_u64_f32 assembly (aarch64 unix) - time: [511.73 ns 512.43 ns 513.32 ns] -Found 6 outliers among 100 measurements (6.00%) - 6 (6.00%) high mild - -conv_u64_f64 compiler-builtins - time: [681.23 ns 682.55 ns 684.30 ns] -Found 18 outliers among 100 measurements (18.00%) - 1 (1.00%) high mild - 17 (17.00%) high severe - -conv_u64_f64 system time: [679.34 ns 679.57 ns 679.88 ns] -Found 18 outliers among 100 measurements (18.00%) - 1 (1.00%) low mild - 6 (6.00%) high mild - 11 (11.00%) high severe - -conv_u64_f64 assembly (aarch64 unix) - time: [509.90 ns 510.09 ns 510.30 ns] -Found 15 outliers among 100 measurements (15.00%) - 6 (6.00%) high mild - 9 (9.00%) high severe - -conv_u128_f32 compiler-builtins - time: [1.1368 µs 1.1372 µs 1.1377 µs] -Found 14 outliers among 100 measurements (14.00%) - 8 (8.00%) high mild - 6 (6.00%) high severe - -conv_u128_f32 system time: [1.4338 µs 1.4370 µs 1.4410 µs] -Found 7 outliers among 100 measurements (7.00%) - 2 (2.00%) high mild - 5 (5.00%) high severe - -conv_u128_f64 compiler-builtins - time: [1.0133 µs 1.0143 µs 1.0156 µs] -Found 16 outliers among 100 measurements (16.00%) - 2 (2.00%) high mild - 14 (14.00%) high severe - -conv_u128_f64 system time: [1.3473 µs 1.3530 µs 1.3600 µs] -Found 4 outliers among 100 measurements (4.00%) - 4 (4.00%) high mild - -conv_i32_f32 compiler-builtins - time: [906.53 ns 907.86 ns 909.23 ns] -Found 7 outliers among 100 measurements (7.00%) - 4 (4.00%) high mild - 3 (3.00%) high severe - -conv_i32_f32 system time: [914.53 ns 915.69 ns 917.01 ns] -Found 10 outliers among 100 measurements (10.00%) - 6 (6.00%) high mild - 4 (4.00%) high severe - -conv_i32_f32 assembly (aarch64 unix) - time: [464.55 ns 465.10 ns 465.83 ns] -Found 4 outliers among 100 measurements (4.00%) - 4 (4.00%) high mild - -conv_i32_f64 compiler-builtins - time: [617.63 ns 617.92 ns 618.27 ns] -Found 12 outliers among 100 measurements (12.00%) - 3 (3.00%) high mild - 9 (9.00%) high severe - -conv_i32_f64 system time: [622.83 ns 624.19 ns 625.61 ns] -Found 6 outliers among 100 measurements (6.00%) - 5 (5.00%) high mild - 1 (1.00%) high severe - -conv_i32_f64 assembly (aarch64 unix) - time: [465.24 ns 466.04 ns 466.95 ns] -Found 11 outliers among 100 measurements (11.00%) - 4 (4.00%) high mild - 7 (7.00%) high severe - -conv_i64_f32 compiler-builtins - time: [852.67 ns 853.92 ns 855.34 ns] -Found 11 outliers among 100 measurements (11.00%) - 3 (3.00%) high mild - 8 (8.00%) high severe - -conv_i64_f32 system time: [906.94 ns 908.04 ns 909.33 ns] -Found 15 outliers among 100 measurements (15.00%) - 2 (2.00%) high mild - 13 (13.00%) high severe - -conv_i64_f32 assembly (aarch64 unix) - time: [510.84 ns 511.27 ns 511.80 ns] -Found 8 outliers among 100 measurements (8.00%) - 3 (3.00%) high mild - 5 (5.00%) high severe - -conv_i64_f64 compiler-builtins - time: [932.35 ns 932.97 ns 933.76 ns] -Found 10 outliers among 100 measurements (10.00%) - 4 (4.00%) high mild - 6 (6.00%) high severe - -conv_i64_f64 system time: [955.91 ns 958.95 ns 962.05 ns] -Found 5 outliers among 100 measurements (5.00%) - 3 (3.00%) high mild - 2 (2.00%) high severe - -conv_i64_f64 assembly (aarch64 unix) - time: [510.19 ns 510.72 ns 511.44 ns] -Found 9 outliers among 100 measurements (9.00%) - 5 (5.00%) high mild - 4 (4.00%) high severe - -conv_i128_f32 compiler-builtins - time: [1.4248 µs 1.4285 µs 1.4323 µs] -Found 12 outliers among 100 measurements (12.00%) - 7 (7.00%) high mild - 5 (5.00%) high severe - -conv_i128_f32 system time: [1.6970 µs 1.7017 µs 1.7069 µs] -Found 5 outliers among 100 measurements (5.00%) - 3 (3.00%) high mild - 2 (2.00%) high severe - -conv_i128_f64 compiler-builtins - time: [1.3132 µs 1.3161 µs 1.3191 µs] -Found 2 outliers among 100 measurements (2.00%) - 1 (1.00%) high mild - 1 (1.00%) high severe - -conv_i128_f64 system time: [1.6071 µs 1.6100 µs 1.6133 µs] -Found 4 outliers among 100 measurements (4.00%) - 3 (3.00%) high mild - 1 (1.00%) high severe - -conv_f64_u32 compiler-builtins - time: [640.35 ns 641.00 ns 641.68 ns] -Found 6 outliers among 100 measurements (6.00%) - 4 (4.00%) high mild - 2 (2.00%) high severe - -conv_f64_u32 system time: [640.87 ns 641.63 ns 642.42 ns] -Found 3 outliers among 100 measurements (3.00%) - 1 (1.00%) high mild - 2 (2.00%) high severe - -conv_f64_u32 assembly (aarch64 unix) - time: [482.02 ns 482.67 ns 483.38 ns] -Found 1 outliers among 100 measurements (1.00%) - 1 (1.00%) high severe - -conv_f64_u64 compiler-builtins - time: [638.58 ns 638.98 ns 639.45 ns] -Found 15 outliers among 100 measurements (15.00%) - 1 (1.00%) high mild - 14 (14.00%) high severe - -conv_f64_u64 system time: [642.54 ns 644.07 ns 645.59 ns] -Found 4 outliers among 100 measurements (4.00%) - 3 (3.00%) high mild - 1 (1.00%) high severe - -conv_f64_u64 assembly (aarch64 unix) - time: [482.65 ns 483.70 ns 484.87 ns] -Found 1 outliers among 100 measurements (1.00%) - 1 (1.00%) high mild - -conv_f64_u128 compiler-builtins - time: [1.0631 µs 1.0652 µs 1.0674 µs] -Found 8 outliers among 100 measurements (8.00%) - 7 (7.00%) high mild - 1 (1.00%) high severe - -conv_f64_u128 system time: [821.41 ns 823.45 ns 825.74 ns] -Found 11 outliers among 100 measurements (11.00%) - 8 (8.00%) high mild - 3 (3.00%) high severe - -conv_f64_i32 compiler-builtins - time: [826.76 ns 845.08 ns 870.23 ns] -Found 4 outliers among 100 measurements (4.00%) - 4 (4.00%) high mild - -conv_f64_i32 system time: [764.12 ns 764.63 ns 765.26 ns] -Found 2 outliers among 100 measurements (2.00%) - 2 (2.00%) high severe - -conv_f64_i32 assembly (aarch64 unix) - time: [484.50 ns 485.98 ns 487.54 ns] -Found 3 outliers among 100 measurements (3.00%) - 1 (1.00%) high mild - 2 (2.00%) high severe - -conv_f64_i64 compiler-builtins - time: [797.27 ns 798.19 ns 799.84 ns] -Found 9 outliers among 100 measurements (9.00%) - 5 (5.00%) high mild - 4 (4.00%) high severe - -conv_f64_i64 system time: [768.74 ns 769.52 ns 770.23 ns] -Found 1 outliers among 100 measurements (1.00%) - 1 (1.00%) high severe - -conv_f64_i64 assembly (aarch64 unix) - time: [480.59 ns 481.03 ns 481.46 ns] -Found 3 outliers among 100 measurements (3.00%) - 2 (2.00%) high mild - 1 (1.00%) high severe - -conv_f64_i128 compiler-builtins - time: [1.0577 µs 1.0591 µs 1.0606 µs] -Found 2 outliers among 100 measurements (2.00%) - 1 (1.00%) high mild - 1 (1.00%) high severe - -conv_f64_i128 system time: [1.0181 µs 1.0195 µs 1.0211 µs] -Found 3 outliers among 100 measurements (3.00%) - 3 (3.00%) high mild - -conv_f32_u32 compiler-builtins - time: [800.40 ns 801.39 ns 802.35 ns] -Found 2 outliers among 100 measurements (2.00%) - 2 (2.00%) high mild - -conv_f32_u32 system time: [638.12 ns 638.34 ns 638.63 ns] -Found 11 outliers among 100 measurements (11.00%) - 4 (4.00%) high mild - 7 (7.00%) high severe - -conv_f32_u32 assembly (aarch64 unix) - time: [479.37 ns 480.97 ns 483.32 ns] -Found 13 outliers among 100 measurements (13.00%) - 6 (6.00%) high mild - 7 (7.00%) high severe - -conv_f32_u64 compiler-builtins - time: [801.95 ns 803.64 ns 805.75 ns] - -conv_f32_u64 system time: [638.20 ns 638.56 ns 639.07 ns] -Found 10 outliers among 100 measurements (10.00%) - 1 (1.00%) high mild - 9 (9.00%) high severe - -conv_f32_u64 assembly (aarch64 unix) - time: [480.07 ns 480.47 ns 480.86 ns] -Found 2 outliers among 100 measurements (2.00%) - 1 (1.00%) high mild - 1 (1.00%) high severe - -conv_f32_u128 compiler-builtins - time: [1.1579 µs 1.1623 µs 1.1657 µs] -Found 14 outliers among 100 measurements (14.00%) - 2 (2.00%) low severe - 7 (7.00%) high mild - 5 (5.00%) high severe - -conv_f32_u128 system time: [1.0344 µs 1.0394 µs 1.0450 µs] - -conv_f32_i32 compiler-builtins - time: [800.14 ns 801.52 ns 803.26 ns] -Found 10 outliers among 100 measurements (10.00%) - 8 (8.00%) high mild - 2 (2.00%) high severe - -conv_f32_i32 system time: [741.36 ns 741.74 ns 742.13 ns] -Found 4 outliers among 100 measurements (4.00%) - 2 (2.00%) high mild - 2 (2.00%) high severe - -conv_f32_i32 assembly (aarch64 unix) - time: [484.35 ns 486.08 ns 488.11 ns] -Found 17 outliers among 100 measurements (17.00%) - 9 (9.00%) high mild - 8 (8.00%) high severe - -conv_f32_i64 compiler-builtins - time: [800.94 ns 802.68 ns 804.74 ns] - -conv_f32_i64 system time: [748.60 ns 750.68 ns 753.16 ns] -Found 9 outliers among 100 measurements (9.00%) - 4 (4.00%) high mild - 5 (5.00%) high severe - -conv_f32_i64 assembly (aarch64 unix) - time: [480.70 ns 481.23 ns 481.82 ns] -Found 4 outliers among 100 measurements (4.00%) - 2 (2.00%) high mild - 2 (2.00%) high severe - -conv_f32_i128 compiler-builtins - time: [1.1774 µs 1.1829 µs 1.1887 µs] -Found 11 outliers among 100 measurements (11.00%) - 1 (1.00%) low severe - 7 (7.00%) low mild - 1 (1.00%) high mild - 2 (2.00%) high severe - -conv_f32_i128 system time: [1.1785 µs 1.1853 µs 1.1941 µs] -Found 7 outliers among 100 measurements (7.00%) - 2 (2.00%) high mild - 5 (5.00%) high severe - -div_f32 compiler-builtins - time: [38.852 µs 39.011 µs 39.178 µs] -Found 3 outliers among 100 measurements (3.00%) - 3 (3.00%) high mild - -div_f32 system time: [41.846 µs 41.920 µs 42.005 µs] -Found 3 outliers among 100 measurements (3.00%) - 1 (1.00%) high mild - 2 (2.00%) high severe - -div_f32 assembly (aarch64 unix) - time: [8.1309 µs 8.1627 µs 8.2005 µs] -Found 2 outliers among 100 measurements (2.00%) - 2 (2.00%) high mild - -div_f64 compiler-builtins - time: [50.369 µs 50.605 µs 50.857 µs] -Found 15 outliers among 100 measurements (15.00%) - 11 (11.00%) high mild - 4 (4.00%) high severe - -div_f64 system time: [53.506 µs 53.582 µs 53.676 µs] -Found 8 outliers among 100 measurements (8.00%) - 4 (4.00%) high mild - 4 (4.00%) high severe - -div_f64 assembly (aarch64 unix) - time: [8.0695 µs 8.0807 µs 8.0948 µs] -Found 4 outliers among 100 measurements (4.00%) - 2 (2.00%) high mild - 2 (2.00%) high severe - diff --git a/testcrate/bench-3cee6376-aarch64-macos.txt b/testcrate/bench-3cee6376-aarch64-macos.txt deleted file mode 100644 index 131e7a85a..000000000 --- a/testcrate/bench-3cee6376-aarch64-macos.txt +++ /dev/null @@ -1,699 +0,0 @@ - -running 0 tests - -test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s - -add_f32/compiler-builtins - time: [36.813 µs 37.048 µs 37.303 µs] -Found 5 outliers among 100 measurements (5.00%) - 5 (5.00%) high mild -add_f32/system time: [39.103 µs 39.142 µs 39.189 µs] -Found 8 outliers among 100 measurements (8.00%) - 2 (2.00%) high mild - 6 (6.00%) high severe -add_f32/assembly (aarch64 unix) - time: [8.3786 µs 8.4680 µs 8.5570 µs] - -add_f64/compiler-builtins - time: [35.784 µs 35.819 µs 35.863 µs] -Found 4 outliers among 100 measurements (4.00%) - 1 (1.00%) high mild - 3 (3.00%) high severe -add_f64/system time: [39.634 µs 39.689 µs 39.746 µs] -Found 16 outliers among 100 measurements (16.00%) - 4 (4.00%) high mild - 12 (12.00%) high severe -add_f64/assembly (aarch64 unix) - time: [8.0533 µs 8.0599 µs 8.0670 µs] -Found 14 outliers among 100 measurements (14.00%) - 6 (6.00%) high mild - 8 (8.00%) high severe - -add_f128/compiler-builtins - time: [41.830 µs 41.920 µs 42.005 µs] - -cmp_f32_gt/compiler-builtins - time: [13.405 µs 13.411 µs 13.418 µs] -Found 18 outliers among 100 measurements (18.00%) - 4 (4.00%) high mild - 14 (14.00%) high severe -cmp_f32_gt/system time: [12.348 µs 12.355 µs 12.363 µs] -Found 12 outliers among 100 measurements (12.00%) - 2 (2.00%) high mild - 10 (10.00%) high severe -cmp_f32_gt/assembly (aarch64 unix) - time: [8.1233 µs 8.1625 µs 8.2072 µs] -Found 12 outliers among 100 measurements (12.00%) - 7 (7.00%) high mild - 5 (5.00%) high severe - -cmp_f32_unord/compiler-builtins - time: [11.349 µs 11.467 µs 11.584 µs] -cmp_f32_unord/system time: [8.0714 µs 8.0792 µs 8.0890 µs] -Found 16 outliers among 100 measurements (16.00%) - 4 (4.00%) high mild - 12 (12.00%) high severe -cmp_f32_unord/assembly (aarch64 unix) - time: [8.1121 µs 8.1705 µs 8.2325 µs] -Found 20 outliers among 100 measurements (20.00%) - 3 (3.00%) high mild - 17 (17.00%) high severe - -cmp_f64_gt/compiler-builtins - time: [13.749 µs 13.837 µs 13.934 µs] -Found 20 outliers among 100 measurements (20.00%) - 9 (9.00%) low mild - 7 (7.00%) high mild - 4 (4.00%) high severe -cmp_f64_gt/system time: [12.475 µs 12.515 µs 12.565 µs] -Found 4 outliers among 100 measurements (4.00%) - 4 (4.00%) high mild -cmp_f64_gt/assembly (aarch64 unix) - time: [8.0456 µs 8.0540 µs 8.0653 µs] -Found 12 outliers among 100 measurements (12.00%) - 3 (3.00%) high mild - 9 (9.00%) high severe - -cmp_f64_unord/compiler-builtins - time: [10.723 µs 10.730 µs 10.739 µs] -Found 15 outliers among 100 measurements (15.00%) - 5 (5.00%) high mild - 10 (10.00%) high severe -cmp_f64_unord/system time: [8.0944 µs 8.1296 µs 8.1683 µs] -Found 17 outliers among 100 measurements (17.00%) - 4 (4.00%) high mild - 13 (13.00%) high severe -cmp_f64_unord/assembly (aarch64 unix) - time: [8.1042 µs 8.1337 µs 8.1662 µs] -Found 3 outliers among 100 measurements (3.00%) - 3 (3.00%) high mild - -cmp_f128_gt/compiler-builtins - time: [20.508 µs 20.558 µs 20.615 µs] -Found 8 outliers among 100 measurements (8.00%) - 2 (2.00%) high mild - 6 (6.00%) high severe - -cmp_f128_unord/compiler-builtins - time: [13.332 µs 13.346 µs 13.360 µs] -Found 4 outliers among 100 measurements (4.00%) - 2 (2.00%) high mild - 2 (2.00%) high severe - -conv_u32_f32/compiler-builtins - time: [621.20 ns 621.89 ns 622.65 ns] -Found 7 outliers among 100 measurements (7.00%) - 4 (4.00%) high mild - 3 (3.00%) high severe -conv_u32_f32/system time: [621.44 ns 622.08 ns 622.74 ns] -Found 4 outliers among 100 measurements (4.00%) - 3 (3.00%) high mild - 1 (1.00%) high severe -conv_u32_f32/assembly (aarch64 unix) - time: [465.96 ns 466.65 ns 467.45 ns] -Found 13 outliers among 100 measurements (13.00%) - 3 (3.00%) high mild - 10 (10.00%) high severe - -conv_u32_f64/compiler-builtins - time: [619.71 ns 620.51 ns 621.52 ns] -Found 5 outliers among 100 measurements (5.00%) - 4 (4.00%) high mild - 1 (1.00%) high severe -conv_u32_f64/system time: [466.60 ns 467.14 ns 467.77 ns] -Found 2 outliers among 100 measurements (2.00%) - 2 (2.00%) high mild -conv_u32_f64/assembly (aarch64 unix) - time: [464.02 ns 464.32 ns 464.69 ns] -Found 2 outliers among 100 measurements (2.00%) - 1 (1.00%) high mild - 1 (1.00%) high severe - -conv_u64_f32/compiler-builtins - time: [851.24 ns 852.98 ns 854.77 ns] -Found 5 outliers among 100 measurements (5.00%) - 5 (5.00%) high mild -conv_u64_f32/system time: [724.35 ns 729.43 ns 735.07 ns] -Found 4 outliers among 100 measurements (4.00%) - 4 (4.00%) high mild -conv_u64_f32/assembly (aarch64 unix) - time: [513.30 ns 514.64 ns 516.16 ns] -Found 8 outliers among 100 measurements (8.00%) - 8 (8.00%) high mild - -conv_u64_f64/compiler-builtins - time: [850.72 ns 853.26 ns 856.54 ns] -Found 15 outliers among 100 measurements (15.00%) - 2 (2.00%) high mild - 13 (13.00%) high severe -conv_u64_f64/system time: [681.43 ns 682.54 ns 683.79 ns] -Found 4 outliers among 100 measurements (4.00%) - 3 (3.00%) high mild - 1 (1.00%) high severe -conv_u64_f64/assembly (aarch64 unix) - time: [511.37 ns 511.71 ns 512.02 ns] -Found 1 outliers among 100 measurements (1.00%) - 1 (1.00%) high severe - -conv_u128_f32/compiler-builtins - time: [1.1395 µs 1.1409 µs 1.1424 µs] -Found 10 outliers among 100 measurements (10.00%) - 6 (6.00%) high mild - 4 (4.00%) high severe -conv_u128_f32/system time: [1.4348 µs 1.4369 µs 1.4390 µs] -Found 5 outliers among 100 measurements (5.00%) - 4 (4.00%) high mild - 1 (1.00%) high severe - -conv_u128_f64/compiler-builtins - time: [1.0148 µs 1.0157 µs 1.0167 µs] -Found 4 outliers among 100 measurements (4.00%) - 3 (3.00%) high mild - 1 (1.00%) high severe -conv_u128_f64/system time: [1.3404 µs 1.3423 µs 1.3442 µs] -Found 8 outliers among 100 measurements (8.00%) - 7 (7.00%) high mild - 1 (1.00%) high severe - -conv_i32_f32/compiler-builtins - time: [902.89 ns 903.81 ns 904.84 ns] -Found 7 outliers among 100 measurements (7.00%) - 4 (4.00%) high mild - 3 (3.00%) high severe -conv_i32_f32/system time: [942.62 ns 949.04 ns 955.77 ns] -Found 4 outliers among 100 measurements (4.00%) - 3 (3.00%) high mild - 1 (1.00%) high severe -conv_i32_f32/assembly (aarch64 unix) - time: [466.06 ns 466.60 ns 467.27 ns] -Found 1 outliers among 100 measurements (1.00%) - 1 (1.00%) high severe - -conv_i32_f64/compiler-builtins - time: [618.98 ns 619.24 ns 619.55 ns] -Found 17 outliers among 100 measurements (17.00%) - 1 (1.00%) low mild - 3 (3.00%) high mild - 13 (13.00%) high severe -conv_i32_f64/system time: [622.18 ns 623.41 ns 624.85 ns] -Found 8 outliers among 100 measurements (8.00%) - 5 (5.00%) high mild - 3 (3.00%) high severe -conv_i32_f64/assembly (aarch64 unix) - time: [466.26 ns 466.76 ns 467.35 ns] -Found 9 outliers among 100 measurements (9.00%) - 5 (5.00%) high mild - 4 (4.00%) high severe - -conv_i64_f32/compiler-builtins - time: [850.11 ns 850.45 ns 850.88 ns] -Found 15 outliers among 100 measurements (15.00%) - 1 (1.00%) low severe - 1 (1.00%) low mild - 3 (3.00%) high mild - 10 (10.00%) high severe -conv_i64_f32/system time: [908.36 ns 908.70 ns 909.10 ns] -Found 12 outliers among 100 measurements (12.00%) - 3 (3.00%) high mild - 9 (9.00%) high severe -conv_i64_f32/assembly (aarch64 unix) - time: [513.56 ns 514.44 ns 515.38 ns] -Found 8 outliers among 100 measurements (8.00%) - 8 (8.00%) high mild - -conv_i64_f64/compiler-builtins - time: [935.39 ns 935.78 ns 936.26 ns] -Found 13 outliers among 100 measurements (13.00%) - 5 (5.00%) high mild - 8 (8.00%) high severe -conv_i64_f64/system time: [946.56 ns 947.33 ns 948.20 ns] -Found 8 outliers among 100 measurements (8.00%) - 6 (6.00%) high mild - 2 (2.00%) high severe -conv_i64_f64/assembly (aarch64 unix) - time: [511.55 ns 512.03 ns 512.56 ns] -Found 21 outliers among 100 measurements (21.00%) - 4 (4.00%) high mild - 17 (17.00%) high severe - -conv_i128_f32/compiler-builtins - time: [1.4206 µs 1.4218 µs 1.4232 µs] -Found 10 outliers among 100 measurements (10.00%) - 5 (5.00%) high mild - 5 (5.00%) high severe -conv_i128_f32/system time: [1.6863 µs 1.6891 µs 1.6922 µs] -Found 10 outliers among 100 measurements (10.00%) - 9 (9.00%) high mild - 1 (1.00%) high severe - -conv_i128_f64/compiler-builtins - time: [1.3110 µs 1.3122 µs 1.3136 µs] -Found 4 outliers among 100 measurements (4.00%) - 2 (2.00%) high mild - 2 (2.00%) high severe -conv_i128_f64/system time: [1.6022 µs 1.6048 µs 1.6090 µs] -Found 5 outliers among 100 measurements (5.00%) - 3 (3.00%) high mild - 2 (2.00%) high severe - -conv_f64_u32/compiler-builtins - time: [798.65 ns 799.42 ns 800.39 ns] -Found 15 outliers among 100 measurements (15.00%) - 6 (6.00%) high mild - 9 (9.00%) high severe -conv_f64_u32/system time: [639.48 ns 639.88 ns 640.40 ns] -Found 16 outliers among 100 measurements (16.00%) - 1 (1.00%) low mild - 5 (5.00%) high mild - 10 (10.00%) high severe -conv_f64_u32/assembly (aarch64 unix) - time: [480.78 ns 481.35 ns 482.17 ns] -Found 7 outliers among 100 measurements (7.00%) - 5 (5.00%) high mild - 2 (2.00%) high severe - -conv_f64_u64/compiler-builtins - time: [799.56 ns 800.54 ns 801.89 ns] -Found 4 outliers among 100 measurements (4.00%) - 2 (2.00%) high mild - 2 (2.00%) high severe -conv_f64_u64/system time: [640.72 ns 641.24 ns 641.81 ns] -Found 5 outliers among 100 measurements (5.00%) - 3 (3.00%) high mild - 2 (2.00%) high severe -conv_f64_u64/assembly (aarch64 unix) - time: [481.54 ns 482.48 ns 483.53 ns] -Found 6 outliers among 100 measurements (6.00%) - 1 (1.00%) low severe - 1 (1.00%) low mild - 3 (3.00%) high mild - 1 (1.00%) high severe - -conv_f64_u128/compiler-builtins - time: [1.0510 µs 1.0515 µs 1.0520 µs] -Found 13 outliers among 100 measurements (13.00%) - 1 (1.00%) low mild - 2 (2.00%) high mild - 10 (10.00%) high severe -conv_f64_u128/system time: [818.45 ns 819.23 ns 820.15 ns] -Found 2 outliers among 100 measurements (2.00%) - 2 (2.00%) high mild - -conv_f64_i32/compiler-builtins - time: [800.56 ns 801.31 ns 802.21 ns] -Found 5 outliers among 100 measurements (5.00%) - 3 (3.00%) high mild - 2 (2.00%) high severe -conv_f64_i32/system time: [765.62 ns 766.15 ns 766.80 ns] -Found 3 outliers among 100 measurements (3.00%) - 2 (2.00%) high mild - 1 (1.00%) high severe -conv_f64_i32/assembly (aarch64 unix) - time: [471.65 ns 472.77 ns 473.89 ns] -Found 10 outliers among 100 measurements (10.00%) - 1 (1.00%) low mild - 8 (8.00%) high mild - 1 (1.00%) high severe - -conv_f64_i64/compiler-builtins - time: [801.00 ns 804.55 ns 808.72 ns] -Found 18 outliers among 100 measurements (18.00%) - 6 (6.00%) high mild - 12 (12.00%) high severe -conv_f64_i64/system time: [770.28 ns 772.47 ns 775.21 ns] -Found 2 outliers among 100 measurements (2.00%) - 2 (2.00%) high mild -conv_f64_i64/assembly (aarch64 unix) - time: [491.56 ns 494.96 ns 499.19 ns] -Found 3 outliers among 100 measurements (3.00%) - 2 (2.00%) high mild - 1 (1.00%) high severe - -conv_f64_i128/compiler-builtins - time: [1.0637 µs 1.0704 µs 1.0762 µs] -Found 5 outliers among 100 measurements (5.00%) - 5 (5.00%) high mild -conv_f64_i128/system time: [1.0022 µs 1.0027 µs 1.0033 µs] -Found 4 outliers among 100 measurements (4.00%) - 1 (1.00%) low severe - 3 (3.00%) high severe - -conv_f32_u32/compiler-builtins - time: [644.56 ns 647.01 ns 649.95 ns] -Found 15 outliers among 100 measurements (15.00%) - 13 (13.00%) high mild - 2 (2.00%) high severe -conv_f32_u32/system time: [648.12 ns 651.20 ns 654.54 ns] -Found 9 outliers among 100 measurements (9.00%) - 7 (7.00%) high mild - 2 (2.00%) high severe -conv_f32_u32/assembly (aarch64 unix) - time: [481.02 ns 482.71 ns 484.60 ns] -Found 12 outliers among 100 measurements (12.00%) - 1 (1.00%) low mild - 10 (10.00%) high mild - 1 (1.00%) high severe - -conv_f32_u64/compiler-builtins - time: [644.14 ns 646.61 ns 649.53 ns] -Found 11 outliers among 100 measurements (11.00%) - 6 (6.00%) high mild - 5 (5.00%) high severe -conv_f32_u64/system time: [646.21 ns 650.17 ns 654.55 ns] -Found 3 outliers among 100 measurements (3.00%) - 3 (3.00%) high mild -conv_f32_u64/assembly (aarch64 unix) - time: [473.36 ns 474.60 ns 476.00 ns] -Found 9 outliers among 100 measurements (9.00%) - 2 (2.00%) low mild - 5 (5.00%) high mild - 2 (2.00%) high severe - -conv_f32_u128/compiler-builtins - time: [1.0820 µs 1.0828 µs 1.0839 µs] -Found 2 outliers among 100 measurements (2.00%) - 1 (1.00%) high mild - 1 (1.00%) high severe -conv_f32_u128/system time: [1.0003 µs 1.0042 µs 1.0076 µs] -Found 21 outliers among 100 measurements (21.00%) - 1 (1.00%) low mild - 3 (3.00%) high mild - 17 (17.00%) high severe - -conv_f32_i32/compiler-builtins - time: [801.13 ns 801.82 ns 802.53 ns] -Found 2 outliers among 100 measurements (2.00%) - 2 (2.00%) high severe -conv_f32_i32/system time: [745.17 ns 745.97 ns 746.78 ns] -Found 2 outliers among 100 measurements (2.00%) - 2 (2.00%) high severe -conv_f32_i32/assembly (aarch64 unix) - time: [469.87 ns 470.65 ns 471.57 ns] -Found 1 outliers among 100 measurements (1.00%) - 1 (1.00%) high mild - -conv_f32_i64/compiler-builtins - time: [799.44 ns 799.94 ns 800.59 ns] -Found 4 outliers among 100 measurements (4.00%) - 1 (1.00%) high mild - 3 (3.00%) high severe -conv_f32_i64/system time: [744.81 ns 745.17 ns 745.62 ns] -Found 14 outliers among 100 measurements (14.00%) - 5 (5.00%) high mild - 9 (9.00%) high severe -conv_f32_i64/assembly (aarch64 unix) - time: [465.06 ns 466.01 ns 467.12 ns] -Found 13 outliers among 100 measurements (13.00%) - 2 (2.00%) low severe - 5 (5.00%) high mild - 6 (6.00%) high severe - -conv_f32_i128/compiler-builtins - time: [1.1390 µs 1.1515 µs 1.1637 µs] -conv_f32_i128/system time: [1.1315 µs 1.1330 µs 1.1347 µs] -Found 6 outliers among 100 measurements (6.00%) - 3 (3.00%) low mild - 2 (2.00%) high mild - 1 (1.00%) high severe - -div_f32/compiler-builtins - time: [39.408 µs 39.676 µs 39.969 µs] -Found 5 outliers among 100 measurements (5.00%) - 5 (5.00%) high mild -div_f32/system time: [42.108 µs 42.248 µs 42.528 µs] -Found 11 outliers among 100 measurements (11.00%) - 4 (4.00%) high mild - 7 (7.00%) high severe -div_f32/assembly (aarch64 unix) - time: [8.0724 µs 8.0794 µs 8.0870 µs] -Found 7 outliers among 100 measurements (7.00%) - 5 (5.00%) high mild - 2 (2.00%) high severe - -div_f64/compiler-builtins - time: [49.992 µs 50.014 µs 50.040 µs] -Found 5 outliers among 100 measurements (5.00%) - 5 (5.00%) high severe -div_f64/system time: [53.577 µs 53.651 µs 53.743 µs] -Found 6 outliers among 100 measurements (6.00%) - 4 (4.00%) high mild - 2 (2.00%) high severe -div_f64/assembly (aarch64 unix) - time: [8.0976 µs 8.1064 µs 8.1158 µs] -Found 6 outliers among 100 measurements (6.00%) - 3 (3.00%) high mild - 3 (3.00%) high severe - -extend_f16_f32/compiler-builtins - time: [804.09 ns 805.38 ns 807.09 ns] -Found 3 outliers among 100 measurements (3.00%) - 1 (1.00%) high mild - 2 (2.00%) high severe -extend_f16_f32/system time: [641.07 ns 641.76 ns 642.60 ns] -Found 12 outliers among 100 measurements (12.00%) - 6 (6.00%) high mild - 6 (6.00%) high severe -extend_f16_f32/assembly (aarch64 unix) - time: [456.69 ns 457.14 ns 457.68 ns] -Found 8 outliers among 100 measurements (8.00%) - 4 (4.00%) low mild - 2 (2.00%) high mild - 2 (2.00%) high severe - -extend_f16_f128/compiler-builtins - time: [1.1025 µs 1.1035 µs 1.1045 µs] -Found 2 outliers among 100 measurements (2.00%) - 1 (1.00%) high mild - 1 (1.00%) high severe - -extend_f32_f64/compiler-builtins - time: [799.30 ns 799.68 ns 800.16 ns] -Found 13 outliers among 100 measurements (13.00%) - 3 (3.00%) high mild - 10 (10.00%) high severe -extend_f32_f64/system time: [992.48 ns 993.27 ns 994.32 ns] -Found 15 outliers among 100 measurements (15.00%) - 3 (3.00%) high mild - 12 (12.00%) high severe -extend_f32_f64/assembly (aarch64 unix) - time: [457.65 ns 460.39 ns 463.78 ns] - -extend_f32_f128/compiler-builtins - time: [1.0295 µs 1.0311 µs 1.0327 µs] -Found 3 outliers among 100 measurements (3.00%) - 2 (2.00%) low mild - 1 (1.00%) high mild - -extend_f64_f128/compiler-builtins - time: [1.0400 µs 1.0412 µs 1.0426 µs] -Found 2 outliers among 100 measurements (2.00%) - 2 (2.00%) high mild - -mul_f32/compiler-builtins - time: [25.604 µs 25.705 µs 25.818 µs] -Found 23 outliers among 100 measurements (23.00%) - 17 (17.00%) low severe - 3 (3.00%) high mild - 3 (3.00%) high severe -mul_f32/system time: [29.914 µs 29.977 µs 30.043 µs] -Found 5 outliers among 100 measurements (5.00%) - 5 (5.00%) high mild -mul_f32/assembly (aarch64 unix) - time: [8.1384 µs 8.1964 µs 8.2603 µs] -Found 13 outliers among 100 measurements (13.00%) - 3 (3.00%) high mild - 10 (10.00%) high severe - -mul_f64/compiler-builtins - time: [25.596 µs 25.615 µs 25.637 µs] -Found 3 outliers among 100 measurements (3.00%) - 2 (2.00%) high mild - 1 (1.00%) high severe -mul_f64/system time: [30.931 µs 30.963 µs 31.002 µs] -Found 3 outliers among 100 measurements (3.00%) - 3 (3.00%) high mild -mul_f64/assembly (aarch64 unix) - time: [8.0589 µs 8.0638 µs 8.0695 µs] -Found 3 outliers among 100 measurements (3.00%) - 1 (1.00%) high mild - 2 (2.00%) high severe - -mul_f128/compiler-builtins - time: [54.242 µs 54.306 µs 54.374 µs] -Found 3 outliers among 100 measurements (3.00%) - 2 (2.00%) high mild - 1 (1.00%) high severe - -powi_f32/compiler-builtins - time: [129.91 µs 130.09 µs 130.24 µs] -powi_f32/system time: [126.97 µs 127.34 µs 127.82 µs] -Found 4 outliers among 100 measurements (4.00%) - 3 (3.00%) high mild - 1 (1.00%) high severe - -powi_f64/compiler-builtins - time: [130.08 µs 130.81 µs 131.46 µs] -Found 13 outliers among 100 measurements (13.00%) - 13 (13.00%) high mild -powi_f64/system time: [128.51 µs 128.68 µs 128.88 µs] -Found 21 outliers among 100 measurements (21.00%) - 4 (4.00%) high mild - 17 (17.00%) high severe - -sub_f32/compiler-builtins - time: [37.861 µs 38.012 µs 38.158 µs] -Found 26 outliers among 100 measurements (26.00%) - 18 (18.00%) low mild - 7 (7.00%) high mild - 1 (1.00%) high severe -sub_f32/system time: [39.586 µs 39.628 µs 39.673 µs] -Found 2 outliers among 100 measurements (2.00%) - 1 (1.00%) high mild - 1 (1.00%) high severe -sub_f32/assembly (aarch64 unix) - time: [8.0976 µs 8.1584 µs 8.2208 µs] -Found 6 outliers among 100 measurements (6.00%) - 6 (6.00%) high mild - -sub_f64/compiler-builtins - time: [37.755 µs 37.838 µs 37.921 µs] -Found 25 outliers among 100 measurements (25.00%) - 7 (7.00%) low severe - 3 (3.00%) low mild - 4 (4.00%) high mild - 11 (11.00%) high severe -sub_f64/system time: [39.979 µs 40.019 µs 40.064 µs] -Found 3 outliers among 100 measurements (3.00%) - 2 (2.00%) high mild - 1 (1.00%) high severe -sub_f64/assembly (aarch64 unix) - time: [8.0669 µs 8.0733 µs 8.0801 µs] -Found 7 outliers among 100 measurements (7.00%) - 3 (3.00%) high mild - 4 (4.00%) high severe - -sub_f128/compiler-builtins - time: [68.618 µs 68.899 µs 69.293 µs] -Found 11 outliers among 100 measurements (11.00%) - 2 (2.00%) high mild - 9 (9.00%) high severe - -trunc_f32_f16/compiler-builtins - time: [1.3343 µs 1.3468 µs 1.3608 µs] -Found 3 outliers among 100 measurements (3.00%) - 1 (1.00%) high mild - 2 (2.00%) high severe -trunc_f32_f16/system time: [1.2687 µs 1.2714 µs 1.2738 µs] -trunc_f32_f16/assembly (aarch64 unix) - time: [470.06 ns 472.96 ns 475.30 ns] - -trunc_f64_f16/compiler-builtins - time: [1.2729 µs 1.2738 µs 1.2749 µs] -Found 7 outliers among 100 measurements (7.00%) - 2 (2.00%) high mild - 5 (5.00%) high severe -trunc_f64_f16/assembly (aarch64 unix) - time: [455.91 ns 456.61 ns 457.33 ns] -Found 12 outliers among 100 measurements (12.00%) - 1 (1.00%) low severe - 2 (2.00%) low mild - 6 (6.00%) high mild - 3 (3.00%) high severe - -trunc_f64_f32/compiler-builtins - time: [1.2240 µs 1.2325 µs 1.2410 µs] -Found 17 outliers among 100 measurements (17.00%) - 4 (4.00%) low mild - 2 (2.00%) high mild - 11 (11.00%) high severe -trunc_f64_f32/system time: [1.2784 µs 1.2835 µs 1.2884 µs] -Found 10 outliers among 100 measurements (10.00%) - 6 (6.00%) low severe - 1 (1.00%) low mild - 2 (2.00%) high mild - 1 (1.00%) high severe -trunc_f64_f32/assembly (aarch64 unix) - time: [455.64 ns 456.08 ns 456.58 ns] -Found 18 outliers among 100 measurements (18.00%) - 3 (3.00%) low severe - 4 (4.00%) low mild - 8 (8.00%) high mild - 3 (3.00%) high severe - -trunc_f128_f16/compiler-builtins - time: [1.2563 µs 1.2666 µs 1.2776 µs] -Found 3 outliers among 100 measurements (3.00%) - 3 (3.00%) high mild - -trunc_f128_f32/compiler-builtins - time: [1.2459 µs 1.2482 µs 1.2507 µs] -Found 6 outliers among 100 measurements (6.00%) - 2 (2.00%) low mild - 2 (2.00%) high mild - 2 (2.00%) high severe - -trunc_f128_f64/compiler-builtins - time: [1.2821 µs 1.3047 µs 1.3452 µs] -Found 8 outliers among 100 measurements (8.00%) - 4 (4.00%) low severe - 1 (1.00%) low mild - 2 (2.00%) high mild - 1 (1.00%) high severe - - -running 52 tests -test memcmp_builtin_1048576 ... bench: 20,975.52 ns/iter (+/- 239.69) = 49991 MB/s -test memcmp_builtin_16 ... bench: 1.60 ns/iter (+/- 0.05) = 16000 MB/s -test memcmp_builtin_32 ... bench: 1.61 ns/iter (+/- 0.03) = 32000 MB/s -test memcmp_builtin_4096 ... bench: 95.84 ns/iter (+/- 2.82) = 43115 MB/s -test memcmp_builtin_64 ... bench: 2.39 ns/iter (+/- 0.09) = 32000 MB/s -test memcmp_builtin_8 ... bench: 1.60 ns/iter (+/- 0.04) = 8000 MB/s -test memcmp_builtin_unaligned_1048575 ... bench: 22,060.00 ns/iter (+/- 873.55) = 47532 MB/s -test memcmp_builtin_unaligned_15 ... bench: 3.19 ns/iter (+/- 0.02) = 5333 MB/s -test memcmp_builtin_unaligned_31 ... bench: 1.61 ns/iter (+/- 0.01) = 32000 MB/s -test memcmp_builtin_unaligned_4095 ... bench: 96.63 ns/iter (+/- 4.58) = 42666 MB/s -test memcmp_builtin_unaligned_63 ... bench: 2.40 ns/iter (+/- 0.11) = 32000 MB/s -test memcmp_builtin_unaligned_7 ... bench: 3.37 ns/iter (+/- 0.05) = 2666 MB/s -test memcmp_rust_1048576 ... bench: 309,647.23 ns/iter (+/- 6,077.35) = 3386 MB/s -test memcmp_rust_16 ... bench: 5.66 ns/iter (+/- 0.30) = 3200 MB/s -test memcmp_rust_32 ... bench: 10.47 ns/iter (+/- 0.14) = 3200 MB/s -test memcmp_rust_4096 ... bench: 1,124.34 ns/iter (+/- 36.92) = 3644 MB/s -test memcmp_rust_64 ... bench: 19.90 ns/iter (+/- 0.36) = 3368 MB/s -test memcmp_rust_8 ... bench: 3.46 ns/iter (+/- 0.11) = 2666 MB/s -test memcmp_rust_unaligned_1048575 ... bench: 308,613.87 ns/iter (+/- 6,613.18) = 3397 MB/s -test memcmp_rust_unaligned_15 ... bench: 5.35 ns/iter (+/- 0.05) = 3200 MB/s -test memcmp_rust_unaligned_31 ... bench: 9.94 ns/iter (+/- 0.06) = 3555 MB/s -test memcmp_rust_unaligned_4095 ... bench: 1,120.06 ns/iter (+/- 5.03) = 3657 MB/s -test memcmp_rust_unaligned_63 ... bench: 19.64 ns/iter (+/- 0.82) = 3368 MB/s -test memcmp_rust_unaligned_7 ... bench: 3.22 ns/iter (+/- 0.10) = 2666 MB/s -test memcpy_builtin_1048576 ... bench: 12,538.05 ns/iter (+/- 354.79) = 83631 MB/s -test memcpy_builtin_1048576_misalign ... bench: 30,092.56 ns/iter (+/- 8,064.04) = 34845 MB/s -test memcpy_builtin_1048576_offset ... bench: 12,538.36 ns/iter (+/- 359.04) = 83631 MB/s -test memcpy_builtin_4096 ... bench: 44.24 ns/iter (+/- 6.80) = 93090 MB/s -test memcpy_builtin_4096_misalign ... bench: 45.34 ns/iter (+/- 2.13) = 91022 MB/s -test memcpy_builtin_4096_offset ... bench: 44.71 ns/iter (+/- 0.61) = 93090 MB/s -test memcpy_rust_1048576 ... bench: 17,943.33 ns/iter (+/- 243.18) = 58439 MB/s -test memcpy_rust_1048576_misalign ... bench: 15,004.68 ns/iter (+/- 3,978.65) = 69886 MB/s -test memcpy_rust_1048576_offset ... bench: 14,722.06 ns/iter (+/- 479.54) = 71225 MB/s -test memcpy_rust_4096 ... bench: 44.91 ns/iter (+/- 4.62) = 93090 MB/s -test memcpy_rust_4096_misalign ... bench: 76.21 ns/iter (+/- 8.21) = 53894 MB/s -test memcpy_rust_4096_offset ... bench: 76.27 ns/iter (+/- 4.69) = 53894 MB/s -test memmove_builtin_1048576 ... bench: 18,644.50 ns/iter (+/- 379.84) = 56242 MB/s -test memmove_builtin_1048576_misalign ... bench: 18,947.70 ns/iter (+/- 1,226.26) = 55342 MB/s -test memmove_builtin_4096 ... bench: 44.21 ns/iter (+/- 0.79) = 93090 MB/s -test memmove_builtin_4096_misalign ... bench: 47.21 ns/iter (+/- 3.12) = 87148 MB/s -test memmove_rust_1048576 ... bench: 34,813.33 ns/iter (+/- 3,637.47) = 30120 MB/s -test memmove_rust_1048576_misalign ... bench: 35,067.19 ns/iter (+/- 1,699.63) = 29902 MB/s -test memmove_rust_4096 ... bench: 148.69 ns/iter (+/- 1.31) = 27675 MB/s -test memmove_rust_4096_misalign ... bench: 153.81 ns/iter (+/- 1.71) = 26771 MB/s -test memset_builtin_1048576 ... bench: 15,704.12 ns/iter (+/- 12,113.86) = 66771 MB/s -test memset_builtin_1048576_offset ... bench: 17,894.23 ns/iter (+/- 175.12) = 58599 MB/s -test memset_builtin_4096 ... bench: 39.95 ns/iter (+/- 0.19) = 105025 MB/s -test memset_builtin_4096_offset ... bench: 40.48 ns/iter (+/- 3.11) = 102400 MB/s -test memset_rust_1048576 ... bench: 10,600.66 ns/iter (+/- 1,559.93) = 98922 MB/s -test memset_rust_1048576_offset ... bench: 14,810.85 ns/iter (+/- 575.27) = 70801 MB/s -test memset_rust_4096 ... bench: 37.91 ns/iter (+/- 2.77) = 110702 MB/s -test memset_rust_4096_offset ... bench: 59.99 ns/iter (+/- 10.45) = 69423 MB/s - -test result: ok. 0 passed; 0 failed; 0 ignored; 52 measured; 0 filtered out; finished in 97.74s - From 9732496d898d0b8f45f8a5b3b381b84e6fb3f39a Mon Sep 17 00:00:00 2001 From: Andrey Turkin Date: Tue, 25 Jun 2024 13:52:02 +0300 Subject: [PATCH 0779/1459] Get rid of a warning --- Cargo.toml | 2 +- build.rs | 2 -- crates/panic-handler/Cargo.toml | 1 + 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ab4395159..c8d78ec3e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" homepage = "https://github.com/rust-lang/compiler-builtins" documentation = "https://docs.rs/compiler_builtins" -edition = "2018" +edition = "2021" description = """ Compiler intrinsics used by the Rust compiler. Also available for other targets if necessary! diff --git a/build.rs b/build.rs index 0ecd39911..d7e3be594 100644 --- a/build.rs +++ b/build.rs @@ -219,8 +219,6 @@ fn configure_check_cfg() { #[cfg(feature = "c")] mod c { - extern crate cc; - use std::collections::{BTreeMap, HashSet}; use std::env; use std::fs::{self, File}; diff --git a/crates/panic-handler/Cargo.toml b/crates/panic-handler/Cargo.toml index 1dea613d1..4fb81eb82 100644 --- a/crates/panic-handler/Cargo.toml +++ b/crates/panic-handler/Cargo.toml @@ -2,5 +2,6 @@ name = "panic-handler" version = "0.1.0" authors = ["Alex Crichton "] +edition = "2021" [dependencies] From 36417618a47c8a70186b62e331345258c51306df Mon Sep 17 00:00:00 2001 From: Andrey Turkin Date: Sat, 22 Jun 2024 00:01:52 +0300 Subject: [PATCH 0780/1459] Implement __bswap[sdt]i2 intrinsics These can be emitted by gcc, at least if requested specifically via __builtin_bswap{32,64,128}. --- README.md | 3 +++ build.rs | 3 +++ src/int/bswap.rs | 22 ++++++++++++++++++++++ src/int/mod.rs | 1 + testcrate/tests/misc.rs | 34 ++++++++++++++++++++++++++++++++++ 5 files changed, 63 insertions(+) create mode 100644 src/int/bswap.rs diff --git a/README.md b/README.md index 9a847da3f..a24c12739 100644 --- a/README.md +++ b/README.md @@ -154,6 +154,9 @@ rely on CI. - [ ] arm/unordsf2vfp.S - [x] ashldi3.c - [x] ashrdi3.c +- [x] bswapdi2.c +- [x] bswapsi2.c +- [x] bswapti2.c - [x] comparedf2.c - [x] comparesf2.c - [x] divdf3.c diff --git a/build.rs b/build.rs index d7e3be594..34467d8f5 100644 --- a/build.rs +++ b/build.rs @@ -161,6 +161,9 @@ fn configure_check_cfg() { "__ashlsi3", "__ashrdi3", "__ashrsi3", + "__bswapsi2", + "__bswapdi2", + "__bswapti2", "__clzsi2", "__divdi3", "__divsi3", diff --git a/src/int/bswap.rs b/src/int/bswap.rs new file mode 100644 index 000000000..9df80204d --- /dev/null +++ b/src/int/bswap.rs @@ -0,0 +1,22 @@ +intrinsics! { + #[maybe_use_optimized_c_shim] + #[avr_skip] + /// Swaps bytes in 32-bit number + pub extern "C" fn __bswapsi2(x: u32) -> u32 { + x.swap_bytes() + } + + #[maybe_use_optimized_c_shim] + #[avr_skip] + /// Swaps bytes in 64-bit number + pub extern "C" fn __bswapdi2(x: u64) -> u64 { + x.swap_bytes() + } + + #[maybe_use_optimized_c_shim] + #[avr_skip] + /// Swaps bytes in 128-bit number + pub extern "C" fn __bswapti2(x: u128) -> u128 { + x.swap_bytes() + } +} diff --git a/src/int/mod.rs b/src/int/mod.rs index 45d383880..ddbffd740 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -4,6 +4,7 @@ mod specialized_div_rem; pub mod addsub; mod big; +pub mod bswap; pub mod leading_zeros; pub mod mul; pub mod sdiv; diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs index e01223c74..c19923b75 100644 --- a/testcrate/tests/misc.rs +++ b/testcrate/tests/misc.rs @@ -92,6 +92,40 @@ fn leading_zeros() { }) } +#[test] +#[cfg(not(target_arch = "avr"))] +fn bswap() { + use compiler_builtins::int::bswap::{__bswapdi2, __bswapsi2}; + fuzz(N, |x: u32| { + assert_eq!(x.swap_bytes(), __bswapsi2(x)); + }); + fuzz(N, |x: u64| { + assert_eq!(x.swap_bytes(), __bswapdi2(x)); + }); + + assert_eq!(__bswapsi2(0x12345678u32), 0x78563412u32); + assert_eq!(__bswapsi2(0x00000001u32), 0x01000000u32); + assert_eq!(__bswapdi2(0x123456789ABCDEF0u64), 0xF0DEBC9A78563412u64); + assert_eq!(__bswapdi2(0x0200000001000000u64), 0x0000000100000002u64); + + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] + { + use compiler_builtins::int::bswap::__bswapti2; + fuzz(N, |x: u128| { + assert_eq!(x.swap_bytes(), __bswapti2(x)); + }); + + assert_eq!( + __bswapti2(0x123456789ABCDEF013579BDF02468ACEu128), + 0xCE8A4602DF9B5713F0DEBC9A78563412u128 + ); + assert_eq!( + __bswapti2(0x04000000030000000200000001000000u128), + 0x00000001000000020000000300000004u128 + ); + } +} + // This is approximate because of issues related to // https://github.com/rust-lang/rust/issues/73920. // TODO how do we resolve this indeterminacy? From 6c37129ba4f0002b6d0552ea2ef183314b84cb2f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 18 Jul 2024 03:25:03 -0500 Subject: [PATCH 0781/1459] Fix missing `extern "C"` for unsafe functions `unsafe` functions were being matched in a different block that did not include `extern $abi`. This means that some intrinsics were getting generated with the Rust ABI rather than C. Combine the last two blocks using an optional token matcher, which fixes this problem and is cleaner. --- src/macros.rs | 33 +++------------------------------ 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/src/macros.rs b/src/macros.rs index f537c1a96..fb14660af 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -449,41 +449,14 @@ macro_rules! intrinsics { // input we were given. ( $(#[$($attr:tt)*])* - pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { - $($body:tt)* - } - - $($rest:tt)* - ) => ( - $(#[$($attr)*])* - pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { - $($body)* - } - - #[cfg(not(feature = "mangled-names"))] - mod $name { - $(#[$($attr)*])* - #[no_mangle] - #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] - extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { - super::$name($($argname),*) - } - } - - intrinsics!($($rest)*); - ); - - // Same as the above for unsafe functions. - ( - $(#[$($attr:tt)*])* - pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + pub $(unsafe $(@ $empty:tt)?)? extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( $(#[$($attr)*])* - pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + pub $(unsafe $($empty)?)? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } @@ -492,7 +465,7 @@ macro_rules! intrinsics { $(#[$($attr)*])* #[no_mangle] #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] - unsafe fn $name( $($argname: $ty),* ) $(-> $ret)? { + $(unsafe $($empty)?)? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } } From 3041451e0a6b8e5bc96fe8bb495546fad7d70901 Mon Sep 17 00:00:00 2001 From: Andrey Turkin Date: Sat, 22 Jun 2024 00:01:52 +0300 Subject: [PATCH 0782/1459] Implement remaining __clz*i2 intrinsics --- README.md | 6 +-- build.rs | 2 +- src/int/leading_zeros.rs | 92 ++++++++++++++++++++++++---------------- src/int/mod.rs | 1 - testcrate/tests/misc.rs | 89 +++++++++++++++++++++++++++----------- 5 files changed, 123 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index a24c12739..6ff0de190 100644 --- a/README.md +++ b/README.md @@ -157,6 +157,9 @@ rely on CI. - [x] bswapdi2.c - [x] bswapsi2.c - [x] bswapti2.c +- [x] clzdi2.c +- [x] clzsi2.c +- [x] clzti2.c - [x] comparedf2.c - [x] comparesf2.c - [x] divdf3.c @@ -325,9 +328,6 @@ These builtins are never called by LLVM. - ~~arm/switch32.S~~ - ~~arm/switch8.S~~ - ~~arm/switchu8.S~~ -- ~~clzdi2.c~~ -- ~~clzsi2.c~~ -- ~~clzti2.c~~ - ~~cmpdi2.c~~ - ~~cmpti2.c~~ - ~~ctzdi2.c~~ diff --git a/build.rs b/build.rs index 34467d8f5..79f994456 100644 --- a/build.rs +++ b/build.rs @@ -165,6 +165,7 @@ fn configure_check_cfg() { "__bswapdi2", "__bswapti2", "__clzsi2", + "__clzdi2", "__divdi3", "__divsi3", "__divmoddi4", @@ -382,7 +383,6 @@ mod c { sources.extend(&[ ("__absvti2", "absvti2.c"), ("__addvti3", "addvti3.c"), - ("__clzti2", "clzti2.c"), ("__cmpti2", "cmpti2.c"), ("__ctzti2", "ctzti2.c"), ("__ffsti2", "ffsti2.c"), diff --git a/src/int/leading_zeros.rs b/src/int/leading_zeros.rs index 9e60ab0d7..68ac55ac7 100644 --- a/src/int/leading_zeros.rs +++ b/src/int/leading_zeros.rs @@ -3,10 +3,12 @@ // adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`. // Compilers will insert the check for zero in cases where it is needed. +use crate::int::{CastInto, Int}; + public_test_dep! { /// Returns the number of leading binary zeros in `x`. #[allow(dead_code)] -pub(crate) fn usize_leading_zeros_default(x: usize) -> usize { +pub(crate) fn leading_zeros_default>(x: T) -> usize { // The basic idea is to test if the higher bits of `x` are zero and bisect the number // of leading zeros. It is possible for all branches of the bisection to use the same // code path by conditionally shifting the higher parts down to let the next bisection @@ -16,46 +18,47 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize { // because it simplifies the final bisection step. let mut x = x; // the number of potential leading zeros - let mut z = usize::MAX.count_ones() as usize; + let mut z = T::BITS as usize; // a temporary - let mut t: usize; - #[cfg(target_pointer_width = "64")] - { + let mut t: T; + + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { t = x >> 32; - if t != 0 { + if t != T::ZERO { z -= 32; x = t; } } - #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] - { + if T::BITS >= 32 { t = x >> 16; - if t != 0 { + if t != T::ZERO { z -= 16; x = t; } } + const { assert!(T::BITS >= 16) }; t = x >> 8; - if t != 0 { + if t != T::ZERO { z -= 8; x = t; } t = x >> 4; - if t != 0 { + if t != T::ZERO { z -= 4; x = t; } t = x >> 2; - if t != 0 { + if t != T::ZERO { z -= 2; x = t; } // the last two bisections are combined into one conditional t = x >> 1; - if t != 0 { + if t != T::ZERO { z - 2 } else { - z - x + z - x.cast() } // We could potentially save a few cycles by using the LUT trick from @@ -80,12 +83,12 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize { public_test_dep! { /// Returns the number of leading binary zeros in `x`. #[allow(dead_code)] -pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize { +pub(crate) fn leading_zeros_riscv>(x: T) -> usize { let mut x = x; // the number of potential leading zeros - let mut z = usize::MAX.count_ones() as usize; + let mut z = T::BITS; // a temporary - let mut t: usize; + let mut t: u32; // RISC-V does not have a set-if-greater-than-or-equal instruction and // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is @@ -95,11 +98,11 @@ pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize { // right). If we try to save an instruction by using `x < imm` for each bisection, we // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, // but the immediate will never fit into 12 bits and never save an instruction. - #[cfg(target_pointer_width = "64")] - { + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise // `t` is set to 0. - t = ((x >= (1 << 32)) as usize) << 5; + t = ((x >= (T::ONE << 32)) as u32) << 5; // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the // next step to process. x >>= t; @@ -107,43 +110,58 @@ pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize { // leading zeros z -= t; } - #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] - { - t = ((x >= (1 << 16)) as usize) << 4; + if T::BITS >= 32 { + t = ((x >= (T::ONE << 16)) as u32) << 4; x >>= t; z -= t; } - t = ((x >= (1 << 8)) as usize) << 3; + const { assert!(T::BITS >= 16) }; + t = ((x >= (T::ONE << 8)) as u32) << 3; x >>= t; z -= t; - t = ((x >= (1 << 4)) as usize) << 2; + t = ((x >= (T::ONE << 4)) as u32) << 2; x >>= t; z -= t; - t = ((x >= (1 << 2)) as usize) << 1; + t = ((x >= (T::ONE << 2)) as u32) << 1; x >>= t; z -= t; - t = (x >= (1 << 1)) as usize; + t = (x >= (T::ONE << 1)) as u32; x >>= t; z -= t; // All bits except the LSB are guaranteed to be zero for this final bisection step. // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. - z - x + z as usize - x.cast() } } intrinsics! { #[maybe_use_optimized_c_shim] - #[cfg(any( - target_pointer_width = "16", - target_pointer_width = "32", - target_pointer_width = "64" - ))] - /// Returns the number of leading binary zeros in `x`. - pub extern "C" fn __clzsi2(x: usize) -> usize { + /// Returns the number of leading binary zeros in `x` + pub extern "C" fn __clzsi2(x: u32) -> usize { + if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { + leading_zeros_riscv(x) + } else { + leading_zeros_default(x) + } + } + + #[maybe_use_optimized_c_shim] + /// Returns the number of leading binary zeros in `x` + pub extern "C" fn __clzdi2(x: u64) -> usize { if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { - usize_leading_zeros_riscv(x) + leading_zeros_riscv(x) + } else { + leading_zeros_default(x) + } + } + + /// Returns the number of leading binary zeros in `x` + pub extern "C" fn __clzti2(x: u128) -> usize { + let hi = (x >> 64) as u64; + if hi == 0 { + 64 + __clzdi2(x as u64) } else { - usize_leading_zeros_default(x) + __clzdi2(hi) } } } diff --git a/src/int/mod.rs b/src/int/mod.rs index ddbffd740..728ce1d57 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -12,7 +12,6 @@ pub mod shift; pub mod udiv; pub use big::{i256, u256}; -pub use leading_zeros::__clzsi2; public_test_dep! { /// Minimal integer implementations needed on all integer types, including wide integers. diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs index c19923b75..f830a5423 100644 --- a/testcrate/tests/misc.rs +++ b/testcrate/tests/misc.rs @@ -65,31 +65,70 @@ fn fuzz_values() { #[test] fn leading_zeros() { - use compiler_builtins::int::__clzsi2; - use compiler_builtins::int::leading_zeros::{ - usize_leading_zeros_default, usize_leading_zeros_riscv, - }; - fuzz(N, |x: usize| { - let lz = x.leading_zeros() as usize; - let lz0 = __clzsi2(x); - let lz1 = usize_leading_zeros_default(x); - let lz2 = usize_leading_zeros_riscv(x); - if lz0 != lz { - panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0); - } - if lz1 != lz { - panic!( - "usize_leading_zeros_default({}): std: {}, builtins: {}", - x, lz, lz1 - ); - } - if lz2 != lz { - panic!( - "usize_leading_zeros_riscv({}): std: {}, builtins: {}", - x, lz, lz2 - ); - } - }) + use compiler_builtins::int::leading_zeros::{leading_zeros_default, leading_zeros_riscv}; + { + use compiler_builtins::int::leading_zeros::__clzsi2; + fuzz(N, |x: u32| { + if x == 0 { + return; // undefined value for an intrinsic + } + let lz = x.leading_zeros() as usize; + let lz0 = __clzsi2(x); + let lz1 = leading_zeros_default(x); + let lz2 = leading_zeros_riscv(x); + if lz0 != lz { + panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0); + } + if lz1 != lz { + panic!( + "leading_zeros_default({}): std: {}, builtins: {}", + x, lz, lz1 + ); + } + if lz2 != lz { + panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2); + } + }); + } + + { + use compiler_builtins::int::leading_zeros::__clzdi2; + fuzz(N, |x: u64| { + if x == 0 { + return; // undefined value for an intrinsic + } + let lz = x.leading_zeros() as usize; + let lz0 = __clzdi2(x); + let lz1 = leading_zeros_default(x); + let lz2 = leading_zeros_riscv(x); + if lz0 != lz { + panic!("__clzdi2({}): std: {}, builtins: {}", x, lz, lz0); + } + if lz1 != lz { + panic!( + "leading_zeros_default({}): std: {}, builtins: {}", + x, lz, lz1 + ); + } + if lz2 != lz { + panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2); + } + }); + } + + { + use compiler_builtins::int::leading_zeros::__clzti2; + fuzz(N, |x: u128| { + if x == 0 { + return; // undefined value for an intrinsic + } + let lz = x.leading_zeros() as usize; + let lz0 = __clzti2(x); + if lz0 != lz { + panic!("__clzti2({}): std: {}, builtins: {}", x, lz, lz0); + } + }); + } } #[test] From 29d3466950080607b77c70ca3e7b7f5acf5512e1 Mon Sep 17 00:00:00 2001 From: Andrey Turkin Date: Tue, 16 Jul 2024 14:03:43 +0300 Subject: [PATCH 0783/1459] Never use C version of clz builtins --- build.rs | 9 --------- src/int/leading_zeros.rs | 2 -- 2 files changed, 11 deletions(-) diff --git a/build.rs b/build.rs index 79f994456..b8da7cc56 100644 --- a/build.rs +++ b/build.rs @@ -164,8 +164,6 @@ fn configure_check_cfg() { "__bswapsi2", "__bswapdi2", "__bswapti2", - "__clzsi2", - "__clzdi2", "__divdi3", "__divsi3", "__divmoddi4", @@ -346,8 +344,6 @@ mod c { ("__absvsi2", "absvsi2.c"), ("__addvdi3", "addvdi3.c"), ("__addvsi3", "addvsi3.c"), - ("__clzdi2", "clzdi2.c"), - ("__clzsi2", "clzsi2.c"), ("__cmpdi2", "cmpdi2.c"), ("__ctzdi2", "ctzdi2.c"), ("__ctzsi2", "ctzsi2.c"), @@ -435,8 +431,6 @@ mod c { ("__aeabi_frsub", "arm/aeabi_frsub.c"), ("__bswapdi2", "arm/bswapdi2.S"), ("__bswapsi2", "arm/bswapsi2.S"), - ("__clzdi2", "arm/clzdi2.S"), - ("__clzsi2", "arm/clzsi2.S"), ("__divmodsi4", "arm/divmodsi4.S"), ("__divsi3", "arm/divsi3.S"), ("__modsi3", "arm/modsi3.S"), @@ -572,9 +566,6 @@ mod c { } } sources.remove(&to_remove); - - // But use some generic implementations where possible - sources.extend(&[("__clzdi2", "clzdi2.c"), ("__clzsi2", "clzsi2.c")]) } if llvm_target[0] == "thumbv7m" || llvm_target[0] == "thumbv7em" { diff --git a/src/int/leading_zeros.rs b/src/int/leading_zeros.rs index 68ac55ac7..1fee9fcf5 100644 --- a/src/int/leading_zeros.rs +++ b/src/int/leading_zeros.rs @@ -135,7 +135,6 @@ pub(crate) fn leading_zeros_riscv>(x: T) -> usize { } intrinsics! { - #[maybe_use_optimized_c_shim] /// Returns the number of leading binary zeros in `x` pub extern "C" fn __clzsi2(x: u32) -> usize { if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { @@ -145,7 +144,6 @@ intrinsics! { } } - #[maybe_use_optimized_c_shim] /// Returns the number of leading binary zeros in `x` pub extern "C" fn __clzdi2(x: u64) -> usize { if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { From 70f566b6401afacb37cef7f51d05b0066b4ea3d2 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 25 Jun 2024 02:34:17 -0400 Subject: [PATCH 0784/1459] Remove a broken link in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6ff0de190..b5f061706 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ features = ["c"] 5. Add the intrinsic to `examples/intrinsics.rs` to verify it can be linked on all targets. 6. Send a Pull Request (PR). -7. Once the PR passes our extensive [testing infrastructure][4], we'll merge it! +7. Once the PR passes our extensive testing infrastructure, we'll merge it! 8. Celebrate :tada: [1]: https://github.com/rust-lang/llvm-project/tree/9e3de9490ff580cd484fbfa2908292b4838d56e7/compiler-rt/test/builtins/Unit From 8f0543579a8a1bbe0fe71178859c73b092beff83 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 25 Jun 2024 02:51:45 -0400 Subject: [PATCH 0785/1459] Move IBM extended double to the unneeded section --- README.md | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index b5f061706..b33a7d6f7 100644 --- a/README.md +++ b/README.md @@ -257,10 +257,6 @@ These builtins are needed to support `f16` and `f128`, which are in the process - [ ] floatunsitf.c - [x] multf3.c - [ ] powitf2.c -- [ ] ppc/fixtfdi.c -- [ ] ppc/fixunstfdi.c -- [ ] ppc/floatditf.c -- [ ] ppc/floatunditf.c - [x] subtf3.c - [x] truncdfhf2.c - [x] truncsfhf2.c @@ -270,12 +266,9 @@ These builtins are needed to support `f16` and `f128`, which are in the process ## Unimplemented functions -These builtins involve floating-point types ("`f80`" and complex numbers) that are not supported by Rust. +These builtins are for x87 `f80` floating-point numbers that are not supported +by Rust. -- ~~divdc3.c~~ -- ~~divsc3.c~~ -- ~~divtc3.c~~ -- ~~divxc3.c~~ - ~~fixunsxfdi.c~~ - ~~fixunsxfsi.c~~ - ~~fixunsxfti.c~~ @@ -287,19 +280,35 @@ These builtins involve floating-point types ("`f80`" and complex numbers) that a - ~~floatuntixf.c~~ - ~~i386/floatdixf.S~~ - ~~i386/floatundixf.S~~ -- ~~muldc3.c~~ -- ~~mulsc3.c~~ -- ~~multc3.c~~ -- ~~mulxc3.c~~ -- ~~powixf2.c~~ +- ~~x86_64/floatdixf.c~~ +- ~~x86_64/floatundixf.S~~ + +These builtins are for IBM "extended double" non-IEEE 128-bit floating-point +numbers. + - ~~ppc/divtc3.c~~ +- ~~ppc/fixtfdi.c~~ +- ~~ppc/fixunstfdi.c~~ +- ~~ppc/floatditf.c~~ +- ~~ppc/floatunditf.c~~ - ~~ppc/gcc_qadd.c~~ - ~~ppc/gcc_qdiv.c~~ - ~~ppc/gcc_qmul.c~~ - ~~ppc/gcc_qsub.c~~ - ~~ppc/multc3.c~~ -- ~~x86_64/floatdixf.c~~ -- ~~x86_64/floatundixf.S~~ + +These builtins involve complex floating-point types that are not supported by +Rust. + +- ~~divdc3.c~~ +- ~~divsc3.c~~ +- ~~divtc3.c~~ +- ~~divxc3.c~~ +- ~~muldc3.c~~ +- ~~mulsc3.c~~ +- ~~multc3.c~~ +- ~~mulxc3.c~~ +- ~~powixf2.c~~ These builtins are never called by LLVM. From 686511e9aca2b1137b1727df4aac899bcb71bafd Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 25 Jun 2024 03:56:21 -0400 Subject: [PATCH 0786/1459] Add missing symbols from compiler-rt to the README --- README.md | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index b33a7d6f7..2d7aa3378 100644 --- a/README.md +++ b/README.md @@ -16,13 +16,13 @@ you can use this crate to get those intrinsics and solve the linker errors. To do that, add this crate somewhere in the dependency graph of the crate you are building: -``` toml +```toml # Cargo.toml [dependencies] compiler_builtins = { git = "https://github.com/rust-lang/compiler-builtins" } ``` -``` rust +```rust extern crate compiler_builtins; // ... @@ -91,7 +91,7 @@ obtained with the following: ```sh curl -L -o rustc-llvm-18.0.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/18.0-2024-02-13.tar.gz tar xzf rustc-llvm-18.0.tar.gz --strip-components 1 llvm-project-rustc-18.0-2024-02-13/compiler-rt -```` +``` Local targets may also be tested with `./ci/run.sh [target]`. @@ -100,9 +100,11 @@ rely on CI. ## Progress +- [x] aarch64/chkstk.S - [x] adddf3.c - [x] addsf3.c - [x] arm/adddf3vfp.S +- [x] arm/addsf3.S - [x] arm/addsf3vfp.S - [x] arm/aeabi_dcmp.S - [x] arm/aeabi_fcmp.S @@ -113,6 +115,7 @@ rely on CI. - [x] arm/aeabi_memset.S - [x] arm/aeabi_uidivmod.S - [x] arm/aeabi_uldivmod.S +- [ ] arm/chkstk.S - [x] arm/divdf3vfp.S - [ ] arm/divmodsi4.S (generic version is done) - [x] arm/divsf3vfp.S @@ -154,6 +157,12 @@ rely on CI. - [ ] arm/unordsf2vfp.S - [x] ashldi3.c - [x] ashrdi3.c +- [ ] avr/divmodhi4.S +- [ ] avr/divmodqi4.S +- [ ] avr/mulhi3.S +- [ ] avr/mulqi3.S +- [ ] avr/udivmodhi4.S +- [ ] avr/udivmodqi4.S - [x] bswapdi2.c - [x] bswapsi2.c - [x] bswapti2.c @@ -166,6 +175,7 @@ rely on CI. - [x] divdi3.c - [x] divmoddi4.c - [x] divmodsi4.c +- [x] divmodti4.c - [x] divsf3.c - [x] divsi3.c - [x] extendsfdf2.c @@ -204,6 +214,8 @@ rely on CI. - [x] mulsf3.c - [x] powidf2.c - [x] powisf2.c +- [ ] riscv/muldi3.S +- [ ] riscv/mulsi3.S - [x] subdf3.c - [x] subsf3.c - [x] truncdfsf2.c @@ -236,7 +248,8 @@ These builtins are needed to support 128-bit integers. - [x] udivti3.c - [x] umodti3.c -These builtins are needed to support `f16` and `f128`, which are in the process of being added to Rust. +These builtins are needed to support `f16` and `f128`, which are in the process +of being added to Rust. - [x] addtf3.c - [x] comparetf2.c @@ -253,8 +266,10 @@ These builtins are needed to support `f16` and `f128`, which are in the process - [x] fixunstfti.c - [ ] floatditf.c - [ ] floatsitf.c +- [ ] floattitf.c - [ ] floatunditf.c - [ ] floatunsitf.c +- [ ] floatuntitf.c - [x] multf3.c - [ ] powitf2.c - [x] subtf3.c @@ -264,11 +279,42 @@ These builtins are needed to support `f16` and `f128`, which are in the process - [x] trunctfhf2.c - [x] trunctfsf2.c + +These builtins are used by the Hexagon DSP + +- [ ] hexagon/common_entry_exit_abi1.S +- [ ] hexagon/common_entry_exit_abi2.S +- [ ] hexagon/common_entry_exit_legacy.S +- [x] hexagon/dfaddsub.S~~ +- [x] hexagon/dfdiv.S~~ +- [x] hexagon/dffma.S~~ +- [x] hexagon/dfminmax.S~~ +- [x] hexagon/dfmul.S~~ +- [x] hexagon/dfsqrt.S~~ +- [x] hexagon/divdi3.S~~ +- [x] hexagon/divsi3.S~~ +- [x] hexagon/fastmath2_dlib_asm.S~~ +- [x] hexagon/fastmath2_ldlib_asm.S~~ +- [x] hexagon/fastmath_dlib_asm.S~~ +- [x] hexagon/memcpy_forward_vp4cp4n2.S~~ +- [x] hexagon/memcpy_likely_aligned.S~~ +- [x] hexagon/moddi3.S~~ +- [x] hexagon/modsi3.S~~ +- [x] hexagon/sfdiv_opt.S~~ +- [x] hexagon/sfsqrt_opt.S~~ +- [x] hexagon/udivdi3.S~~ +- [x] hexagon/udivmoddi4.S~~ +- [x] hexagon/udivmodsi4.S~~ +- [x] hexagon/udivsi3.S~~ +- [x] hexagon/umoddi3.S~~ +- [x] hexagon/umodsi3.S~~ + ## Unimplemented functions These builtins are for x87 `f80` floating-point numbers that are not supported by Rust. +- ~~extendxftf2.c~~ - ~~fixunsxfdi.c~~ - ~~fixunsxfsi.c~~ - ~~fixunsxfti.c~~ @@ -288,8 +334,11 @@ numbers. - ~~ppc/divtc3.c~~ - ~~ppc/fixtfdi.c~~ +- ~~ppc/fixtfti.c~~ - ~~ppc/fixunstfdi.c~~ +- ~~ppc/fixunstfti.c~~ - ~~ppc/floatditf.c~~ +- ~~ppc/floattitf.c~~ - ~~ppc/floatunditf.c~~ - ~~ppc/gcc_qadd.c~~ - ~~ppc/gcc_qdiv.c~~ @@ -297,6 +346,13 @@ numbers. - ~~ppc/gcc_qsub.c~~ - ~~ppc/multc3.c~~ +These builtins are for 16-bit brain floating-point numbers that are not +supported by Rust. + +- ~~truncdfbf2.c~~ +- ~~truncsfbf2.c~~ +- ~~trunctfxf2.c~~ + These builtins involve complex floating-point types that are not supported by Rust. @@ -342,6 +398,7 @@ These builtins are never called by LLVM. - ~~ctzdi2.c~~ - ~~ctzsi2.c~~ - ~~ctzti2.c~~ +- ~~ffssi2.c~~ - ~~ffsdi2.c~~ - this is [called by gcc][jemalloc-fail] though! - ~~ffsti2.c~~ - ~~mulvdi3.c~~ @@ -404,13 +461,34 @@ Rust only exposes atomic types on platforms that support them, and therefore doe Miscellaneous functionality that is not used by Rust. +- ~~aarch64/fp_mode.c~~ +- ~~aarch64/lse.S~~ (LSE atomics) +- ~~aarch64/sme-abi-init.c~~ (matrix extension) +- ~~aarch64/sme-abi.S~~ (matrix extension) +- ~~aarch64/sme-libc-routines.c~~ (matrix extension) - ~~apple_versioning.c~~ +- ~~arm/fp_mode.c~~ +- ~~avr/exit.S~~ - ~~clear_cache.c~~ +- ~~cpu_model/aarch64.c~~ +- ~~cpu_model/x86.c~~ +- ~~crtbegin.c~~ +- ~~crtend.c~~ - ~~emutls.c~~ - ~~enable_execute_stack.c~~ - ~~eprintf.c~~ +- ~~fp_mode.c~~ (float exception handling) - ~~gcc_personality_v0.c~~ +- ~~i386/fp_mode.c~~ +- ~~int_util.c~~ +- ~~loongarch/fp_mode.c~~ +- ~~os_version_check.c~~ +- ~~riscv/fp_mode.c~~ +- ~~riscv/restore.S~~ (callee-saved registers) +- ~~riscv/save.S~~ (callee-saved registers) - ~~trampoline_setup.c~~ +- ~~ve/grow_stack.S~~ +- ~~ve/grow_stack_align.S~~ Floating-point implementations of builtins that are only called from soft-float code. It would be better to simply use the generic soft-float versions in this case. From 133705eaa9d7b38a317a19cc91daa3179229cc9e Mon Sep 17 00:00:00 2001 From: Andrey Turkin Date: Sat, 22 Jun 2024 00:01:52 +0300 Subject: [PATCH 0787/1459] Implement __ctz*i2 intrinsics --- README.md | 6 ++-- build.rs | 3 -- src/int/mod.rs | 1 + src/int/trailing_zeros.rs | 64 +++++++++++++++++++++++++++++++++++++++ testcrate/tests/misc.rs | 43 ++++++++++++++++++++++++++ 5 files changed, 111 insertions(+), 6 deletions(-) create mode 100644 src/int/trailing_zeros.rs diff --git a/README.md b/README.md index 2d7aa3378..51bef5e2e 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,9 @@ rely on CI. - [x] clzti2.c - [x] comparedf2.c - [x] comparesf2.c +- [x] ctzdi2.c +- [x] ctzsi2.c +- [x] ctzti2.c - [x] divdf3.c - [x] divdi3.c - [x] divmoddi4.c @@ -395,9 +398,6 @@ These builtins are never called by LLVM. - ~~arm/switchu8.S~~ - ~~cmpdi2.c~~ - ~~cmpti2.c~~ -- ~~ctzdi2.c~~ -- ~~ctzsi2.c~~ -- ~~ctzti2.c~~ - ~~ffssi2.c~~ - ~~ffsdi2.c~~ - this is [called by gcc][jemalloc-fail] though! - ~~ffsti2.c~~ diff --git a/build.rs b/build.rs index b8da7cc56..d267d3cc4 100644 --- a/build.rs +++ b/build.rs @@ -345,8 +345,6 @@ mod c { ("__addvdi3", "addvdi3.c"), ("__addvsi3", "addvsi3.c"), ("__cmpdi2", "cmpdi2.c"), - ("__ctzdi2", "ctzdi2.c"), - ("__ctzsi2", "ctzsi2.c"), ("__int_util", "int_util.c"), ("__mulvdi3", "mulvdi3.c"), ("__mulvsi3", "mulvsi3.c"), @@ -380,7 +378,6 @@ mod c { ("__absvti2", "absvti2.c"), ("__addvti3", "addvti3.c"), ("__cmpti2", "cmpti2.c"), - ("__ctzti2", "ctzti2.c"), ("__ffsti2", "ffsti2.c"), ("__mulvti3", "mulvti3.c"), ("__negti2", "negti2.c"), diff --git a/src/int/mod.rs b/src/int/mod.rs index 728ce1d57..5f56c6b6e 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -9,6 +9,7 @@ pub mod leading_zeros; pub mod mul; pub mod sdiv; pub mod shift; +pub mod trailing_zeros; pub mod udiv; pub use big::{i256, u256}; diff --git a/src/int/trailing_zeros.rs b/src/int/trailing_zeros.rs new file mode 100644 index 000000000..cea366b07 --- /dev/null +++ b/src/int/trailing_zeros.rs @@ -0,0 +1,64 @@ +use crate::int::{CastInto, Int}; + +public_test_dep! { +/// Returns number of trailing binary zeros in `x`. +#[allow(dead_code)] +pub(crate) fn trailing_zeros + CastInto + CastInto>(x: T) -> usize { + let mut x = x; + let mut r: u32 = 0; + let mut t: u32; + + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + r += ((CastInto::::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 + x >>= r; // remove 32 zero bits + } + + if T::BITS >= 32 { + t = ((CastInto::::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 + r += t; + x >>= t; // x = [0 - 0xFFFF] + higher garbage bits + } + + const { assert!(T::BITS >= 16) }; + t = ((CastInto::::cast(x) == 0) as u32) << 3; + x >>= t; // x = [0 - 0xFF] + higher garbage bits + r += t; + + let mut x: u8 = x.cast(); + + t = (((x & 0x0F) == 0) as u32) << 2; + x >>= t; // x = [0 - 0xF] + higher garbage bits + r += t; + + t = (((x & 0x3) == 0) as u32) << 1; + x >>= t; // x = [0 - 0x3] + higher garbage bits + r += t; + + x &= 3; + + r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg()) +} +} + +intrinsics! { + /// Returns the number of trailing binary zeros in `x` (32 bit version). + pub extern "C" fn __ctzsi2(x: u32) -> usize { + trailing_zeros(x) + } + + /// Returns the number of trailing binary zeros in `x` (64 bit version). + pub extern "C" fn __ctzdi2(x: u64) -> usize { + trailing_zeros(x) + } + + /// Returns the number of trailing binary zeros in `x` (128 bit version). + pub extern "C" fn __ctzti2(x: u128) -> usize { + let lo = x as u64; + if lo == 0 { + 64 + __ctzdi2((x >> 64) as u64) + } else { + __ctzdi2(lo) + } + } +} diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs index f830a5423..f9431915b 100644 --- a/testcrate/tests/misc.rs +++ b/testcrate/tests/misc.rs @@ -131,6 +131,49 @@ fn leading_zeros() { } } +#[test] +fn trailing_zeros() { + use compiler_builtins::int::trailing_zeros::{__ctzdi2, __ctzsi2, __ctzti2, trailing_zeros}; + fuzz(N, |x: u32| { + if x == 0 { + return; // undefined value for an intrinsic + } + let tz = x.trailing_zeros() as usize; + let tz0 = __ctzsi2(x); + let tz1 = trailing_zeros(x); + if tz0 != tz { + panic!("__ctzsi2({}): std: {}, builtins: {}", x, tz, tz0); + } + if tz1 != tz { + panic!("trailing_zeros({}): std: {}, builtins: {}", x, tz, tz1); + } + }); + fuzz(N, |x: u64| { + if x == 0 { + return; // undefined value for an intrinsic + } + let tz = x.trailing_zeros() as usize; + let tz0 = __ctzdi2(x); + let tz1 = trailing_zeros(x); + if tz0 != tz { + panic!("__ctzdi2({}): std: {}, builtins: {}", x, tz, tz0); + } + if tz1 != tz { + panic!("trailing_zeros({}): std: {}, builtins: {}", x, tz, tz1); + } + }); + fuzz(N, |x: u128| { + if x == 0 { + return; // undefined value for an intrinsic + } + let tz = x.trailing_zeros() as usize; + let tz0 = __ctzti2(x); + if tz0 != tz { + panic!("__ctzti2({}): std: {}, builtins: {}", x, tz, tz0); + } + }); +} + #[test] #[cfg(not(target_arch = "avr"))] fn bswap() { From 237f65a57313afdca531308b70c1d5224b239a36 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 18 Jul 2024 00:29:04 -0500 Subject: [PATCH 0788/1459] Add `release-plz` for automatic releases. This is what `cc-rs` is using and should create a release PR whenever a change to `master` is made. If the branch is merged, it should publish the new version. Includes configuration to disable semver checks and not keep a changelog since this is an implementation detail. --- .github/workflows/publish.yml | 30 ++++++++++++++++++++++++++++++ .release-plz.toml | 3 +++ crates/panic-handler/Cargo.toml | 1 + testcrate/Cargo.toml | 1 + 4 files changed, 35 insertions(+) create mode 100644 .github/workflows/publish.yml create mode 100644 .release-plz.toml diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 000000000..d568f3757 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,30 @@ +name: Release-plz + +permissions: + pull-requests: write + contents: write + +on: + push: + branches: + - master + +jobs: + release-plz: + name: Release-plz + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: true + - name: Install Rust (rustup) + run: rustup update nightly --no-self-update && rustup default nightly + - name: Publish `libm` as part of builtins, rather than its own crate + run: rm libm/Cargo.toml + - name: Run release-plz + uses: MarcoIeni/release-plz-action@v0.5 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} diff --git a/.release-plz.toml b/.release-plz.toml new file mode 100644 index 000000000..affc10265 --- /dev/null +++ b/.release-plz.toml @@ -0,0 +1,3 @@ +[workspace] +changelog_update = false +semver_check = false diff --git a/crates/panic-handler/Cargo.toml b/crates/panic-handler/Cargo.toml index 4fb81eb82..2ad858409 100644 --- a/crates/panic-handler/Cargo.toml +++ b/crates/panic-handler/Cargo.toml @@ -3,5 +3,6 @@ name = "panic-handler" version = "0.1.0" authors = ["Alex Crichton "] edition = "2021" +publish = false [dependencies] diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 6b5c4cf48..e39c35b67 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -3,6 +3,7 @@ name = "testcrate" version = "0.1.0" authors = ["Alex Crichton "] edition = "2021" +publish = false [lib] test = false From 7516ff24839c3d09a59f6c9ff9f9fa4b688e17b0 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 28 Jul 2024 17:00:41 -0400 Subject: [PATCH 0789/1459] Set allow_dirty to release-plz This should resolve the issue in --- .release-plz.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.release-plz.toml b/.release-plz.toml index affc10265..fce19d157 100644 --- a/.release-plz.toml +++ b/.release-plz.toml @@ -1,3 +1,8 @@ [workspace] changelog_update = false semver_check = false + +# As part of the release process, we delete `libm/Cargo.toml`. Since +# this is only run in CI, we shouldn't need to worry about it. +allow_dirty = true +publish_allow_dirty = true From ed743643c44d2c9c16636a2a2d84b6fea7cb2d0b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 28 Jul 2024 21:08:01 +0000 Subject: [PATCH 0790/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c8d78ec3e..706031bbf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.113" +version = "0.1.114" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From b7af6078c9e9ffa43041124cab5dddb6a1ac0706 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 2 Aug 2024 13:41:08 -0400 Subject: [PATCH 0791/1459] Switch to a target structure in build.rs --- build.rs | 118 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 78 insertions(+), 40 deletions(-) diff --git a/build.rs b/build.rs index d267d3cc4..80454815f 100644 --- a/build.rs +++ b/build.rs @@ -1,10 +1,50 @@ use std::{collections::BTreeMap, env, path::PathBuf, sync::atomic::Ordering}; +#[allow(dead_code)] +struct Target { + triple: String, + os: String, + arch: String, + vendor: String, + env: String, + pointer_width: u8, + little_endian: bool, + features: Vec, +} + +impl Target { + fn from_env() -> Self { + let little_endian = match env::var("CARGO_CFG_TARGET_ENDIAN").unwrap().as_str() { + "little" => true, + "big" => false, + x => panic!("unknown endian {x}"), + }; + + Self { + triple: env::var("TARGET").unwrap(), + os: env::var("CARGO_CFG_TARGET_OS").unwrap(), + arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(), + vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), + env: env::var("CARGO_CFG_TARGET_ENV").unwrap(), + pointer_width: env::var("CARGO_CFG_TARGET_POINTER_WIDTH") + .unwrap() + .parse() + .unwrap(), + little_endian, + features: env::var("CARGO_CFG_TARGET_FEATURE") + .unwrap_or_default() + .split(",") + .map(ToOwned::to_owned) + .collect(), + } + } +} + fn main() { println!("cargo:rerun-if-changed=build.rs"); configure_check_cfg(); - let target = env::var("TARGET").unwrap(); + let target = Target::from_env(); let cwd = env::current_dir().unwrap(); println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display()); @@ -14,12 +54,12 @@ fn main() { println!("cargo:rustc-cfg=feature=\"unstable\""); // Emscripten's runtime includes all the builtins - if target.contains("emscripten") { + if target.env == "emscripten" { return; } // OpenBSD provides compiler_rt by default, use it instead of rebuilding it from source - if target.contains("openbsd") { + if target.os == "openbsd" { println!("cargo:rustc-link-search=native=/usr/lib"); println!("cargo:rustc-link-lib=compiler_rt"); return; @@ -27,22 +67,22 @@ fn main() { // Forcibly enable memory intrinsics on wasm & SGX as we don't have a libc to // provide them. - if (target.contains("wasm") && !target.contains("wasi")) - || (target.contains("sgx") && target.contains("fortanix")) - || target.contains("-none") - || target.contains("nvptx") - || target.contains("uefi") - || target.contains("xous") + if (target.triple.contains("wasm") && !target.triple.contains("wasi")) + || (target.triple.contains("sgx") && target.triple.contains("fortanix")) + || target.triple.contains("-none") + || target.triple.contains("nvptx") + || target.triple.contains("uefi") + || target.triple.contains("xous") { println!("cargo:rustc-cfg=feature=\"mem\""); } // These targets have hardware unaligned access support. println!("cargo::rustc-check-cfg=cfg(feature, values(\"mem-unaligned\"))"); - if target.contains("x86_64") - || target.contains("i686") - || target.contains("aarch64") - || target.contains("bpf") + if target.arch.contains("x86_64") + || target.arch.contains("i686") + || target.arch.contains("aarch64") + || target.arch.contains("bpf") { println!("cargo:rustc-cfg=feature=\"mem-unaligned\""); } @@ -50,7 +90,7 @@ fn main() { // NOTE we are going to assume that llvm-target, what determines our codegen option, matches the // target triple. This is usually correct for our built-in targets but can break in presence of // custom targets, which can have arbitrary names. - let llvm_target = target.split('-').collect::>(); + let llvm_target = target.triple.split('-').collect::>(); // Build missing intrinsics from compiler-rt C source code. If we're // mangling names though we assume that we're also in test mode so we don't @@ -60,7 +100,7 @@ fn main() { // Don't use a C compiler for these targets: // // * nvptx - everything is bitcode, not compatible with mixed C/Rust - if !target.contains("nvptx") { + if !target.arch.contains("nvptx") { #[cfg(feature = "c")] c::compile(&llvm_target, &target); } @@ -86,7 +126,7 @@ fn main() { println!("cargo::rustc-check-cfg=cfg(kernel_user_helpers)"); if llvm_target[0] == "armv4t" || llvm_target[0] == "armv5te" - || target == "arm-linux-androideabi" + || target.triple == "arm-linux-androideabi" { println!("cargo:rustc-cfg=kernel_user_helpers") } @@ -227,6 +267,8 @@ mod c { use std::io::Write; use std::path::{Path, PathBuf}; + use super::Target; + struct Sources { // SYMBOL -> PATH TO SOURCE map: BTreeMap<&'static str, &'static str>, @@ -267,11 +309,7 @@ mod c { } /// Compile intrinsics from the compiler-rt C source code - pub fn compile(llvm_target: &[&str], target: &String) { - let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); - let target_env = env::var("CARGO_CFG_TARGET_ENV").unwrap(); - let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap(); - let target_vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap(); + pub fn compile(llvm_target: &[&str], target: &Target) { let mut consider_float_intrinsics = true; let cfg = &mut cc::Build::new(); @@ -280,8 +318,8 @@ mod c { // // Therefore, evaluate if those flags are present and set a boolean that causes any // compiler-rt intrinsics that contain floating point source to be excluded for this target. - if target_arch == "aarch64" { - let cflags_key = String::from("CFLAGS_") + &(target.to_owned().replace("-", "_")); + if target.arch == "aarch64" { + let cflags_key = String::from("CFLAGS_") + &(target.triple.replace("-", "_")); if let Ok(cflags_value) = env::var(cflags_key) { if cflags_value.contains("+nofp") || cflags_value.contains("+nosimd") { consider_float_intrinsics = false; @@ -299,7 +337,7 @@ mod c { cfg.warnings(false); - if target_env == "msvc" { + if target.env == "msvc" { // Don't pull in extra libraries on MSVC cfg.flag("/Zl"); @@ -328,7 +366,7 @@ mod c { // at odds with compiling with `-ffreestanding`, as the header // may be incompatible or not present. Create a minimal stub // header to use instead. - if target_os == "uefi" { + if target.os == "uefi" { let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); let include_dir = out_dir.join("include"); if !include_dir.exists() { @@ -373,7 +411,7 @@ mod c { // On iOS and 32-bit OSX these are all just empty intrinsics, no need to // include them. - if target_vendor != "apple" || target_arch != "x86" { + if target.vendor != "apple" || target.arch != "x86" { sources.extend(&[ ("__absvti2", "absvti2.c"), ("__addvti3", "addvti3.c"), @@ -392,7 +430,7 @@ mod c { } } - if target_vendor == "apple" { + if target.vendor == "apple" { sources.extend(&[ ("atomic_flag_clear", "atomic_flag_clear.c"), ("atomic_flag_clear_explicit", "atomic_flag_clear_explicit.c"), @@ -406,8 +444,8 @@ mod c { ]); } - if target_env != "msvc" { - if target_arch == "x86" { + if target.env != "msvc" { + if target.arch == "x86" { sources.extend(&[ ("__ashldi3", "i386/ashldi3.S"), ("__ashrdi3", "i386/ashrdi3.S"), @@ -421,7 +459,7 @@ mod c { } } - if target_arch == "arm" && target_vendor != "apple" && target_env != "msvc" { + if target.arch == "arm" && target.vendor != "apple" && target.env != "msvc" { sources.extend(&[ ("__aeabi_div0", "arm/aeabi_div0.c"), ("__aeabi_drsub", "arm/aeabi_drsub.c"), @@ -441,7 +479,7 @@ mod c { ("__umodsi3", "arm/umodsi3.S"), ]); - if target_os == "freebsd" { + if target.os == "freebsd" { sources.extend(&[("__clear_cache", "clear_cache.c")]); } @@ -513,7 +551,7 @@ mod c { ]); } - if (target_arch == "aarch64" || target_arch == "arm64ec") && consider_float_intrinsics { + if (target.arch == "aarch64" || target.arch == "arm64ec") && consider_float_intrinsics { sources.extend(&[ ("__comparetf2", "comparetf2.c"), ("__floatditf", "floatditf.c"), @@ -526,16 +564,16 @@ mod c { ("__fe_raise_inexact", "fp_mode.c"), ]); - if target_os != "windows" { + if target.os != "windows" { sources.extend(&[("__multc3", "multc3.c")]); } } - if target_arch == "mips" || target_arch == "riscv32" || target_arch == "riscv64" { + if target.arch == "mips" || target.arch == "riscv32" || target.arch == "riscv64" { sources.extend(&[("__bswapsi2", "bswapsi2.c")]); } - if target_arch == "mips64" { + if target.arch == "mips64" { sources.extend(&[ ("__netf2", "comparetf2.c"), ("__floatsitf", "floatsitf.c"), @@ -544,7 +582,7 @@ mod c { ]); } - if target_arch == "loongarch64" { + if target.arch == "loongarch64" { sources.extend(&[ ("__netf2", "comparetf2.c"), ("__floatsitf", "floatsitf.c"), @@ -554,7 +592,7 @@ mod c { } // Remove the assembly implementations that won't compile for the target - if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" || target_os == "uefi" + if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" || target.os == "uefi" { let mut to_remove = Vec::new(); for (k, v) in sources.map.iter() { @@ -570,7 +608,7 @@ mod c { } // Android uses emulated TLS so we need a runtime support function. - if target_os == "android" { + if target.os == "android" { sources.extend(&[("__emutls_get_address", "emutls.c")]); // Work around a bug in the NDK headers (fixed in @@ -580,7 +618,7 @@ mod c { } // OpenHarmony also uses emulated TLS. - if target_env == "ohos" { + if target.env == "ohos" { sources.extend(&[("__emutls_get_address", "emutls.c")]); } @@ -607,7 +645,7 @@ mod c { // sets of flags to the same source file. // Note: Out-of-line aarch64 atomics are not supported by the msvc toolchain (#430). let src_dir = root.join("lib/builtins"); - if target_arch == "aarch64" && target_env != "msvc" { + if target.arch == "aarch64" && target.env != "msvc" { // See below for why we're building these as separate libraries. build_aarch64_out_of_line_atomics_libraries(&src_dir, cfg); From e3de4abc858e11dc6bc6d38928f3056436482732 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 2 Aug 2024 17:42:36 -0400 Subject: [PATCH 0792/1459] Add back remaining config on `arm_aeabi_alias` Intrinsics marked with `arm_aeabi_alias = ...` were having the rest of their attributes eaten. Add them back. --- src/macros.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/macros.rs b/src/macros.rs index fb14660af..42c83ee55 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -290,6 +290,7 @@ macro_rules! intrinsics { $($rest:tt)* ) => ( #[cfg(target_arch = "arm")] + $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } @@ -298,6 +299,7 @@ macro_rules! intrinsics { mod $name { #[no_mangle] #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] + $(#[$($attr)*])* extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } @@ -307,6 +309,7 @@ macro_rules! intrinsics { mod $alias { #[no_mangle] #[cfg_attr(all(not(windows), not(target_vendor="apple")), linkage = "weak")] + $(#[$($attr)*])* extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } From d91dcf46423a176cc7c716838ef489171025e591 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 2 Aug 2024 13:48:13 -0400 Subject: [PATCH 0793/1459] Configure which platforms get `f16` and `f128` enabled by default By moving the logic for which platforms get symbols to `compiler_builtins` rather than rust-lang/rust, we can control where symbols get enabled without relying on Cargo features. Using Cargo features turned out to be a problem in [1]. This will help resolve errors like [2]. [1]: https://github.com/rust-lang/rust/issues/128358 [2]: https://github.com/rust-lang/rust/issues/128401 --- build.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/build.rs b/build.rs index 80454815f..749a08a5a 100644 --- a/build.rs +++ b/build.rs @@ -42,11 +42,12 @@ impl Target { fn main() { println!("cargo:rerun-if-changed=build.rs"); - configure_check_cfg(); - let target = Target::from_env(); let cwd = env::current_dir().unwrap(); + configure_check_cfg(); + configure_f16_f128(&target); + println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display()); // Activate libm's unstable features to make full use of Nightly. @@ -259,6 +260,47 @@ fn configure_check_cfg() { println!("cargo::rustc-check-cfg=cfg(assert_no_panic)"); } +/// Configure whether or not `f16` and `f128` support should be enabled. +fn configure_f16_f128(target: &Target) { + // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means + // that the backend will not crash when using these types. This does not mean that the + // backend does the right thing, or that the platform doesn't have ABI bugs. + // + // We do this here rather than in `rust-lang/rust` because configuring via cargo features is + // not straightforward. + // + // Original source of this list: + // + let (f16_ok, f128_ok) = match target.arch.as_str() { + // `f16` and `f128` both crash + "arm64ec" => (false, false), + // `f16` crashes + "s390x" => (false, true), + // `f128` crashes + "mips64" | "mips64r6" => (true, false), + // `f128` crashes + "powerpc64" if &target.os == "aix" => (true, false), + // `f128` crashes + "sparc" | "sparcv9" => (true, false), + // Most everything else works as of LLVM 19 + _ => (true, true), + }; + + // If the feature is set, disable these types. + let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some(); + + println!("cargo::rustc-check-cfg=cfg(f16_enabled)"); + println!("cargo::rustc-check-cfg=cfg(f128_enabled)"); + + if f16_ok && !disable_both { + println!("cargo::rustc-cfg=f16_enabled"); + } + + if f128_ok && !disable_both { + println!("cargo::rustc-cfg=f128_enabled"); + } +} + #[cfg(feature = "c")] mod c { use std::collections::{BTreeMap, HashSet}; From ce16d53a2eb7556942d30a504b50aa5d48e2c551 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 2 Aug 2024 15:58:29 -0400 Subject: [PATCH 0794/1459] Make use of new `f16` and `f128` config options Change from `not(feature = "no-f16-f128")` to `f16_enabled` or `f128_disabled`, as applicable. --- src/float/add.rs | 2 +- src/float/cmp.rs | 2 +- src/float/conv.rs | 12 ++++++------ src/float/extend.rs | 6 +++++- src/float/mod.rs | 4 ++-- src/float/mul.rs | 2 +- src/float/sub.rs | 2 +- src/float/trunc.rs | 7 ++++++- src/lib.rs | 4 ++-- 9 files changed, 25 insertions(+), 16 deletions(-) diff --git a/src/float/add.rs b/src/float/add.rs index 7e8529f3e..03ed131af 100644 --- a/src/float/add.rs +++ b/src/float/add.rs @@ -204,7 +204,7 @@ intrinsics! { } #[ppc_alias = __addkf3] - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] pub extern "C" fn __addtf3(a: f128, b: f128) -> f128 { add(a, b) } diff --git a/src/float/cmp.rs b/src/float/cmp.rs index 5c431304c..1901ca4b3 100644 --- a/src/float/cmp.rs +++ b/src/float/cmp.rs @@ -172,7 +172,7 @@ intrinsics! { } } -#[cfg(not(feature = "no-f16-f128",))] +#[cfg(f128_enabled)] intrinsics! { #[avr_skip] #[ppc_alias = __lekf2] diff --git a/src/float/conv.rs b/src/float/conv.rs index 52119f3e8..d275f982b 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -263,19 +263,19 @@ intrinsics! { } #[ppc_alias = __fixunskfsi] - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] pub extern "C" fn __fixunstfsi(f: f128) -> u32 { float_to_unsigned_int(f) } #[ppc_alias = __fixunskfdi] - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] pub extern "C" fn __fixunstfdi(f: f128) -> u64 { float_to_unsigned_int(f) } #[ppc_alias = __fixunskfti] - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] pub extern "C" fn __fixunstfti(f: f128) -> u128 { float_to_unsigned_int(f) } @@ -314,19 +314,19 @@ intrinsics! { } #[ppc_alias = __fixkfsi] - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] pub extern "C" fn __fixtfsi(f: f128) -> i32 { float_to_signed_int(f) } #[ppc_alias = __fixkfdi] - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] pub extern "C" fn __fixtfdi(f: f128) -> i64 { float_to_signed_int(f) } #[ppc_alias = __fixkfti] - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] pub extern "C" fn __fixtfti(f: f128) -> i128 { float_to_signed_int(f) } diff --git a/src/float/extend.rs b/src/float/extend.rs index 556048991..2f392255f 100644 --- a/src/float/extend.rs +++ b/src/float/extend.rs @@ -83,17 +83,18 @@ intrinsics! { } } -#[cfg(not(feature = "no-f16-f128"))] intrinsics! { #[avr_skip] #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_h2f] + #[cfg(f16_enabled)] pub extern "C" fn __extendhfsf2(a: f16) -> f32 { extend(a) } #[avr_skip] #[aapcs_on_arm] + #[cfg(f16_enabled)] pub extern "C" fn __gnu_h2f_ieee(a: f16) -> f32 { extend(a) } @@ -101,6 +102,7 @@ intrinsics! { #[avr_skip] #[aapcs_on_arm] #[ppc_alias = __extendhfkf2] + #[cfg(all(f16_enabled, f128_enabled))] pub extern "C" fn __extendhftf2(a: f16) -> f128 { extend(a) } @@ -108,6 +110,7 @@ intrinsics! { #[avr_skip] #[aapcs_on_arm] #[ppc_alias = __extendsfkf2] + #[cfg(f128_enabled)] pub extern "C" fn __extendsftf2(a: f32) -> f128 { extend(a) } @@ -115,6 +118,7 @@ intrinsics! { #[avr_skip] #[aapcs_on_arm] #[ppc_alias = __extenddfkf2] + #[cfg(f128_enabled)] pub extern "C" fn __extenddftf2(a: f64) -> f128 { extend(a) } diff --git a/src/float/mod.rs b/src/float/mod.rs index 5fef1df32..847373205 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -188,9 +188,9 @@ macro_rules! float_impl { }; } -#[cfg(not(feature = "no-f16-f128"))] +#[cfg(f16_enabled)] float_impl!(f16, u16, i16, i8, 16, 10); float_impl!(f32, u32, i32, i16, 32, 23); float_impl!(f64, u64, i64, i16, 64, 52); -#[cfg(not(feature = "no-f16-f128"))] +#[cfg(f128_enabled)] float_impl!(f128, u128, i128, i16, 128, 112); diff --git a/src/float/mul.rs b/src/float/mul.rs index decf722e2..cb0fcdfa8 100644 --- a/src/float/mul.rs +++ b/src/float/mul.rs @@ -195,7 +195,7 @@ intrinsics! { } #[ppc_alias = __mulkf3] - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] pub extern "C" fn __multf3(a: f128, b: f128) -> f128 { mul(a, b) } diff --git a/src/float/sub.rs b/src/float/sub.rs index 3ab46495d..d33016ead 100644 --- a/src/float/sub.rs +++ b/src/float/sub.rs @@ -14,7 +14,7 @@ intrinsics! { } #[ppc_alias = __subkf3] - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] pub extern "C" fn __subtf3(a: f128, b: f128) -> f128 { #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] use crate::float::add::__addkf3 as __addtf3; diff --git a/src/float/trunc.rs b/src/float/trunc.rs index 9aea6f91e..c54ff7805 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -131,17 +131,18 @@ intrinsics! { } } -#[cfg(not(feature = "no-f16-f128"))] intrinsics! { #[avr_skip] #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_f2h] + #[cfg(f16_enabled)] pub extern "C" fn __truncsfhf2(a: f32) -> f16 { trunc(a) } #[avr_skip] #[aapcs_on_arm] + #[cfg(f16_enabled)] pub extern "C" fn __gnu_f2h_ieee(a: f32) -> f16 { trunc(a) } @@ -149,6 +150,7 @@ intrinsics! { #[avr_skip] #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_d2h] + #[cfg(f16_enabled)] pub extern "C" fn __truncdfhf2(a: f64) -> f16 { trunc(a) } @@ -156,6 +158,7 @@ intrinsics! { #[avr_skip] #[aapcs_on_arm] #[ppc_alias = __trunckfhf2] + #[cfg(all(f16_enabled, f128_enabled))] pub extern "C" fn __trunctfhf2(a: f128) -> f16 { trunc(a) } @@ -163,6 +166,7 @@ intrinsics! { #[avr_skip] #[aapcs_on_arm] #[ppc_alias = __trunckfsf2] + #[cfg(f128_enabled)] pub extern "C" fn __trunctfsf2(a: f128) -> f32 { trunc(a) } @@ -170,6 +174,7 @@ intrinsics! { #[avr_skip] #[aapcs_on_arm] #[ppc_alias = __trunckfdf2] + #[cfg(f128_enabled)] pub extern "C" fn __trunctfdf2(a: f128) -> f64 { trunc(a) } diff --git a/src/lib.rs b/src/lib.rs index 0d207a914..0d44fdf96 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,8 +13,8 @@ #![feature(naked_functions)] #![feature(repr_simd)] #![feature(c_unwind)] -#![cfg_attr(not(feature = "no-f16-f128"), feature(f16))] -#![cfg_attr(not(feature = "no-f16-f128"), feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] +#![cfg_attr(f128_enabled, feature(f128))] #![no_builtins] #![no_std] #![allow(unused_features)] From f2de60946651a18ed7f0edaac4da584e3a0285d7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 2 Aug 2024 19:20:26 -0400 Subject: [PATCH 0795/1459] Update which platforms have no `f16` symbols Previously we were building the C versions of these symbols. Since we added the Rust version and updated compiler builtins, these are no longer available by default. This is unintentional, but it gives a better indicator of which symbol versions are not actually provided by the system. Use the list of build failures to correct the list of platforms that do not have `f16` symbols. --- testcrate/build.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/testcrate/build.rs b/testcrate/build.rs index 12c3e7d13..fc01f3f8d 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -44,10 +44,20 @@ fn main() { features.insert(Feature::NoSysF16F128Convert); } - if target.starts_with("wasm32-") { + // These platforms do not have f16 symbols available in their system libraries, so + // skip related tests. Most of these are missing `f16 <-> f32` conversion routines. + if (target.starts_with("aarch64-") && target.contains("linux")) + || target.starts_with("arm") + || target.starts_with("powerpc-") + || target.starts_with("powerpc64-") + || target.starts_with("powerpc64le-") + || target.contains("windows-") // Linking says "error: function signature mismatch: __extendhfsf2" and seems to // think the signature is either `(i32) -> f32` or `(f32) -> f32` + || target.starts_with("wasm32-") + { features.insert(Feature::NoSysF16); + features.insert(Feature::NoSysF16F128Convert); } for feature in features { From 662a2242580aa86ff3121f3635cb93a98f8da1c2 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 2 Aug 2024 19:29:37 -0400 Subject: [PATCH 0796/1459] Correct `sys_avialable` for `f64` -> `f16` truncation The `sys_available` gate was incorrect. Update it. --- testcrate/benches/float_trunc.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testcrate/benches/float_trunc.rs b/testcrate/benches/float_trunc.rs index 1553dacee..70869bcaf 100644 --- a/testcrate/benches/float_trunc.rs +++ b/testcrate/benches/float_trunc.rs @@ -32,7 +32,7 @@ float_bench! { sig: (a: f64) -> f16, crate_fn: trunc::__truncdfhf2, sys_fn: __truncdfhf2, - sys_available: not(feature = "no-sys-f128"), + sys_available: not(feature = "no-sys-f16"), asm: [ #[cfg(target_arch = "aarch64")] { // FIXME(f16_f128): remove `from_bits()` after f16 asm support (rust-lang/rust/#116909) From 614f96bb43c7906a194e7f633daf0da991552603 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 2 Aug 2024 21:44:36 -0400 Subject: [PATCH 0797/1459] Update to the latest `rustc_apfloat` The latest version has a convenient `.unwrap()`. Increase the version so we can use this. --- testcrate/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index e39c35b67..21cec1701 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -15,7 +15,7 @@ doctest = false # `xoshiro128**` is used for its quality, size, and speed at generating `u32` shift amounts. rand_xoshiro = "0.6" # To compare float builtins against -rustc_apfloat = "0.2.0" +rustc_apfloat = "0.2.1" [dependencies.compiler_builtins] path = ".." From c6f95d7477dd1e72ed889a6cae5f6b4feabc3ce4 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 2 Aug 2024 21:45:59 -0400 Subject: [PATCH 0798/1459] Update float conversion tests Since there are more platforms that do not have symbols present, we need to use `rustc_apfloat` for more conversion tests. Make use of the fallback like other tests, and refactor so each test gets its own function. Previously we were testing both apfloat and system conversion methods when possible. This changes to only test one or the other, depending on whether or not the system version is available. This seems reasonable because it is consistent with all other tests, but we should consider updating all tests to check both at some point. This also includes an adjustment of PowerPC configuration to account for the linking errors at [1]. [1]: https://github.com/rust-lang/compiler-builtins/issues/655 --- testcrate/benches/float_extend.rs | 11 ++ testcrate/benches/float_trunc.rs | 6 + testcrate/build.rs | 1 + testcrate/src/lib.rs | 6 +- testcrate/tests/conv.rs | 252 +++++++++--------------------- 5 files changed, 100 insertions(+), 176 deletions(-) diff --git a/testcrate/benches/float_extend.rs b/testcrate/benches/float_extend.rs index 9bd8009e9..bf136f49a 100644 --- a/testcrate/benches/float_extend.rs +++ b/testcrate/benches/float_extend.rs @@ -82,6 +82,7 @@ float_bench! { asm: [], } +#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] criterion_group!( float_extend, extend_f16_f32, @@ -90,4 +91,14 @@ criterion_group!( extend_f32_f128, extend_f64_f128, ); + +// FIXME(#655): `f16` tests disabled until we can bootstrap symbols +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] +criterion_group!( + float_extend, + extend_f32_f64, + extend_f32_f128, + extend_f64_f128, +); + criterion_main!(float_extend); diff --git a/testcrate/benches/float_trunc.rs b/testcrate/benches/float_trunc.rs index 70869bcaf..74b43dfc8 100644 --- a/testcrate/benches/float_trunc.rs +++ b/testcrate/benches/float_trunc.rs @@ -115,6 +115,7 @@ float_bench! { asm: [], } +#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] criterion_group!( float_trunc, trunc_f32_f16, @@ -124,4 +125,9 @@ criterion_group!( trunc_f128_f32, trunc_f128_f64, ); + +// FIXME(#655): `f16` tests disabled until we can bootstrap symbols +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] +criterion_group!(float_trunc, trunc_f64_f32, trunc_f128_f32, trunc_f128_f64,); + criterion_main!(float_trunc); diff --git a/testcrate/build.rs b/testcrate/build.rs index fc01f3f8d..8c441de8a 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -51,6 +51,7 @@ fn main() { || target.starts_with("powerpc-") || target.starts_with("powerpc64-") || target.starts_with("powerpc64le-") + || target.starts_with("i586-") || target.contains("windows-") // Linking says "error: function signature mismatch: __extendhfsf2" and seems to // think the signature is either `(i32) -> f32` or `(f32) -> f32` diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs index f9b052528..5458c9ab6 100644 --- a/testcrate/src/lib.rs +++ b/testcrate/src/lib.rs @@ -282,6 +282,8 @@ macro_rules! apfloat_fallback { // The expression to run. This expression may use `FloatTy` for its signature. // Optionally, the final conversion back to a float can be suppressed using // `=> no_convert` (for e.g. operations that return a bool). + // + // If the apfloat needs a different operation, it can be provided here. $op:expr $(=> $convert:ident)? $(; $apfloat_op:expr)?, // Arguments that get passed to `$op` after converting to a float $($arg:expr),+ @@ -318,7 +320,7 @@ macro_rules! apfloat_fallback { // Some apfloat operations return a `StatusAnd` that we need to extract the value from. This // is the default. - (@inner fty: $float_ty:ty, op_res: $val:expr, args: $($_arg:expr),+) => {{ + (@inner fty: $float_ty:ty, op_res: $val:expr, args: $($_arg:expr),+) => {{ // ignore the status, just get the value let unwrapped = $val.value; @@ -326,7 +328,7 @@ macro_rules! apfloat_fallback { }}; // This is the case where we can't use the same expression for the default builtin and - // nonstandard apfloat fallbac (e.g. `as` casts in std are normal functions in apfloat, so + // nonstandard apfloat fallback (e.g. `as` casts in std are normal functions in apfloat, so // two separate expressions must be specified. (@inner fty: $float_ty:ty, op_res: $_val:expr, diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 1425b49ce..e394183cf 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -206,212 +206,116 @@ mod f_to_i { } } -macro_rules! conv { - ($fX:ident, $fD:ident, $fn:ident, $apfloatX:ident, $apfloatD:ident) => { - fuzz_float(N, |x: $fX| { - let tmp0: $apfloatD = $apfloatX::from_bits(x.to_bits().into()) - .convert(&mut false) - .value; - let tmp0 = $fD::from_bits(tmp0.to_bits().try_into().unwrap()); - let tmp1: $fD = $fn(x); - if !Float::eq_repr(tmp0, tmp1) { - panic!( - "{}({x:?}): apfloat: {tmp0:?}, builtins: {tmp1:?}", - stringify!($fn) - ); - } - }) - }; -} - -macro_rules! extend { - ($fX:ident, $fD:ident, $fn:ident) => { +macro_rules! f_to_f { + ( + $mod:ident, + $( + $from_ty:ty => $to_ty:ty, + $from_ap_ty:ident => $to_ap_ty:ident, + $fn:ident, $sys_available:meta + );+; + ) => {$( #[test] fn $fn() { - use compiler_builtins::float::extend::$fn; + use compiler_builtins::float::{$mod::$fn, Float}; + use rustc_apfloat::ieee::{$from_ap_ty, $to_ap_ty}; + + fuzz_float(N, |x: $from_ty| { + let tmp0: $to_ty = apfloat_fallback!( + $from_ty, + $from_ap_ty, + $sys_available, + |x: $from_ty| x as $to_ty; + |x: $from_ty| { + let from_apf = FloatTy::from_bits(x.to_bits().into()); + // Get `value` directly to ignore INVALID_OP + let to_apf: $to_ap_ty = from_apf.convert(&mut false).value; + <$to_ty>::from_bits(to_apf.to_bits().try_into().unwrap()) + }, + x + ); + let tmp1: $to_ty = $fn(x); - fuzz_float(N, |x: $fX| { - let tmp0 = x as $fD; - let tmp1: $fD = $fn(x); if !Float::eq_repr(tmp0, tmp1) { panic!( - "{}({}): std: {}, builtins: {}", + "{}({:?}): std: {:?}, builtins: {:?}", stringify!($fn), x, tmp0, tmp1 ); } - }); + }) } - }; + )+}; } -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -mod float_extend { +mod extend { use super::*; - extend!(f32, f64, __extendsfdf2); - - #[test] - fn conv() { - use compiler_builtins::float::extend::__extendsfdf2; - use rustc_apfloat::ieee::{Double, Single}; - - conv!(f32, f64, __extendsfdf2, Single, Double); + f_to_f! { + extend, + f32 => f64, Single => Double, __extendsfdf2, all(); } -} - -#[cfg(not(feature = "no-f16-f128"))] -#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] -mod float_extend_f128 { - use super::*; - - #[test] - fn conv() { - use compiler_builtins::float::extend::{ - __extenddftf2, __extendhfsf2, __extendhftf2, __extendsftf2, __gnu_h2f_ieee, - }; - use rustc_apfloat::ieee::{Double, Half, Quad, Single}; - // FIXME(f16_f128): Also do extend!() for `f16` and `f128` when builtins are in nightly - conv!(f16, f32, __extendhfsf2, Half, Single); - conv!(f16, f32, __gnu_h2f_ieee, Half, Single); - conv!(f16, f128, __extendhftf2, Half, Quad); - conv!(f32, f128, __extendsftf2, Single, Quad); - conv!(f64, f128, __extenddftf2, Double, Quad); + #[cfg(target_arch = "arm")] + f_to_f! { + extend, + f32 => f64, Single => Double, __extendsfdf2vfp, all(); } -} -#[cfg(not(feature = "no-f16-f128"))] -#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] -mod float_extend_f128_ppc { - use super::*; - - #[test] - fn conv() { - use compiler_builtins::float::extend::{ - __extenddfkf2, __extendhfkf2, __extendhfsf2, __extendsfkf2, __gnu_h2f_ieee, - }; - use rustc_apfloat::ieee::{Double, Half, Quad, Single}; - - // FIXME(f16_f128): Also do extend!() for `f16` and `f128` when builtins are in nightly - conv!(f16, f32, __extendhfsf2, Half, Single); - conv!(f16, f32, __gnu_h2f_ieee, Half, Single); - conv!(f16, f128, __extendhfkf2, Half, Quad); - conv!(f32, f128, __extendsfkf2, Single, Quad); - conv!(f64, f128, __extenddfkf2, Double, Quad); + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + f_to_f! { + extend, + f16 => f32, Half => Single, __extendhfsf2, not(feature = "no-sys-f16"); + f16 => f32, Half => Single, __gnu_h2f_ieee, not(feature = "no-sys-f16"); + f16 => f128, Half => Quad, __extendhftf2, not(feature = "no-sys-f16-f128-convert"); + f32 => f128, Single => Quad, __extendsftf2, not(feature = "no-sys-f128"); + f64 => f128, Double => Quad, __extenddftf2, not(feature = "no-sys-f128"); } -} - -#[cfg(target_arch = "arm")] -mod float_extend_arm { - use super::*; - extend!(f32, f64, __extendsfdf2vfp); - - #[test] - fn conv() { - use compiler_builtins::float::extend::__extendsfdf2vfp; - use rustc_apfloat::ieee::{Double, Single}; - - conv!(f32, f64, __extendsfdf2vfp, Single, Double); + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + f_to_f! { + extend, + // FIXME(#655): `f16` tests disabled until we can bootstrap symbols + f32 => f128, Single => Quad, __extendsfkf2, not(feature = "no-sys-f128"); + f64 => f128, Double => Quad, __extenddfkf2, not(feature = "no-sys-f128"); } } -macro_rules! trunc { - ($fX:ident, $fD:ident, $fn:ident) => { - #[test] - fn $fn() { - use compiler_builtins::float::trunc::$fn; - - fuzz_float(N, |x: $fX| { - let tmp0 = x as $fD; - let tmp1: $fD = $fn(x); - if !Float::eq_repr(tmp0, tmp1) { - panic!( - "{}({}): std: {}, builtins: {}", - stringify!($fn), - x, - tmp0, - tmp1 - ); - } - }); - } - }; -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -mod float_trunc { +mod trunc { use super::*; - trunc!(f64, f32, __truncdfsf2); - - #[test] - fn conv() { - use compiler_builtins::float::trunc::__truncdfsf2; - use rustc_apfloat::ieee::{Double, Single}; - - conv!(f64, f32, __truncdfsf2, Double, Single); + f_to_f! { + trunc, + f64 => f32, Double => Single, __truncdfsf2, all(); } -} -#[cfg(not(feature = "no-f16-f128"))] -#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] -mod float_trunc_f128 { - use super::*; - - #[test] - fn conv() { - use compiler_builtins::float::trunc::{__gnu_f2h_ieee, __truncdfhf2, __truncsfhf2}; - use compiler_builtins::float::trunc::{__trunctfdf2, __trunctfhf2, __trunctfsf2}; - use rustc_apfloat::ieee::{Double, Half, Quad, Single}; - - // FIXME(f16_f128): Also do trunc!() for `f16` and `f128` when builtins are in nightly - conv!(f32, f16, __truncsfhf2, Single, Half); - conv!(f32, f16, __gnu_f2h_ieee, Single, Half); - conv!(f64, f16, __truncdfhf2, Double, Half); - conv!(f128, f16, __trunctfhf2, Quad, Half); - conv!(f128, f32, __trunctfsf2, Quad, Single); - conv!(f128, f64, __trunctfdf2, Quad, Double); + #[cfg(target_arch = "arm")] + f_to_f! { + trunc, + f64 => f32, Double => Single, __truncdfsf2vfp, all(); } -} - -#[cfg(not(feature = "no-f16-f128"))] -#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] -mod float_trunc_f128_ppc { - use super::*; - - #[test] - fn conv() { - use compiler_builtins::float::trunc::{__gnu_f2h_ieee, __truncdfhf2, __truncsfhf2}; - use compiler_builtins::float::trunc::{__trunckfdf2, __trunckfhf2, __trunckfsf2}; - use rustc_apfloat::ieee::{Double, Half, Quad, Single}; - // FIXME(f16_f128): Also do trunc!() for `f16` and `f128` when builtins are in nightly - conv!(f32, f16, __truncsfhf2, Single, Half); - conv!(f32, f16, __gnu_f2h_ieee, Single, Half); - conv!(f64, f16, __truncdfhf2, Double, Half); - conv!(f128, f16, __trunckfhf2, Quad, Half); - conv!(f128, f32, __trunckfsf2, Quad, Single); - conv!(f128, f64, __trunckfdf2, Quad, Double); + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + f_to_f! { + trunc, + f32 => f16, Single => Half, __truncsfhf2, not(feature = "no-sys-f16"); + f32 => f16, Single => Half, __gnu_f2h_ieee, not(feature = "no-sys-f16"); + f128 => f16, Quad => Half, __trunctfhf2, not(feature = "no-sys-f16-f128-convert"); + f128 => f32, Quad => Single, __trunctfsf2, not(feature = "no-sys-f128"); + f128 => f64, Quad => Double, __trunctfdf2, not(feature = "no-sys-f128"); } -} - -#[cfg(target_arch = "arm")] -mod float_trunc_arm { - use super::*; - trunc!(f64, f32, __truncdfsf2vfp); - - #[test] - fn conv() { - use compiler_builtins::float::trunc::__truncdfsf2vfp; - use rustc_apfloat::ieee::{Double, Single}; - - conv!(f64, f32, __truncdfsf2vfp, Double, Single) + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + f_to_f! { + trunc, + // FIXME(#655): `f16` tests disabled until we can bootstrap symbols + f128 => f32, Quad => Single, __trunckfsf2, not(feature = "no-sys-f128"); + f128 => f64, Quad => Double, __trunckfdf2, not(feature = "no-sys-f128"); } } From ae3c532f6497c3581d0cb4b375f7d77499313001 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 4 Aug 2024 23:17:25 +0000 Subject: [PATCH 0799/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 706031bbf..805b44cad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.114" +version = "0.1.115" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 59b13486ce68e953745811810fa8aa7ec4a9a0db Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 5 Aug 2024 16:26:27 -0500 Subject: [PATCH 0800/1459] Fix emscripten as `os` rather than `env`. b7af6078 ("Switch to a target structure...") is checking whether the target environment is emscripten, but it seems emscripten is the OS. Fix this, which should resolve the issue in . --- build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.rs b/build.rs index 749a08a5a..bbae9e286 100644 --- a/build.rs +++ b/build.rs @@ -55,7 +55,7 @@ fn main() { println!("cargo:rustc-cfg=feature=\"unstable\""); // Emscripten's runtime includes all the builtins - if target.env == "emscripten" { + if target.os == "emscripten" { return; } From 2e2a9255cca54b9f58c3f5f8a03b484893a2c7b6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 3 Aug 2024 21:29:58 -0400 Subject: [PATCH 0801/1459] Eliminate the use of `public_test_dep!` Replace `public_test_dep!` by placing optionally public items into new modules, then controlling what is exported with the `public-test-deps` feature. This is nicer for automatic formatting and diagnostics. --- src/float/mod.rs | 190 +---------- src/float/traits.rs | 184 +++++++++++ src/int/leading_zeros.rs | 228 ++++++------- src/int/mod.rs | 417 +----------------------- src/int/specialized_div_rem/delegate.rs | 4 +- src/int/trailing_zeros.rs | 70 ++-- src/int/traits.rs | 402 +++++++++++++++++++++++ src/macros.rs | 16 - 8 files changed, 752 insertions(+), 759 deletions(-) create mode 100644 src/float/traits.rs create mode 100644 src/int/traits.rs diff --git a/src/float/mod.rs b/src/float/mod.rs index 847373205..41b308626 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -1,7 +1,3 @@ -use core::ops; - -use crate::int::{DInt, Int, MinInt}; - pub mod add; pub mod cmp; pub mod conv; @@ -10,187 +6,11 @@ pub mod extend; pub mod mul; pub mod pow; pub mod sub; +pub(crate) mod traits; pub mod trunc; -/// Wrapper to extract the integer type half of the float's size -pub(crate) type HalfRep = <::Int as DInt>::H; - -public_test_dep! { -/// Trait for some basic operations on floats -#[allow(dead_code)] -pub(crate) trait Float: - Copy - + core::fmt::Debug - + PartialEq - + PartialOrd - + ops::AddAssign - + ops::MulAssign - + ops::Add - + ops::Sub - + ops::Div - + ops::Rem -{ - /// A uint of the same width as the float - type Int: Int; - - /// A int of the same width as the float - type SignedInt: Int; - - /// An int capable of containing the exponent bits plus a sign bit. This is signed. - type ExpInt: Int; - - const ZERO: Self; - const ONE: Self; - - /// The bitwidth of the float type - const BITS: u32; - - /// The bitwidth of the significand - const SIGNIFICAND_BITS: u32; - - /// The bitwidth of the exponent - const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1; - - /// The maximum value of the exponent - const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1; - - /// The exponent bias value - const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1; - - /// A mask for the sign bit - const SIGN_MASK: Self::Int; - - /// A mask for the significand - const SIGNIFICAND_MASK: Self::Int; - - /// The implicit bit of the float format - const IMPLICIT_BIT: Self::Int; - - /// A mask for the exponent - const EXPONENT_MASK: Self::Int; - - /// Returns `self` transmuted to `Self::Int` - fn repr(self) -> Self::Int; - - /// Returns `self` transmuted to `Self::SignedInt` - fn signed_repr(self) -> Self::SignedInt; - - /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be - /// represented in multiple different ways. This method returns `true` if two NaNs are - /// compared. - fn eq_repr(self, rhs: Self) -> bool; - - /// Returns true if the sign is negative - fn is_sign_negative(self) -> bool; - - /// Returns the exponent with bias - fn exp(self) -> Self::ExpInt; - - /// Returns the significand with no implicit bit (or the "fractional" part) - fn frac(self) -> Self::Int; - - /// Returns the significand with implicit bit - fn imp_frac(self) -> Self::Int; - - /// Returns a `Self::Int` transmuted back to `Self` - fn from_repr(a: Self::Int) -> Self; - - /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. - fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self; - - /// Returns (normalized exponent, normalized significand) - fn normalize(significand: Self::Int) -> (i32, Self::Int); - - /// Returns if `self` is subnormal - fn is_subnormal(self) -> bool; -} -} - -macro_rules! float_impl { - ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { - impl Float for $ty { - type Int = $ity; - type SignedInt = $sity; - type ExpInt = $expty; - - const ZERO: Self = 0.0; - const ONE: Self = 1.0; - - const BITS: u32 = $bits; - const SIGNIFICAND_BITS: u32 = $significand_bits; - - const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); - const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1; - const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS; - const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK); - - fn repr(self) -> Self::Int { - self.to_bits() - } - fn signed_repr(self) -> Self::SignedInt { - self.to_bits() as Self::SignedInt - } - fn eq_repr(self, rhs: Self) -> bool { - #[cfg(feature = "mangled-names")] - fn is_nan(x: $ty) -> bool { - // When using mangled-names, the "real" compiler-builtins might not have the - // necessary builtin (__unordtf2) to test whether `f128` is NaN. - // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin - // x is NaN if all the bits of the exponent are set and the significand is non-0 - x.repr() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK - && x.repr() & $ty::SIGNIFICAND_MASK != 0 - } - #[cfg(not(feature = "mangled-names"))] - fn is_nan(x: $ty) -> bool { - x.is_nan() - } - if is_nan(self) && is_nan(rhs) { - true - } else { - self.repr() == rhs.repr() - } - } - fn is_sign_negative(self) -> bool { - self.is_sign_negative() - } - fn exp(self) -> Self::ExpInt { - ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt - } - fn frac(self) -> Self::Int { - self.to_bits() & Self::SIGNIFICAND_MASK - } - fn imp_frac(self) -> Self::Int { - self.frac() | Self::IMPLICIT_BIT - } - fn from_repr(a: Self::Int) -> Self { - Self::from_bits(a) - } - fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self { - Self::from_repr( - ((sign as Self::Int) << (Self::BITS - 1)) - | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK) - | (significand & Self::SIGNIFICAND_MASK), - ) - } - fn normalize(significand: Self::Int) -> (i32, Self::Int) { - let shift = significand - .leading_zeros() - .wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros()); - ( - 1i32.wrapping_sub(shift as i32), - significand << shift as Self::Int, - ) - } - fn is_subnormal(self) -> bool { - (self.repr() & Self::EXPONENT_MASK) == Self::Int::ZERO - } - } - }; -} +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use traits::{Float, HalfRep}; -#[cfg(f16_enabled)] -float_impl!(f16, u16, i16, i8, 16, 10); -float_impl!(f32, u32, i32, i16, 32, 23); -float_impl!(f64, u64, i64, i16, 64, 52); -#[cfg(f128_enabled)] -float_impl!(f128, u128, i128, i16, 128, 112); +#[cfg(feature = "public-test-deps")] +pub use traits::{Float, HalfRep}; diff --git a/src/float/traits.rs b/src/float/traits.rs new file mode 100644 index 000000000..e57bd1b98 --- /dev/null +++ b/src/float/traits.rs @@ -0,0 +1,184 @@ +use core::ops; + +use crate::int::{DInt, Int, MinInt}; + +/// Wrapper to extract the integer type half of the float's size +pub type HalfRep = <::Int as DInt>::H; + +/// Trait for some basic operations on floats +#[allow(dead_code)] +pub trait Float: + Copy + + core::fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::MulAssign + + ops::Add + + ops::Sub + + ops::Div + + ops::Rem +{ + /// A uint of the same width as the float + type Int: Int; + + /// A int of the same width as the float + type SignedInt: Int; + + /// An int capable of containing the exponent bits plus a sign bit. This is signed. + type ExpInt: Int; + + const ZERO: Self; + const ONE: Self; + + /// The bitwidth of the float type + const BITS: u32; + + /// The bitwidth of the significand + const SIGNIFICAND_BITS: u32; + + /// The bitwidth of the exponent + const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1; + + /// The maximum value of the exponent + const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1; + + /// The exponent bias value + const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1; + + /// A mask for the sign bit + const SIGN_MASK: Self::Int; + + /// A mask for the significand + const SIGNIFICAND_MASK: Self::Int; + + /// The implicit bit of the float format + const IMPLICIT_BIT: Self::Int; + + /// A mask for the exponent + const EXPONENT_MASK: Self::Int; + + /// Returns `self` transmuted to `Self::Int` + fn repr(self) -> Self::Int; + + /// Returns `self` transmuted to `Self::SignedInt` + fn signed_repr(self) -> Self::SignedInt; + + /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be + /// represented in multiple different ways. This method returns `true` if two NaNs are + /// compared. + fn eq_repr(self, rhs: Self) -> bool; + + /// Returns true if the sign is negative + fn is_sign_negative(self) -> bool; + + /// Returns the exponent with bias + fn exp(self) -> Self::ExpInt; + + /// Returns the significand with no implicit bit (or the "fractional" part) + fn frac(self) -> Self::Int; + + /// Returns the significand with implicit bit + fn imp_frac(self) -> Self::Int; + + /// Returns a `Self::Int` transmuted back to `Self` + fn from_repr(a: Self::Int) -> Self; + + /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. + fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self; + + /// Returns (normalized exponent, normalized significand) + fn normalize(significand: Self::Int) -> (i32, Self::Int); + + /// Returns if `self` is subnormal + fn is_subnormal(self) -> bool; +} + +macro_rules! float_impl { + ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { + impl Float for $ty { + type Int = $ity; + type SignedInt = $sity; + type ExpInt = $expty; + + const ZERO: Self = 0.0; + const ONE: Self = 1.0; + + const BITS: u32 = $bits; + const SIGNIFICAND_BITS: u32 = $significand_bits; + + const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); + const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1; + const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS; + const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK); + + fn repr(self) -> Self::Int { + self.to_bits() + } + fn signed_repr(self) -> Self::SignedInt { + self.to_bits() as Self::SignedInt + } + fn eq_repr(self, rhs: Self) -> bool { + #[cfg(feature = "mangled-names")] + fn is_nan(x: $ty) -> bool { + // When using mangled-names, the "real" compiler-builtins might not have the + // necessary builtin (__unordtf2) to test whether `f128` is NaN. + // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin + // x is NaN if all the bits of the exponent are set and the significand is non-0 + x.repr() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK + && x.repr() & $ty::SIGNIFICAND_MASK != 0 + } + #[cfg(not(feature = "mangled-names"))] + fn is_nan(x: $ty) -> bool { + x.is_nan() + } + if is_nan(self) && is_nan(rhs) { + true + } else { + self.repr() == rhs.repr() + } + } + fn is_sign_negative(self) -> bool { + self.is_sign_negative() + } + fn exp(self) -> Self::ExpInt { + ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt + } + fn frac(self) -> Self::Int { + self.to_bits() & Self::SIGNIFICAND_MASK + } + fn imp_frac(self) -> Self::Int { + self.frac() | Self::IMPLICIT_BIT + } + fn from_repr(a: Self::Int) -> Self { + Self::from_bits(a) + } + fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self { + Self::from_repr( + ((sign as Self::Int) << (Self::BITS - 1)) + | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK) + | (significand & Self::SIGNIFICAND_MASK), + ) + } + fn normalize(significand: Self::Int) -> (i32, Self::Int) { + let shift = significand + .leading_zeros() + .wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros()); + ( + 1i32.wrapping_sub(shift as i32), + significand << shift as Self::Int, + ) + } + fn is_subnormal(self) -> bool { + (self.repr() & Self::EXPONENT_MASK) == Self::Int::ZERO + } + } + }; +} + +#[cfg(not(feature = "no-f16-f128"))] +float_impl!(f16, u16, i16, i8, 16, 10); +float_impl!(f32, u32, i32, i16, 32, 23); +float_impl!(f64, u64, i64, i16, 64, 52); +#[cfg(not(feature = "no-f16-f128"))] +float_impl!(f128, u128, i128, i16, 128, 112); diff --git a/src/int/leading_zeros.rs b/src/int/leading_zeros.rs index 1fee9fcf5..eede1ebe6 100644 --- a/src/int/leading_zeros.rs +++ b/src/int/leading_zeros.rs @@ -3,137 +3,141 @@ // adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`. // Compilers will insert the check for zero in cases where it is needed. -use crate::int::{CastInto, Int}; +mod implementation { + use crate::int::{CastInto, Int}; -public_test_dep! { -/// Returns the number of leading binary zeros in `x`. -#[allow(dead_code)] -pub(crate) fn leading_zeros_default>(x: T) -> usize { - // The basic idea is to test if the higher bits of `x` are zero and bisect the number - // of leading zeros. It is possible for all branches of the bisection to use the same - // code path by conditionally shifting the higher parts down to let the next bisection - // step work on the higher or lower parts of `x`. Instead of starting with `z == 0` - // and adding to the number of zeros, it is slightly faster to start with - // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros, - // because it simplifies the final bisection step. - let mut x = x; - // the number of potential leading zeros - let mut z = T::BITS as usize; - // a temporary - let mut t: T; + /// Returns the number of leading binary zeros in `x`. + #[allow(dead_code)] + pub fn leading_zeros_default>(x: T) -> usize { + // The basic idea is to test if the higher bits of `x` are zero and bisect the number + // of leading zeros. It is possible for all branches of the bisection to use the same + // code path by conditionally shifting the higher parts down to let the next bisection + // step work on the higher or lower parts of `x`. Instead of starting with `z == 0` + // and adding to the number of zeros, it is slightly faster to start with + // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros, + // because it simplifies the final bisection step. + let mut x = x; + // the number of potential leading zeros + let mut z = T::BITS as usize; + // a temporary + let mut t: T; - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - t = x >> 32; + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + t = x >> 32; + if t != T::ZERO { + z -= 32; + x = t; + } + } + if T::BITS >= 32 { + t = x >> 16; + if t != T::ZERO { + z -= 16; + x = t; + } + } + const { assert!(T::BITS >= 16) }; + t = x >> 8; if t != T::ZERO { - z -= 32; + z -= 8; x = t; } - } - if T::BITS >= 32 { - t = x >> 16; + t = x >> 4; if t != T::ZERO { - z -= 16; + z -= 4; x = t; } - } - const { assert!(T::BITS >= 16) }; - t = x >> 8; - if t != T::ZERO { - z -= 8; - x = t; - } - t = x >> 4; - if t != T::ZERO { - z -= 4; - x = t; - } - t = x >> 2; - if t != T::ZERO { - z -= 2; - x = t; - } - // the last two bisections are combined into one conditional - t = x >> 1; - if t != T::ZERO { - z - 2 - } else { - z - x.cast() - } + t = x >> 2; + if t != T::ZERO { + z -= 2; + x = t; + } + // the last two bisections are combined into one conditional + t = x >> 1; + if t != T::ZERO { + z - 2 + } else { + z - x.cast() + } - // We could potentially save a few cycles by using the LUT trick from - // "https://embeddedgurus.com/state-space/2014/09/ - // fast-deterministic-and-portable-counting-leading-zeros/". - // However, 256 bytes for a LUT is too large for embedded use cases. We could remove - // the last 3 bisections and use this 16 byte LUT for the rest of the work: - //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]; - //z -= LUT[x] as usize; - //z - // However, it ends up generating about the same number of instructions. When benchmarked - // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO - // execution effects. Changing to using a LUT and branching is risky for smaller cores. -} -} + // We could potentially save a few cycles by using the LUT trick from + // "https://embeddedgurus.com/state-space/2014/09/ + // fast-deterministic-and-portable-counting-leading-zeros/". + // However, 256 bytes for a LUT is too large for embedded use cases. We could remove + // the last 3 bisections and use this 16 byte LUT for the rest of the work: + //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]; + //z -= LUT[x] as usize; + //z + // However, it ends up generating about the same number of instructions. When benchmarked + // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO + // execution effects. Changing to using a LUT and branching is risky for smaller cores. + } -// The above method does not compile well on RISC-V (because of the lack of predicated -// instructions), producing code with many branches or using an excessively long -// branchless solution. This method takes advantage of the set-if-less-than instruction on -// RISC-V that allows `(x >= power-of-two) as usize` to be branchless. + // The above method does not compile well on RISC-V (because of the lack of predicated + // instructions), producing code with many branches or using an excessively long + // branchless solution. This method takes advantage of the set-if-less-than instruction on + // RISC-V that allows `(x >= power-of-two) as usize` to be branchless. -public_test_dep! { -/// Returns the number of leading binary zeros in `x`. -#[allow(dead_code)] -pub(crate) fn leading_zeros_riscv>(x: T) -> usize { - let mut x = x; - // the number of potential leading zeros - let mut z = T::BITS; - // a temporary - let mut t: u32; + /// Returns the number of leading binary zeros in `x`. + #[allow(dead_code)] + pub fn leading_zeros_riscv>(x: T) -> usize { + let mut x = x; + // the number of potential leading zeros + let mut z = T::BITS; + // a temporary + let mut t: u32; - // RISC-V does not have a set-if-greater-than-or-equal instruction and - // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is - // still the most optimal method. A conditional set can only be turned into a single - // immediate instruction if `x` is compared with an immediate `imm` (that can fit into - // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the - // right). If we try to save an instruction by using `x < imm` for each bisection, we - // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, - // but the immediate will never fit into 12 bits and never save an instruction. - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise - // `t` is set to 0. - t = ((x >= (T::ONE << 32)) as u32) << 5; - // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the - // next step to process. + // RISC-V does not have a set-if-greater-than-or-equal instruction and + // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is + // still the most optimal method. A conditional set can only be turned into a single + // immediate instruction if `x` is compared with an immediate `imm` (that can fit into + // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the + // right). If we try to save an instruction by using `x < imm` for each bisection, we + // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, + // but the immediate will never fit into 12 bits and never save an instruction. + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise + // `t` is set to 0. + t = ((x >= (T::ONE << 32)) as u32) << 5; + // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the + // next step to process. + x >>= t; + // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential + // leading zeros + z -= t; + } + if T::BITS >= 32 { + t = ((x >= (T::ONE << 16)) as u32) << 4; + x >>= t; + z -= t; + } + const { assert!(T::BITS >= 16) }; + t = ((x >= (T::ONE << 8)) as u32) << 3; x >>= t; - // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential - // leading zeros z -= t; - } - if T::BITS >= 32 { - t = ((x >= (T::ONE << 16)) as u32) << 4; + t = ((x >= (T::ONE << 4)) as u32) << 2; x >>= t; z -= t; + t = ((x >= (T::ONE << 2)) as u32) << 1; + x >>= t; + z -= t; + t = (x >= (T::ONE << 1)) as u32; + x >>= t; + z -= t; + // All bits except the LSB are guaranteed to be zero for this final bisection step. + // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. + z as usize - x.cast() } - const { assert!(T::BITS >= 16) }; - t = ((x >= (T::ONE << 8)) as u32) << 3; - x >>= t; - z -= t; - t = ((x >= (T::ONE << 4)) as u32) << 2; - x >>= t; - z -= t; - t = ((x >= (T::ONE << 2)) as u32) << 1; - x >>= t; - z -= t; - t = (x >= (T::ONE << 1)) as u32; - x >>= t; - z -= t; - // All bits except the LSB are guaranteed to be zero for this final bisection step. - // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. - z as usize - x.cast() -} } +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use implementation::*; + +#[cfg(feature = "public-test-deps")] +pub use implementation::*; + intrinsics! { /// Returns the number of leading binary zeros in `x` pub extern "C" fn __clzsi2(x: u32) -> usize { diff --git a/src/int/mod.rs b/src/int/mod.rs index 5f56c6b6e..a0d992e13 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -1,6 +1,4 @@ -use core::ops; - -mod specialized_div_rem; +pub(crate) mod specialized_div_rem; pub mod addsub; mod big; @@ -10,416 +8,13 @@ pub mod mul; pub mod sdiv; pub mod shift; pub mod trailing_zeros; +mod traits; pub mod udiv; pub use big::{i256, u256}; -public_test_dep! { -/// Minimal integer implementations needed on all integer types, including wide integers. -#[allow(dead_code)] -pub(crate) trait MinInt: Copy - + core::fmt::Debug - + ops::BitOr - + ops::Not - + ops::Shl -{ - - /// Type with the same width but other signedness - type OtherSign: MinInt; - /// Unsigned version of Self - type UnsignedInt: MinInt; - - /// If `Self` is a signed integer - const SIGNED: bool; - - /// The bitwidth of the int type - const BITS: u32; - - const ZERO: Self; - const ONE: Self; - const MIN: Self; - const MAX: Self; -} -} - -public_test_dep! { -/// Trait for some basic operations on integers -#[allow(dead_code)] -pub(crate) trait Int: MinInt - + PartialEq - + PartialOrd - + ops::AddAssign - + ops::SubAssign - + ops::BitAndAssign - + ops::BitOrAssign - + ops::BitXorAssign - + ops::ShlAssign - + ops::ShrAssign - + ops::Add - + ops::Sub - + ops::Mul - + ops::Div - + ops::Shr - + ops::BitXor - + ops::BitAnd -{ - /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing - /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, - /// 112,119,120,125,126,127]. - const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(::BITS); - - /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. - const FUZZ_NUM: usize = { - let log2 = (::BITS - 1).count_ones() as usize; - if log2 == 3 { - // case for u8 - 6 - } else { - // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate - // boundaries. - 8 + (4 * (log2 - 4)) - } - }; - - fn unsigned(self) -> Self::UnsignedInt; - fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; - - fn from_bool(b: bool) -> Self; - - /// Prevents the need for excessive conversions between signed and unsigned - fn logical_shr(self, other: u32) -> Self; - - /// Absolute difference between two integers. - fn abs_diff(self, other: Self) -> Self::UnsignedInt; - - // copied from primitive integers, but put in a trait - fn is_zero(self) -> bool; - fn wrapping_neg(self) -> Self; - fn wrapping_add(self, other: Self) -> Self; - fn wrapping_mul(self, other: Self) -> Self; - fn wrapping_sub(self, other: Self) -> Self; - fn wrapping_shl(self, other: u32) -> Self; - fn wrapping_shr(self, other: u32) -> Self; - fn rotate_left(self, other: u32) -> Self; - fn overflowing_add(self, other: Self) -> (Self, bool); - fn leading_zeros(self) -> u32; - fn ilog2(self) -> u32; -} -} - -pub(crate) const fn make_fuzz_lengths(bits: u32) -> [u8; 20] { - let mut v = [0u8; 20]; - v[0] = 0; - v[1] = 1; - v[2] = 2; // important for parity and the iX::MIN case when reversed - let mut i = 3; - - // No need for any more until the byte boundary, because there should be no algorithms - // that are sensitive to anything not next to byte boundaries after 2. We also scale - // in powers of two, which is important to prevent u128 corner tests from getting too - // big. - let mut l = 8; - loop { - if l >= ((bits / 2) as u8) { - break; - } - // get both sides of the byte boundary - v[i] = l - 1; - i += 1; - v[i] = l; - i += 1; - l *= 2; - } - - if bits != 8 { - // add the lower side of the middle boundary - v[i] = ((bits / 2) - 1) as u8; - i += 1; - } - - // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS - // boundary because of algorithms that split the high part up. We reverse the scaling - // as we go to Self::BITS. - let mid = i; - let mut j = 1; - loop { - v[i] = (bits as u8) - (v[mid - j]) - 1; - if j == mid { - break; - } - i += 1; - j += 1; - } - v -} - -macro_rules! int_impl_common { - ($ty:ty) => { - fn from_bool(b: bool) -> Self { - b as $ty - } - - fn logical_shr(self, other: u32) -> Self { - Self::from_unsigned(self.unsigned().wrapping_shr(other)) - } - - fn is_zero(self) -> bool { - self == Self::ZERO - } - - fn wrapping_neg(self) -> Self { - ::wrapping_neg(self) - } - - fn wrapping_add(self, other: Self) -> Self { - ::wrapping_add(self, other) - } - - fn wrapping_mul(self, other: Self) -> Self { - ::wrapping_mul(self, other) - } - - fn wrapping_sub(self, other: Self) -> Self { - ::wrapping_sub(self, other) - } - - fn wrapping_shl(self, other: u32) -> Self { - ::wrapping_shl(self, other) - } - - fn wrapping_shr(self, other: u32) -> Self { - ::wrapping_shr(self, other) - } - - fn rotate_left(self, other: u32) -> Self { - ::rotate_left(self, other) - } - - fn overflowing_add(self, other: Self) -> (Self, bool) { - ::overflowing_add(self, other) - } - - fn leading_zeros(self) -> u32 { - ::leading_zeros(self) - } - - fn ilog2(self) -> u32 { - ::ilog2(self) - } - }; -} - -macro_rules! int_impl { - ($ity:ty, $uty:ty) => { - impl MinInt for $uty { - type OtherSign = $ity; - type UnsignedInt = $uty; - - const BITS: u32 = ::ZERO.count_zeros(); - const SIGNED: bool = Self::MIN != Self::ZERO; - - const ZERO: Self = 0; - const ONE: Self = 1; - const MIN: Self = ::MIN; - const MAX: Self = ::MAX; - } - - impl Int for $uty { - fn unsigned(self) -> $uty { - self - } - - // It makes writing macros easier if this is implemented for both signed and unsigned - #[allow(clippy::wrong_self_convention)] - fn from_unsigned(me: $uty) -> Self { - me - } - - fn abs_diff(self, other: Self) -> Self { - if self < other { - other.wrapping_sub(self) - } else { - self.wrapping_sub(other) - } - } - - int_impl_common!($uty); - } - - impl MinInt for $ity { - type OtherSign = $uty; - type UnsignedInt = $uty; - - const BITS: u32 = ::ZERO.count_zeros(); - const SIGNED: bool = Self::MIN != Self::ZERO; - - const ZERO: Self = 0; - const ONE: Self = 1; - const MIN: Self = ::MIN; - const MAX: Self = ::MAX; - } - - impl Int for $ity { - fn unsigned(self) -> $uty { - self as $uty - } - - fn from_unsigned(me: $uty) -> Self { - me as $ity - } - - fn abs_diff(self, other: Self) -> $uty { - self.wrapping_sub(other).wrapping_abs() as $uty - } - - int_impl_common!($ity); - } - }; -} - -int_impl!(isize, usize); -int_impl!(i8, u8); -int_impl!(i16, u16); -int_impl!(i32, u32); -int_impl!(i64, u64); -int_impl!(i128, u128); - -public_test_dep! { -/// Trait for integers twice the bit width of another integer. This is implemented for all -/// primitives except for `u8`, because there is not a smaller primitive. -pub(crate) trait DInt: MinInt { - /// Integer that is half the bit width of the integer this trait is implemented for - type H: HInt; - - /// Returns the low half of `self` - fn lo(self) -> Self::H; - /// Returns the high half of `self` - fn hi(self) -> Self::H; - /// Returns the low and high halves of `self` as a tuple - fn lo_hi(self) -> (Self::H, Self::H) { - (self.lo(), self.hi()) - } - /// Constructs an integer using lower and higher half parts - fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { - lo.zero_widen() | hi.widen_hi() - } -} -} - -public_test_dep! { -/// Trait for integers half the bit width of another integer. This is implemented for all -/// primitives except for `u128`, because it there is not a larger primitive. -pub(crate) trait HInt: Int { - /// Integer that is double the bit width of the integer this trait is implemented for - type D: DInt + MinInt; - - /// Widens (using default extension) the integer to have double bit width - fn widen(self) -> Self::D; - /// Widens (zero extension only) the integer to have double bit width. This is needed to get - /// around problems with associated type bounds (such as `Int`) being unstable - fn zero_widen(self) -> Self::D; - /// Widens the integer to have double bit width and shifts the integer into the higher bits - fn widen_hi(self) -> Self::D { - self.widen() << ::BITS - } - /// Widening multiplication with zero widening. This cannot overflow. - fn zero_widen_mul(self, rhs: Self) -> Self::D; - /// Widening multiplication. This cannot overflow. - fn widen_mul(self, rhs: Self) -> Self::D; -} -} - -macro_rules! impl_d_int { - ($($X:ident $D:ident),*) => { - $( - impl DInt for $D { - type H = $X; - - fn lo(self) -> Self::H { - self as $X - } - fn hi(self) -> Self::H { - (self >> <$X as MinInt>::BITS) as $X - } - } - )* - }; -} - -macro_rules! impl_h_int { - ($($H:ident $uH:ident $X:ident),*) => { - $( - impl HInt for $H { - type D = $X; - - fn widen(self) -> Self::D { - self as $X - } - fn zero_widen(self) -> Self::D { - (self as $uH) as $X - } - fn zero_widen_mul(self, rhs: Self) -> Self::D { - self.zero_widen().wrapping_mul(rhs.zero_widen()) - } - fn widen_mul(self, rhs: Self) -> Self::D { - self.widen().wrapping_mul(rhs.widen()) - } - } - )* - }; -} - -impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); -impl_h_int!( - u8 u8 u16, - u16 u16 u32, - u32 u32 u64, - u64 u64 u128, - i8 u8 i16, - i16 u16 i32, - i32 u32 i64, - i64 u64 i128 -); - -public_test_dep! { -/// Trait to express (possibly lossy) casting of integers -pub(crate) trait CastInto: Copy { - fn cast(self) -> T; -} - -pub(crate) trait CastFrom:Copy { - fn cast_from(value: T) -> Self; -} -} - -impl + Copy> CastFrom for T { - fn cast_from(value: U) -> Self { - value.cast() - } -} - -macro_rules! cast_into { - ($ty:ty) => { - cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); - }; - ($ty:ty; $($into:ty),*) => {$( - impl CastInto<$into> for $ty { - fn cast(self) -> $into { - self as $into - } - } - )*}; -} +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; -cast_into!(usize); -cast_into!(isize); -cast_into!(u8); -cast_into!(i8); -cast_into!(u16); -cast_into!(i16); -cast_into!(u32); -cast_into!(i32); -cast_into!(u64); -cast_into!(i64); -cast_into!(u128); -cast_into!(i128); +#[cfg(feature = "public-test-deps")] +pub use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; diff --git a/src/int/specialized_div_rem/delegate.rs b/src/int/specialized_div_rem/delegate.rs index 330c6e4f8..f5c6e5023 100644 --- a/src/int/specialized_div_rem/delegate.rs +++ b/src/int/specialized_div_rem/delegate.rs @@ -185,7 +185,6 @@ macro_rules! impl_delegate { }; } -public_test_dep! { /// Returns `n / d` and sets `*rem = n % d`. /// /// This specialization exists because: @@ -195,7 +194,7 @@ public_test_dep! { /// delegate algorithm strategy the only reasonably fast way to perform `u128` division. // used on SPARC #[allow(dead_code)] -pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { +pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { use super::*; let duo_lo = duo as u64; let duo_hi = (duo >> 64) as u64; @@ -316,4 +315,3 @@ pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { } } } -} diff --git a/src/int/trailing_zeros.rs b/src/int/trailing_zeros.rs index cea366b07..9878a1687 100644 --- a/src/int/trailing_zeros.rs +++ b/src/int/trailing_zeros.rs @@ -1,46 +1,52 @@ -use crate::int::{CastInto, Int}; +mod implementation { + use crate::int::{CastInto, Int}; -public_test_dep! { -/// Returns number of trailing binary zeros in `x`. -#[allow(dead_code)] -pub(crate) fn trailing_zeros + CastInto + CastInto>(x: T) -> usize { - let mut x = x; - let mut r: u32 = 0; - let mut t: u32; + /// Returns number of trailing binary zeros in `x`. + #[allow(dead_code)] + pub fn trailing_zeros + CastInto + CastInto>(x: T) -> usize { + let mut x = x; + let mut r: u32 = 0; + let mut t: u32; - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - r += ((CastInto::::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 - x >>= r; // remove 32 zero bits - } + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + r += ((CastInto::::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 + x >>= r; // remove 32 zero bits + } - if T::BITS >= 32 { - t = ((CastInto::::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 - r += t; - x >>= t; // x = [0 - 0xFFFF] + higher garbage bits - } + if T::BITS >= 32 { + t = ((CastInto::::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 + r += t; + x >>= t; // x = [0 - 0xFFFF] + higher garbage bits + } - const { assert!(T::BITS >= 16) }; - t = ((CastInto::::cast(x) == 0) as u32) << 3; - x >>= t; // x = [0 - 0xFF] + higher garbage bits - r += t; + const { assert!(T::BITS >= 16) }; + t = ((CastInto::::cast(x) == 0) as u32) << 3; + x >>= t; // x = [0 - 0xFF] + higher garbage bits + r += t; - let mut x: u8 = x.cast(); + let mut x: u8 = x.cast(); - t = (((x & 0x0F) == 0) as u32) << 2; - x >>= t; // x = [0 - 0xF] + higher garbage bits - r += t; + t = (((x & 0x0F) == 0) as u32) << 2; + x >>= t; // x = [0 - 0xF] + higher garbage bits + r += t; - t = (((x & 0x3) == 0) as u32) << 1; - x >>= t; // x = [0 - 0x3] + higher garbage bits - r += t; + t = (((x & 0x3) == 0) as u32) << 1; + x >>= t; // x = [0 - 0x3] + higher garbage bits + r += t; - x &= 3; + x &= 3; - r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg()) -} + r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg()) + } } +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use implementation::*; + +#[cfg(feature = "public-test-deps")] +pub use implementation::*; + intrinsics! { /// Returns the number of trailing binary zeros in `x` (32 bit version). pub extern "C" fn __ctzsi2(x: u32) -> usize { diff --git a/src/int/traits.rs b/src/int/traits.rs new file mode 100644 index 000000000..e9d879627 --- /dev/null +++ b/src/int/traits.rs @@ -0,0 +1,402 @@ +use core::ops; + +/// Minimal integer implementations needed on all integer types, including wide integers. +#[allow(dead_code)] +pub trait MinInt: + Copy + + core::fmt::Debug + + ops::BitOr + + ops::Not + + ops::Shl +{ + /// Type with the same width but other signedness + type OtherSign: MinInt; + /// Unsigned version of Self + type UnsignedInt: MinInt; + + /// If `Self` is a signed integer + const SIGNED: bool; + + /// The bitwidth of the int type + const BITS: u32; + + const ZERO: Self; + const ONE: Self; + const MIN: Self; + const MAX: Self; +} + +/// Trait for some basic operations on integers +#[allow(dead_code)] +pub trait Int: + MinInt + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::SubAssign + + ops::BitAndAssign + + ops::BitOrAssign + + ops::BitXorAssign + + ops::ShlAssign + + ops::ShrAssign + + ops::Add + + ops::Sub + + ops::Mul + + ops::Div + + ops::Shr + + ops::BitXor + + ops::BitAnd +{ + /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing + /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, + /// 112,119,120,125,126,127]. + const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(::BITS); + + /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. + const FUZZ_NUM: usize = { + let log2 = (::BITS - 1).count_ones() as usize; + if log2 == 3 { + // case for u8 + 6 + } else { + // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate + // boundaries. + 8 + (4 * (log2 - 4)) + } + }; + + fn unsigned(self) -> Self::UnsignedInt; + fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; + + fn from_bool(b: bool) -> Self; + + /// Prevents the need for excessive conversions between signed and unsigned + fn logical_shr(self, other: u32) -> Self; + + /// Absolute difference between two integers. + fn abs_diff(self, other: Self) -> Self::UnsignedInt; + + // copied from primitive integers, but put in a trait + fn is_zero(self) -> bool; + fn wrapping_neg(self) -> Self; + fn wrapping_add(self, other: Self) -> Self; + fn wrapping_mul(self, other: Self) -> Self; + fn wrapping_sub(self, other: Self) -> Self; + fn wrapping_shl(self, other: u32) -> Self; + fn wrapping_shr(self, other: u32) -> Self; + fn rotate_left(self, other: u32) -> Self; + fn overflowing_add(self, other: Self) -> (Self, bool); + fn leading_zeros(self) -> u32; + fn ilog2(self) -> u32; +} + +const fn make_fuzz_lengths(bits: u32) -> [u8; 20] { + let mut v = [0u8; 20]; + v[0] = 0; + v[1] = 1; + v[2] = 2; // important for parity and the iX::MIN case when reversed + let mut i = 3; + + // No need for any more until the byte boundary, because there should be no algorithms + // that are sensitive to anything not next to byte boundaries after 2. We also scale + // in powers of two, which is important to prevent u128 corner tests from getting too + // big. + let mut l = 8; + loop { + if l >= ((bits / 2) as u8) { + break; + } + // get both sides of the byte boundary + v[i] = l - 1; + i += 1; + v[i] = l; + i += 1; + l *= 2; + } + + if bits != 8 { + // add the lower side of the middle boundary + v[i] = ((bits / 2) - 1) as u8; + i += 1; + } + + // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS + // boundary because of algorithms that split the high part up. We reverse the scaling + // as we go to Self::BITS. + let mid = i; + let mut j = 1; + loop { + v[i] = (bits as u8) - (v[mid - j]) - 1; + if j == mid { + break; + } + i += 1; + j += 1; + } + v +} + +macro_rules! int_impl_common { + ($ty:ty) => { + fn from_bool(b: bool) -> Self { + b as $ty + } + + fn logical_shr(self, other: u32) -> Self { + Self::from_unsigned(self.unsigned().wrapping_shr(other)) + } + + fn is_zero(self) -> bool { + self == Self::ZERO + } + + fn wrapping_neg(self) -> Self { + ::wrapping_neg(self) + } + + fn wrapping_add(self, other: Self) -> Self { + ::wrapping_add(self, other) + } + + fn wrapping_mul(self, other: Self) -> Self { + ::wrapping_mul(self, other) + } + + fn wrapping_sub(self, other: Self) -> Self { + ::wrapping_sub(self, other) + } + + fn wrapping_shl(self, other: u32) -> Self { + ::wrapping_shl(self, other) + } + + fn wrapping_shr(self, other: u32) -> Self { + ::wrapping_shr(self, other) + } + + fn rotate_left(self, other: u32) -> Self { + ::rotate_left(self, other) + } + + fn overflowing_add(self, other: Self) -> (Self, bool) { + ::overflowing_add(self, other) + } + + fn leading_zeros(self) -> u32 { + ::leading_zeros(self) + } + + fn ilog2(self) -> u32 { + ::ilog2(self) + } + }; +} + +macro_rules! int_impl { + ($ity:ty, $uty:ty) => { + impl MinInt for $uty { + type OtherSign = $ity; + type UnsignedInt = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $uty { + fn unsigned(self) -> $uty { + self + } + + // It makes writing macros easier if this is implemented for both signed and unsigned + #[allow(clippy::wrong_self_convention)] + fn from_unsigned(me: $uty) -> Self { + me + } + + fn abs_diff(self, other: Self) -> Self { + if self < other { + other.wrapping_sub(self) + } else { + self.wrapping_sub(other) + } + } + + int_impl_common!($uty); + } + + impl MinInt for $ity { + type OtherSign = $uty; + type UnsignedInt = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $ity { + fn unsigned(self) -> $uty { + self as $uty + } + + fn from_unsigned(me: $uty) -> Self { + me as $ity + } + + fn abs_diff(self, other: Self) -> $uty { + self.wrapping_sub(other).wrapping_abs() as $uty + } + + int_impl_common!($ity); + } + }; +} + +int_impl!(isize, usize); +int_impl!(i8, u8); +int_impl!(i16, u16); +int_impl!(i32, u32); +int_impl!(i64, u64); +int_impl!(i128, u128); + +/// Trait for integers twice the bit width of another integer. This is implemented for all +/// primitives except for `u8`, because there is not a smaller primitive. +pub trait DInt: MinInt { + /// Integer that is half the bit width of the integer this trait is implemented for + type H: HInt; + + /// Returns the low half of `self` + fn lo(self) -> Self::H; + /// Returns the high half of `self` + fn hi(self) -> Self::H; + /// Returns the low and high halves of `self` as a tuple + fn lo_hi(self) -> (Self::H, Self::H) { + (self.lo(), self.hi()) + } + /// Constructs an integer using lower and higher half parts + fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { + lo.zero_widen() | hi.widen_hi() + } +} + +/// Trait for integers half the bit width of another integer. This is implemented for all +/// primitives except for `u128`, because it there is not a larger primitive. +pub trait HInt: Int { + /// Integer that is double the bit width of the integer this trait is implemented for + type D: DInt + MinInt; + + /// Widens (using default extension) the integer to have double bit width + fn widen(self) -> Self::D; + /// Widens (zero extension only) the integer to have double bit width. This is needed to get + /// around problems with associated type bounds (such as `Int`) being unstable + fn zero_widen(self) -> Self::D; + /// Widens the integer to have double bit width and shifts the integer into the higher bits + fn widen_hi(self) -> Self::D { + self.widen() << ::BITS + } + /// Widening multiplication with zero widening. This cannot overflow. + fn zero_widen_mul(self, rhs: Self) -> Self::D; + /// Widening multiplication. This cannot overflow. + fn widen_mul(self, rhs: Self) -> Self::D; +} + +macro_rules! impl_d_int { + ($($X:ident $D:ident),*) => { + $( + impl DInt for $D { + type H = $X; + + fn lo(self) -> Self::H { + self as $X + } + fn hi(self) -> Self::H { + (self >> <$X as MinInt>::BITS) as $X + } + } + )* + }; +} + +macro_rules! impl_h_int { + ($($H:ident $uH:ident $X:ident),*) => { + $( + impl HInt for $H { + type D = $X; + + fn widen(self) -> Self::D { + self as $X + } + fn zero_widen(self) -> Self::D { + (self as $uH) as $X + } + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen().wrapping_mul(rhs.zero_widen()) + } + fn widen_mul(self, rhs: Self) -> Self::D { + self.widen().wrapping_mul(rhs.widen()) + } + } + )* + }; +} + +impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); +impl_h_int!( + u8 u8 u16, + u16 u16 u32, + u32 u32 u64, + u64 u64 u128, + i8 u8 i16, + i16 u16 i32, + i32 u32 i64, + i64 u64 i128 +); + +/// Trait to express (possibly lossy) casting of integers +pub trait CastInto: Copy { + fn cast(self) -> T; +} + +pub trait CastFrom: Copy { + fn cast_from(value: T) -> Self; +} + +impl + Copy> CastFrom for T { + fn cast_from(value: U) -> Self { + value.cast() + } +} + +macro_rules! cast_into { + ($ty:ty) => { + cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); + }; + ($ty:ty; $($into:ty),*) => {$( + impl CastInto<$into> for $ty { + fn cast(self) -> $into { + self as $into + } + } + )*}; +} + +cast_into!(usize); +cast_into!(isize); +cast_into!(u8); +cast_into!(i8); +cast_into!(u16); +cast_into!(i16); +cast_into!(u32); +cast_into!(i32); +cast_into!(u64); +cast_into!(i64); +cast_into!(u128); +cast_into!(i128); diff --git a/src/macros.rs b/src/macros.rs index 42c83ee55..9f951d515 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -1,21 +1,5 @@ //! Macros shared throughout the compiler-builtins implementation -/// Changes the visibility to `pub` if feature "public-test-deps" is set -#[cfg(not(feature = "public-test-deps"))] -macro_rules! public_test_dep { - ($(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*) => { - $(#[$($meta)*])* pub(crate) $ident $($tokens)* - }; -} - -/// Changes the visibility to `pub` if feature "public-test-deps" is set -#[cfg(feature = "public-test-deps")] -macro_rules! public_test_dep { - {$(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*} => { - $(#[$($meta)*])* pub $ident $($tokens)* - }; -} - /// The "main macro" used for defining intrinsics. /// /// The compiler-builtins library is super platform-specific with tons of crazy From 359fb67d22397ac67a755b9b1947ca9a4fa8df6b Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Tue, 6 Aug 2024 17:16:03 +0200 Subject: [PATCH 0802/1459] Add `only-soft-floats` feature to prevent using any intrinsics or arch-specific code --- libm/Cargo.toml | 3 +++ libm/crates/compiler-builtins-smoke-test/Cargo.toml | 1 + libm/src/math/mod.rs | 2 +- libm/src/math/sqrt.rs | 4 ++-- libm/src/math/sqrtf.rs | 4 ++-- 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index d33ca61cd..893a2e19d 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -23,6 +23,9 @@ unstable = [] # musl libc. musl-reference-tests = ['rand'] +# Used to prevent using any intrinsics or arch-specific code. +only-soft-floats = [] + [workspace] members = [ "crates/compiler-builtins-smoke-test", diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index 695b710ff..ec48ca206 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -10,3 +10,4 @@ bench = false [features] unstable = [] checked = [] +only-soft-floats = [] diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 05ebb708c..04d3bbb62 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -76,7 +76,7 @@ macro_rules! div { macro_rules! llvm_intrinsically_optimized { (#[cfg($($clause:tt)*)] $e:expr) => { - #[cfg(all(feature = "unstable", $($clause)*))] + #[cfg(all(feature = "unstable", not(feature = "only-soft-floats"), $($clause)*))] { if true { // thwart the dead code lint $e diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index baa0db9f8..a0003cb05 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -92,7 +92,7 @@ pub fn sqrt(x: f64) -> f64 { } } } - #[cfg(target_feature = "sse2")] + #[cfg(all(target_feature = "sse2", not(feature = "only-soft-floats")))] { // Note: This path is unlikely since LLVM will usually have already // optimized sqrt calls into hardware instructions if sse2 is available, @@ -107,7 +107,7 @@ pub fn sqrt(x: f64) -> f64 { _mm_cvtsd_f64(m_sqrt) } } - #[cfg(not(target_feature = "sse2"))] + #[cfg(any(not(target_feature = "sse2"), feature = "only-soft-floats"))] { use core::num::Wrapping; diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index 12bd60028..0cef073ea 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -27,7 +27,7 @@ pub fn sqrtf(x: f32) -> f32 { } } } - #[cfg(target_feature = "sse")] + #[cfg(all(target_feature = "sse", not(feature = "only-soft-floats")))] { // Note: This path is unlikely since LLVM will usually have already // optimized sqrt calls into hardware instructions if sse is available, @@ -42,7 +42,7 @@ pub fn sqrtf(x: f32) -> f32 { _mm_cvtss_f32(m_sqrt) } } - #[cfg(not(target_feature = "sse"))] + #[cfg(any(not(target_feature = "sse"), feature = "only-soft-floats"))] { const TINY: f32 = 1.0e-30; From cca0d36005016ede39c3b91de42191ae87d4bb04 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 6 Aug 2024 21:48:51 +0000 Subject: [PATCH 0803/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 805b44cad..6c07fd555 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.115" +version = "0.1.116" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 47e50fd2467af6553418a93d5bd05339ef6f07c3 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 6 Aug 2024 22:11:19 -0400 Subject: [PATCH 0804/1459] Revert "Eliminate the use of `public_test_dep!`" --- src/float/mod.rs | 190 ++++++++++- src/float/traits.rs | 184 ----------- src/int/leading_zeros.rs | 228 +++++++------ src/int/mod.rs | 417 +++++++++++++++++++++++- src/int/specialized_div_rem/delegate.rs | 4 +- src/int/trailing_zeros.rs | 70 ++-- src/int/traits.rs | 402 ----------------------- src/macros.rs | 16 + 8 files changed, 759 insertions(+), 752 deletions(-) delete mode 100644 src/float/traits.rs delete mode 100644 src/int/traits.rs diff --git a/src/float/mod.rs b/src/float/mod.rs index 41b308626..847373205 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -1,3 +1,7 @@ +use core::ops; + +use crate::int::{DInt, Int, MinInt}; + pub mod add; pub mod cmp; pub mod conv; @@ -6,11 +10,187 @@ pub mod extend; pub mod mul; pub mod pow; pub mod sub; -pub(crate) mod traits; pub mod trunc; -#[cfg(not(feature = "public-test-deps"))] -pub(crate) use traits::{Float, HalfRep}; +/// Wrapper to extract the integer type half of the float's size +pub(crate) type HalfRep = <::Int as DInt>::H; + +public_test_dep! { +/// Trait for some basic operations on floats +#[allow(dead_code)] +pub(crate) trait Float: + Copy + + core::fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::MulAssign + + ops::Add + + ops::Sub + + ops::Div + + ops::Rem +{ + /// A uint of the same width as the float + type Int: Int; + + /// A int of the same width as the float + type SignedInt: Int; + + /// An int capable of containing the exponent bits plus a sign bit. This is signed. + type ExpInt: Int; + + const ZERO: Self; + const ONE: Self; + + /// The bitwidth of the float type + const BITS: u32; + + /// The bitwidth of the significand + const SIGNIFICAND_BITS: u32; + + /// The bitwidth of the exponent + const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1; + + /// The maximum value of the exponent + const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1; + + /// The exponent bias value + const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1; + + /// A mask for the sign bit + const SIGN_MASK: Self::Int; + + /// A mask for the significand + const SIGNIFICAND_MASK: Self::Int; + + /// The implicit bit of the float format + const IMPLICIT_BIT: Self::Int; + + /// A mask for the exponent + const EXPONENT_MASK: Self::Int; + + /// Returns `self` transmuted to `Self::Int` + fn repr(self) -> Self::Int; + + /// Returns `self` transmuted to `Self::SignedInt` + fn signed_repr(self) -> Self::SignedInt; + + /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be + /// represented in multiple different ways. This method returns `true` if two NaNs are + /// compared. + fn eq_repr(self, rhs: Self) -> bool; + + /// Returns true if the sign is negative + fn is_sign_negative(self) -> bool; + + /// Returns the exponent with bias + fn exp(self) -> Self::ExpInt; + + /// Returns the significand with no implicit bit (or the "fractional" part) + fn frac(self) -> Self::Int; + + /// Returns the significand with implicit bit + fn imp_frac(self) -> Self::Int; + + /// Returns a `Self::Int` transmuted back to `Self` + fn from_repr(a: Self::Int) -> Self; + + /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. + fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self; + + /// Returns (normalized exponent, normalized significand) + fn normalize(significand: Self::Int) -> (i32, Self::Int); + + /// Returns if `self` is subnormal + fn is_subnormal(self) -> bool; +} +} + +macro_rules! float_impl { + ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { + impl Float for $ty { + type Int = $ity; + type SignedInt = $sity; + type ExpInt = $expty; + + const ZERO: Self = 0.0; + const ONE: Self = 1.0; + + const BITS: u32 = $bits; + const SIGNIFICAND_BITS: u32 = $significand_bits; + + const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); + const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1; + const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS; + const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK); + + fn repr(self) -> Self::Int { + self.to_bits() + } + fn signed_repr(self) -> Self::SignedInt { + self.to_bits() as Self::SignedInt + } + fn eq_repr(self, rhs: Self) -> bool { + #[cfg(feature = "mangled-names")] + fn is_nan(x: $ty) -> bool { + // When using mangled-names, the "real" compiler-builtins might not have the + // necessary builtin (__unordtf2) to test whether `f128` is NaN. + // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin + // x is NaN if all the bits of the exponent are set and the significand is non-0 + x.repr() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK + && x.repr() & $ty::SIGNIFICAND_MASK != 0 + } + #[cfg(not(feature = "mangled-names"))] + fn is_nan(x: $ty) -> bool { + x.is_nan() + } + if is_nan(self) && is_nan(rhs) { + true + } else { + self.repr() == rhs.repr() + } + } + fn is_sign_negative(self) -> bool { + self.is_sign_negative() + } + fn exp(self) -> Self::ExpInt { + ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt + } + fn frac(self) -> Self::Int { + self.to_bits() & Self::SIGNIFICAND_MASK + } + fn imp_frac(self) -> Self::Int { + self.frac() | Self::IMPLICIT_BIT + } + fn from_repr(a: Self::Int) -> Self { + Self::from_bits(a) + } + fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self { + Self::from_repr( + ((sign as Self::Int) << (Self::BITS - 1)) + | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK) + | (significand & Self::SIGNIFICAND_MASK), + ) + } + fn normalize(significand: Self::Int) -> (i32, Self::Int) { + let shift = significand + .leading_zeros() + .wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros()); + ( + 1i32.wrapping_sub(shift as i32), + significand << shift as Self::Int, + ) + } + fn is_subnormal(self) -> bool { + (self.repr() & Self::EXPONENT_MASK) == Self::Int::ZERO + } + } + }; +} -#[cfg(feature = "public-test-deps")] -pub use traits::{Float, HalfRep}; +#[cfg(f16_enabled)] +float_impl!(f16, u16, i16, i8, 16, 10); +float_impl!(f32, u32, i32, i16, 32, 23); +float_impl!(f64, u64, i64, i16, 64, 52); +#[cfg(f128_enabled)] +float_impl!(f128, u128, i128, i16, 128, 112); diff --git a/src/float/traits.rs b/src/float/traits.rs deleted file mode 100644 index e57bd1b98..000000000 --- a/src/float/traits.rs +++ /dev/null @@ -1,184 +0,0 @@ -use core::ops; - -use crate::int::{DInt, Int, MinInt}; - -/// Wrapper to extract the integer type half of the float's size -pub type HalfRep = <::Int as DInt>::H; - -/// Trait for some basic operations on floats -#[allow(dead_code)] -pub trait Float: - Copy - + core::fmt::Debug - + PartialEq - + PartialOrd - + ops::AddAssign - + ops::MulAssign - + ops::Add - + ops::Sub - + ops::Div - + ops::Rem -{ - /// A uint of the same width as the float - type Int: Int; - - /// A int of the same width as the float - type SignedInt: Int; - - /// An int capable of containing the exponent bits plus a sign bit. This is signed. - type ExpInt: Int; - - const ZERO: Self; - const ONE: Self; - - /// The bitwidth of the float type - const BITS: u32; - - /// The bitwidth of the significand - const SIGNIFICAND_BITS: u32; - - /// The bitwidth of the exponent - const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1; - - /// The maximum value of the exponent - const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1; - - /// The exponent bias value - const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1; - - /// A mask for the sign bit - const SIGN_MASK: Self::Int; - - /// A mask for the significand - const SIGNIFICAND_MASK: Self::Int; - - /// The implicit bit of the float format - const IMPLICIT_BIT: Self::Int; - - /// A mask for the exponent - const EXPONENT_MASK: Self::Int; - - /// Returns `self` transmuted to `Self::Int` - fn repr(self) -> Self::Int; - - /// Returns `self` transmuted to `Self::SignedInt` - fn signed_repr(self) -> Self::SignedInt; - - /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be - /// represented in multiple different ways. This method returns `true` if two NaNs are - /// compared. - fn eq_repr(self, rhs: Self) -> bool; - - /// Returns true if the sign is negative - fn is_sign_negative(self) -> bool; - - /// Returns the exponent with bias - fn exp(self) -> Self::ExpInt; - - /// Returns the significand with no implicit bit (or the "fractional" part) - fn frac(self) -> Self::Int; - - /// Returns the significand with implicit bit - fn imp_frac(self) -> Self::Int; - - /// Returns a `Self::Int` transmuted back to `Self` - fn from_repr(a: Self::Int) -> Self; - - /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. - fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self; - - /// Returns (normalized exponent, normalized significand) - fn normalize(significand: Self::Int) -> (i32, Self::Int); - - /// Returns if `self` is subnormal - fn is_subnormal(self) -> bool; -} - -macro_rules! float_impl { - ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { - impl Float for $ty { - type Int = $ity; - type SignedInt = $sity; - type ExpInt = $expty; - - const ZERO: Self = 0.0; - const ONE: Self = 1.0; - - const BITS: u32 = $bits; - const SIGNIFICAND_BITS: u32 = $significand_bits; - - const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); - const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1; - const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS; - const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK); - - fn repr(self) -> Self::Int { - self.to_bits() - } - fn signed_repr(self) -> Self::SignedInt { - self.to_bits() as Self::SignedInt - } - fn eq_repr(self, rhs: Self) -> bool { - #[cfg(feature = "mangled-names")] - fn is_nan(x: $ty) -> bool { - // When using mangled-names, the "real" compiler-builtins might not have the - // necessary builtin (__unordtf2) to test whether `f128` is NaN. - // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin - // x is NaN if all the bits of the exponent are set and the significand is non-0 - x.repr() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK - && x.repr() & $ty::SIGNIFICAND_MASK != 0 - } - #[cfg(not(feature = "mangled-names"))] - fn is_nan(x: $ty) -> bool { - x.is_nan() - } - if is_nan(self) && is_nan(rhs) { - true - } else { - self.repr() == rhs.repr() - } - } - fn is_sign_negative(self) -> bool { - self.is_sign_negative() - } - fn exp(self) -> Self::ExpInt { - ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt - } - fn frac(self) -> Self::Int { - self.to_bits() & Self::SIGNIFICAND_MASK - } - fn imp_frac(self) -> Self::Int { - self.frac() | Self::IMPLICIT_BIT - } - fn from_repr(a: Self::Int) -> Self { - Self::from_bits(a) - } - fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self { - Self::from_repr( - ((sign as Self::Int) << (Self::BITS - 1)) - | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK) - | (significand & Self::SIGNIFICAND_MASK), - ) - } - fn normalize(significand: Self::Int) -> (i32, Self::Int) { - let shift = significand - .leading_zeros() - .wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros()); - ( - 1i32.wrapping_sub(shift as i32), - significand << shift as Self::Int, - ) - } - fn is_subnormal(self) -> bool { - (self.repr() & Self::EXPONENT_MASK) == Self::Int::ZERO - } - } - }; -} - -#[cfg(not(feature = "no-f16-f128"))] -float_impl!(f16, u16, i16, i8, 16, 10); -float_impl!(f32, u32, i32, i16, 32, 23); -float_impl!(f64, u64, i64, i16, 64, 52); -#[cfg(not(feature = "no-f16-f128"))] -float_impl!(f128, u128, i128, i16, 128, 112); diff --git a/src/int/leading_zeros.rs b/src/int/leading_zeros.rs index eede1ebe6..1fee9fcf5 100644 --- a/src/int/leading_zeros.rs +++ b/src/int/leading_zeros.rs @@ -3,140 +3,136 @@ // adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`. // Compilers will insert the check for zero in cases where it is needed. -mod implementation { - use crate::int::{CastInto, Int}; +use crate::int::{CastInto, Int}; - /// Returns the number of leading binary zeros in `x`. - #[allow(dead_code)] - pub fn leading_zeros_default>(x: T) -> usize { - // The basic idea is to test if the higher bits of `x` are zero and bisect the number - // of leading zeros. It is possible for all branches of the bisection to use the same - // code path by conditionally shifting the higher parts down to let the next bisection - // step work on the higher or lower parts of `x`. Instead of starting with `z == 0` - // and adding to the number of zeros, it is slightly faster to start with - // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros, - // because it simplifies the final bisection step. - let mut x = x; - // the number of potential leading zeros - let mut z = T::BITS as usize; - // a temporary - let mut t: T; +public_test_dep! { +/// Returns the number of leading binary zeros in `x`. +#[allow(dead_code)] +pub(crate) fn leading_zeros_default>(x: T) -> usize { + // The basic idea is to test if the higher bits of `x` are zero and bisect the number + // of leading zeros. It is possible for all branches of the bisection to use the same + // code path by conditionally shifting the higher parts down to let the next bisection + // step work on the higher or lower parts of `x`. Instead of starting with `z == 0` + // and adding to the number of zeros, it is slightly faster to start with + // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros, + // because it simplifies the final bisection step. + let mut x = x; + // the number of potential leading zeros + let mut z = T::BITS as usize; + // a temporary + let mut t: T; - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - t = x >> 32; - if t != T::ZERO { - z -= 32; - x = t; - } - } - if T::BITS >= 32 { - t = x >> 16; - if t != T::ZERO { - z -= 16; - x = t; - } - } - const { assert!(T::BITS >= 16) }; - t = x >> 8; + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + t = x >> 32; if t != T::ZERO { - z -= 8; + z -= 32; x = t; } - t = x >> 4; - if t != T::ZERO { - z -= 4; - x = t; - } - t = x >> 2; + } + if T::BITS >= 32 { + t = x >> 16; if t != T::ZERO { - z -= 2; + z -= 16; x = t; } - // the last two bisections are combined into one conditional - t = x >> 1; - if t != T::ZERO { - z - 2 - } else { - z - x.cast() - } - - // We could potentially save a few cycles by using the LUT trick from - // "https://embeddedgurus.com/state-space/2014/09/ - // fast-deterministic-and-portable-counting-leading-zeros/". - // However, 256 bytes for a LUT is too large for embedded use cases. We could remove - // the last 3 bisections and use this 16 byte LUT for the rest of the work: - //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]; - //z -= LUT[x] as usize; - //z - // However, it ends up generating about the same number of instructions. When benchmarked - // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO - // execution effects. Changing to using a LUT and branching is risky for smaller cores. + } + const { assert!(T::BITS >= 16) }; + t = x >> 8; + if t != T::ZERO { + z -= 8; + x = t; + } + t = x >> 4; + if t != T::ZERO { + z -= 4; + x = t; + } + t = x >> 2; + if t != T::ZERO { + z -= 2; + x = t; + } + // the last two bisections are combined into one conditional + t = x >> 1; + if t != T::ZERO { + z - 2 + } else { + z - x.cast() } - // The above method does not compile well on RISC-V (because of the lack of predicated - // instructions), producing code with many branches or using an excessively long - // branchless solution. This method takes advantage of the set-if-less-than instruction on - // RISC-V that allows `(x >= power-of-two) as usize` to be branchless. + // We could potentially save a few cycles by using the LUT trick from + // "https://embeddedgurus.com/state-space/2014/09/ + // fast-deterministic-and-portable-counting-leading-zeros/". + // However, 256 bytes for a LUT is too large for embedded use cases. We could remove + // the last 3 bisections and use this 16 byte LUT for the rest of the work: + //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]; + //z -= LUT[x] as usize; + //z + // However, it ends up generating about the same number of instructions. When benchmarked + // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO + // execution effects. Changing to using a LUT and branching is risky for smaller cores. +} +} - /// Returns the number of leading binary zeros in `x`. - #[allow(dead_code)] - pub fn leading_zeros_riscv>(x: T) -> usize { - let mut x = x; - // the number of potential leading zeros - let mut z = T::BITS; - // a temporary - let mut t: u32; +// The above method does not compile well on RISC-V (because of the lack of predicated +// instructions), producing code with many branches or using an excessively long +// branchless solution. This method takes advantage of the set-if-less-than instruction on +// RISC-V that allows `(x >= power-of-two) as usize` to be branchless. - // RISC-V does not have a set-if-greater-than-or-equal instruction and - // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is - // still the most optimal method. A conditional set can only be turned into a single - // immediate instruction if `x` is compared with an immediate `imm` (that can fit into - // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the - // right). If we try to save an instruction by using `x < imm` for each bisection, we - // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, - // but the immediate will never fit into 12 bits and never save an instruction. - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise - // `t` is set to 0. - t = ((x >= (T::ONE << 32)) as u32) << 5; - // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the - // next step to process. - x >>= t; - // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential - // leading zeros - z -= t; - } - if T::BITS >= 32 { - t = ((x >= (T::ONE << 16)) as u32) << 4; - x >>= t; - z -= t; - } - const { assert!(T::BITS >= 16) }; - t = ((x >= (T::ONE << 8)) as u32) << 3; - x >>= t; - z -= t; - t = ((x >= (T::ONE << 4)) as u32) << 2; - x >>= t; - z -= t; - t = ((x >= (T::ONE << 2)) as u32) << 1; +public_test_dep! { +/// Returns the number of leading binary zeros in `x`. +#[allow(dead_code)] +pub(crate) fn leading_zeros_riscv>(x: T) -> usize { + let mut x = x; + // the number of potential leading zeros + let mut z = T::BITS; + // a temporary + let mut t: u32; + + // RISC-V does not have a set-if-greater-than-or-equal instruction and + // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is + // still the most optimal method. A conditional set can only be turned into a single + // immediate instruction if `x` is compared with an immediate `imm` (that can fit into + // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the + // right). If we try to save an instruction by using `x < imm` for each bisection, we + // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, + // but the immediate will never fit into 12 bits and never save an instruction. + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise + // `t` is set to 0. + t = ((x >= (T::ONE << 32)) as u32) << 5; + // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the + // next step to process. x >>= t; + // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential + // leading zeros z -= t; - t = (x >= (T::ONE << 1)) as u32; + } + if T::BITS >= 32 { + t = ((x >= (T::ONE << 16)) as u32) << 4; x >>= t; z -= t; - // All bits except the LSB are guaranteed to be zero for this final bisection step. - // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. - z as usize - x.cast() } + const { assert!(T::BITS >= 16) }; + t = ((x >= (T::ONE << 8)) as u32) << 3; + x >>= t; + z -= t; + t = ((x >= (T::ONE << 4)) as u32) << 2; + x >>= t; + z -= t; + t = ((x >= (T::ONE << 2)) as u32) << 1; + x >>= t; + z -= t; + t = (x >= (T::ONE << 1)) as u32; + x >>= t; + z -= t; + // All bits except the LSB are guaranteed to be zero for this final bisection step. + // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. + z as usize - x.cast() +} } - -#[cfg(not(feature = "public-test-deps"))] -pub(crate) use implementation::*; - -#[cfg(feature = "public-test-deps")] -pub use implementation::*; intrinsics! { /// Returns the number of leading binary zeros in `x` diff --git a/src/int/mod.rs b/src/int/mod.rs index a0d992e13..5f56c6b6e 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -1,4 +1,6 @@ -pub(crate) mod specialized_div_rem; +use core::ops; + +mod specialized_div_rem; pub mod addsub; mod big; @@ -8,13 +10,416 @@ pub mod mul; pub mod sdiv; pub mod shift; pub mod trailing_zeros; -mod traits; pub mod udiv; pub use big::{i256, u256}; -#[cfg(not(feature = "public-test-deps"))] -pub(crate) use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; +public_test_dep! { +/// Minimal integer implementations needed on all integer types, including wide integers. +#[allow(dead_code)] +pub(crate) trait MinInt: Copy + + core::fmt::Debug + + ops::BitOr + + ops::Not + + ops::Shl +{ + + /// Type with the same width but other signedness + type OtherSign: MinInt; + /// Unsigned version of Self + type UnsignedInt: MinInt; + + /// If `Self` is a signed integer + const SIGNED: bool; + + /// The bitwidth of the int type + const BITS: u32; + + const ZERO: Self; + const ONE: Self; + const MIN: Self; + const MAX: Self; +} +} + +public_test_dep! { +/// Trait for some basic operations on integers +#[allow(dead_code)] +pub(crate) trait Int: MinInt + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::SubAssign + + ops::BitAndAssign + + ops::BitOrAssign + + ops::BitXorAssign + + ops::ShlAssign + + ops::ShrAssign + + ops::Add + + ops::Sub + + ops::Mul + + ops::Div + + ops::Shr + + ops::BitXor + + ops::BitAnd +{ + /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing + /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, + /// 112,119,120,125,126,127]. + const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(::BITS); + + /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. + const FUZZ_NUM: usize = { + let log2 = (::BITS - 1).count_ones() as usize; + if log2 == 3 { + // case for u8 + 6 + } else { + // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate + // boundaries. + 8 + (4 * (log2 - 4)) + } + }; + + fn unsigned(self) -> Self::UnsignedInt; + fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; + + fn from_bool(b: bool) -> Self; + + /// Prevents the need for excessive conversions between signed and unsigned + fn logical_shr(self, other: u32) -> Self; + + /// Absolute difference between two integers. + fn abs_diff(self, other: Self) -> Self::UnsignedInt; + + // copied from primitive integers, but put in a trait + fn is_zero(self) -> bool; + fn wrapping_neg(self) -> Self; + fn wrapping_add(self, other: Self) -> Self; + fn wrapping_mul(self, other: Self) -> Self; + fn wrapping_sub(self, other: Self) -> Self; + fn wrapping_shl(self, other: u32) -> Self; + fn wrapping_shr(self, other: u32) -> Self; + fn rotate_left(self, other: u32) -> Self; + fn overflowing_add(self, other: Self) -> (Self, bool); + fn leading_zeros(self) -> u32; + fn ilog2(self) -> u32; +} +} + +pub(crate) const fn make_fuzz_lengths(bits: u32) -> [u8; 20] { + let mut v = [0u8; 20]; + v[0] = 0; + v[1] = 1; + v[2] = 2; // important for parity and the iX::MIN case when reversed + let mut i = 3; + + // No need for any more until the byte boundary, because there should be no algorithms + // that are sensitive to anything not next to byte boundaries after 2. We also scale + // in powers of two, which is important to prevent u128 corner tests from getting too + // big. + let mut l = 8; + loop { + if l >= ((bits / 2) as u8) { + break; + } + // get both sides of the byte boundary + v[i] = l - 1; + i += 1; + v[i] = l; + i += 1; + l *= 2; + } + + if bits != 8 { + // add the lower side of the middle boundary + v[i] = ((bits / 2) - 1) as u8; + i += 1; + } + + // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS + // boundary because of algorithms that split the high part up. We reverse the scaling + // as we go to Self::BITS. + let mid = i; + let mut j = 1; + loop { + v[i] = (bits as u8) - (v[mid - j]) - 1; + if j == mid { + break; + } + i += 1; + j += 1; + } + v +} + +macro_rules! int_impl_common { + ($ty:ty) => { + fn from_bool(b: bool) -> Self { + b as $ty + } + + fn logical_shr(self, other: u32) -> Self { + Self::from_unsigned(self.unsigned().wrapping_shr(other)) + } + + fn is_zero(self) -> bool { + self == Self::ZERO + } + + fn wrapping_neg(self) -> Self { + ::wrapping_neg(self) + } + + fn wrapping_add(self, other: Self) -> Self { + ::wrapping_add(self, other) + } + + fn wrapping_mul(self, other: Self) -> Self { + ::wrapping_mul(self, other) + } + + fn wrapping_sub(self, other: Self) -> Self { + ::wrapping_sub(self, other) + } + + fn wrapping_shl(self, other: u32) -> Self { + ::wrapping_shl(self, other) + } + + fn wrapping_shr(self, other: u32) -> Self { + ::wrapping_shr(self, other) + } + + fn rotate_left(self, other: u32) -> Self { + ::rotate_left(self, other) + } + + fn overflowing_add(self, other: Self) -> (Self, bool) { + ::overflowing_add(self, other) + } + + fn leading_zeros(self) -> u32 { + ::leading_zeros(self) + } + + fn ilog2(self) -> u32 { + ::ilog2(self) + } + }; +} + +macro_rules! int_impl { + ($ity:ty, $uty:ty) => { + impl MinInt for $uty { + type OtherSign = $ity; + type UnsignedInt = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $uty { + fn unsigned(self) -> $uty { + self + } + + // It makes writing macros easier if this is implemented for both signed and unsigned + #[allow(clippy::wrong_self_convention)] + fn from_unsigned(me: $uty) -> Self { + me + } + + fn abs_diff(self, other: Self) -> Self { + if self < other { + other.wrapping_sub(self) + } else { + self.wrapping_sub(other) + } + } + + int_impl_common!($uty); + } + + impl MinInt for $ity { + type OtherSign = $uty; + type UnsignedInt = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $ity { + fn unsigned(self) -> $uty { + self as $uty + } + + fn from_unsigned(me: $uty) -> Self { + me as $ity + } + + fn abs_diff(self, other: Self) -> $uty { + self.wrapping_sub(other).wrapping_abs() as $uty + } + + int_impl_common!($ity); + } + }; +} + +int_impl!(isize, usize); +int_impl!(i8, u8); +int_impl!(i16, u16); +int_impl!(i32, u32); +int_impl!(i64, u64); +int_impl!(i128, u128); + +public_test_dep! { +/// Trait for integers twice the bit width of another integer. This is implemented for all +/// primitives except for `u8`, because there is not a smaller primitive. +pub(crate) trait DInt: MinInt { + /// Integer that is half the bit width of the integer this trait is implemented for + type H: HInt; + + /// Returns the low half of `self` + fn lo(self) -> Self::H; + /// Returns the high half of `self` + fn hi(self) -> Self::H; + /// Returns the low and high halves of `self` as a tuple + fn lo_hi(self) -> (Self::H, Self::H) { + (self.lo(), self.hi()) + } + /// Constructs an integer using lower and higher half parts + fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { + lo.zero_widen() | hi.widen_hi() + } +} +} + +public_test_dep! { +/// Trait for integers half the bit width of another integer. This is implemented for all +/// primitives except for `u128`, because it there is not a larger primitive. +pub(crate) trait HInt: Int { + /// Integer that is double the bit width of the integer this trait is implemented for + type D: DInt + MinInt; + + /// Widens (using default extension) the integer to have double bit width + fn widen(self) -> Self::D; + /// Widens (zero extension only) the integer to have double bit width. This is needed to get + /// around problems with associated type bounds (such as `Int`) being unstable + fn zero_widen(self) -> Self::D; + /// Widens the integer to have double bit width and shifts the integer into the higher bits + fn widen_hi(self) -> Self::D { + self.widen() << ::BITS + } + /// Widening multiplication with zero widening. This cannot overflow. + fn zero_widen_mul(self, rhs: Self) -> Self::D; + /// Widening multiplication. This cannot overflow. + fn widen_mul(self, rhs: Self) -> Self::D; +} +} + +macro_rules! impl_d_int { + ($($X:ident $D:ident),*) => { + $( + impl DInt for $D { + type H = $X; + + fn lo(self) -> Self::H { + self as $X + } + fn hi(self) -> Self::H { + (self >> <$X as MinInt>::BITS) as $X + } + } + )* + }; +} + +macro_rules! impl_h_int { + ($($H:ident $uH:ident $X:ident),*) => { + $( + impl HInt for $H { + type D = $X; + + fn widen(self) -> Self::D { + self as $X + } + fn zero_widen(self) -> Self::D { + (self as $uH) as $X + } + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen().wrapping_mul(rhs.zero_widen()) + } + fn widen_mul(self, rhs: Self) -> Self::D { + self.widen().wrapping_mul(rhs.widen()) + } + } + )* + }; +} + +impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); +impl_h_int!( + u8 u8 u16, + u16 u16 u32, + u32 u32 u64, + u64 u64 u128, + i8 u8 i16, + i16 u16 i32, + i32 u32 i64, + i64 u64 i128 +); + +public_test_dep! { +/// Trait to express (possibly lossy) casting of integers +pub(crate) trait CastInto: Copy { + fn cast(self) -> T; +} + +pub(crate) trait CastFrom:Copy { + fn cast_from(value: T) -> Self; +} +} + +impl + Copy> CastFrom for T { + fn cast_from(value: U) -> Self { + value.cast() + } +} + +macro_rules! cast_into { + ($ty:ty) => { + cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); + }; + ($ty:ty; $($into:ty),*) => {$( + impl CastInto<$into> for $ty { + fn cast(self) -> $into { + self as $into + } + } + )*}; +} -#[cfg(feature = "public-test-deps")] -pub use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; +cast_into!(usize); +cast_into!(isize); +cast_into!(u8); +cast_into!(i8); +cast_into!(u16); +cast_into!(i16); +cast_into!(u32); +cast_into!(i32); +cast_into!(u64); +cast_into!(i64); +cast_into!(u128); +cast_into!(i128); diff --git a/src/int/specialized_div_rem/delegate.rs b/src/int/specialized_div_rem/delegate.rs index f5c6e5023..330c6e4f8 100644 --- a/src/int/specialized_div_rem/delegate.rs +++ b/src/int/specialized_div_rem/delegate.rs @@ -185,6 +185,7 @@ macro_rules! impl_delegate { }; } +public_test_dep! { /// Returns `n / d` and sets `*rem = n % d`. /// /// This specialization exists because: @@ -194,7 +195,7 @@ macro_rules! impl_delegate { /// delegate algorithm strategy the only reasonably fast way to perform `u128` division. // used on SPARC #[allow(dead_code)] -pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { +pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { use super::*; let duo_lo = duo as u64; let duo_hi = (duo >> 64) as u64; @@ -315,3 +316,4 @@ pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { } } } +} diff --git a/src/int/trailing_zeros.rs b/src/int/trailing_zeros.rs index 9878a1687..cea366b07 100644 --- a/src/int/trailing_zeros.rs +++ b/src/int/trailing_zeros.rs @@ -1,51 +1,45 @@ -mod implementation { - use crate::int::{CastInto, Int}; +use crate::int::{CastInto, Int}; - /// Returns number of trailing binary zeros in `x`. - #[allow(dead_code)] - pub fn trailing_zeros + CastInto + CastInto>(x: T) -> usize { - let mut x = x; - let mut r: u32 = 0; - let mut t: u32; +public_test_dep! { +/// Returns number of trailing binary zeros in `x`. +#[allow(dead_code)] +pub(crate) fn trailing_zeros + CastInto + CastInto>(x: T) -> usize { + let mut x = x; + let mut r: u32 = 0; + let mut t: u32; - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - r += ((CastInto::::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 - x >>= r; // remove 32 zero bits - } - - if T::BITS >= 32 { - t = ((CastInto::::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 - r += t; - x >>= t; // x = [0 - 0xFFFF] + higher garbage bits - } + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + r += ((CastInto::::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 + x >>= r; // remove 32 zero bits + } - const { assert!(T::BITS >= 16) }; - t = ((CastInto::::cast(x) == 0) as u32) << 3; - x >>= t; // x = [0 - 0xFF] + higher garbage bits + if T::BITS >= 32 { + t = ((CastInto::::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 r += t; + x >>= t; // x = [0 - 0xFFFF] + higher garbage bits + } - let mut x: u8 = x.cast(); - - t = (((x & 0x0F) == 0) as u32) << 2; - x >>= t; // x = [0 - 0xF] + higher garbage bits - r += t; + const { assert!(T::BITS >= 16) }; + t = ((CastInto::::cast(x) == 0) as u32) << 3; + x >>= t; // x = [0 - 0xFF] + higher garbage bits + r += t; - t = (((x & 0x3) == 0) as u32) << 1; - x >>= t; // x = [0 - 0x3] + higher garbage bits - r += t; + let mut x: u8 = x.cast(); - x &= 3; + t = (((x & 0x0F) == 0) as u32) << 2; + x >>= t; // x = [0 - 0xF] + higher garbage bits + r += t; - r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg()) - } -} + t = (((x & 0x3) == 0) as u32) << 1; + x >>= t; // x = [0 - 0x3] + higher garbage bits + r += t; -#[cfg(not(feature = "public-test-deps"))] -pub(crate) use implementation::*; + x &= 3; -#[cfg(feature = "public-test-deps")] -pub use implementation::*; + r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg()) +} +} intrinsics! { /// Returns the number of trailing binary zeros in `x` (32 bit version). diff --git a/src/int/traits.rs b/src/int/traits.rs deleted file mode 100644 index e9d879627..000000000 --- a/src/int/traits.rs +++ /dev/null @@ -1,402 +0,0 @@ -use core::ops; - -/// Minimal integer implementations needed on all integer types, including wide integers. -#[allow(dead_code)] -pub trait MinInt: - Copy - + core::fmt::Debug - + ops::BitOr - + ops::Not - + ops::Shl -{ - /// Type with the same width but other signedness - type OtherSign: MinInt; - /// Unsigned version of Self - type UnsignedInt: MinInt; - - /// If `Self` is a signed integer - const SIGNED: bool; - - /// The bitwidth of the int type - const BITS: u32; - - const ZERO: Self; - const ONE: Self; - const MIN: Self; - const MAX: Self; -} - -/// Trait for some basic operations on integers -#[allow(dead_code)] -pub trait Int: - MinInt - + PartialEq - + PartialOrd - + ops::AddAssign - + ops::SubAssign - + ops::BitAndAssign - + ops::BitOrAssign - + ops::BitXorAssign - + ops::ShlAssign - + ops::ShrAssign - + ops::Add - + ops::Sub - + ops::Mul - + ops::Div - + ops::Shr - + ops::BitXor - + ops::BitAnd -{ - /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing - /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, - /// 112,119,120,125,126,127]. - const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(::BITS); - - /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. - const FUZZ_NUM: usize = { - let log2 = (::BITS - 1).count_ones() as usize; - if log2 == 3 { - // case for u8 - 6 - } else { - // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate - // boundaries. - 8 + (4 * (log2 - 4)) - } - }; - - fn unsigned(self) -> Self::UnsignedInt; - fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; - - fn from_bool(b: bool) -> Self; - - /// Prevents the need for excessive conversions between signed and unsigned - fn logical_shr(self, other: u32) -> Self; - - /// Absolute difference between two integers. - fn abs_diff(self, other: Self) -> Self::UnsignedInt; - - // copied from primitive integers, but put in a trait - fn is_zero(self) -> bool; - fn wrapping_neg(self) -> Self; - fn wrapping_add(self, other: Self) -> Self; - fn wrapping_mul(self, other: Self) -> Self; - fn wrapping_sub(self, other: Self) -> Self; - fn wrapping_shl(self, other: u32) -> Self; - fn wrapping_shr(self, other: u32) -> Self; - fn rotate_left(self, other: u32) -> Self; - fn overflowing_add(self, other: Self) -> (Self, bool); - fn leading_zeros(self) -> u32; - fn ilog2(self) -> u32; -} - -const fn make_fuzz_lengths(bits: u32) -> [u8; 20] { - let mut v = [0u8; 20]; - v[0] = 0; - v[1] = 1; - v[2] = 2; // important for parity and the iX::MIN case when reversed - let mut i = 3; - - // No need for any more until the byte boundary, because there should be no algorithms - // that are sensitive to anything not next to byte boundaries after 2. We also scale - // in powers of two, which is important to prevent u128 corner tests from getting too - // big. - let mut l = 8; - loop { - if l >= ((bits / 2) as u8) { - break; - } - // get both sides of the byte boundary - v[i] = l - 1; - i += 1; - v[i] = l; - i += 1; - l *= 2; - } - - if bits != 8 { - // add the lower side of the middle boundary - v[i] = ((bits / 2) - 1) as u8; - i += 1; - } - - // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS - // boundary because of algorithms that split the high part up. We reverse the scaling - // as we go to Self::BITS. - let mid = i; - let mut j = 1; - loop { - v[i] = (bits as u8) - (v[mid - j]) - 1; - if j == mid { - break; - } - i += 1; - j += 1; - } - v -} - -macro_rules! int_impl_common { - ($ty:ty) => { - fn from_bool(b: bool) -> Self { - b as $ty - } - - fn logical_shr(self, other: u32) -> Self { - Self::from_unsigned(self.unsigned().wrapping_shr(other)) - } - - fn is_zero(self) -> bool { - self == Self::ZERO - } - - fn wrapping_neg(self) -> Self { - ::wrapping_neg(self) - } - - fn wrapping_add(self, other: Self) -> Self { - ::wrapping_add(self, other) - } - - fn wrapping_mul(self, other: Self) -> Self { - ::wrapping_mul(self, other) - } - - fn wrapping_sub(self, other: Self) -> Self { - ::wrapping_sub(self, other) - } - - fn wrapping_shl(self, other: u32) -> Self { - ::wrapping_shl(self, other) - } - - fn wrapping_shr(self, other: u32) -> Self { - ::wrapping_shr(self, other) - } - - fn rotate_left(self, other: u32) -> Self { - ::rotate_left(self, other) - } - - fn overflowing_add(self, other: Self) -> (Self, bool) { - ::overflowing_add(self, other) - } - - fn leading_zeros(self) -> u32 { - ::leading_zeros(self) - } - - fn ilog2(self) -> u32 { - ::ilog2(self) - } - }; -} - -macro_rules! int_impl { - ($ity:ty, $uty:ty) => { - impl MinInt for $uty { - type OtherSign = $ity; - type UnsignedInt = $uty; - - const BITS: u32 = ::ZERO.count_zeros(); - const SIGNED: bool = Self::MIN != Self::ZERO; - - const ZERO: Self = 0; - const ONE: Self = 1; - const MIN: Self = ::MIN; - const MAX: Self = ::MAX; - } - - impl Int for $uty { - fn unsigned(self) -> $uty { - self - } - - // It makes writing macros easier if this is implemented for both signed and unsigned - #[allow(clippy::wrong_self_convention)] - fn from_unsigned(me: $uty) -> Self { - me - } - - fn abs_diff(self, other: Self) -> Self { - if self < other { - other.wrapping_sub(self) - } else { - self.wrapping_sub(other) - } - } - - int_impl_common!($uty); - } - - impl MinInt for $ity { - type OtherSign = $uty; - type UnsignedInt = $uty; - - const BITS: u32 = ::ZERO.count_zeros(); - const SIGNED: bool = Self::MIN != Self::ZERO; - - const ZERO: Self = 0; - const ONE: Self = 1; - const MIN: Self = ::MIN; - const MAX: Self = ::MAX; - } - - impl Int for $ity { - fn unsigned(self) -> $uty { - self as $uty - } - - fn from_unsigned(me: $uty) -> Self { - me as $ity - } - - fn abs_diff(self, other: Self) -> $uty { - self.wrapping_sub(other).wrapping_abs() as $uty - } - - int_impl_common!($ity); - } - }; -} - -int_impl!(isize, usize); -int_impl!(i8, u8); -int_impl!(i16, u16); -int_impl!(i32, u32); -int_impl!(i64, u64); -int_impl!(i128, u128); - -/// Trait for integers twice the bit width of another integer. This is implemented for all -/// primitives except for `u8`, because there is not a smaller primitive. -pub trait DInt: MinInt { - /// Integer that is half the bit width of the integer this trait is implemented for - type H: HInt; - - /// Returns the low half of `self` - fn lo(self) -> Self::H; - /// Returns the high half of `self` - fn hi(self) -> Self::H; - /// Returns the low and high halves of `self` as a tuple - fn lo_hi(self) -> (Self::H, Self::H) { - (self.lo(), self.hi()) - } - /// Constructs an integer using lower and higher half parts - fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { - lo.zero_widen() | hi.widen_hi() - } -} - -/// Trait for integers half the bit width of another integer. This is implemented for all -/// primitives except for `u128`, because it there is not a larger primitive. -pub trait HInt: Int { - /// Integer that is double the bit width of the integer this trait is implemented for - type D: DInt + MinInt; - - /// Widens (using default extension) the integer to have double bit width - fn widen(self) -> Self::D; - /// Widens (zero extension only) the integer to have double bit width. This is needed to get - /// around problems with associated type bounds (such as `Int`) being unstable - fn zero_widen(self) -> Self::D; - /// Widens the integer to have double bit width and shifts the integer into the higher bits - fn widen_hi(self) -> Self::D { - self.widen() << ::BITS - } - /// Widening multiplication with zero widening. This cannot overflow. - fn zero_widen_mul(self, rhs: Self) -> Self::D; - /// Widening multiplication. This cannot overflow. - fn widen_mul(self, rhs: Self) -> Self::D; -} - -macro_rules! impl_d_int { - ($($X:ident $D:ident),*) => { - $( - impl DInt for $D { - type H = $X; - - fn lo(self) -> Self::H { - self as $X - } - fn hi(self) -> Self::H { - (self >> <$X as MinInt>::BITS) as $X - } - } - )* - }; -} - -macro_rules! impl_h_int { - ($($H:ident $uH:ident $X:ident),*) => { - $( - impl HInt for $H { - type D = $X; - - fn widen(self) -> Self::D { - self as $X - } - fn zero_widen(self) -> Self::D { - (self as $uH) as $X - } - fn zero_widen_mul(self, rhs: Self) -> Self::D { - self.zero_widen().wrapping_mul(rhs.zero_widen()) - } - fn widen_mul(self, rhs: Self) -> Self::D { - self.widen().wrapping_mul(rhs.widen()) - } - } - )* - }; -} - -impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); -impl_h_int!( - u8 u8 u16, - u16 u16 u32, - u32 u32 u64, - u64 u64 u128, - i8 u8 i16, - i16 u16 i32, - i32 u32 i64, - i64 u64 i128 -); - -/// Trait to express (possibly lossy) casting of integers -pub trait CastInto: Copy { - fn cast(self) -> T; -} - -pub trait CastFrom: Copy { - fn cast_from(value: T) -> Self; -} - -impl + Copy> CastFrom for T { - fn cast_from(value: U) -> Self { - value.cast() - } -} - -macro_rules! cast_into { - ($ty:ty) => { - cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); - }; - ($ty:ty; $($into:ty),*) => {$( - impl CastInto<$into> for $ty { - fn cast(self) -> $into { - self as $into - } - } - )*}; -} - -cast_into!(usize); -cast_into!(isize); -cast_into!(u8); -cast_into!(i8); -cast_into!(u16); -cast_into!(i16); -cast_into!(u32); -cast_into!(i32); -cast_into!(u64); -cast_into!(i64); -cast_into!(u128); -cast_into!(i128); diff --git a/src/macros.rs b/src/macros.rs index 9f951d515..42c83ee55 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -1,5 +1,21 @@ //! Macros shared throughout the compiler-builtins implementation +/// Changes the visibility to `pub` if feature "public-test-deps" is set +#[cfg(not(feature = "public-test-deps"))] +macro_rules! public_test_dep { + ($(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*) => { + $(#[$($meta)*])* pub(crate) $ident $($tokens)* + }; +} + +/// Changes the visibility to `pub` if feature "public-test-deps" is set +#[cfg(feature = "public-test-deps")] +macro_rules! public_test_dep { + {$(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*} => { + $(#[$($meta)*])* pub $ident $($tokens)* + }; +} + /// The "main macro" used for defining intrinsics. /// /// The compiler-builtins library is super platform-specific with tons of crazy From 9bd55390927997b541a32200789889128b39da56 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 7 Aug 2024 02:18:44 +0000 Subject: [PATCH 0805/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6c07fd555..e0a4abc63 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.116" +version = "0.1.117" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 3a73197dc9121b3e603e099660f6c6091559d4cf Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Wed, 7 Aug 2024 11:29:47 +0200 Subject: [PATCH 0806/1459] Rename `only-soft-floats` feature into `force-soft-floats` --- libm/Cargo.toml | 2 +- libm/crates/compiler-builtins-smoke-test/Cargo.toml | 2 +- libm/src/math/mod.rs | 2 +- libm/src/math/sqrt.rs | 4 ++-- libm/src/math/sqrtf.rs | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 893a2e19d..c2388083b 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -24,7 +24,7 @@ unstable = [] musl-reference-tests = ['rand'] # Used to prevent using any intrinsics or arch-specific code. -only-soft-floats = [] +force-soft-floats = [] [workspace] members = [ diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index ec48ca206..481d386a4 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -10,4 +10,4 @@ bench = false [features] unstable = [] checked = [] -only-soft-floats = [] +force-soft-floats = [] diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 04d3bbb62..a56532ddd 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -76,7 +76,7 @@ macro_rules! div { macro_rules! llvm_intrinsically_optimized { (#[cfg($($clause:tt)*)] $e:expr) => { - #[cfg(all(feature = "unstable", not(feature = "only-soft-floats"), $($clause)*))] + #[cfg(all(feature = "unstable", not(feature = "force-soft-floats"), $($clause)*))] { if true { // thwart the dead code lint $e diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index a0003cb05..66cb7659c 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -92,7 +92,7 @@ pub fn sqrt(x: f64) -> f64 { } } } - #[cfg(all(target_feature = "sse2", not(feature = "only-soft-floats")))] + #[cfg(all(target_feature = "sse2", not(feature = "force-soft-floats")))] { // Note: This path is unlikely since LLVM will usually have already // optimized sqrt calls into hardware instructions if sse2 is available, @@ -107,7 +107,7 @@ pub fn sqrt(x: f64) -> f64 { _mm_cvtsd_f64(m_sqrt) } } - #[cfg(any(not(target_feature = "sse2"), feature = "only-soft-floats"))] + #[cfg(any(not(target_feature = "sse2"), feature = "force-soft-floats"))] { use core::num::Wrapping; diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index 0cef073ea..16cbb2f97 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -27,7 +27,7 @@ pub fn sqrtf(x: f32) -> f32 { } } } - #[cfg(all(target_feature = "sse", not(feature = "only-soft-floats")))] + #[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))] { // Note: This path is unlikely since LLVM will usually have already // optimized sqrt calls into hardware instructions if sse is available, @@ -42,7 +42,7 @@ pub fn sqrtf(x: f32) -> f32 { _mm_cvtss_f32(m_sqrt) } } - #[cfg(any(not(target_feature = "sse"), feature = "only-soft-floats"))] + #[cfg(any(not(target_feature = "sse"), feature = "force-soft-floats"))] { const TINY: f32 = 1.0e-30; From 2207206fbd4a7f15df3bdab05f3a536c4427d613 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Thu, 8 Aug 2024 14:00:59 +0200 Subject: [PATCH 0807/1459] Update `libm` submodule --- libm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm b/libm index 279e5f6ab..300edb325 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 279e5f6abe0a2ca9066962d9ec894f0df1f417ac +Subproject commit 300edb32520b1673e16d2411a0e2e6273959eb46 From ed34a6cca4ad68b07c8b41280ef9650beb73f8af Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Thu, 8 Aug 2024 14:09:07 +0200 Subject: [PATCH 0808/1459] Activate `force-soft-floats` feature in `build.rs` --- build.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build.rs b/build.rs index bbae9e286..93e323e6b 100644 --- a/build.rs +++ b/build.rs @@ -51,8 +51,9 @@ fn main() { println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display()); // Activate libm's unstable features to make full use of Nightly. - println!("cargo::rustc-check-cfg=cfg(feature, values(\"unstable\"))"); + println!("cargo::rustc-check-cfg=cfg(feature, values(\"unstable\", \"force-soft-floats\"))"); println!("cargo:rustc-cfg=feature=\"unstable\""); + println!("cargo:rustc-cfg=feature=\"force-soft-floats\""); // Emscripten's runtime includes all the builtins if target.os == "emscripten" { From 015113d2bfa6314708fd9f3922a8a66763a8f5fa Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 8 Aug 2024 12:17:09 +0000 Subject: [PATCH 0809/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e0a4abc63..49b315779 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.117" +version = "0.1.118" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 6a80b908461f693e6777a3eafb095af73845c74e Mon Sep 17 00:00:00 2001 From: Kleis Auke Wolthuizen Date: Sun, 11 Aug 2024 11:36:25 +0200 Subject: [PATCH 0810/1459] Configure `f16` and `f128` support for WebAssembly --- build.rs | 2 ++ testcrate/build.rs | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/build.rs b/build.rs index 93e323e6b..56716b906 100644 --- a/build.rs +++ b/build.rs @@ -283,6 +283,8 @@ fn configure_f16_f128(target: &Target) { "powerpc64" if &target.os == "aix" => (true, false), // `f128` crashes "sparc" | "sparcv9" => (true, false), + // `f16` miscompiles + "wasm32" | "wasm64" => (false, true), // Most everything else works as of LLVM 19 _ => (true, true), }; diff --git a/testcrate/build.rs b/testcrate/build.rs index 8c441de8a..74d74559e 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -54,8 +54,9 @@ fn main() { || target.starts_with("i586-") || target.contains("windows-") // Linking says "error: function signature mismatch: __extendhfsf2" and seems to - // think the signature is either `(i32) -> f32` or `(f32) -> f32` - || target.starts_with("wasm32-") + // think the signature is either `(i32) -> f32` or `(f32) -> f32`. See + // . + || target.starts_with("wasm") { features.insert(Feature::NoSysF16); features.insert(Feature::NoSysF16F128Convert); From 35c5554083e74e55e72611f7f597c9056d665bc7 Mon Sep 17 00:00:00 2001 From: Kleis Auke Wolthuizen Date: Mon, 12 Aug 2024 08:08:41 +0200 Subject: [PATCH 0811/1459] Fix CI for targets that conditionally disable `f16` or `f128` support --- build.rs | 83 +---------------------------- configure.rs | 86 +++++++++++++++++++++++++++++++ testcrate/benches/float_add.rs | 18 +++++-- testcrate/benches/float_cmp.rs | 28 ++++++---- testcrate/benches/float_conv.rs | 1 - testcrate/benches/float_div.rs | 2 - testcrate/benches/float_extend.rs | 49 +++++++++++------- testcrate/benches/float_mul.rs | 18 +++++-- testcrate/benches/float_sub.rs | 18 +++++-- testcrate/benches/float_trunc.rs | 51 ++++++++++++------ testcrate/build.rs | 45 +++++++++------- testcrate/src/bench.rs | 6 ++- testcrate/src/lib.rs | 4 +- testcrate/tests/addsub.rs | 7 ++- testcrate/tests/cmp.rs | 5 +- testcrate/tests/conv.rs | 14 ++--- testcrate/tests/mul.rs | 7 ++- 17 files changed, 260 insertions(+), 182 deletions(-) create mode 100644 configure.rs diff --git a/build.rs b/build.rs index 56716b906..894508b54 100644 --- a/build.rs +++ b/build.rs @@ -1,44 +1,8 @@ use std::{collections::BTreeMap, env, path::PathBuf, sync::atomic::Ordering}; -#[allow(dead_code)] -struct Target { - triple: String, - os: String, - arch: String, - vendor: String, - env: String, - pointer_width: u8, - little_endian: bool, - features: Vec, -} - -impl Target { - fn from_env() -> Self { - let little_endian = match env::var("CARGO_CFG_TARGET_ENDIAN").unwrap().as_str() { - "little" => true, - "big" => false, - x => panic!("unknown endian {x}"), - }; +mod configure; - Self { - triple: env::var("TARGET").unwrap(), - os: env::var("CARGO_CFG_TARGET_OS").unwrap(), - arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(), - vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), - env: env::var("CARGO_CFG_TARGET_ENV").unwrap(), - pointer_width: env::var("CARGO_CFG_TARGET_POINTER_WIDTH") - .unwrap() - .parse() - .unwrap(), - little_endian, - features: env::var("CARGO_CFG_TARGET_FEATURE") - .unwrap_or_default() - .split(",") - .map(ToOwned::to_owned) - .collect(), - } - } -} +use configure::{configure_f16_f128, Target}; fn main() { println!("cargo:rerun-if-changed=build.rs"); @@ -261,49 +225,6 @@ fn configure_check_cfg() { println!("cargo::rustc-check-cfg=cfg(assert_no_panic)"); } -/// Configure whether or not `f16` and `f128` support should be enabled. -fn configure_f16_f128(target: &Target) { - // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means - // that the backend will not crash when using these types. This does not mean that the - // backend does the right thing, or that the platform doesn't have ABI bugs. - // - // We do this here rather than in `rust-lang/rust` because configuring via cargo features is - // not straightforward. - // - // Original source of this list: - // - let (f16_ok, f128_ok) = match target.arch.as_str() { - // `f16` and `f128` both crash - "arm64ec" => (false, false), - // `f16` crashes - "s390x" => (false, true), - // `f128` crashes - "mips64" | "mips64r6" => (true, false), - // `f128` crashes - "powerpc64" if &target.os == "aix" => (true, false), - // `f128` crashes - "sparc" | "sparcv9" => (true, false), - // `f16` miscompiles - "wasm32" | "wasm64" => (false, true), - // Most everything else works as of LLVM 19 - _ => (true, true), - }; - - // If the feature is set, disable these types. - let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some(); - - println!("cargo::rustc-check-cfg=cfg(f16_enabled)"); - println!("cargo::rustc-check-cfg=cfg(f128_enabled)"); - - if f16_ok && !disable_both { - println!("cargo::rustc-cfg=f16_enabled"); - } - - if f128_ok && !disable_both { - println!("cargo::rustc-cfg=f128_enabled"); - } -} - #[cfg(feature = "c")] mod c { use std::collections::{BTreeMap, HashSet}; diff --git a/configure.rs b/configure.rs new file mode 100644 index 000000000..676c88f3a --- /dev/null +++ b/configure.rs @@ -0,0 +1,86 @@ +// Configuration that is shared between `compiler_builtins` and `testcrate`. + +use std::env; + +#[allow(dead_code)] +pub struct Target { + pub triple: String, + pub os: String, + pub arch: String, + pub vendor: String, + pub env: String, + pub pointer_width: u8, + pub little_endian: bool, + pub features: Vec, +} + +impl Target { + pub fn from_env() -> Self { + let little_endian = match env::var("CARGO_CFG_TARGET_ENDIAN").unwrap().as_str() { + "little" => true, + "big" => false, + x => panic!("unknown endian {x}"), + }; + + Self { + triple: env::var("TARGET").unwrap(), + os: env::var("CARGO_CFG_TARGET_OS").unwrap(), + arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(), + vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), + env: env::var("CARGO_CFG_TARGET_ENV").unwrap(), + pointer_width: env::var("CARGO_CFG_TARGET_POINTER_WIDTH") + .unwrap() + .parse() + .unwrap(), + little_endian, + features: env::var("CARGO_CFG_TARGET_FEATURE") + .unwrap_or_default() + .split(",") + .map(ToOwned::to_owned) + .collect(), + } + } +} + +/// Configure whether or not `f16` and `f128` support should be enabled. +pub fn configure_f16_f128(target: &Target) { + // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means + // that the backend will not crash when using these types. This does not mean that the + // backend does the right thing, or that the platform doesn't have ABI bugs. + // + // We do this here rather than in `rust-lang/rust` because configuring via cargo features is + // not straightforward. + // + // Original source of this list: + // + let (f16_ok, f128_ok) = match target.arch.as_str() { + // `f16` and `f128` both crash + "arm64ec" => (false, false), + // `f16` crashes + "s390x" => (false, true), + // `f128` crashes + "mips64" | "mips64r6" => (true, false), + // `f128` crashes + "powerpc64" if &target.os == "aix" => (true, false), + // `f128` crashes + "sparc" | "sparcv9" => (true, false), + // `f16` miscompiles + "wasm32" | "wasm64" => (false, true), + // Most everything else works as of LLVM 19 + _ => (true, true), + }; + + // If the feature is set, disable these types. + let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some(); + + println!("cargo::rustc-check-cfg=cfg(f16_enabled)"); + println!("cargo::rustc-check-cfg=cfg(f128_enabled)"); + + if f16_ok && !disable_both { + println!("cargo::rustc-cfg=f16_enabled"); + } + + if f128_ok && !disable_both { + println!("cargo::rustc-cfg=f128_enabled"); + } +} diff --git a/testcrate/benches/float_add.rs b/testcrate/benches/float_add.rs index eef1ecc57..3311e7b5b 100644 --- a/testcrate/benches/float_add.rs +++ b/testcrate/benches/float_add.rs @@ -1,7 +1,7 @@ -#![feature(f128)] +#![cfg_attr(f128_enabled, feature(f128))] use compiler_builtins::float::add; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{criterion_main, Criterion}; use testcrate::float_bench; float_bench! { @@ -66,6 +66,7 @@ float_bench! { ], } +#[cfg(f128_enabled)] float_bench! { name: add_f128, sig: (a: f128, b: f128) -> f128, @@ -77,5 +78,16 @@ float_bench! { asm: [] } -criterion_group!(float_add, add_f32, add_f64, add_f128); +pub fn float_add() { + let mut criterion = Criterion::default().configure_from_args(); + + add_f32(&mut criterion); + add_f64(&mut criterion); + + #[cfg(f128_enabled)] + { + add_f128(&mut criterion); + } +} + criterion_main!(float_add); diff --git a/testcrate/benches/float_cmp.rs b/testcrate/benches/float_cmp.rs index 641eb0ac5..400c09b42 100644 --- a/testcrate/benches/float_cmp.rs +++ b/testcrate/benches/float_cmp.rs @@ -1,6 +1,6 @@ -#![feature(f128)] +#![cfg_attr(f128_enabled, feature(f128))] -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{criterion_main, Criterion}; use testcrate::float_bench; use compiler_builtins::float::cmp; @@ -190,13 +190,19 @@ float_bench! { asm: [] } -criterion_group!( - float_cmp, - cmp_f32_gt, - cmp_f32_unord, - cmp_f64_gt, - cmp_f64_unord, - cmp_f128_gt, - cmp_f128_unord -); +pub fn float_cmp() { + let mut criterion = Criterion::default().configure_from_args(); + + cmp_f32_gt(&mut criterion); + cmp_f32_unord(&mut criterion); + cmp_f64_gt(&mut criterion); + cmp_f64_unord(&mut criterion); + + #[cfg(f128_enabled)] + { + cmp_f128_gt(&mut criterion); + cmp_f128_unord(&mut criterion); + } +} + criterion_main!(float_cmp); diff --git a/testcrate/benches/float_conv.rs b/testcrate/benches/float_conv.rs index bbd3a0685..de2043b04 100644 --- a/testcrate/benches/float_conv.rs +++ b/testcrate/benches/float_conv.rs @@ -1,4 +1,3 @@ -#![feature(f128)] #![allow(improper_ctypes)] use compiler_builtins::float::conv; diff --git a/testcrate/benches/float_div.rs b/testcrate/benches/float_div.rs index e679f8ccc..6ba439b04 100644 --- a/testcrate/benches/float_div.rs +++ b/testcrate/benches/float_div.rs @@ -1,5 +1,3 @@ -#![feature(f128)] - use compiler_builtins::float::div; use criterion::{criterion_group, criterion_main, Criterion}; use testcrate::float_bench; diff --git a/testcrate/benches/float_extend.rs b/testcrate/benches/float_extend.rs index bf136f49a..a9563741a 100644 --- a/testcrate/benches/float_extend.rs +++ b/testcrate/benches/float_extend.rs @@ -1,11 +1,12 @@ #![allow(unused_variables)] // "unused" f16 registers -#![feature(f128)] -#![feature(f16)] +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] use compiler_builtins::float::extend; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{criterion_main, Criterion}; use testcrate::float_bench; +#[cfg(f16_enabled)] float_bench! { name: extend_f16_f32, sig: (a: f16) -> f32, @@ -28,6 +29,7 @@ float_bench! { ], } +#[cfg(all(f16_enabled, f128_enabled))] float_bench! { name: extend_f16_f128, sig: (a: f16) -> f128, @@ -60,6 +62,7 @@ float_bench! { ], } +#[cfg(f128_enabled)] float_bench! { name: extend_f32_f128, sig: (a: f32) -> f128, @@ -71,6 +74,7 @@ float_bench! { asm: [], } +#[cfg(f128_enabled)] float_bench! { name: extend_f64_f128, sig: (a: f64) -> f128, @@ -82,23 +86,28 @@ float_bench! { asm: [], } -#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] -criterion_group!( - float_extend, - extend_f16_f32, - extend_f16_f128, - extend_f32_f64, - extend_f32_f128, - extend_f64_f128, -); +pub fn float_extend() { + let mut criterion = Criterion::default().configure_from_args(); -// FIXME(#655): `f16` tests disabled until we can bootstrap symbols -#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] -criterion_group!( - float_extend, - extend_f32_f64, - extend_f32_f128, - extend_f64_f128, -); + // FIXME(#655): `f16` tests disabled until we can bootstrap symbols + #[cfg(f16_enabled)] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + { + extend_f16_f32(&mut criterion); + + #[cfg(f128_enabled)] + { + extend_f16_f128(&mut criterion); + } + } + + extend_f32_f64(&mut criterion); + + #[cfg(f128_enabled)] + { + extend_f32_f128(&mut criterion); + extend_f64_f128(&mut criterion); + } +} criterion_main!(float_extend); diff --git a/testcrate/benches/float_mul.rs b/testcrate/benches/float_mul.rs index efa32b285..6e30b7866 100644 --- a/testcrate/benches/float_mul.rs +++ b/testcrate/benches/float_mul.rs @@ -1,7 +1,7 @@ -#![feature(f128)] +#![cfg_attr(f128_enabled, feature(f128))] use compiler_builtins::float::mul; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{criterion_main, Criterion}; use testcrate::float_bench; float_bench! { @@ -66,6 +66,7 @@ float_bench! { ], } +#[cfg(f128_enabled)] float_bench! { name: mul_f128, sig: (a: f128, b: f128) -> f128, @@ -77,5 +78,16 @@ float_bench! { asm: [] } -criterion_group!(float_mul, mul_f32, mul_f64, mul_f128); +pub fn float_mul() { + let mut criterion = Criterion::default().configure_from_args(); + + mul_f32(&mut criterion); + mul_f64(&mut criterion); + + #[cfg(f128_enabled)] + { + mul_f128(&mut criterion); + } +} + criterion_main!(float_mul); diff --git a/testcrate/benches/float_sub.rs b/testcrate/benches/float_sub.rs index 6d87604aa..cdb678eef 100644 --- a/testcrate/benches/float_sub.rs +++ b/testcrate/benches/float_sub.rs @@ -1,7 +1,7 @@ -#![feature(f128)] +#![cfg_attr(f128_enabled, feature(f128))] use compiler_builtins::float::sub; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{criterion_main, Criterion}; use testcrate::float_bench; float_bench! { @@ -66,6 +66,7 @@ float_bench! { ], } +#[cfg(f128_enabled)] float_bench! { name: sub_f128, sig: (a: f128, b: f128) -> f128, @@ -77,5 +78,16 @@ float_bench! { asm: [] } -criterion_group!(float_sub, sub_f32, sub_f64, sub_f128); +pub fn float_sub() { + let mut criterion = Criterion::default().configure_from_args(); + + sub_f32(&mut criterion); + sub_f64(&mut criterion); + + #[cfg(f128_enabled)] + { + sub_f128(&mut criterion); + } +} + criterion_main!(float_sub); diff --git a/testcrate/benches/float_trunc.rs b/testcrate/benches/float_trunc.rs index 74b43dfc8..8d874e4b2 100644 --- a/testcrate/benches/float_trunc.rs +++ b/testcrate/benches/float_trunc.rs @@ -1,10 +1,11 @@ -#![feature(f128)] -#![feature(f16)] +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] use compiler_builtins::float::trunc; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{criterion_main, Criterion}; use testcrate::float_bench; +#[cfg(f16_enabled)] float_bench! { name: trunc_f32_f16, sig: (a: f32) -> f16, @@ -27,6 +28,7 @@ float_bench! { ], } +#[cfg(f16_enabled)] float_bench! { name: trunc_f64_f16, sig: (a: f64) -> f16, @@ -82,6 +84,7 @@ float_bench! { ], } +#[cfg(all(f16_enabled, f128_enabled))] float_bench! { name: trunc_f128_f16, sig: (a: f128) -> f16, @@ -93,6 +96,7 @@ float_bench! { asm: [], } +#[cfg(f128_enabled)] float_bench! { name: trunc_f128_f32, sig: (a: f128) -> f32, @@ -104,6 +108,7 @@ float_bench! { asm: [], } +#[cfg(f128_enabled)] float_bench! { name: trunc_f128_f64, sig: (a: f128) -> f64, @@ -115,19 +120,31 @@ float_bench! { asm: [], } -#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] -criterion_group!( - float_trunc, - trunc_f32_f16, - trunc_f64_f16, - trunc_f64_f32, - trunc_f128_f16, - trunc_f128_f32, - trunc_f128_f64, -); - -// FIXME(#655): `f16` tests disabled until we can bootstrap symbols -#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] -criterion_group!(float_trunc, trunc_f64_f32, trunc_f128_f32, trunc_f128_f64,); +pub fn float_trunc() { + let mut criterion = Criterion::default().configure_from_args(); + + // FIXME(#655): `f16` tests disabled until we can bootstrap symbols + #[cfg(f16_enabled)] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + { + trunc_f32_f16(&mut criterion); + trunc_f64_f16(&mut criterion); + } + + trunc_f64_f32(&mut criterion); + + #[cfg(f128_enabled)] + { + // FIXME(#655): `f16` tests disabled until we can bootstrap symbols + #[cfg(f16_enabled)] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + { + trunc_f128_f16(&mut criterion); + } + + trunc_f128_f32(&mut criterion); + trunc_f128_f64(&mut criterion); + } +} criterion_main!(float_trunc); diff --git a/testcrate/build.rs b/testcrate/build.rs index 74d74559e..fde4e5b56 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -1,4 +1,4 @@ -use std::{collections::HashSet, env}; +use std::collections::HashSet; /// Features to enable #[derive(Debug, PartialEq, Eq, Hash)] @@ -9,35 +9,39 @@ enum Feature { NoSysF16F128Convert, } +mod builtins_configure { + include!("../configure.rs"); +} + fn main() { - let target = env::var("TARGET").unwrap(); + let target = builtins_configure::Target::from_env(); let mut features = HashSet::new(); // These platforms do not have f128 symbols available in their system libraries, so // skip related tests. - if target.starts_with("arm-") - || target.contains("apple-darwin") - || target.contains("windows-msvc") + if target.arch == "arm" + || target.vendor == "apple" + || target.env == "msvc" // GCC and LLVM disagree on the ABI of `f16` and `f128` with MinGW. See // . - || target.contains("windows-gnu") + || (target.os == "windows" && target.env == "gnu") // FIXME(llvm): There is an ABI incompatibility between GCC and Clang on 32-bit x86. // See . - || target.starts_with("i686") + || target.arch == "i686" // 32-bit PowerPC and 64-bit LE gets code generated that Qemu cannot handle. See // . - || target.starts_with("powerpc-") - || target.starts_with("powerpc64le-") + || target.arch == "powerpc" + || target.arch == "powerpc64le" // FIXME: We get different results from the builtin functions. See // . - || target.starts_with("powerpc64-") + || target.arch == "powerpc64" { features.insert(Feature::NoSysF128); features.insert(Feature::NoSysF128IntConvert); features.insert(Feature::NoSysF16F128Convert); } - if target.starts_with("i586") || target.starts_with("i686") { + if target.arch == "i586" || target.arch == "i686" { // 32-bit x86 does not have `__fixunstfti`/`__fixtfti` but does have everything else features.insert(Feature::NoSysF128IntConvert); // FIXME: 32-bit x86 has a bug in `f128 -> f16` system libraries @@ -46,17 +50,18 @@ fn main() { // These platforms do not have f16 symbols available in their system libraries, so // skip related tests. Most of these are missing `f16 <-> f32` conversion routines. - if (target.starts_with("aarch64-") && target.contains("linux")) - || target.starts_with("arm") - || target.starts_with("powerpc-") - || target.starts_with("powerpc64-") - || target.starts_with("powerpc64le-") - || target.starts_with("i586-") - || target.contains("windows-") + if (target.arch == "aarch64" && target.os == "linux") + || target.arch.starts_with("arm") + || target.arch == "powerpc" + || target.arch == "powerpc64" + || target.arch == "powerpc64le" + || target.arch == "i586" + || target.os == "windows" // Linking says "error: function signature mismatch: __extendhfsf2" and seems to // think the signature is either `(i32) -> f32` or `(f32) -> f32`. See // . - || target.starts_with("wasm") + || target.arch == "wasm32" + || target.arch == "wasm64" { features.insert(Feature::NoSysF16); features.insert(Feature::NoSysF16F128Convert); @@ -78,4 +83,6 @@ fn main() { println!("cargo:warning={warning}"); println!("cargo:rustc-cfg=feature=\"{name}\""); } + + builtins_configure::configure_f16_f128(&target); } diff --git a/testcrate/src/bench.rs b/testcrate/src/bench.rs index 5ab9bac87..1d571a6cf 100644 --- a/testcrate/src/bench.rs +++ b/testcrate/src/bench.rs @@ -350,9 +350,11 @@ macro_rules! impl_testio { } } -#[cfg(not(feature = "no-f16-f128"))] -impl_testio!(float f16, f128); +#[cfg(f16_enabled)] +impl_testio!(float f16); impl_testio!(float f32, f64); +#[cfg(f128_enabled)] +impl_testio!(float f128); impl_testio!(int i16, i32, i64, i128); impl_testio!(int u16, u32, u64, u128); impl_testio!((float, int)(f32, i32)); diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs index 5458c9ab6..cc9e73938 100644 --- a/testcrate/src/lib.rs +++ b/testcrate/src/lib.rs @@ -13,8 +13,8 @@ //! Some floating point tests are disabled for specific architectures, because they do not have //! correct rounding. #![no_std] -#![cfg_attr(not(feature = "no-f16-f128"), feature(f128))] -#![cfg_attr(not(feature = "no-f16-f128"), feature(f16))] +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] #![feature(isqrt)] pub mod bench; diff --git a/testcrate/tests/addsub.rs b/testcrate/tests/addsub.rs index f21f61ff6..1ba2df741 100644 --- a/testcrate/tests/addsub.rs +++ b/testcrate/tests/addsub.rs @@ -1,6 +1,5 @@ #![allow(unused_macros)] -#![feature(f128)] -#![feature(f16)] +#![cfg_attr(f128_enabled, feature(f128))] use testcrate::*; @@ -120,7 +119,7 @@ mod float_addsub { } } -#[cfg(not(feature = "no-f16-f128"))] +#[cfg(f128_enabled)] #[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] mod float_addsub_f128 { @@ -131,7 +130,7 @@ mod float_addsub_f128 { } } -#[cfg(not(feature = "no-f16-f128"))] +#[cfg(f128_enabled)] #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] mod float_addsub_f128_ppc { use super::*; diff --git a/testcrate/tests/cmp.rs b/testcrate/tests/cmp.rs index e8a0eb165..7e973e7e5 100644 --- a/testcrate/tests/cmp.rs +++ b/testcrate/tests/cmp.rs @@ -1,7 +1,6 @@ #![allow(unused_macros)] #![allow(unreachable_code)] -#![feature(f128)] -#![feature(f16)] +#![cfg_attr(f128_enabled, feature(f128))] #[cfg(not(target_arch = "powerpc64"))] use testcrate::*; @@ -94,7 +93,7 @@ mod float_comparisons { } #[test] - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] fn cmp_f128() { #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] use compiler_builtins::float::cmp::{ diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index e394183cf..ce1f64e67 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -1,5 +1,5 @@ -#![cfg_attr(not(feature = "no-f16-f128"), feature(f16))] -#![cfg_attr(not(feature = "no-f16-f128"), feature(f128))] +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] // makes configuration easier #![allow(unused_macros)] #![allow(unused_imports)] @@ -176,7 +176,7 @@ mod f_to_i { } #[test] - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] fn f128_to_int() { #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] use compiler_builtins::float::conv::{ @@ -264,7 +264,7 @@ mod extend { f32 => f64, Single => Double, __extendsfdf2vfp, all(); } - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(all(f16_enabled, f128_enabled))] #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] f_to_f! { extend, @@ -275,7 +275,7 @@ mod extend { f64 => f128, Double => Quad, __extenddftf2, not(feature = "no-sys-f128"); } - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] f_to_f! { extend, @@ -299,7 +299,7 @@ mod trunc { f64 => f32, Double => Single, __truncdfsf2vfp, all(); } - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(all(f16_enabled, f128_enabled))] #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] f_to_f! { trunc, @@ -310,7 +310,7 @@ mod trunc { f128 => f64, Quad => Double, __trunctfdf2, not(feature = "no-sys-f128"); } - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] f_to_f! { trunc, diff --git a/testcrate/tests/mul.rs b/testcrate/tests/mul.rs index 90144bb06..867622fdf 100644 --- a/testcrate/tests/mul.rs +++ b/testcrate/tests/mul.rs @@ -1,6 +1,5 @@ #![allow(unused_macros)] -#![feature(f128)] -#![feature(f16)] +#![cfg_attr(f128_enabled, feature(f128))] use testcrate::*; @@ -131,7 +130,7 @@ mod float_mul { } } -#[cfg(not(feature = "no-f16-f128"))] +#[cfg(f128_enabled)] #[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] mod float_mul_f128 { @@ -145,7 +144,7 @@ mod float_mul_f128 { } } -#[cfg(not(feature = "no-f16-f128"))] +#[cfg(f128_enabled)] #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] mod float_mul_f128_ppc { use super::*; From 0b364b10eaad760a7b40feeeefc64468f96928bc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 12 Aug 2024 16:42:17 +0000 Subject: [PATCH 0812/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 49b315779..44cc3994b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.118" +version = "0.1.119" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 7fc5c94451a2477975da7360950cd2c5040a38d7 Mon Sep 17 00:00:00 2001 From: Kleis Auke Wolthuizen Date: Mon, 12 Aug 2024 19:45:28 +0200 Subject: [PATCH 0813/1459] Add `configure.rs` to the `include` list --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.toml b/Cargo.toml index 44cc3994b..8f2d8a569 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ if necessary! include = [ '/Cargo.toml', '/build.rs', + '/configure.rs', '/src/*', '/examples/*', '/LICENSE.txt', From 0f809f63aea59721df3842aec5a66a7c254cb50d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 19 Aug 2024 07:04:07 -0500 Subject: [PATCH 0814/1459] Apply fixes to `build.rs` files Make the following changes: - Add `rerun-if-changed` to the new `configure.rs`, it seems this was causing incorrect caching. - Change from matching `i686` to `x86`. The target triple starts with `i686` so that is what we were checking before, but the architecture is `x86`. This change should have been made when we added `struct Target`, update it now instead. --- build.rs | 6 ++++-- configure.rs | 6 ++++++ testcrate/build.rs | 12 +++++++----- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/build.rs b/build.rs index 894508b54..5ccff76e7 100644 --- a/build.rs +++ b/build.rs @@ -5,7 +5,9 @@ mod configure; use configure::{configure_f16_f128, Target}; fn main() { - println!("cargo:rerun-if-changed=build.rs"); + println!("cargo::rerun-if-changed=build.rs"); + println!("cargo::rerun-if-changed=configure.rs"); + let target = Target::from_env(); let cwd = env::current_dir().unwrap(); @@ -46,7 +48,7 @@ fn main() { // These targets have hardware unaligned access support. println!("cargo::rustc-check-cfg=cfg(feature, values(\"mem-unaligned\"))"); if target.arch.contains("x86_64") - || target.arch.contains("i686") + || target.arch.contains("x86") || target.arch.contains("aarch64") || target.arch.contains("bpf") { diff --git a/configure.rs b/configure.rs index 676c88f3a..e23c0e839 100644 --- a/configure.rs +++ b/configure.rs @@ -2,6 +2,7 @@ use std::env; +#[derive(Debug)] #[allow(dead_code)] pub struct Target { pub triple: String, @@ -40,6 +41,11 @@ impl Target { .collect(), } } + + #[allow(dead_code)] + pub fn has_feature(&self, feature: &str) -> bool { + self.features.iter().any(|f| f == feature) + } } /// Configure whether or not `f16` and `f128` support should be enabled. diff --git a/testcrate/build.rs b/testcrate/build.rs index fde4e5b56..6205c7ac6 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -14,6 +14,8 @@ mod builtins_configure { } fn main() { + println!("cargo::rerun-if-changed=../configure.rs"); + let target = builtins_configure::Target::from_env(); let mut features = HashSet::new(); @@ -27,7 +29,7 @@ fn main() { || (target.os == "windows" && target.env == "gnu") // FIXME(llvm): There is an ABI incompatibility between GCC and Clang on 32-bit x86. // See . - || target.arch == "i686" + || target.arch == "x86" // 32-bit PowerPC and 64-bit LE gets code generated that Qemu cannot handle. See // . || target.arch == "powerpc" @@ -41,7 +43,7 @@ fn main() { features.insert(Feature::NoSysF16F128Convert); } - if target.arch == "i586" || target.arch == "i686" { + if target.arch == "x86" { // 32-bit x86 does not have `__fixunstfti`/`__fixtfti` but does have everything else features.insert(Feature::NoSysF128IntConvert); // FIXME: 32-bit x86 has a bug in `f128 -> f16` system libraries @@ -55,7 +57,7 @@ fn main() { || target.arch == "powerpc" || target.arch == "powerpc64" || target.arch == "powerpc64le" - || target.arch == "i586" + || (target.arch == "x86" && !target.has_feature("sse")) || target.os == "windows" // Linking says "error: function signature mismatch: __extendhfsf2" and seems to // think the signature is either `(i32) -> f32` or `(f32) -> f32`. See @@ -72,11 +74,11 @@ fn main() { Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"), Feature::NoSysF128IntConvert => ( "no-sys-f128-int-convert", - "using apfloat fallback for f128 to int conversions", + "using apfloat fallback for f128 <-> int conversions", ), Feature::NoSysF16F128Convert => ( "no-sys-f16-f128-convert", - "skipping using apfloat fallback for f16 <-> f128 conversions", + "using apfloat fallback for f16 <-> f128 conversions", ), Feature::NoSysF16 => ("no-sys-f16", "using apfloat fallback for f16"), }; From 6651d112ac393a8436099f10cdaac33e5392712d Mon Sep 17 00:00:00 2001 From: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> Date: Thu, 22 Aug 2024 10:59:06 +0800 Subject: [PATCH 0815/1459] Fix weak linkage on windows and apple platforms There were some issues regarding windows and apple platform, we were exporting symbols that are already provided by the compiler but weren't marked as `weak` which resulted in conflicted symbols in the linking process. Initially, we didn't add `weak` because we thought it is not supported on windows and apple platforms, but it looks like its only not supported on windows-gnu platforms Signed-off-by: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> --- src/macros.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/macros.rs b/src/macros.rs index 42c83ee55..18535d633 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -256,7 +256,7 @@ macro_rules! intrinsics { #[cfg(all(any(windows, target_os = "uefi"), target_arch = "x86_64", not(feature = "mangled-names")))] mod $name { #[no_mangle] - #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] + #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] extern $abi fn $name( $($argname: $ty),* ) -> $crate::macros::win64_128bit_abi_hack::U64x2 { @@ -298,7 +298,7 @@ macro_rules! intrinsics { #[cfg(all(target_arch = "arm", not(feature = "mangled-names")))] mod $name { #[no_mangle] - #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] + #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] $(#[$($attr)*])* extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) @@ -308,7 +308,7 @@ macro_rules! intrinsics { #[cfg(all(target_arch = "arm", not(feature = "mangled-names")))] mod $alias { #[no_mangle] - #[cfg_attr(all(not(windows), not(target_vendor="apple")), linkage = "weak")] + #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] $(#[$($attr)*])* extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) @@ -375,7 +375,7 @@ macro_rules! intrinsics { mod $name { $(#[$($attr)*])* #[no_mangle] - #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] + #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } @@ -400,7 +400,7 @@ macro_rules! intrinsics { #[naked] $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] + #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } @@ -467,7 +467,7 @@ macro_rules! intrinsics { mod $name { $(#[$($attr)*])* #[no_mangle] - #[cfg_attr(all(not(windows), not(target_vendor = "apple")), linkage = "weak")] + #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] $(unsafe $($empty)?)? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } From 71e9468f27067c5ce89154dc38c4ae0a315517ef Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 22 Aug 2024 06:03:55 +0000 Subject: [PATCH 0816/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8f2d8a569..370512181 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.119" +version = "0.1.120" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From f3882e4e44af7840eab5f7d0e18df6ed78d8d42d Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Thu, 22 Aug 2024 00:43:02 -0700 Subject: [PATCH 0817/1459] Use array simd in `U64x2` --- src/macros.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/macros.rs b/src/macros.rs index 18535d633..f7c6d0bfe 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -482,20 +482,20 @@ macro_rules! intrinsics { #[cfg(all(any(windows, target_os = "uefi"), target_pointer_width = "64"))] pub mod win64_128bit_abi_hack { #[repr(simd)] - pub struct U64x2(u64, u64); + pub struct U64x2([u64; 2]); impl From for U64x2 { fn from(i: i128) -> U64x2 { use crate::int::DInt; let j = i as u128; - U64x2(j.lo(), j.hi()) + U64x2([j.lo(), j.hi()]) } } impl From for U64x2 { fn from(i: u128) -> U64x2 { use crate::int::DInt; - U64x2(i.lo(), i.hi()) + U64x2([i.lo(), i.hi()]) } } } From 2fcb07e2e483f4bef03fbb06cda5d08b4f8403ec Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 22 Aug 2024 08:01:13 +0000 Subject: [PATCH 0818/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 370512181..3f104b037 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.120" +version = "0.1.121" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 7093c297c58edbbac436d7806efdfcf9c9678a53 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 22 Aug 2024 03:43:13 -0500 Subject: [PATCH 0819/1459] Add a `success` job to CI This will allow us to enable auto merge once CI completes. --- libm/.github/workflows/main.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 2f2e46822..322043d85 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -71,3 +71,20 @@ jobs: - name: Install Rust run: rustup update nightly && rustup default nightly - run: cargo bench --all + + success: + needs: + - docker + - rustfmt + - wasm + - cb + - benchmarks + runs-on: ubuntu-latest + # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency + # failed" as success. So we have to do some contortions to ensure the job fails if any of its + # dependencies fails. + if: always() # make sure this is never "skipped" + steps: + # Manually check the status of all dependencies. `if: failure()` does not work. + - name: check if any dependency failed + run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}' From fb6cb2af515ee63d063b4d207da21f96edfa08ec Mon Sep 17 00:00:00 2001 From: beetrees Date: Sat, 24 Aug 2024 11:24:45 +0100 Subject: [PATCH 0820/1459] Use `macos-latest` in CI for `aarch64-apple-darwin` --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 38064543f..affb3a824 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,7 +16,7 @@ jobs: matrix: include: - target: aarch64-apple-darwin - os: macos-14 + os: macos-latest rust: nightly - target: aarch64-unknown-linux-gnu os: ubuntu-latest From b44d52f6fd35ad8bd13cdb2440effe09acc1d490 Mon Sep 17 00:00:00 2001 From: beetrees Date: Sat, 24 Aug 2024 11:52:41 +0100 Subject: [PATCH 0821/1459] Use `macos-13` in CI for `x86_64-apple-darwin` --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index affb3a824..fddb5973e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -80,7 +80,7 @@ jobs: os: ubuntu-latest rust: nightly - target: x86_64-apple-darwin - os: macos-latest + os: macos-13 rust: nightly - target: i686-pc-windows-msvc os: windows-latest From a113c7c36c568fe62cf7eefaa52a14e3543b8785 Mon Sep 17 00:00:00 2001 From: beetrees Date: Fri, 23 Aug 2024 12:39:31 +0100 Subject: [PATCH 0822/1459] Fix ABI for `f16` builtins on Intel Apple targets --- src/float/extend.rs | 2 + src/float/trunc.rs | 3 ++ src/macros.rs | 100 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+) diff --git a/src/float/extend.rs b/src/float/extend.rs index 2f392255f..9fabcde25 100644 --- a/src/float/extend.rs +++ b/src/float/extend.rs @@ -86,6 +86,7 @@ intrinsics! { intrinsics! { #[avr_skip] #[aapcs_on_arm] + #[apple_f16_arg_abi] #[arm_aeabi_alias = __aeabi_h2f] #[cfg(f16_enabled)] pub extern "C" fn __extendhfsf2(a: f16) -> f32 { @@ -94,6 +95,7 @@ intrinsics! { #[avr_skip] #[aapcs_on_arm] + #[apple_f16_arg_abi] #[cfg(f16_enabled)] pub extern "C" fn __gnu_h2f_ieee(a: f16) -> f32 { extend(a) diff --git a/src/float/trunc.rs b/src/float/trunc.rs index c54ff7805..5c17cd96a 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -134,6 +134,7 @@ intrinsics! { intrinsics! { #[avr_skip] #[aapcs_on_arm] + #[apple_f16_ret_abi] #[arm_aeabi_alias = __aeabi_f2h] #[cfg(f16_enabled)] pub extern "C" fn __truncsfhf2(a: f32) -> f16 { @@ -142,6 +143,7 @@ intrinsics! { #[avr_skip] #[aapcs_on_arm] + #[apple_f16_ret_abi] #[cfg(f16_enabled)] pub extern "C" fn __gnu_f2h_ieee(a: f32) -> f16 { trunc(a) @@ -149,6 +151,7 @@ intrinsics! { #[avr_skip] #[aapcs_on_arm] + #[apple_f16_ret_abi] #[arm_aeabi_alias = __aeabi_d2h] #[cfg(f16_enabled)] pub extern "C" fn __truncdfhf2(a: f64) -> f16 { diff --git a/src/macros.rs b/src/macros.rs index f7c6d0bfe..f51e49e98 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -276,6 +276,106 @@ macro_rules! intrinsics { intrinsics!($($rest)*); ); + // `arm_aeabi_alias` would conflict with `f16_apple_{arg,ret}_abi` not handled here. Avoid macro ambiguity by combining in a + // single `#[]`. + ( + #[apple_f16_arg_abi] + #[arm_aeabi_alias = $alias:ident] + $($t:tt)* + ) => { + intrinsics! { + #[apple_f16_arg_abi, arm_aeabi_alias = $alias] + $($t)* + } + }; + ( + #[apple_f16_ret_abi] + #[arm_aeabi_alias = $alias:ident] + $($t:tt)* + ) => { + intrinsics! { + #[apple_f16_ret_abi, arm_aeabi_alias = $alias] + $($t)* + } + }; + + // On x86 (32-bit and 64-bit) Apple platforms, `f16` is passed and returned like a `u16` unless + // the builtin involves `f128`. + ( + // `arm_aeabi_alias` would conflict if not handled here. Avoid macro ambiguity by combining + // in a single `#[]`. + #[apple_f16_arg_abi $(, arm_aeabi_alias = $alias:ident)?] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64")))] + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + + #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"), not(feature = "mangled-names")))] + mod $name { + #[no_mangle] + #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] + $(#[$($attr)*])* + extern $abi fn $name( $($argname: u16),* ) $(-> $ret)? { + super::$name($(f16::from_bits($argname)),*) + } + } + + #[cfg(not(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"))))] + intrinsics! { + $(#[arm_aeabi_alias = $alias])? + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + ( + #[apple_f16_ret_abi $(, arm_aeabi_alias = $alias:ident)?] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64")))] + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + + #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"), not(feature = "mangled-names")))] + mod $name { + #[no_mangle] + #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] + $(#[$($attr)*])* + extern $abi fn $name( $($argname: $ty),* ) -> u16 { + super::$name($($argname),*).to_bits() + } + } + + #[cfg(not(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"))))] + intrinsics! { + $(#[arm_aeabi_alias = $alias])? + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + // A bunch of intrinsics on ARM are aliased in the standard compiler-rt // build under `__aeabi_*` aliases, and LLVM will call these instead of the // original function. The aliasing here is used to generate these symbols in From 6b5328969224cc4a588cc41a30c9d36b9fee0551 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 25 Aug 2024 04:08:06 +0000 Subject: [PATCH 0823/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 3f104b037..b9de326d9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.121" +version = "0.1.122" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 0fab77e8d72cf232af4977642b52544f0e4ab521 Mon Sep 17 00:00:00 2001 From: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> Date: Tue, 27 Aug 2024 12:55:05 +0800 Subject: [PATCH 0824/1459] Don't include `math` for `unix` and `wasi` targets This fixes such as (https://github.com/rust-lang/rust/issues/128386) where, our implementation is being used on systems where there is already `math` library and its more performant and accurate. So with this change, linux will go back to the previous behavior and not include these functions, windows and apple were generally not affected. Looking at the targets we have builtin now in rust, everything else is probably good to have the math symbols. > A note on the above, the `hermit` os uses `libm` directly for itself, > but I think its Ok to keep providing math in `compiler_builtin` for it, > its technically the same implementation either from `compiler_builtin` > or `hermit-builtins`. Signed-off-by: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> --- src/lib.rs | 13 ++++++++++--- src/math.rs | 3 ++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0d44fdf96..b85f789fd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,9 +47,16 @@ mod macros; pub mod float; pub mod int; -// Disabled on x86 without sse2 due to ABI issues -// -#[cfg(not(all(target_arch = "x86", not(target_feature = "sse2"))))] +// Disable for any of the following: +// - x86 without sse2 due to ABI issues +// - +// - All unix targets (linux, macos, freebsd, android, etc) +// - wasm with known target_os +#[cfg(not(any( + all(target_arch = "x86", not(target_feature = "sse2")), + unix, + all(target_family = "wasm", not(target_os = "unknown")) +)))] pub mod math; pub mod mem; diff --git a/src/math.rs b/src/math.rs index 7d4d17876..477dfe365 100644 --- a/src/math.rs +++ b/src/math.rs @@ -17,7 +17,7 @@ macro_rules! no_mangle { } } -#[cfg(all(not(windows), not(target_vendor = "apple")))] +#[cfg(not(windows))] no_mangle! { fn acos(x: f64) -> f64; fn asin(x: f64) -> f64; @@ -92,6 +92,7 @@ no_mangle! { fn fmodf(x: f32, y: f32) -> f32; } +// allow for windows (and other targets) intrinsics! { pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 { let r = self::libm::lgamma_r(x); From 570faeb067cda4629b012522b8c776c86c6edb8d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 28 Aug 2024 16:15:09 +0000 Subject: [PATCH 0825/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b9de326d9..525095b17 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.122" +version = "0.1.123" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 95c62361a092e2e7f87c2d929ce23dcf0e4ba434 Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Sat, 31 Aug 2024 12:54:21 -0500 Subject: [PATCH 0826/1459] [hexagon] Remove aliases w/o leading __ These hexagon builtins incorrectly created aliases in the global namespace which can (and in at least one case, did) conflict with symbols defined by other programs. This should address the issue reported as https://github.com/rust-lang/rust/issues/129823: Compiling compiler_builtins v0.1.123 Compiling core v0.0.0 (/home/ben/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core) Compiling rustc-std-workspace-core v1.99.0 (/home/ben/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/rustc-std-workspace-core) Compiling byteorder v1.5.0 Compiling zerocopy v0.7.34 error: symbol 'fma' is already defined error: could not compile `compiler_builtins` (lib) due to 1 previous error Also: some of the symbols defined were not just aliases, so those are now qualified with `__hexagon_`. The compiler does not yet emit calls to these particular ones, but if/when it does, it can use the new names. --- src/hexagon/dffma.s | 2 -- src/hexagon/dfminmax.s | 6 ------ src/hexagon/fastmath2_dlib_asm.s | 30 +++++++++++++++--------------- src/hexagon/fastmath2_ldlib_asm.s | 18 +++++++++--------- 4 files changed, 24 insertions(+), 32 deletions(-) diff --git a/src/hexagon/dffma.s b/src/hexagon/dffma.s index 043a1d294..97d05eb18 100644 --- a/src/hexagon/dffma.s +++ b/src/hexagon/dffma.s @@ -3,8 +3,6 @@ .type __hexagon_fmadf4,@function .global __hexagon_fmadf5 .type __hexagon_fmadf5,@function - .global fma - .type fma,@function .global __qdsp_fmadf5 ; .set __qdsp_fmadf5, __hexagon_fmadf5 .p2align 5 __hexagon_fmadf4: diff --git a/src/hexagon/dfminmax.s b/src/hexagon/dfminmax.s index 3337a3223..953e773bf 100644 --- a/src/hexagon/dfminmax.s +++ b/src/hexagon/dfminmax.s @@ -1,17 +1,12 @@ .text .global __hexagon_mindf3 .global __hexagon_maxdf3 - .global fmin - .type fmin,@function - .global fmax - .type fmax,@function .type __hexagon_mindf3,@function .type __hexagon_maxdf3,@function .global __qdsp_mindf3 ; .set __qdsp_mindf3, __hexagon_mindf3 .global __qdsp_maxdf3 ; .set __qdsp_maxdf3, __hexagon_maxdf3 .p2align 5 __hexagon_mindf3: -fmin: { p0 = dfclass(r1:0,#0x10) p1 = dfcmp.gt(r1:0,r3:2) @@ -31,7 +26,6 @@ fmin: .size __hexagon_mindf3,.-__hexagon_mindf3 .falign __hexagon_maxdf3: -fmax: { p0 = dfclass(r1:0,#0x10) p1 = dfcmp.gt(r3:2,r1:0) diff --git a/src/hexagon/fastmath2_dlib_asm.s b/src/hexagon/fastmath2_dlib_asm.s index 15c387846..e77b7db03 100644 --- a/src/hexagon/fastmath2_dlib_asm.s +++ b/src/hexagon/fastmath2_dlib_asm.s @@ -1,7 +1,7 @@ .text - .global fast2_dadd_asm - .type fast2_dadd_asm, @function -fast2_dadd_asm: + .global __hexagon_fast2_dadd_asm + .type __hexagon_fast2_dadd_asm, @function +__hexagon_fast2_dadd_asm: .falign { R7:6 = VABSDIFFH(R1:0, R3:2) @@ -49,9 +49,9 @@ fast2_dadd_asm: jumpr r31 } .text - .global fast2_dsub_asm - .type fast2_dsub_asm, @function -fast2_dsub_asm: + .global __hexagon_fast2_dsub_asm + .type __hexagon_fast2_dsub_asm, @function +__hexagon_fast2_dsub_asm: .falign { R7:6 = VABSDIFFH(R1:0, R3:2) @@ -99,9 +99,9 @@ fast2_dsub_asm: jumpr r31 } .text - .global fast2_dmpy_asm - .type fast2_dmpy_asm, @function -fast2_dmpy_asm: + .global __hexagon_fast2_dmpy_asm + .type __hexagon_fast2_dmpy_asm, @function +__hexagon_fast2_dmpy_asm: .falign { R13= lsr(R2, #16) @@ -167,9 +167,9 @@ fast2_dmpy_asm: jumpr r31 } .text - .global fast2_qd2f_asm - .type fast2_qd2f_asm, @function -fast2_qd2f_asm: + .global __hexagon_fast2_qd2f_asm + .type __hexagon_fast2_qd2f_asm, @function +__hexagon_fast2_qd2f_asm: .falign { R3 = abs(R1):sat @@ -225,9 +225,9 @@ fast2_qd2f_asm: jumpr r31 } .text - .global fast2_f2qd_asm - .type fast2_f2qd_asm, @function -fast2_f2qd_asm: + .global __hexagon_fast2_f2qd_asm + .type __hexagon_fast2_f2qd_asm, @function +__hexagon_fast2_f2qd_asm: diff --git a/src/hexagon/fastmath2_ldlib_asm.s b/src/hexagon/fastmath2_ldlib_asm.s index b72b7550a..3251057d7 100644 --- a/src/hexagon/fastmath2_ldlib_asm.s +++ b/src/hexagon/fastmath2_ldlib_asm.s @@ -1,7 +1,7 @@ .text - .global fast2_ldadd_asm - .type fast2_ldadd_asm, @function -fast2_ldadd_asm: + .global __hexagon_fast2ldadd_asm + .type __hexagon_fast2ldadd_asm, @function +__hexagon_fast2ldadd_asm: .falign { R4 = memw(r29+#8) @@ -54,9 +54,9 @@ fast2_ldadd_asm: jumpr r31 } .text - .global fast2_ldsub_asm - .type fast2_ldsub_asm, @function -fast2_ldsub_asm: + .global __hexagon_fast2ldsub_asm + .type __hexagon_fast2ldsub_asm, @function +__hexagon_fast2ldsub_asm: .falign { R4 = memw(r29+#8) @@ -109,9 +109,9 @@ fast2_ldsub_asm: jumpr r31 } .text - .global fast2_ldmpy_asm - .type fast2_ldmpy_asm, @function -fast2_ldmpy_asm: + .global __hexagon_fast2ldmpy_asm + .type __hexagon_fast2ldmpy_asm, @function +__hexagon_fast2ldmpy_asm: .falign { R15:14 = memd(r29+#0) From 3442c02c01d05e406c23330fd66cd5187ffe8a2f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 03:20:30 +0000 Subject: [PATCH 0827/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 525095b17..f5a04747a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.123" +version = "0.1.124" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 729ba06ab1d1f43be757ad1df48025efc69dba48 Mon Sep 17 00:00:00 2001 From: Sbstn Bcht Date: Wed, 4 Sep 2024 04:55:44 +0200 Subject: [PATCH 0828/1459] Remove unsupported *vfp functions (#678) Remove all *vfp functions and related tests since LLVM no longer emits them. Link: https://github.com/rust-lang/compiler-builtins/pull/626 [ Reword commit message - Trevor ] --- README.md | 71 ++++++++++++++++++++------------------ src/float/add.rs | 10 ------ src/float/cmp.rs | 51 --------------------------- src/float/div.rs | 10 ------ src/float/extend.rs | 5 --- src/float/mul.rs | 10 ------ src/float/sub.rs | 10 ------ src/float/trunc.rs | 5 --- testcrate/tests/addsub.rs | 10 ------ testcrate/tests/cmp.rs | 14 -------- testcrate/tests/conv.rs | 12 ------- testcrate/tests/div_rem.rs | 10 ------ testcrate/tests/mul.rs | 10 ------ 13 files changed, 37 insertions(+), 191 deletions(-) diff --git a/README.md b/README.md index 51bef5e2e..46983a281 100644 --- a/README.md +++ b/README.md @@ -103,9 +103,7 @@ rely on CI. - [x] aarch64/chkstk.S - [x] adddf3.c - [x] addsf3.c -- [x] arm/adddf3vfp.S - [x] arm/addsf3.S -- [x] arm/addsf3vfp.S - [x] arm/aeabi_dcmp.S - [x] arm/aeabi_fcmp.S - [x] arm/aeabi_idivmod.S @@ -116,45 +114,13 @@ rely on CI. - [x] arm/aeabi_uidivmod.S - [x] arm/aeabi_uldivmod.S - [ ] arm/chkstk.S -- [x] arm/divdf3vfp.S - [ ] arm/divmodsi4.S (generic version is done) -- [x] arm/divsf3vfp.S - [ ] arm/divsi3.S (generic version is done) -- [x] arm/eqdf2vfp.S -- [x] arm/eqsf2vfp.S -- [x] arm/extendsfdf2vfp.S -- [ ] arm/fixdfsivfp.S -- [ ] arm/fixsfsivfp.S -- [ ] arm/fixunsdfsivfp.S -- [ ] arm/fixunssfsivfp.S -- [ ] arm/floatsidfvfp.S -- [ ] arm/floatsisfvfp.S -- [ ] arm/floatunssidfvfp.S -- [ ] arm/floatunssisfvfp.S -- [x] arm/gedf2vfp.S -- [x] arm/gesf2vfp.S -- [x] arm/gtdf2vfp.S -- [x] arm/gtsf2vfp.S -- [x] arm/ledf2vfp.S -- [x] arm/lesf2vfp.S -- [x] arm/ltdf2vfp.S -- [x] arm/ltsf2vfp.S - [ ] arm/modsi3.S (generic version is done) -- [x] arm/muldf3vfp.S -- [x] arm/mulsf3vfp.S -- [x] arm/nedf2vfp.S -- [ ] arm/negdf2vfp.S -- [ ] arm/negsf2vfp.S -- [x] arm/nesf2vfp.S - [x] arm/softfloat-alias.list -- [x] arm/subdf3vfp.S -- [x] arm/subsf3vfp.S -- [x] arm/truncdfsf2vfp.S - [ ] arm/udivmodsi4.S (generic version is done) - [ ] arm/udivsi3.S (generic version is done) - [ ] arm/umodsi3.S (generic version is done) -- [ ] arm/unorddf2vfp.S -- [ ] arm/unordsf2vfp.S - [x] ashldi3.c - [x] ashrdi3.c - [ ] avr/divmodhi4.S @@ -501,6 +467,43 @@ Floating-point implementations of builtins that are only called from soft-float - ~~x86_64/floatdidf.c~~ - ~~x86_64/floatdisf.c~~ +Unsupported in any current target: used on old versions of 32-bit iOS with ARMv5. + +- ~~arm/adddf3vfp.S~~ +- ~~arm/addsf3vfp.S~~ +- ~~arm/divdf3vfp.S~~ +- ~~arm/divsf3vfp.S~~ +- ~~arm/eqdf2vfp.S~~ +- ~~arm/eqsf2vfp.S~~ +- ~~arm/extendsfdf2vfp.S~~ +- ~~arm/fixdfsivfp.S~~ +- ~~arm/fixsfsivfp.S~~ +- ~~arm/fixunsdfsivfp.S~~ +- ~~arm/fixunssfsivfp.S~~ +- ~~arm/floatsidfvfp.S~~ +- ~~arm/floatsisfvfp.S~~ +- ~~arm/floatunssidfvfp.S~~ +- ~~arm/floatunssisfvfp.S~~ +- ~~arm/gedf2vfp.S~~ +- ~~arm/gesf2vfp.S~~ +- ~~arm/gtdf2vfp.S~~ +- ~~arm/gtsf2vfp.S~~ +- ~~arm/ledf2vfp.S~~ +- ~~arm/lesf2vfp.S~~ +- ~~arm/ltdf2vfp.S~~ +- ~~arm/ltsf2vfp.S~~ +- ~~arm/muldf3vfp.S~~ +- ~~arm/mulsf3vfp.S~~ +- ~~arm/nedf2vfp.S~~ +- ~~arm/negdf2vfp.S~~ +- ~~arm/negsf2vfp.S~~ +- ~~arm/nesf2vfp.S~~ +- ~~arm/subdf3vfp.S~~ +- ~~arm/subsf3vfp.S~~ +- ~~arm/truncdfsf2vfp.S~~ +- ~~arm/unorddf2vfp.S~~ +- ~~arm/unordsf2vfp.S~~ + ## License The compiler-builtins crate is dual licensed under both the University of diff --git a/src/float/add.rs b/src/float/add.rs index 03ed131af..bceef7b0e 100644 --- a/src/float/add.rs +++ b/src/float/add.rs @@ -208,14 +208,4 @@ intrinsics! { pub extern "C" fn __addtf3(a: f128, b: f128) -> f128 { add(a, b) } - - #[cfg(target_arch = "arm")] - pub extern "C" fn __addsf3vfp(a: f32, b: f32) -> f32 { - a + b - } - - #[cfg(target_arch = "arm")] - pub extern "C" fn __adddf3vfp(a: f64, b: f64) -> f64 { - a + b - } } diff --git a/src/float/cmp.rs b/src/float/cmp.rs index 1901ca4b3..bb7d4b498 100644 --- a/src/float/cmp.rs +++ b/src/float/cmp.rs @@ -258,55 +258,4 @@ intrinsics! { pub extern "aapcs" fn __aeabi_dcmpgt(a: f64, b: f64) -> i32 { (__gtdf2(a, b) > 0) as i32 } - - // On hard-float targets LLVM will use native instructions - // for all VFP intrinsics below - - pub extern "C" fn __gesf2vfp(a: f32, b: f32) -> i32 { - (a >= b) as i32 - } - - pub extern "C" fn __gedf2vfp(a: f64, b: f64) -> i32 { - (a >= b) as i32 - } - - pub extern "C" fn __gtsf2vfp(a: f32, b: f32) -> i32 { - (a > b) as i32 - } - - pub extern "C" fn __gtdf2vfp(a: f64, b: f64) -> i32 { - (a > b) as i32 - } - - pub extern "C" fn __ltsf2vfp(a: f32, b: f32) -> i32 { - (a < b) as i32 - } - - pub extern "C" fn __ltdf2vfp(a: f64, b: f64) -> i32 { - (a < b) as i32 - } - - pub extern "C" fn __lesf2vfp(a: f32, b: f32) -> i32 { - (a <= b) as i32 - } - - pub extern "C" fn __ledf2vfp(a: f64, b: f64) -> i32 { - (a <= b) as i32 - } - - pub extern "C" fn __nesf2vfp(a: f32, b: f32) -> i32 { - (a != b) as i32 - } - - pub extern "C" fn __nedf2vfp(a: f64, b: f64) -> i32 { - (a != b) as i32 - } - - pub extern "C" fn __eqsf2vfp(a: f32, b: f32) -> i32 { - (a == b) as i32 - } - - pub extern "C" fn __eqdf2vfp(a: f64, b: f64) -> i32 { - (a == b) as i32 - } } diff --git a/src/float/div.rs b/src/float/div.rs index c0d780b66..2a57ee1a0 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -926,14 +926,4 @@ intrinsics! { pub extern "C" fn __divdf3(a: f64, b: f64) -> f64 { div64(a, b) } - - #[cfg(target_arch = "arm")] - pub extern "C" fn __divsf3vfp(a: f32, b: f32) -> f32 { - a / b - } - - #[cfg(target_arch = "arm")] - pub extern "C" fn __divdf3vfp(a: f64, b: f64) -> f64 { - a / b - } } diff --git a/src/float/extend.rs b/src/float/extend.rs index 9fabcde25..997475c8e 100644 --- a/src/float/extend.rs +++ b/src/float/extend.rs @@ -76,11 +76,6 @@ intrinsics! { pub extern "C" fn __extendsfdf2(a: f32) -> f64 { extend(a) } - - #[cfg(target_arch = "arm")] - pub extern "C" fn __extendsfdf2vfp(a: f32) -> f64 { - a as f64 // LLVM generate 'fcvtds' - } } intrinsics! { diff --git a/src/float/mul.rs b/src/float/mul.rs index cb0fcdfa8..a4c69ea87 100644 --- a/src/float/mul.rs +++ b/src/float/mul.rs @@ -199,14 +199,4 @@ intrinsics! { pub extern "C" fn __multf3(a: f128, b: f128) -> f128 { mul(a, b) } - - #[cfg(target_arch = "arm")] - pub extern "C" fn __mulsf3vfp(a: f32, b: f32) -> f32 { - a * b - } - - #[cfg(target_arch = "arm")] - pub extern "C" fn __muldf3vfp(a: f64, b: f64) -> f64 { - a * b - } } diff --git a/src/float/sub.rs b/src/float/sub.rs index d33016ead..7e8a89458 100644 --- a/src/float/sub.rs +++ b/src/float/sub.rs @@ -23,14 +23,4 @@ intrinsics! { __addtf3(a, f128::from_repr(b.repr() ^ f128::SIGN_MASK)) } - - #[cfg(target_arch = "arm")] - pub extern "C" fn __subsf3vfp(a: f32, b: f32) -> f32 { - a - b - } - - #[cfg(target_arch = "arm")] - pub extern "C" fn __subdf3vfp(a: f64, b: f64) -> f64 { - a - b - } } diff --git a/src/float/trunc.rs b/src/float/trunc.rs index 5c17cd96a..a25b6eabc 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -124,11 +124,6 @@ intrinsics! { pub extern "C" fn __truncdfsf2(a: f64) -> f32 { trunc(a) } - - #[cfg(target_arch = "arm")] - pub extern "C" fn __truncdfsf2vfp(a: f64) -> f32 { - a as f32 - } } intrinsics! { diff --git a/testcrate/tests/addsub.rs b/testcrate/tests/addsub.rs index 1ba2df741..284a2bf5a 100644 --- a/testcrate/tests/addsub.rs +++ b/testcrate/tests/addsub.rs @@ -139,13 +139,3 @@ mod float_addsub_f128_ppc { f128, __addkf3, __subkf3, Quad, not(feature = "no-sys-f128"); } } - -#[cfg(target_arch = "arm")] -mod float_addsub_arm { - use super::*; - - float_sum! { - f32, __addsf3vfp, __subsf3vfp, Single, all(); - f64, __adddf3vfp, __subdf3vfp, Double, all(); - } -} diff --git a/testcrate/tests/cmp.rs b/testcrate/tests/cmp.rs index 7e973e7e5..e3161f374 100644 --- a/testcrate/tests/cmp.rs +++ b/testcrate/tests/cmp.rs @@ -156,7 +156,6 @@ mod float_comparisons_arm { fn cmp_f32() { use compiler_builtins::float::cmp::{ __aeabi_fcmpeq, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmple, __aeabi_fcmplt, - __eqsf2vfp, __gesf2vfp, __gtsf2vfp, __lesf2vfp, __ltsf2vfp, __nesf2vfp, }; fuzz_float_2(N, |x: f32, y: f32| { @@ -166,12 +165,6 @@ mod float_comparisons_arm { 0, x == y, __aeabi_fcmpeq; 0, x >= y, __aeabi_fcmpge; 0, x > y, __aeabi_fcmpgt; - 0, x < y, __ltsf2vfp; - 0, x <= y, __lesf2vfp; - 0, x == y, __eqsf2vfp; - 0, x >= y, __gesf2vfp; - 0, x > y, __gtsf2vfp; - 1, x != y, __nesf2vfp; ); }); } @@ -180,7 +173,6 @@ mod float_comparisons_arm { fn cmp_f64() { use compiler_builtins::float::cmp::{ __aeabi_dcmpeq, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmple, __aeabi_dcmplt, - __eqdf2vfp, __gedf2vfp, __gtdf2vfp, __ledf2vfp, __ltdf2vfp, __nedf2vfp, }; fuzz_float_2(N, |x: f64, y: f64| { @@ -190,12 +182,6 @@ mod float_comparisons_arm { 0, x == y, __aeabi_dcmpeq; 0, x >= y, __aeabi_dcmpge; 0, x > y, __aeabi_dcmpgt; - 0, x < y, __ltdf2vfp; - 0, x <= y, __ledf2vfp; - 0, x == y, __eqdf2vfp; - 0, x >= y, __gedf2vfp; - 0, x > y, __gtdf2vfp; - 1, x != y, __nedf2vfp; ); }); } diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index ce1f64e67..24f3a04a4 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -258,12 +258,6 @@ mod extend { f32 => f64, Single => Double, __extendsfdf2, all(); } - #[cfg(target_arch = "arm")] - f_to_f! { - extend, - f32 => f64, Single => Double, __extendsfdf2vfp, all(); - } - #[cfg(all(f16_enabled, f128_enabled))] #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] f_to_f! { @@ -293,12 +287,6 @@ mod trunc { f64 => f32, Double => Single, __truncdfsf2, all(); } - #[cfg(target_arch = "arm")] - f_to_f! { - trunc, - f64 => f32, Double => Single, __truncdfsf2vfp, all(); - } - #[cfg(all(f16_enabled, f128_enabled))] #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] f_to_f! { diff --git a/testcrate/tests/div_rem.rs b/testcrate/tests/div_rem.rs index ff78b4f54..418e9c189 100644 --- a/testcrate/tests/div_rem.rs +++ b/testcrate/tests/div_rem.rs @@ -156,13 +156,3 @@ mod float_div { f64, __divdf3, Double, all(); } } - -#[cfg(target_arch = "arm")] -mod float_div_arm { - use super::*; - - float! { - f32, __divsf3vfp, Single, all(); - f64, __divdf3vfp, Double, all(); - } -} diff --git a/testcrate/tests/mul.rs b/testcrate/tests/mul.rs index 867622fdf..449d19480 100644 --- a/testcrate/tests/mul.rs +++ b/testcrate/tests/mul.rs @@ -153,13 +153,3 @@ mod float_mul_f128_ppc { f128, __mulkf3, Quad, not(feature = "no-sys-f128"); } } - -#[cfg(target_arch = "arm")] -mod float_mul_arm { - use super::*; - - float_mul! { - f32, __mulsf3vfp, Single, all(); - f64, __muldf3vfp, Double, all(); - } -} From d840fb6b9298d2e3364bd2f243b47e48d795fd95 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 5 Sep 2024 09:03:28 -0700 Subject: [PATCH 0829/1459] Use the trifecta div algorithm for 128-bit div on wasm This commit updates the `#[cfg]` annotations used to select the implementation of 128-bit division in compiler-builtins on wasm targets. This is done with relation to https://github.com/WebAssembly/128-bit-arithmetic where performance of 128-bit operations is being investigated on WebAssembly. While I don't know much about the particulars of the two algorithms involved here the comments indicate that the "trifecta" variant is preferred if possible but it's not selected on 32-bit architectures. This rationale isn't as applicable to WebAssembly targets because despite the 32-bit pointer width there are often wider-than-pointer operations available as it's typically run on 64-bit machines. Locally in testing a benchmark that performs division with a Rust-based bignum libraries whent from 350% slower-than-native to 220% slower-than-native with this change, a nice increase in speed. While this was tested with Wasmtime other runtimes are likely to see an improvement as well. --- src/int/specialized_div_rem/mod.rs | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs index 760f5f5b7..a91fe6632 100644 --- a/src/int/specialized_div_rem/mod.rs +++ b/src/int/specialized_div_rem/mod.rs @@ -136,9 +136,15 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) { // Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a // microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is -// faster if the target pointer width is at least 64. +// faster if the target pointer width is at least 64. Note that this +// implementation is additionally included on WebAssembly despite the typical +// pointer width there being 32 because it's typically run on a 64-bit machine +// that has access to faster 64-bit operations. #[cfg(all( - not(any(target_pointer_width = "16", target_pointer_width = "32")), + any( + target_family = "wasm", + not(any(target_pointer_width = "16", target_pointer_width = "32")), + ), not(all(not(feature = "no-asm"), target_arch = "x86_64")), not(any(target_arch = "sparc", target_arch = "sparc64")) ))] @@ -152,10 +158,14 @@ impl_trifecta!( u128 ); -// If the pointer width less than 64, then the target architecture almost certainly does not have -// the fast 64 to 128 bit widening multiplication needed for `trifecta` to be faster. +// If the pointer width less than 64 and this isn't wasm, then the target +// architecture almost certainly does not have the fast 64 to 128 bit widening +// multiplication needed for `trifecta` to be faster. #[cfg(all( - any(target_pointer_width = "16", target_pointer_width = "32"), + not(any( + target_family = "wasm", + not(any(target_pointer_width = "16", target_pointer_width = "32")), + )), not(all(not(feature = "no-asm"), target_arch = "x86_64")), not(any(target_arch = "sparc", target_arch = "sparc64")) ))] From 6d7ff4257b69d0268acb9380b294abc065bdd502 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 5 Sep 2024 16:14:31 +0000 Subject: [PATCH 0830/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f5a04747a..1c7bc5db0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.124" +version = "0.1.125" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From a863636e67bd5d898a92af112c9d17488a648eb3 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Mon, 9 Sep 2024 16:44:50 +0200 Subject: [PATCH 0831/1459] use `naked_asm!` in `#[naked]` functions --- src/aarch64.rs | 3 +-- src/aarch64_linux.rs | 12 ++++-------- src/arm.rs | 12 ++++-------- src/x86.rs | 8 ++++---- src/x86_64.rs | 4 ++-- 5 files changed, 15 insertions(+), 24 deletions(-) diff --git a/src/aarch64.rs b/src/aarch64.rs index e5747d525..cce485c46 100644 --- a/src/aarch64.rs +++ b/src/aarch64.rs @@ -6,7 +6,7 @@ intrinsics! { #[naked] #[cfg(all(target_os = "uefi", not(feature = "no-asm")))] pub unsafe extern "C" fn __chkstk() { - core::arch::asm!( + core::arch::naked_asm!( ".p2align 2", "lsl x16, x15, #4", "mov x17, sp", @@ -16,7 +16,6 @@ intrinsics! { "ldr xzr, [x17]", "b.gt 1b", "ret", - options(noreturn) ); } } diff --git a/src/aarch64_linux.rs b/src/aarch64_linux.rs index 62144e531..caac3e602 100644 --- a/src/aarch64_linux.rs +++ b/src/aarch64_linux.rs @@ -136,7 +136,7 @@ macro_rules! compare_and_swap { expected: int_ty!($bytes), desired: int_ty!($bytes), ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { // We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap. - unsafe { core::arch::asm! { + unsafe { core::arch::naked_asm! { // UXT s(tmp0), s(0) concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", @@ -150,7 +150,6 @@ macro_rules! compare_and_swap { "cbnz w17, 0b", "1:", "ret", - options(noreturn) } } } } @@ -166,7 +165,7 @@ macro_rules! compare_and_swap_i128 { pub unsafe extern "C" fn $name ( expected: i128, desired: i128, ptr: *mut i128 ) -> i128 { - unsafe { core::arch::asm! { + unsafe { core::arch::naked_asm! { "mov x16, x0", "mov x17, x1", "0:", @@ -180,7 +179,6 @@ macro_rules! compare_and_swap_i128 { "cbnz w15, 0b", "1:", "ret", - options(noreturn) } } } } @@ -196,7 +194,7 @@ macro_rules! swap { pub unsafe extern "C" fn $name ( left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { - unsafe { core::arch::asm! { + unsafe { core::arch::naked_asm! { // mov s(tmp0), s(0) concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", @@ -206,7 +204,6 @@ macro_rules! swap { concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"), "cbnz w17, 0b", "ret", - options(noreturn) } } } } @@ -222,7 +219,7 @@ macro_rules! fetch_op { pub unsafe extern "C" fn $name ( val: int_ty!($bytes), ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { - unsafe { core::arch::asm! { + unsafe { core::arch::naked_asm! { // mov s(tmp0), s(0) concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", @@ -234,7 +231,6 @@ macro_rules! fetch_op { concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"), "cbnz w15, 0b", "ret", - options(noreturn) } } } } diff --git a/src/arm.rs b/src/arm.rs index 55cdda1f3..9e6608397 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -23,7 +23,7 @@ intrinsics! { #[naked] #[cfg(not(target_env = "msvc"))] pub unsafe extern "C" fn __aeabi_uidivmod() { - core::arch::asm!( + core::arch::naked_asm!( "push {{lr}}", "sub sp, sp, #4", "mov r2, sp", @@ -31,13 +31,12 @@ intrinsics! { "ldr r1, [sp]", "add sp, sp, #4", "pop {{pc}}", - options(noreturn) ); } #[naked] pub unsafe extern "C" fn __aeabi_uldivmod() { - core::arch::asm!( + core::arch::naked_asm!( "push {{r4, lr}}", "sub sp, sp, #16", "add r4, sp, #8", @@ -47,26 +46,24 @@ intrinsics! { "ldr r3, [sp, #12]", "add sp, sp, #16", "pop {{r4, pc}}", - options(noreturn) ); } #[naked] pub unsafe extern "C" fn __aeabi_idivmod() { - core::arch::asm!( + core::arch::naked_asm!( "push {{r0, r1, r4, lr}}", bl!("__aeabi_idiv"), "pop {{r1, r2}}", "muls r2, r2, r0", "subs r1, r1, r2", "pop {{r4, pc}}", - options(noreturn) ); } #[naked] pub unsafe extern "C" fn __aeabi_ldivmod() { - core::arch::asm!( + core::arch::naked_asm!( "push {{r4, lr}}", "sub sp, sp, #16", "add r4, sp, #8", @@ -76,7 +73,6 @@ intrinsics! { "ldr r3, [sp, #12]", "add sp, sp, #16", "pop {{r4, pc}}", - options(noreturn) ); } diff --git a/src/x86.rs b/src/x86.rs index ceec3912e..ad04d2108 100644 --- a/src/x86.rs +++ b/src/x86.rs @@ -14,9 +14,9 @@ intrinsics! { not(feature = "no-asm") ))] pub unsafe extern "C" fn __chkstk() { - core::arch::asm!( + core::arch::naked_asm!( "jmp __alloca", // Jump to __alloca since fallthrough may be unreliable" - options(noreturn, att_syntax) + options(att_syntax) ); } @@ -27,7 +27,7 @@ intrinsics! { ))] pub unsafe extern "C" fn _alloca() { // __chkstk and _alloca are the same function - core::arch::asm!( + core::arch::naked_asm!( "push %ecx", "cmp $0x1000,%eax", "lea 8(%esp),%ecx", // esp before calling this routine -> ecx @@ -47,7 +47,7 @@ intrinsics! { "push (%eax)", // push return address onto the stack "sub %esp,%eax", // restore the original value in eax "ret", - options(noreturn, att_syntax) + options(att_syntax) ); } } diff --git a/src/x86_64.rs b/src/x86_64.rs index 8048f85c8..9c91a4556 100644 --- a/src/x86_64.rs +++ b/src/x86_64.rs @@ -14,7 +14,7 @@ intrinsics! { not(feature = "no-asm") ))] pub unsafe extern "C" fn ___chkstk_ms() { - core::arch::asm!( + core::arch::naked_asm!( "push %rcx", "push %rax", "cmp $0x1000,%rax", @@ -32,7 +32,7 @@ intrinsics! { "pop %rax", "pop %rcx", "ret", - options(noreturn, att_syntax) + options(att_syntax) ); } } From 30603f235ec529c42aef3b50b3a49d3624d5e720 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 11 Sep 2024 17:09:04 +0000 Subject: [PATCH 0832/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1c7bc5db0..1aa6e5668 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.125" +version = "0.1.126" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 02d5e44b9d030fa09473cc79ec6c7c10427e4ebf Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 19 Aug 2024 15:33:05 -0500 Subject: [PATCH 0833/1459] Add `Shr` to `u256` Float division requires some shift operations on big integers; implement right shift here. --- src/int/big.rs | 37 ++++++++++++++++++++- testcrate/tests/big.rs | 73 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 1 deletion(-) diff --git a/src/int/big.rs b/src/int/big.rs index 019dd728b..e565da897 100644 --- a/src/int/big.rs +++ b/src/int/big.rs @@ -93,7 +93,7 @@ macro_rules! impl_common { type Output = Self; fn shl(self, rhs: u32) -> Self::Output { - todo!() + unimplemented!("only used to meet trait bounds") } } }; @@ -102,6 +102,41 @@ macro_rules! impl_common { impl_common!(i256); impl_common!(u256); +impl ops::Shr for u256 { + type Output = Self; + + fn shr(self, rhs: u32) -> Self::Output { + assert!(rhs < Self::BITS, "attempted to shift right with overflow"); + + if rhs == 0 { + return self; + } + + let mut ret = self; + let byte_shift = rhs / 64; + let bit_shift = rhs % 64; + + for idx in 0..4 { + let base_idx = idx + byte_shift as usize; + + let Some(base) = ret.0.get(base_idx) else { + ret.0[idx] = 0; + continue; + }; + + let mut new_val = base >> bit_shift; + + if let Some(new) = ret.0.get(base_idx + 1) { + new_val |= new.overflowing_shl(64 - bit_shift).0; + } + + ret.0[idx] = new_val; + } + + ret + } +} + macro_rules! word { (1, $val:expr) => { (($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64 diff --git a/testcrate/tests/big.rs b/testcrate/tests/big.rs index 128b5ddfd..595f62256 100644 --- a/testcrate/tests/big.rs +++ b/testcrate/tests/big.rs @@ -59,3 +59,76 @@ fn widen_mul_u128() { } assert!(errors.is_empty()); } + +#[test] +fn not_u128() { + assert_eq!(!u256::ZERO, u256::MAX); +} + +#[test] +fn shr_u128() { + let only_low = [ + 1, + u16::MAX.into(), + u32::MAX.into(), + u64::MAX.into(), + u128::MAX, + ]; + + let mut errors = Vec::new(); + + for a in only_low { + for perturb in 0..10 { + let a = a.saturating_add(perturb); + for shift in 0..128 { + let res = a.widen() >> shift; + let expected = (a >> shift).widen(); + if res != expected { + errors.push((a.widen(), shift, res, expected)); + } + } + } + } + + let check = [ + ( + u256::MAX, + 1, + u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1]), + ), + ( + u256::MAX, + 5, + u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5]), + ), + (u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])), + (u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])), + (u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])), + (u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])), + (u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])), + (u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])), + (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])), + (u256::MAX, 192, u256([u64::MAX, 0, 0, 0])), + (u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])), + (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])), + (u256::MAX, 254, u256([0b11, 0, 0, 0])), + (u256::MAX, 255, u256([1, 0, 0, 0])), + ]; + + for (input, shift, expected) in check { + let res = input >> shift; + if res != expected { + errors.push((input, shift, res, expected)); + } + } + + for (a, b, res, expected) in &errors { + eprintln!( + "FAILURE: {} >> {b} = {} got {}", + hexu(*a), + hexu(*expected), + hexu(*res), + ); + } + assert!(errors.is_empty()); +} From accd88fdc6bda37af8581f486a23b10cf649d58e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 19 Aug 2024 15:35:49 -0500 Subject: [PATCH 0834/1459] Update bounds and docs for the `Float` trait Add some bounds to integer types that allow our function trait bounds to be slightly less verbose. Also clarify documentation and remove a redundant operation. --- src/float/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/float/mod.rs b/src/float/mod.rs index 847373205..704bba0c0 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -31,10 +31,10 @@ pub(crate) trait Float: + ops::Rem { /// A uint of the same width as the float - type Int: Int; + type Int: Int; /// A int of the same width as the float - type SignedInt: Int; + type SignedInt: Int + MinInt; /// An int capable of containing the exponent bits plus a sign bit. This is signed. type ExpInt: Int; @@ -51,7 +51,7 @@ pub(crate) trait Float: /// The bitwidth of the exponent const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1; - /// The maximum value of the exponent + /// The saturated value of the exponent (infinite representation), in the rightmost postiion. const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1; /// The exponent bias value @@ -83,7 +83,7 @@ pub(crate) trait Float: /// Returns true if the sign is negative fn is_sign_negative(self) -> bool; - /// Returns the exponent with bias + /// Returns the exponent, not adjusting for bias. fn exp(self) -> Self::ExpInt; /// Returns the significand with no implicit bit (or the "fractional" part) @@ -175,7 +175,7 @@ macro_rules! float_impl { fn normalize(significand: Self::Int) -> (i32, Self::Int) { let shift = significand .leading_zeros() - .wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros()); + .wrapping_sub(Self::EXPONENT_BITS); ( 1i32.wrapping_sub(shift as i32), significand << shift as Self::Int, From 8990af83a8356ef4347b65cad770e27d1e4bdd88 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 19 Aug 2024 15:40:43 -0500 Subject: [PATCH 0835/1459] Make float division generic Float division currently has a separate `div32` and `div64` for `f32` and `f64`, respectively. Combine these to make use of generics. This will make it easier to support `f128` division, and reduces a lot of redundant code. This includes a simplification of division tests. --- src/float/div.rs | 1027 +++++++++++++----------------------- testcrate/tests/div_rem.rs | 23 +- 2 files changed, 366 insertions(+), 684 deletions(-) diff --git a/src/float/div.rs b/src/float/div.rs index 2a57ee1a0..4aec3418f 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -1,518 +1,149 @@ -// The functions are complex with many branches, and explicit -// `return`s makes it clear where function exit points are -#![allow(clippy::needless_return)] - -use crate::float::Float; -use crate::int::{CastInto, DInt, HInt, Int, MinInt}; +//! Floating point division routines. +//! +//! This module documentation gives an overview of the method used. More documentation is inline. +//! +//! # Relevant notation +//! +//! - `m_a`: the mantissa of `a`, in base 2 +//! - `p_a`: the exponent of `a`, in base 2. I.e. `a = m_a * 2^p_a` +//! - `uqN` (e.g. `uq1`): this refers to Q notation for fixed-point numbers. UQ1.31 is an unsigned +//! fixed-point number with 1 integral bit, and 31 decimal bits. A `uqN` variable of type `uM` +//! will have N bits of integer and M-N bits of fraction. +//! - `hw`: half width, i.e. for `f64` this will be a `u32`. +//! - `x` is the best estimate of `1/m_b` +//! +//! # Method Overview +//! +//! Division routines must solve for `a / b`, which is `res = m_a*2^p_a / m_b*2^p_b`. The basic +//! process is as follows: +//! +//! - Rearange the exponent and significand to simplify the operations: +//! `res = (m_a / m_b) * 2^{p_a - p_b}`. +//! - Check for early exits (infinity, zero, etc). +//! - If `a` or `b` are subnormal, normalize by shifting the mantissa and adjusting the exponent. +//! - Set the implicit bit so math is correct. +//! - Shift mantissa significant digits (with implicit bit) fully left such that fixed-point UQ1 +//! or UQ0 numbers can be used for mantissa math. These will have greater precision than the +//! actual mantissa, which is important for correct rounding. +//! - Calculate the reciprocal of `m_b`, `x`. +//! - Use the reciprocal to multiply rather than divide: `res = m_a * x_b * 2^{p_a - p_b}`. +//! - Reapply rounding. +//! +//! # Reciprocal calculation +//! +//! Calculating the reciprocal is the most complicated part of this process. It uses the +//! [Newton-Raphson method], which picks an initial estimation (of the reciprocal) and performs +//! a number of iterations to increase its precision. +//! +//! In general, Newton's method takes the following form: +//! +//! ```text +//! `x_n` is a guess or the result of a previous iteration. Increasing `n` converges to the +//! desired result. +//! +//! The result approaches a zero of `f(x)` by applying a correction to the previous gues. +//! +//! x_{n+1} = x_n - f(x_n) / f'(x_n) +//! ``` +//! +//! Applying this to find the reciprocal: +//! +//! ```text +//! 1 / x = b +//! +//! Rearrange so we can solve by finding a zero +//! 0 = (1 / x) - b = f(x) +//! +//! f'(x) = -x^{-2} +//! +//! x_{n+1} = 2*x_n - b*x_n^2 +//! ``` +//! +//! This is a process that can be repeated to calculate the reciprocal with enough precision to +//! achieve a correctly rounded result for the overall division operation. The maximum required +//! number of iterations is known since precision doubles with each iteration. +//! +//! # Half-width operations +//! +//! Calculating the reciprocal requires widening multiplication and performing arithmetic on the +//! results, meaning that emulated integer arithmetic on `u128` (for `f64`) and `u256` (for `f128`) +//! gets used instead of native math. +//! +//! To make this more efficient, all but the final operation can be computed using half-width +//! integers. For example, rather than computing four iterations using 128-bit integers for `f64`, +//! we can instead perform three iterations using native 64-bit integers and only one final +//! iteration using the full 128 bits. +//! +//! This works because of precision doubling. Some leeway is allowed here because the fixed-point +//! number has more bits than the final mantissa will. +//! +//! [Newton-Raphson method]: https://en.wikipedia.org/wiki/Newton%27s_method use super::HalfRep; +use crate::float::Float; +use crate::int::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; +use core::mem::size_of; +use core::ops; -fn div32(a: F, b: F) -> F -where - u32: CastInto, - F::Int: CastInto, - i32: CastInto, - F::Int: CastInto, - F::Int: HInt, - ::Int: core::ops::Mul, -{ - const NUMBER_OF_HALF_ITERATIONS: usize = 0; - const NUMBER_OF_FULL_ITERATIONS: usize = 3; - const USE_NATIVE_FULL_ITERATIONS: bool = true; - - let one = F::Int::ONE; - let zero = F::Int::ZERO; - let hw = F::BITS / 2; - let lo_mask = u32::MAX >> hw; - - let significand_bits = F::SIGNIFICAND_BITS; - let max_exponent = F::EXPONENT_MAX; - - let exponent_bias = F::EXPONENT_BIAS; - - let implicit_bit = F::IMPLICIT_BIT; - let significand_mask = F::SIGNIFICAND_MASK; - let sign_bit = F::SIGN_MASK as F::Int; - let abs_mask = sign_bit - one; - let exponent_mask = F::EXPONENT_MASK; - let inf_rep = exponent_mask; - let quiet_bit = implicit_bit >> 1; - let qnan_rep = exponent_mask | quiet_bit; - - #[inline(always)] - fn negate_u32(a: u32) -> u32 { - (::wrapping_neg(a as i32)) as u32 - } - - let a_rep = a.repr(); - let b_rep = b.repr(); - - let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); - let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); - let quotient_sign = (a_rep ^ b_rep) & sign_bit; - - let mut a_significand = a_rep & significand_mask; - let mut b_significand = b_rep & significand_mask; - let mut scale = 0; - - // Detect if a or b is zero, denormal, infinity, or NaN. - if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() - || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() - { - let a_abs = a_rep & abs_mask; - let b_abs = b_rep & abs_mask; - - // NaN / anything = qNaN - if a_abs > inf_rep { - return F::from_repr(a_rep | quiet_bit); - } - // anything / NaN = qNaN - if b_abs > inf_rep { - return F::from_repr(b_rep | quiet_bit); - } - - if a_abs == inf_rep { - if b_abs == inf_rep { - // infinity / infinity = NaN - return F::from_repr(qnan_rep); - } else { - // infinity / anything else = +/- infinity - return F::from_repr(a_abs | quotient_sign); - } - } - - // anything else / infinity = +/- 0 - if b_abs == inf_rep { - return F::from_repr(quotient_sign); - } - - if a_abs == zero { - if b_abs == zero { - // zero / zero = NaN - return F::from_repr(qnan_rep); - } else { - // zero / anything else = +/- zero - return F::from_repr(quotient_sign); - } - } - - // anything else / zero = +/- infinity - if b_abs == zero { - return F::from_repr(inf_rep | quotient_sign); - } - - // one or both of a or b is denormal, the other (if applicable) is a - // normal number. Renormalize one or both of a and b, and set scale to - // include the necessary exponent adjustment. - if a_abs < implicit_bit { - let (exponent, significand) = F::normalize(a_significand); - scale += exponent; - a_significand = significand; - } - - if b_abs < implicit_bit { - let (exponent, significand) = F::normalize(b_significand); - scale -= exponent; - b_significand = significand; - } - } - - // Set the implicit significand bit. If we fell through from the - // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything. - a_significand |= implicit_bit; - b_significand |= implicit_bit; - - let written_exponent: i32 = CastInto::::cast( - a_exponent - .wrapping_sub(b_exponent) - .wrapping_add(scale.cast()), - ) - .wrapping_add(exponent_bias) as i32; - let b_uq1 = b_significand << (F::BITS - significand_bits - 1); - - // Align the significand of b as a UQ1.(n-1) fixed-point number in the range - // [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax - // polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2. - // The max error for this approximation is achieved at endpoints, so - // abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289..., - // which is about 4.5 bits. - // The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571... - - // Then, refine the reciprocal estimate using a quadratically converging - // Newton-Raphson iteration: - // x_{n+1} = x_n * (2 - x_n * b) - // - // Let b be the original divisor considered "in infinite precision" and - // obtained from IEEE754 representation of function argument (with the - // implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in - // UQ1.(W-1). - // - // Let b_hw be an infinitely precise number obtained from the highest (HW-1) - // bits of divisor significand (with the implicit bit set). Corresponds to - // half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated** - // version of b_UQ1. - // - // Let e_n := x_n - 1/b_hw - // E_n := x_n - 1/b - // abs(E_n) <= abs(e_n) + (1/b_hw - 1/b) - // = abs(e_n) + (b - b_hw) / (b*b_hw) - // <= abs(e_n) + 2 * 2^-HW - - // rep_t-sized iterations may be slower than the corresponding half-width - // variant depending on the handware and whether single/double/quad precision - // is selected. - // NB: Using half-width iterations increases computation errors due to - // rounding, so error estimations have to be computed taking the selected - // mode into account! - - #[allow(clippy::absurd_extreme_comparisons)] - let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 { - // Starting with (n-1) half-width iterations - let b_uq1_hw: u16 = - (CastInto::::cast(b_significand) >> (significand_bits + 1 - hw)) as u16; - - // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW - // with W0 being either 16 or 32 and W0 <= HW. - // That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which - // b/2 is subtracted to obtain x0) wrapped to [0, 1) range. - - // HW is at least 32. Shifting into the highest bits if needed. - let c_hw = (0x7504_u32 as u16).wrapping_shl(hw.wrapping_sub(32)); - - // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572, - // so x0 fits to UQ0.HW without wrapping. - let x_uq0_hw: u16 = { - let mut x_uq0_hw: u16 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */); - // An e_0 error is comprised of errors due to - // * x0 being an inherently imprecise first approximation of 1/b_hw - // * C_hw being some (irrational) number **truncated** to W0 bits - // Please note that e_0 is calculated against the infinitely precise - // reciprocal of b_hw (that is, **truncated** version of b). - // - // e_0 <= 3/4 - 1/sqrt(2) + 2^-W0 - - // By construction, 1 <= b < 2 - // f(x) = x * (2 - b*x) = 2*x - b*x^2 - // f'(x) = 2 * (1 - b*x) - // - // On the [0, 1] interval, f(0) = 0, - // then it increses until f(1/b) = 1 / b, maximum on (0, 1), - // then it decreses to f(1) = 2 - b - // - // Let g(x) = x - f(x) = b*x^2 - x. - // On (0, 1/b), g(x) < 0 <=> f(x) > x - // On (1/b, 1], g(x) > 0 <=> f(x) < x - // - // For half-width iterations, b_hw is used instead of b. - #[allow(clippy::reversed_empty_ranges)] - for _ in 0..NUMBER_OF_HALF_ITERATIONS { - // corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp - // of corr_UQ1_hw. - // "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1). - // On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided - // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is - // expected to be strictly positive because b_UQ1_hw has its highest bit set - // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1). - let corr_uq1_hw: u16 = - 0.wrapping_sub((x_uq0_hw as u32).wrapping_mul(b_uq1_hw.cast()) >> hw) as u16; - - // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally - // obtaining an UQ1.(HW-1) number and proving its highest bit could be - // considered to be 0 to be able to represent it in UQ0.HW. - // From the above analysis of f(x), if corr_UQ1_hw would be represented - // without any intermediate loss of precision (that is, in twice_rep_t) - // x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly - // less otherwise. On the other hand, to obtain [1.]000..., one have to pass - // 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due - // to 1.0 being not representable as UQ0.HW). - // The fact corr_UQ1_hw was virtually round up (due to result of - // multiplication being **first** truncated, then negated - to improve - // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw. - x_uq0_hw = ((x_uq0_hw as u32).wrapping_mul(corr_uq1_hw as u32) >> (hw - 1)) as u16; - // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t - // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after - // any number of iterations, so just subtract 2 from the reciprocal - // approximation after last iteration. - - // In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW: - // corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1 - // = 1 - e_n * b_hw + 2*eps1 - // x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2 - // = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2 - // = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2 - // e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2 - // = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw - // \------ >0 -------/ \-- >0 ---/ - // abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U) - } - // For initial half-width iterations, U = 2^-HW - // Let abs(e_n) <= u_n * U, - // then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U) - // u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2) - - // Account for possible overflow (see above). For an overflow to occur for the - // first time, for "ideal" corr_UQ1_hw (that is, without intermediate - // truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum - // value representable in UQ0.HW or less by 1. This means that 1/b_hw have to - // be not below that value (see g(x) above), so it is safe to decrement just - // once after the final iteration. On the other hand, an effective value of - // divisor changes after this point (from b_hw to b), so adjust here. - x_uq0_hw.wrapping_sub(1_u16) - }; - - // Error estimations for full-precision iterations are calculated just - // as above, but with U := 2^-W and taking extra decrementing into account. - // We need at least one such iteration. - - // Simulating operations on a twice_rep_t to perform a single final full-width - // iteration. Using ad-hoc multiplication implementations to take advantage - // of particular structure of operands. - - let blo: u32 = (CastInto::::cast(b_uq1)) & lo_mask; - // x_UQ0 = x_UQ0_hw * 2^HW - 1 - // x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1 - // - // <--- higher half ---><--- lower half ---> - // [x_UQ0_hw * b_UQ1_hw] - // + [ x_UQ0_hw * blo ] - // - [ b_UQ1 ] - // = [ result ][.... discarded ...] - let corr_uq1 = negate_u32( - (x_uq0_hw as u32) * (b_uq1_hw as u32) + (((x_uq0_hw as u32) * (blo)) >> hw) - 1, - ); // account for *possible* carry - let lo_corr = corr_uq1 & lo_mask; - let hi_corr = corr_uq1 >> hw; - // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1 - let mut x_uq0: ::Int = ((((x_uq0_hw as u32) * hi_corr) << 1) - .wrapping_add(((x_uq0_hw as u32) * lo_corr) >> (hw - 1)) - .wrapping_sub(2)) - .cast(); // 1 to account for the highest bit of corr_UQ1 can be 1 - // 1 to account for possible carry - // Just like the case of half-width iterations but with possibility - // of overflowing by one extra Ulp of x_UQ0. - x_uq0 -= one; - // ... and then traditional fixup by 2 should work - - // On error estimation: - // abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW - // + (2^-HW + 2^-W)) - // abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW - - // Then like for the half-width iterations: - // With 0 <= eps1, eps2 < 2^-W - // E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b - // abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ] - // abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ] - x_uq0 - } else { - // C is (3/4 + 1/sqrt(2)) - 1 truncated to 32 fractional bits as UQ0.n - let c: ::Int = (0x7504F333 << (F::BITS - 32)).cast(); - let x_uq0: ::Int = c.wrapping_sub(b_uq1); - // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-32 - x_uq0 - }; - - let mut x_uq0 = if USE_NATIVE_FULL_ITERATIONS { - for _ in 0..NUMBER_OF_FULL_ITERATIONS { - let corr_uq1: u32 = 0.wrapping_sub( - ((CastInto::::cast(x_uq0) as u64) * (CastInto::::cast(b_uq1) as u64)) - >> F::BITS, - ) as u32; - x_uq0 = ((((CastInto::::cast(x_uq0) as u64) * (corr_uq1 as u64)) >> (F::BITS - 1)) - as u32) - .cast(); - } - x_uq0 - } else { - // not using native full iterations - x_uq0 - }; - - // Finally, account for possible overflow, as explained above. - x_uq0 = x_uq0.wrapping_sub(2.cast()); - - // u_n for different precisions (with N-1 half-width iterations): - // W0 is the precision of C - // u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW - - // Estimated with bc: - // define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; } - // define half2(un) { return 2.0 * un / 2.0^hw + 2.0; } - // define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; } - // define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; } - - // | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1) - // u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797 - // u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440 - // u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317 - // u_3 | < 7.31 | | < 7.31 | < 27054456580 - // u_4 | | | | < 80.4 - // Final (U_N) | same as u_3 | < 72 | < 218 | < 13920 - - // Add 2 to U_N due to final decrement. - - let reciprocal_precision: ::Int = 10.cast(); - - // Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W - let x_uq0 = x_uq0 - reciprocal_precision; - // Now 1/b - (2*P) * 2^-W < x < 1/b - // FIXME Is x_UQ0 still >= 0.5? - - let mut quotient: ::Int = x_uq0.widen_mul(a_significand << 1).hi(); - // Now, a/b - 4*P * 2^-W < q < a/b for q= in UQ1.(SB+1+W). - - // quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1), - // adjust it to be in [1.0, 2.0) as UQ1.SB. - let (mut residual, written_exponent) = if quotient < (implicit_bit << 1) { - // Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB, - // effectively doubling its value as well as its error estimation. - let residual_lo = (a_significand << (significand_bits + 1)).wrapping_sub( - (CastInto::::cast(quotient).wrapping_mul(CastInto::::cast(b_significand))) - .cast(), - ); - a_significand <<= 1; - (residual_lo, written_exponent.wrapping_sub(1)) - } else { - // Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it - // to UQ1.SB by right shifting by 1. Least significant bit is omitted. - quotient >>= 1; - let residual_lo = (a_significand << significand_bits).wrapping_sub( - (CastInto::::cast(quotient).wrapping_mul(CastInto::::cast(b_significand))) - .cast(), - ); - (residual_lo, written_exponent) - }; - - //drop mutability - let quotient = quotient; - - // NB: residualLo is calculated above for the normal result case. - // It is re-computed on denormal path that is expected to be not so - // performance-sensitive. - - // Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB - // Each NextAfter() increments the floating point value by at least 2^-SB - // (more, if exponent was incremented). - // Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint): - // q - // | | * | | | | | - // <---> 2^t - // | | | | | * | | - // q - // To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB. - // (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB - // (8*P) * 2^-W < 0.5 * 2^-SB - // P < 2^(W-4-SB) - // Generally, for at most R NextAfter() to be enough, - // P < (2*R - 1) * 2^(W-4-SB) - // For f32 (0+3): 10 < 32 (OK) - // For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required - // For f64: 220 < 256 (OK) - // For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required) - - // If we have overflowed the exponent, return infinity - if written_exponent >= max_exponent as i32 { - return F::from_repr(inf_rep | quotient_sign); - } - - // Now, quotient <= the correctly-rounded result - // and may need taking NextAfter() up to 3 times (see error estimates above) - // r = a - b * q - let abs_result = if written_exponent > 0 { - let mut ret = quotient & significand_mask; - ret |= ((written_exponent as u32) << significand_bits).cast(); - residual <<= 1; - ret - } else { - if (significand_bits as i32 + written_exponent) < 0 { - return F::from_repr(quotient_sign); - } - let ret = quotient.wrapping_shr(negate_u32(CastInto::::cast(written_exponent)) + 1); - residual = (CastInto::::cast( - a_significand.wrapping_shl( - significand_bits.wrapping_add(CastInto::::cast(written_exponent)), - ), - ) - .wrapping_sub( - (CastInto::::cast(ret).wrapping_mul(CastInto::::cast(b_significand))) << 1, - )) - .cast(); - ret - }; - // Round - let abs_result = { - residual += abs_result & one; // tie to even - // The above line conditionally turns the below LT comparison into LTE - - if residual > b_significand { - abs_result + one - } else { - abs_result - } - }; - F::from_repr(abs_result | quotient_sign) -} - -fn div64(a: F, b: F) -> F +fn div(a: F, b: F) -> F where - F::Int: CastInto, F::Int: CastInto, - F::Int: CastInto>, F::Int: From>, F::Int: From, - F::Int: CastInto, - F::Int: CastInto, F::Int: HInt + DInt, + ::D: ops::Shr::D>, + F::Int: From, u16: CastInto, i32: CastInto, - i64: CastInto, u32: CastInto, - u64: CastInto, - u64: CastInto>, + u128: CastInto>, { - const NUMBER_OF_HALF_ITERATIONS: usize = 3; - const NUMBER_OF_FULL_ITERATIONS: usize = 1; - const USE_NATIVE_FULL_ITERATIONS: bool = false; - let one = F::Int::ONE; let zero = F::Int::ZERO; + let one_hw = HalfRep::::ONE; + let zero_hw = HalfRep::::ZERO; let hw = F::BITS / 2; let lo_mask = F::Int::MAX >> hw; let significand_bits = F::SIGNIFICAND_BITS; - let max_exponent = F::EXPONENT_MAX; + // Saturated exponent, representing infinity + let exponent_sat: F::Int = F::EXPONENT_MAX.cast(); let exponent_bias = F::EXPONENT_BIAS; - let implicit_bit = F::IMPLICIT_BIT; let significand_mask = F::SIGNIFICAND_MASK; - let sign_bit = F::SIGN_MASK as F::Int; + let sign_bit = F::SIGN_MASK; let abs_mask = sign_bit - one; let exponent_mask = F::EXPONENT_MASK; let inf_rep = exponent_mask; let quiet_bit = implicit_bit >> 1; let qnan_rep = exponent_mask | quiet_bit; + let (mut half_iterations, full_iterations) = get_iterations::(); + let recip_precision = reciprocal_precision::(); - #[inline(always)] - fn negate_u64(a: u64) -> u64 { - (::wrapping_neg(a as i64)) as u64 + if F::BITS == 128 { + // FIXME(tgross35): f128 seems to require one more half iteration than expected + half_iterations += 1; } let a_rep = a.repr(); let b_rep = b.repr(); - let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); - let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); + // Exponent numeric representationm not accounting for bias + let a_exponent = (a_rep >> significand_bits) & exponent_sat; + let b_exponent = (b_rep >> significand_bits) & exponent_sat; let quotient_sign = (a_rep ^ b_rep) & sign_bit; let mut a_significand = a_rep & significand_mask; let mut b_significand = b_rep & significand_mask; - let mut scale = 0; + + // The exponent of our final result in its encoded form + let mut res_exponent: i32 = + i32::cast_from(a_exponent) - i32::cast_from(b_exponent) + (exponent_bias as i32); // Detect if a or b is zero, denormal, infinity, or NaN. - if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() - || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + if a_exponent.wrapping_sub(one) >= (exponent_sat - one) + || b_exponent.wrapping_sub(one) >= (exponent_sat - one) { let a_abs = a_rep & abs_mask; let b_abs = b_rep & abs_mask; @@ -521,6 +152,7 @@ where if a_abs > inf_rep { return F::from_repr(a_rep | quiet_bit); } + // anything / NaN = qNaN if b_abs > inf_rep { return F::from_repr(b_rep | quiet_bit); @@ -556,34 +188,31 @@ where return F::from_repr(inf_rep | quotient_sign); } - // one or both of a or b is denormal, the other (if applicable) is a - // normal number. Renormalize one or both of a and b, and set scale to - // include the necessary exponent adjustment. + // a is denormal. Renormalize it and set the scale to include the necessary exponent + // adjustment. if a_abs < implicit_bit { let (exponent, significand) = F::normalize(a_significand); - scale += exponent; + res_exponent += exponent; a_significand = significand; } + // b is denormal. Renormalize it and set the scale to include the necessary exponent + // adjustment. if b_abs < implicit_bit { let (exponent, significand) = F::normalize(b_significand); - scale -= exponent; + res_exponent -= exponent; b_significand = significand; } } - // Set the implicit significand bit. If we fell through from the + // Set the implicit significand bit. If we fell through from the // denormal path it was already set by normalize( ), but setting it twice // won't hurt anything. a_significand |= implicit_bit; b_significand |= implicit_bit; - let written_exponent: i64 = CastInto::::cast( - a_exponent - .wrapping_sub(b_exponent) - .wrapping_add(scale.cast()), - ) - .wrapping_add(exponent_bias as u64) as i64; + // Transform to a fixed-point representation by shifting the significand to the high bits. We + // know this is in the range [1.0, 2.0] since the implicit bit is set to 1 above. let b_uq1 = b_significand << (F::BITS - significand_bits - 1); // Align the significand of b as a UQ1.(n-1) fixed-point number in the range @@ -593,7 +222,7 @@ where // abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289..., // which is about 4.5 bits. // The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571... - + // // Then, refine the reciprocal estimate using a quadratically converging // Newton-Raphson iteration: // x_{n+1} = x_n * (2 - x_n * b) @@ -613,123 +242,116 @@ where // abs(E_n) <= abs(e_n) + (1/b_hw - 1/b) // = abs(e_n) + (b - b_hw) / (b*b_hw) // <= abs(e_n) + 2 * 2^-HW - + // // rep_t-sized iterations may be slower than the corresponding half-width // variant depending on the handware and whether single/double/quad precision // is selected. + // // NB: Using half-width iterations increases computation errors due to // rounding, so error estimations have to be computed taking the selected // mode into account! - - let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 { + let mut x_uq0 = if half_iterations > 0 { // Starting with (n-1) half-width iterations - let b_uq1_hw: HalfRep = CastInto::>::cast( - CastInto::::cast(b_significand) >> (significand_bits + 1 - hw), - ); + let b_uq1_hw: HalfRep = b_uq1.hi(); // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW // with W0 being either 16 or 32 and W0 <= HW. // That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which // b/2 is subtracted to obtain x0) wrapped to [0, 1) range. + let c_hw = c_hw::(); - // HW is at least 32. Shifting into the highest bits if needed. - let c_hw = (CastInto::>::cast(0x7504F333_u64)).wrapping_shl(hw.wrapping_sub(32)); + // Check that the top bit is set, i.e. value is within `[1, 2)`. + debug_assert!(b_uq1_hw & one_hw << (HalfRep::::BITS - 1) > zero_hw); // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572, // so x0 fits to UQ0.HW without wrapping. - let x_uq0_hw: HalfRep = { - let mut x_uq0_hw: HalfRep = - c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */); - // dbg!(x_uq0_hw); - // An e_0 error is comprised of errors due to - // * x0 being an inherently imprecise first approximation of 1/b_hw - // * C_hw being some (irrational) number **truncated** to W0 bits - // Please note that e_0 is calculated against the infinitely precise - // reciprocal of b_hw (that is, **truncated** version of b). - // - // e_0 <= 3/4 - 1/sqrt(2) + 2^-W0 - - // By construction, 1 <= b < 2 - // f(x) = x * (2 - b*x) = 2*x - b*x^2 - // f'(x) = 2 * (1 - b*x) + let mut x_uq0_hw: HalfRep = + c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */); + + // An e_0 error is comprised of errors due to + // * x0 being an inherently imprecise first approximation of 1/b_hw + // * C_hw being some (irrational) number **truncated** to W0 bits + // Please note that e_0 is calculated against the infinitely precise + // reciprocal of b_hw (that is, **truncated** version of b). + // + // e_0 <= 3/4 - 1/sqrt(2) + 2^-W0 + // + // By construction, 1 <= b < 2 + // f(x) = x * (2 - b*x) = 2*x - b*x^2 + // f'(x) = 2 * (1 - b*x) + // + // On the [0, 1] interval, f(0) = 0, + // then it increses until f(1/b) = 1 / b, maximum on (0, 1), + // then it decreses to f(1) = 2 - b + // + // Let g(x) = x - f(x) = b*x^2 - x. + // On (0, 1/b), g(x) < 0 <=> f(x) > x + // On (1/b, 1], g(x) > 0 <=> f(x) < x + // + // For half-width iterations, b_hw is used instead of b. + for _ in 0..half_iterations { + // corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp + // of corr_UQ1_hw. + // "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1). + // On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided + // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is + // expected to be strictly positive because b_UQ1_hw has its highest bit set + // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1). // - // On the [0, 1] interval, f(0) = 0, - // then it increses until f(1/b) = 1 / b, maximum on (0, 1), - // then it decreses to f(1) = 2 - b + // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally + // obtaining an UQ1.(HW-1) number and proving its highest bit could be + // considered to be 0 to be able to represent it in UQ0.HW. + // From the above analysis of f(x), if corr_UQ1_hw would be represented + // without any intermediate loss of precision (that is, in twice_rep_t) + // x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly + // less otherwise. On the other hand, to obtain [1.]000..., one have to pass + // 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due + // to 1.0 being not representable as UQ0.HW). + // The fact corr_UQ1_hw was virtually round up (due to result of + // multiplication being **first** truncated, then negated - to improve + // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw. // - // Let g(x) = x - f(x) = b*x^2 - x. - // On (0, 1/b), g(x) < 0 <=> f(x) > x - // On (1/b, 1], g(x) > 0 <=> f(x) < x + // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t + // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after + // any number of iterations, so just subtract 2 from the reciprocal + // approximation after last iteration. // - // For half-width iterations, b_hw is used instead of b. - for _ in 0..NUMBER_OF_HALF_ITERATIONS { - // corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp - // of corr_UQ1_hw. - // "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1). - // On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided - // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is - // expected to be strictly positive because b_UQ1_hw has its highest bit set - // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1). - let corr_uq1_hw: HalfRep = CastInto::>::cast(zero.wrapping_sub( - ((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(b_uq1_hw))) >> hw, - )); - // dbg!(corr_uq1_hw); - - // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally - // obtaining an UQ1.(HW-1) number and proving its highest bit could be - // considered to be 0 to be able to represent it in UQ0.HW. - // From the above analysis of f(x), if corr_UQ1_hw would be represented - // without any intermediate loss of precision (that is, in twice_rep_t) - // x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly - // less otherwise. On the other hand, to obtain [1.]000..., one have to pass - // 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due - // to 1.0 being not representable as UQ0.HW). - // The fact corr_UQ1_hw was virtually round up (due to result of - // multiplication being **first** truncated, then negated - to improve - // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw. - x_uq0_hw = ((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(corr_uq1_hw)) - >> (hw - 1)) - .cast(); - // dbg!(x_uq0_hw); - // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t - // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after - // any number of iterations, so just subtract 2 from the reciprocal - // approximation after last iteration. - - // In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW: - // corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1 - // = 1 - e_n * b_hw + 2*eps1 - // x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2 - // = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2 - // = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2 - // e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2 - // = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw - // \------ >0 -------/ \-- >0 ---/ - // abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U) - } - // For initial half-width iterations, U = 2^-HW - // Let abs(e_n) <= u_n * U, - // then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U) - // u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2) - - // Account for possible overflow (see above). For an overflow to occur for the - // first time, for "ideal" corr_UQ1_hw (that is, without intermediate - // truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum - // value representable in UQ0.HW or less by 1. This means that 1/b_hw have to - // be not below that value (see g(x) above), so it is safe to decrement just - // once after the final iteration. On the other hand, an effective value of - // divisor changes after this point (from b_hw to b), so adjust here. - x_uq0_hw.wrapping_sub(HalfRep::::ONE) - }; + // In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW: + // corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1 + // = 1 - e_n * b_hw + 2*eps1 + // x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2 + // = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2 + // = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw + // \------ >0 -------/ \-- >0 ---/ + // abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U) + x_uq0_hw = next_guess(x_uq0_hw, b_uq1_hw); + } + + // For initial half-width iterations, U = 2^-HW + // Let abs(e_n) <= u_n * U, + // then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U) + // u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2) + // + // Account for possible overflow (see above). For an overflow to occur for the + // first time, for "ideal" corr_UQ1_hw (that is, without intermediate + // truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum + // value representable in UQ0.HW or less by 1. This means that 1/b_hw have to + // be not below that value (see g(x) above), so it is safe to decrement just + // once after the final iteration. On the other hand, an effective value of + // divisor changes after this point (from b_hw to b), so adjust here. + x_uq0_hw = x_uq0_hw.wrapping_sub(one_hw); // Error estimations for full-precision iterations are calculated just // as above, but with U := 2^-W and taking extra decrementing into account. // We need at least one such iteration. - + // // Simulating operations on a twice_rep_t to perform a single final full-width // iteration. Using ad-hoc multiplication implementations to take advantage // of particular structure of operands. let blo: F::Int = b_uq1 & lo_mask; + // x_UQ0 = x_UQ0_hw * 2^HW - 1 // x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1 // @@ -742,16 +364,19 @@ where + ((F::Int::from(x_uq0_hw) * blo) >> hw)) .wrapping_sub(one) .wrapping_neg(); // account for *possible* carry + let lo_corr: F::Int = corr_uq1 & lo_mask; let hi_corr: F::Int = corr_uq1 >> hw; + // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1 let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1) .wrapping_add((F::Int::from(x_uq0_hw) * lo_corr) >> (hw - 1)) + // 1 to account for the highest bit of corr_UQ1 can be 1 + // 1 to account for possible carry + // Just like the case of half-width iterations but with possibility + // of overflowing by one extra Ulp of x_UQ0. .wrapping_sub(F::Int::from(2u8)); - // 1 to account for the highest bit of corr_UQ1 can be 1 - // 1 to account for possible carry - // Just like the case of half-width iterations but with possibility - // of overflowing by one extra Ulp of x_UQ0. + x_uq0 -= one; // ... and then traditional fixup by 2 should work @@ -759,7 +384,7 @@ where // abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW // + (2^-HW + 2^-W)) // abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW - + // // Then like for the half-width iterations: // With 0 <= eps1, eps2 < 2^-W // E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b @@ -768,89 +393,54 @@ where x_uq0 } else { // C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n - let c: F::Int = (0x7504F333 << (F::BITS - 32)).cast(); - let x_uq0: F::Int = c.wrapping_sub(b_uq1); - // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64 - x_uq0 - }; + let c: F::Int = F::Int::from(0x7504F333u32) << (F::BITS - 32); + let mut x_uq0: F::Int = c.wrapping_sub(b_uq1); - let mut x_uq0 = if USE_NATIVE_FULL_ITERATIONS { - for _ in 0..NUMBER_OF_FULL_ITERATIONS { - let corr_uq1: u64 = 0.wrapping_sub( - (CastInto::::cast(x_uq0) * (CastInto::::cast(b_uq1))) >> F::BITS, - ); - x_uq0 = ((((CastInto::::cast(x_uq0) as u128) * (corr_uq1 as u128)) - >> (F::BITS - 1)) as u64) - .cast(); + // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64 + // x_uq0 + for _ in 0..full_iterations { + x_uq0 = next_guess(x_uq0, b_uq1); } - x_uq0 - } else { - // not using native full iterations + x_uq0 }; // Finally, account for possible overflow, as explained above. x_uq0 = x_uq0.wrapping_sub(2.cast()); - // u_n for different precisions (with N-1 half-width iterations): - // W0 is the precision of C - // u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW - - // Estimated with bc: - // define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; } - // define half2(un) { return 2.0 * un / 2.0^hw + 2.0; } - // define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; } - // define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; } - - // | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1) - // u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797 - // u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440 - // u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317 - // u_3 | < 7.31 | | < 7.31 | < 27054456580 - // u_4 | | | | < 80.4 - // Final (U_N) | same as u_3 | < 72 | < 218 | < 13920 - - // Add 2 to U_N due to final decrement. - - let reciprocal_precision: ::Int = 220.cast(); - // Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W - let x_uq0 = x_uq0 - reciprocal_precision; + x_uq0 -= recip_precision.cast(); + // Now 1/b - (2*P) * 2^-W < x < 1/b // FIXME Is x_UQ0 still >= 0.5? - let mut quotient: F::Int = x_uq0.widen_mul(a_significand << 1).hi(); + let mut quotient_uq1: F::Int = x_uq0.widen_mul(a_significand << 1).hi(); // Now, a/b - 4*P * 2^-W < q < a/b for q= in UQ1.(SB+1+W). // quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1), // adjust it to be in [1.0, 2.0) as UQ1.SB. - let (mut residual, written_exponent) = if quotient < (implicit_bit << 1) { + let mut residual_lo = if quotient_uq1 < (implicit_bit << 1) { // Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB, // effectively doubling its value as well as its error estimation. - let residual_lo = (a_significand << (significand_bits + 1)).wrapping_sub( - (CastInto::::cast(quotient).wrapping_mul(CastInto::::cast(b_significand))) - .cast(), - ); + let residual_lo = (a_significand << (significand_bits + 1)) + .wrapping_sub(quotient_uq1.wrapping_mul(b_significand)); + res_exponent -= 1; a_significand <<= 1; - (residual_lo, written_exponent.wrapping_sub(1)) + residual_lo } else { // Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it // to UQ1.SB by right shifting by 1. Least significant bit is omitted. - quotient >>= 1; - let residual_lo = (a_significand << significand_bits).wrapping_sub( - (CastInto::::cast(quotient).wrapping_mul(CastInto::::cast(b_significand))) - .cast(), - ); - (residual_lo, written_exponent) + quotient_uq1 >>= 1; + (a_significand << significand_bits).wrapping_sub(quotient_uq1.wrapping_mul(b_significand)) }; - //drop mutability - let quotient = quotient; + // drop mutability + let quotient = quotient_uq1; // NB: residualLo is calculated above for the normal result case. // It is re-computed on denormal path that is expected to be not so // performance-sensitive. - + // // Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB // Each NextAfter() increments the floating point value by at least 2^-SB // (more, if exponent was incremented). @@ -870,60 +460,161 @@ where // For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required // For f64: 220 < 256 (OK) // For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required) - + // // If we have overflowed the exponent, return infinity - if written_exponent >= max_exponent as i64 { + if res_exponent >= i32::cast_from(exponent_sat) { return F::from_repr(inf_rep | quotient_sign); } // Now, quotient <= the correctly-rounded result // and may need taking NextAfter() up to 3 times (see error estimates above) // r = a - b * q - let abs_result = if written_exponent > 0 { + let mut abs_result = if res_exponent > 0 { let mut ret = quotient & significand_mask; - ret |= written_exponent.cast() << significand_bits; - residual <<= 1; + ret |= F::Int::from(res_exponent as u32) << significand_bits; + residual_lo <<= 1; ret } else { - if (significand_bits as i64 + written_exponent) < 0 { + if ((significand_bits as i32) + res_exponent) < 0 { return F::from_repr(quotient_sign); } - let ret = - quotient.wrapping_shr((negate_u64(CastInto::::cast(written_exponent)) + 1) as u32); - residual = (CastInto::::cast( - a_significand.wrapping_shl( - significand_bits.wrapping_add(CastInto::::cast(written_exponent)), - ), - ) - .wrapping_sub( - (CastInto::::cast(ret).wrapping_mul(CastInto::::cast(b_significand))) << 1, - )) - .cast(); + + let ret = quotient.wrapping_shr(u32::cast_from(res_exponent.wrapping_neg()) + 1); + residual_lo = a_significand + .wrapping_shl(significand_bits.wrapping_add(CastInto::::cast(res_exponent))) + .wrapping_sub(ret.wrapping_mul(b_significand) << 1); ret }; - // Round - let abs_result = { - residual += abs_result & one; // tie to even - // conditionally turns the below LT comparison into LTE - if residual > b_significand { - abs_result + one - } else { - abs_result - } - }; + + residual_lo += abs_result & one; // tie to even + // conditionally turns the below LT comparison into LTE + abs_result += u8::from(residual_lo > b_significand).into(); + + if F::BITS == 128 || (F::BITS == 32 && half_iterations > 0) { + // Do not round Infinity to NaN + abs_result += + u8::from(abs_result < inf_rep && residual_lo > (2 + 1).cast() * b_significand).into(); + } + + if F::BITS == 128 { + abs_result += + u8::from(abs_result < inf_rep && residual_lo > (4 + 1).cast() * b_significand).into(); + } + F::from_repr(abs_result | quotient_sign) } +/// Calculate the number of iterations required for a float type's precision. +/// +/// This returns `(h, f)` where `h` is the number of iterations to be done using integers at half +/// the float's bit width, and `f` is the number of iterations done using integers of the float's +/// full width. This is further explained in the module documentation. +/// +/// # Requirements +/// +/// The initial estimate should have at least 8 bits of precision. If this is not true, results +/// will be inaccurate. +const fn get_iterations() -> (usize, usize) { + // Precision doubles with each iteration. Assume we start with 8 bits of precision. + let total_iterations = F::BITS.ilog2() as usize - 2; + + if 2 * size_of::() <= size_of::<*const ()>() { + // If widening multiplication will be efficient (uses word-sized integers), there is no + // reason to use half-sized iterations. + (0, total_iterations) + } else { + // Otherwise, do as many iterations as possible at half width. + (total_iterations - 1, 1) + } +} + +/// `u_n` for different precisions (with N-1 half-width iterations). +/// +/// W0 is the precision of C +/// u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW +/// +/// Estimated with bc: +/// +/// ```text +/// define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; } +/// define half2(un) { return 2.0 * un / 2.0^hw + 2.0; } +/// define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; } +/// define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; } +/// +/// | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1) +/// u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797 +/// u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440 +/// u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317 +/// u_3 | < 7.31 | | < 7.31 | < 27054456580 +/// u_4 | | | | < 80.4 +/// Final (U_N) | same as u_3 | < 72 | < 218 | < 13920 +/// ```` +/// +/// Add 2 to `U_N` due to final decrement. +const fn reciprocal_precision() -> u16 { + let (half_iterations, full_iterations) = get_iterations::(); + + if full_iterations < 1 { + panic!("Must have at least one full iteration"); + } + + // FIXME(tgross35): calculate this programmatically + if F::BITS == 32 && half_iterations == 2 && full_iterations == 1 { + 74u16 + } else if F::BITS == 32 && half_iterations == 0 && full_iterations == 3 { + 10 + } else if F::BITS == 64 && half_iterations == 3 && full_iterations == 1 { + 220 + } else if F::BITS == 128 && half_iterations == 4 && full_iterations == 1 { + 13922 + } else { + panic!("Invalid number of iterations") + } +} + +/// The value of `C` adjusted to half width. +/// +/// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW with W0 being either +/// 16 or 32 and W0 <= HW. That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from +/// which b/2 is subtracted to obtain x0) wrapped to [0, 1) range. +fn c_hw() -> HalfRep +where + F::Int: DInt, + u128: CastInto>, +{ + const C_U128: u128 = 0x7504f333f9de6108b2fb1366eaa6a542; + const { C_U128 >> (u128::BITS - >::BITS) }.cast() +} + +/// Perform one iteration at any width to approach `1/b`, given previous guess `x`. Returns +/// the next `x` as a UQ0 number. +/// +/// This is the `x_{n+1} = 2*x_n - b*x_n^2` algorithm, implemented as `x_n * (2 - b*x_n)`. It +/// uses widening multiplication to calculate the result with necessary precision. +fn next_guess(x_uq0: I, b_uq1: I) -> I +where + I: Int + HInt, + ::D: ops::Shr::D>, +{ + // `corr = 2 - b*x_n` + // + // This looks like `0 - b*x_n`. However, this works - in `UQ1`, `0.0 - x = 2.0 - x`. + let corr_uq1: I = I::ZERO.wrapping_sub(x_uq0.widen_mul(b_uq1).hi()); + + // `x_n * corr = x_n * (2 - b*x_n)` + (x_uq0.widen_mul(corr_uq1) >> (I::BITS - 1)).lo() +} + intrinsics! { #[avr_skip] #[arm_aeabi_alias = __aeabi_fdiv] pub extern "C" fn __divsf3(a: f32, b: f32) -> f32 { - div32(a, b) + div(a, b) } #[avr_skip] #[arm_aeabi_alias = __aeabi_ddiv] pub extern "C" fn __divdf3(a: f64, b: f64) -> f64 { - div64(a, b) + div(a, b) } } diff --git a/testcrate/tests/div_rem.rs b/testcrate/tests/div_rem.rs index 418e9c189..2de61c707 100644 --- a/testcrate/tests/div_rem.rs +++ b/testcrate/tests/div_rem.rs @@ -115,7 +115,13 @@ macro_rules! float { fuzz_float_2(N, |x: $f, y: $f| { let quo0: $f = apfloat_fallback!($f, $apfloat_ty, $sys_available, Div::div, x, y); let quo1: $f = $fn(x, y); - #[cfg(not(target_arch = "arm"))] + + // ARM SIMD instructions always flush subnormals to zero + if cfg!(target_arch = "arm") && + ((Float::is_subnormal(quo0)) || Float::is_subnormal(quo1)) { + return; + } + if !Float::eq_repr(quo0, quo1) { panic!( "{}({:?}, {:?}): std: {:?}, builtins: {:?}", @@ -126,21 +132,6 @@ macro_rules! float { quo1 ); } - - // ARM SIMD instructions always flush subnormals to zero - #[cfg(target_arch = "arm")] - if !(Float::is_subnormal(quo0) || Float::is_subnormal(quo1)) { - if !Float::eq_repr(quo0, quo1) { - panic!( - "{}({:?}, {:?}): std: {:?}, builtins: {:?}", - stringify!($fn), - x, - y, - quo0, - quo1 - ); - } - } }); } )* From 910e92fb60ef62ade0a61266ade5fc65c31bfdaa Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 19 Aug 2024 15:46:05 -0500 Subject: [PATCH 0836/1459] Add `f128` division Use the new generic division algorithm to expose `__divtf3` and `__divkf3`. --- README.md | 2 +- build.rs | 1 - examples/intrinsics.rs | 5 +++++ src/float/div.rs | 17 +++++++++++++++++ testcrate/tests/div_rem.rs | 16 ++++++++++++++++ 5 files changed, 39 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 46983a281..06137f3c7 100644 --- a/README.md +++ b/README.md @@ -222,7 +222,7 @@ of being added to Rust. - [x] addtf3.c - [x] comparetf2.c -- [ ] divtf3.c +- [x] divtf3.c - [x] extenddftf2.c - [x] extendhfsf2.c - [x] extendhftf2.c diff --git a/build.rs b/build.rs index 5ccff76e7..3b2805f83 100644 --- a/build.rs +++ b/build.rs @@ -526,7 +526,6 @@ mod c { ("__floatsitf", "floatsitf.c"), ("__floatunditf", "floatunditf.c"), ("__floatunsitf", "floatunsitf.c"), - ("__divtf3", "divtf3.c"), ("__powitf2", "powitf2.c"), ("__fe_getround", "fp_mode.c"), ("__fe_raise_inexact", "fp_mode.c"), diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 8bb707673..6dcd3820f 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -256,6 +256,10 @@ mod intrinsics { a * b } + pub fn divtf(a: f128, b: f128) -> f128 { + a / b + } + pub fn subtf(a: f128, b: f128) -> f128 { a - b } @@ -440,6 +444,7 @@ fn run() { bb(aeabi_uldivmod(bb(2), bb(3))); bb(ashlti3(bb(2), bb(2))); bb(ashrti3(bb(2), bb(2))); + bb(divtf(bb(2.), bb(2.))); bb(divti3(bb(2), bb(2))); bb(eqtf(bb(2.), bb(2.))); bb(extendhfdf(bb(2.))); diff --git a/src/float/div.rs b/src/float/div.rs index 4aec3418f..f085455fa 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -617,4 +617,21 @@ intrinsics! { pub extern "C" fn __divdf3(a: f64, b: f64) -> f64 { div(a, b) } + + #[avr_skip] + #[ppc_alias = __divkf3] + #[cfg(not(feature = "no-f16-f128"))] + pub extern "C" fn __divtf3(a: f128, b: f128) -> f128 { + div(a, b) + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __divsf3vfp(a: f32, b: f32) -> f32 { + a / b + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __divdf3vfp(a: f64, b: f64) -> f64 { + a / b + } } diff --git a/testcrate/tests/div_rem.rs b/testcrate/tests/div_rem.rs index 2de61c707..ac87eb630 100644 --- a/testcrate/tests/div_rem.rs +++ b/testcrate/tests/div_rem.rs @@ -1,3 +1,4 @@ +#![feature(f128)] #![allow(unused_macros)] use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4}; @@ -146,4 +147,19 @@ mod float_div { f32, __divsf3, Single, all(); f64, __divdf3, Double, all(); } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + float! { + f128, __divtf3, Quad, + // FIXME(llvm): there is a bug in LLVM rt. + // See . + not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux"))); + } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + float! { + f128, __divkf3, Quad, not(feature = "no-sys-f128"); + } } From 5c153cf07ea8b790eeff732381c60fa306bc91fa Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 19 Aug 2024 15:46:40 -0500 Subject: [PATCH 0837/1459] Add benchmarks for `f128` division --- testcrate/benches/float_div.rs | 29 +++++++++++++++++++++++++++-- testcrate/src/bench.rs | 7 ++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/testcrate/benches/float_div.rs b/testcrate/benches/float_div.rs index 6ba439b04..6a039a82a 100644 --- a/testcrate/benches/float_div.rs +++ b/testcrate/benches/float_div.rs @@ -1,5 +1,7 @@ +#![cfg_attr(f128_enabled, feature(f128))] + use compiler_builtins::float::div; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{criterion_main, Criterion}; use testcrate::float_bench; float_bench! { @@ -64,5 +66,28 @@ float_bench! { ], } -criterion_group!(float_div, div_f32, div_f64); +#[cfg(f128_enabled)] +float_bench! { + name: div_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: div::__divtf3, + crate_fn_ppc: div::__divkf3, + sys_fn: __divtf3, + sys_fn_ppc: __divkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +pub fn float_div() { + let mut criterion = Criterion::default().configure_from_args(); + + div_f32(&mut criterion); + div_f64(&mut criterion); + + #[cfg(f128_enabled)] + { + div_f128(&mut criterion); + } +} + criterion_main!(float_div); diff --git a/testcrate/src/bench.rs b/testcrate/src/bench.rs index 1d571a6cf..f831b5a66 100644 --- a/testcrate/src/bench.rs +++ b/testcrate/src/bench.rs @@ -30,13 +30,14 @@ pub fn skip_sys_checks(test_name: &str) -> bool { // FIXME(f16_f128): system symbols have incorrect results // - const X86_NO_SSE_SKIPPED: &[&str] = - &["add_f128", "sub_f128", "mul_f128", "powi_f32", "powi_f64"]; + const X86_NO_SSE_SKIPPED: &[&str] = &[ + "add_f128", "sub_f128", "mul_f128", "div_f128", "powi_f32", "powi_f64", + ]; // FIXME(f16_f128): Wide multiply carry bug in `compiler-rt`, re-enable when nightly no longer // uses `compiler-rt` version. // - const AARCH64_SKIPPED: &[&str] = &["mul_f128"]; + const AARCH64_SKIPPED: &[&str] = &["mul_f128", "div_f128"]; // FIXME(llvm): system symbols have incorrect results on Windows // From 5bf19f185c1b07790c1f41022842e3e25365952f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 24 Sep 2024 16:40:44 +0000 Subject: [PATCH 0838/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1aa6e5668..fa89c152e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.126" +version = "0.1.127" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From b9a1617e00cca32e8ce9d5e16ad0cf9908d07642 Mon Sep 17 00:00:00 2001 From: Artyom Tetyukhin <51746822+arttet@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:21:36 +0400 Subject: [PATCH 0839/1459] Bump cc dependency --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index fa89c152e..86cddd24d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,7 @@ test = false core = { version = "1.0.0", optional = true, package = 'rustc-std-workspace-core' } [build-dependencies] -cc = { optional = true, version = "1.0" } +cc = { optional = true, version = "1.1" } [dev-dependencies] panic-handler = { path = 'crates/panic-handler' } From 543868b9a497476c562c13ae2f34bd3aa08b0371 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 25 Sep 2024 14:37:03 +0000 Subject: [PATCH 0840/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 86cddd24d..7fd3b2230 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.127" +version = "0.1.128" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 7c4138c8a4f1478f2f1d208cd091b2f0938b2780 Mon Sep 17 00:00:00 2001 From: Davide Mor Date: Thu, 26 Sep 2024 15:17:47 +0200 Subject: [PATCH 0841/1459] Fixed `__divtf3` having wrong cfg for f128 --- src/float/div.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/float/div.rs b/src/float/div.rs index f085455fa..f125771a0 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -620,7 +620,7 @@ intrinsics! { #[avr_skip] #[ppc_alias = __divkf3] - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] pub extern "C" fn __divtf3(a: f128, b: f128) -> f128 { div(a, b) } From 1191189a5209410003ca9343faef0cb8dc3ace5c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 27 Sep 2024 07:00:19 -0400 Subject: [PATCH 0842/1459] Revert "Bump cc dependency" --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7fd3b2230..75cebda89 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,7 @@ test = false core = { version = "1.0.0", optional = true, package = 'rustc-std-workspace-core' } [build-dependencies] -cc = { optional = true, version = "1.1" } +cc = { optional = true, version = "1.0" } [dev-dependencies] panic-handler = { path = 'crates/panic-handler' } From e2758ad1f134606052ee8935fb4c9dbd57976ffb Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 27 Sep 2024 11:08:02 +0000 Subject: [PATCH 0843/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 75cebda89..670bcd74a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.128" +version = "0.1.129" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 0fc4cdd026d0a87aa75462a6f8927f0be60cc4f3 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 27 Sep 2024 00:01:01 -0400 Subject: [PATCH 0844/1459] Remove unneeded features A few of the features that we enable have been stabilized, others may have been needed at some point but are no longer required. Clean this up. --- examples/intrinsics.rs | 2 -- src/lib.rs | 6 ------ testcrate/tests/aeabi_memclr.rs | 1 - testcrate/tests/aeabi_memcpy.rs | 1 - testcrate/tests/aeabi_memset.rs | 1 - 5 files changed, 11 deletions(-) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 6dcd3820f..06d772330 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -4,12 +4,10 @@ // to link due to the missing intrinsic (symbol). #![allow(unused_features)] -#![allow(stable_features)] // bench_black_box feature is stable, leaving for backcompat #![allow(internal_features)] #![cfg_attr(thumb, no_main)] #![deny(dead_code)] #![feature(allocator_api)] -#![feature(bench_black_box)] #![feature(f128)] #![feature(f16)] #![feature(lang_items)] diff --git a/src/lib.rs b/src/lib.rs index b85f789fd..dea30a3c6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,18 +1,12 @@ #![cfg_attr(feature = "compiler-builtins", compiler_builtins)] -#![cfg_attr(not(feature = "no-asm"), feature(asm))] #![feature(abi_unadjusted)] #![feature(asm_experimental_arch)] -#![cfg_attr(not(feature = "no-asm"), feature(global_asm))] #![feature(cfg_target_has_atomic)] #![feature(compiler_builtins)] -#![feature(core_ffi_c)] #![feature(core_intrinsics)] -#![feature(inline_const)] -#![feature(lang_items)] #![feature(linkage)] #![feature(naked_functions)] #![feature(repr_simd)] -#![feature(c_unwind)] #![cfg_attr(f16_enabled, feature(f16))] #![cfg_attr(f128_enabled, feature(f128))] #![no_builtins] diff --git a/testcrate/tests/aeabi_memclr.rs b/testcrate/tests/aeabi_memclr.rs index 595076939..bfd15a391 100644 --- a/testcrate/tests/aeabi_memclr.rs +++ b/testcrate/tests/aeabi_memclr.rs @@ -5,7 +5,6 @@ feature = "mem" ))] #![feature(compiler_builtins_lib)] -#![feature(lang_items)] #![no_std] extern crate compiler_builtins; diff --git a/testcrate/tests/aeabi_memcpy.rs b/testcrate/tests/aeabi_memcpy.rs index 2d72dfbba..c892c5aba 100644 --- a/testcrate/tests/aeabi_memcpy.rs +++ b/testcrate/tests/aeabi_memcpy.rs @@ -5,7 +5,6 @@ feature = "mem" ))] #![feature(compiler_builtins_lib)] -#![feature(lang_items)] #![no_std] extern crate compiler_builtins; diff --git a/testcrate/tests/aeabi_memset.rs b/testcrate/tests/aeabi_memset.rs index f03729bed..34ab3acc7 100644 --- a/testcrate/tests/aeabi_memset.rs +++ b/testcrate/tests/aeabi_memset.rs @@ -5,7 +5,6 @@ feature = "mem" ))] #![feature(compiler_builtins_lib)] -#![feature(lang_items)] #![no_std] extern crate compiler_builtins; From 5d5e2a2d778c46dc737764a62d9b2f42a4e8249e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 19 Aug 2024 17:31:23 -0400 Subject: [PATCH 0845/1459] Move `float_pow` tests to their own file --- testcrate/tests/float_pow.rs | 54 ++++++++++++++++++++++++++++++++++ testcrate/tests/misc.rs | 56 ------------------------------------ 2 files changed, 54 insertions(+), 56 deletions(-) create mode 100644 testcrate/tests/float_pow.rs diff --git a/testcrate/tests/float_pow.rs b/testcrate/tests/float_pow.rs new file mode 100644 index 000000000..761a6611d --- /dev/null +++ b/testcrate/tests/float_pow.rs @@ -0,0 +1,54 @@ +#![allow(unused_macros)] +#![cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] + +use testcrate::*; + +// This is approximate because of issues related to +// https://github.com/rust-lang/rust/issues/73920. +// TODO how do we resolve this indeterminacy? +macro_rules! pow { + ($($f:ty, $tolerance:expr, $fn:ident);*;) => { + $( + #[test] + fn $fn() { + use compiler_builtins::float::pow::$fn; + use compiler_builtins::float::Float; + fuzz_float_2(N, |x: $f, y: $f| { + if !(Float::is_subnormal(x) || Float::is_subnormal(y) || x.is_nan()) { + let n = y.to_bits() & !<$f as Float>::SIGNIFICAND_MASK; + let n = (n as <$f as Float>::SignedInt) >> <$f as Float>::SIGNIFICAND_BITS; + let n = n as i32; + let tmp0: $f = x.powi(n); + let tmp1: $f = $fn(x, n); + let (a, b) = if tmp0 < tmp1 { + (tmp0, tmp1) + } else { + (tmp1, tmp0) + }; + + let good = if a == b { + // handles infinity equality + true + } else if a < $tolerance { + b < $tolerance + } else { + let quo = b / a; + (quo < (1. + $tolerance)) && (quo > (1. - $tolerance)) + }; + + assert!( + good, + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn), x, n, tmp0, tmp1 + ); + } + }); + } + )* + }; +} + +pow! { + f32, 1e-4, __powisf2; + f64, 1e-12, __powidf2; +} diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs index f9431915b..f5ac2ab7d 100644 --- a/testcrate/tests/misc.rs +++ b/testcrate/tests/misc.rs @@ -207,59 +207,3 @@ fn bswap() { ); } } - -// This is approximate because of issues related to -// https://github.com/rust-lang/rust/issues/73920. -// TODO how do we resolve this indeterminacy? -macro_rules! pow { - ($($f:ty, $tolerance:expr, $fn:ident);*;) => { - $( - #[test] - fn $fn() { - use compiler_builtins::float::pow::$fn; - use compiler_builtins::float::Float; - fuzz_float_2(N, |x: $f, y: $f| { - if !(Float::is_subnormal(x) || Float::is_subnormal(y) || x.is_nan()) { - let n = y.to_bits() & !<$f as Float>::SIGNIFICAND_MASK; - let n = (n as <$f as Float>::SignedInt) >> <$f as Float>::SIGNIFICAND_BITS; - let n = n as i32; - let tmp0: $f = x.powi(n); - let tmp1: $f = $fn(x, n); - let (a, b) = if tmp0 < tmp1 { - (tmp0, tmp1) - } else { - (tmp1, tmp0) - }; - let good = { - if a == b { - // handles infinity equality - true - } else if a < $tolerance { - b < $tolerance - } else { - let quo = b / a; - (quo < (1. + $tolerance)) && (quo > (1. - $tolerance)) - } - }; - if !good { - panic!( - "{}({}, {}): std: {}, builtins: {}", - stringify!($fn), x, n, tmp0, tmp1 - ); - } - } - }); - } - )* - }; -} - -#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] -mod float_pow { - use super::*; - - pow! { - f32, 1e-4, __powisf2; - f64, 1e-12, __powidf2; - } -} From 79d817d7b045bb2c98063a0970f680bf9ae5d96e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 19 Aug 2024 17:32:42 -0400 Subject: [PATCH 0846/1459] Add support for `f128` integer exponentiation Create the symbol `__powitf2`. --- README.md | 2 +- build.rs | 1 - src/float/pow.rs | 9 +++++++++ testcrate/tests/float_pow.rs | 24 +++++++++++++++++++++--- 4 files changed, 31 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 06137f3c7..985020f5a 100644 --- a/README.md +++ b/README.md @@ -240,7 +240,7 @@ of being added to Rust. - [ ] floatunsitf.c - [ ] floatuntitf.c - [x] multf3.c -- [ ] powitf2.c +- [x] powitf2.c - [x] subtf3.c - [x] truncdfhf2.c - [x] truncsfhf2.c diff --git a/build.rs b/build.rs index 3b2805f83..df98688d1 100644 --- a/build.rs +++ b/build.rs @@ -526,7 +526,6 @@ mod c { ("__floatsitf", "floatsitf.c"), ("__floatunditf", "floatunditf.c"), ("__floatunsitf", "floatunsitf.c"), - ("__powitf2", "powitf2.c"), ("__fe_getround", "fp_mode.c"), ("__fe_raise_inexact", "fp_mode.c"), ]); diff --git a/src/float/pow.rs b/src/float/pow.rs index 3103fe6f6..dac768f7b 100644 --- a/src/float/pow.rs +++ b/src/float/pow.rs @@ -35,4 +35,13 @@ intrinsics! { pub extern "C" fn __powidf2(a: f64, b: i32) -> f64 { pow(a, b) } + + #[avr_skip] + #[ppc_alias = __powikf2] + #[cfg(f128_enabled)] + // FIXME(f16_f128): MSVC cannot build these until `__divtf3` is available in nightly. + #[cfg(not(target_env = "msvc"))] + pub extern "C" fn __powitf2(a: f128, b: i32) -> f128 { + pow(a, b) + } } diff --git a/testcrate/tests/float_pow.rs b/testcrate/tests/float_pow.rs index 761a6611d..d85ee99df 100644 --- a/testcrate/tests/float_pow.rs +++ b/testcrate/tests/float_pow.rs @@ -1,4 +1,5 @@ #![allow(unused_macros)] +#![cfg_attr(f128_enabled, feature(f128))] #![cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] use testcrate::*; @@ -7,9 +8,12 @@ use testcrate::*; // https://github.com/rust-lang/rust/issues/73920. // TODO how do we resolve this indeterminacy? macro_rules! pow { - ($($f:ty, $tolerance:expr, $fn:ident);*;) => { + ($($f:ty, $tolerance:expr, $fn:ident, $sys_available:meta);*;) => { $( #[test] + // FIXME(apfloat): We skip tests if system symbols aren't available rather + // than providing a fallback, since `rustc_apfloat` does not provide `pow`. + #[cfg($sys_available)] fn $fn() { use compiler_builtins::float::pow::$fn; use compiler_builtins::float::Float; @@ -49,6 +53,20 @@ macro_rules! pow { } pow! { - f32, 1e-4, __powisf2; - f64, 1e-12, __powidf2; + f32, 1e-4, __powisf2, all(); + f64, 1e-12, __powidf2, all(); +} + +#[cfg(f128_enabled)] +// FIXME(f16_f128): MSVC cannot build these until `__divtf3` is available in nightly. +#[cfg(not(target_env = "msvc"))] +#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] +pow! { + f128, 1e-36, __powitf2, not(feature = "no-sys-f128"); +} + +#[cfg(f128_enabled)] +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] +pow! { + f128, 1e-36, __powikf2, not(feature = "no-sys-f128"); } From 31c4a8eb4dcf36c61cb23fe1bc0f22198d8b6840 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 19 Aug 2024 17:37:41 -0400 Subject: [PATCH 0847/1459] Add a benchmark for `__powitf2` --- testcrate/benches/float_pow.rs | 31 ++++++++++++++++++++++++++++--- testcrate/src/bench.rs | 2 ++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/testcrate/benches/float_pow.rs b/testcrate/benches/float_pow.rs index 252f74012..46da3f25c 100644 --- a/testcrate/benches/float_pow.rs +++ b/testcrate/benches/float_pow.rs @@ -1,5 +1,7 @@ +#![cfg_attr(f128_enabled, feature(f128))] + use compiler_builtins::float::pow; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{criterion_main, Criterion}; use testcrate::float_bench; float_bench! { @@ -20,5 +22,28 @@ float_bench! { asm: [], } -criterion_group!(float_add, powi_f32, powi_f64); -criterion_main!(float_add); +// FIXME(f16_f128): can be changed to only `f128_enabled` once `__multf3` and `__divtf3` are +// distributed by nightly. +#[cfg(all(f128_enabled, not(feature = "no-sys-f128")))] +float_bench! { + name: powi_f128, + sig: (a: f128, b: i32) -> f128, + crate_fn: pow::__powitf2, + crate_fn_ppc: pow::__powikf2, + sys_fn: __powitf2, + sys_fn_ppc: __powikf2, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +pub fn float_pow() { + let mut criterion = Criterion::default().configure_from_args(); + + powi_f32(&mut criterion); + powi_f64(&mut criterion); + + #[cfg(all(f128_enabled, not(feature = "no-sys-f128")))] + powi_f128(&mut criterion); +} + +criterion_main!(float_pow); diff --git a/testcrate/src/bench.rs b/testcrate/src/bench.rs index f831b5a66..798be6579 100644 --- a/testcrate/src/bench.rs +++ b/testcrate/src/bench.rs @@ -360,3 +360,5 @@ impl_testio!(int i16, i32, i64, i128); impl_testio!(int u16, u32, u64, u128); impl_testio!((float, int)(f32, i32)); impl_testio!((float, int)(f64, i32)); +#[cfg(f128_enabled)] +impl_testio!((float, int)(f128, i32)); From 1150e9a5e86dd1f39d8b5134b3ff5539653f1555 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 28 Sep 2024 15:13:17 +0000 Subject: [PATCH 0848/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 670bcd74a..e84c5545e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.129" +version = "0.1.130" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 584994774ee2ceff16d835c5fa1337a98d3f7bc9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 28 Sep 2024 17:57:06 -0400 Subject: [PATCH 0849/1459] Resolve FIXMEs related to `f16` assembly We have a couple FIXMEs from before aarch64 `f16` assembly support existed. We have this available now, so resolve the notes here. --- testcrate/benches/float_extend.rs | 7 ++----- testcrate/benches/float_trunc.rs | 14 +++++--------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/testcrate/benches/float_extend.rs b/testcrate/benches/float_extend.rs index a9563741a..a0cdaf48a 100644 --- a/testcrate/benches/float_extend.rs +++ b/testcrate/benches/float_extend.rs @@ -15,11 +15,10 @@ float_bench! { sys_available: not(feature = "no-sys-f16"), asm: [ #[cfg(target_arch = "aarch64")] { - // FIXME(f16_f128): remove `to_bits()` after f16 asm support (rust-lang/rust/#116909) let ret: f32; asm!( "fcvt {ret:s}, {a:h}", - a = in(vreg) a.to_bits(), + a = in(vreg) a, ret = lateout(vreg) ret, options(nomem, nostack, pure), ); @@ -96,9 +95,7 @@ pub fn float_extend() { extend_f16_f32(&mut criterion); #[cfg(f128_enabled)] - { - extend_f16_f128(&mut criterion); - } + extend_f16_f128(&mut criterion); } extend_f32_f64(&mut criterion); diff --git a/testcrate/benches/float_trunc.rs b/testcrate/benches/float_trunc.rs index 8d874e4b2..de9b5bf8c 100644 --- a/testcrate/benches/float_trunc.rs +++ b/testcrate/benches/float_trunc.rs @@ -14,8 +14,7 @@ float_bench! { sys_available: not(feature = "no-sys-f16"), asm: [ #[cfg(target_arch = "aarch64")] { - // FIXME(f16_f128): remove `from_bits()` after f16 asm support (rust-lang/rust/#116909) - let ret: u16; + let ret: f16; asm!( "fcvt {ret:h}, {a:s}", a = in(vreg) a, @@ -23,7 +22,7 @@ float_bench! { options(nomem, nostack, pure), ); - f16::from_bits(ret) + ret }; ], } @@ -37,8 +36,7 @@ float_bench! { sys_available: not(feature = "no-sys-f16"), asm: [ #[cfg(target_arch = "aarch64")] { - // FIXME(f16_f128): remove `from_bits()` after f16 asm support (rust-lang/rust/#116909) - let ret: u16; + let ret: f16; asm!( "fcvt {ret:h}, {a:d}", a = in(vreg) a, @@ -46,7 +44,7 @@ float_bench! { options(nomem, nostack, pure), ); - f16::from_bits(ret) + ret }; ], } @@ -138,9 +136,7 @@ pub fn float_trunc() { // FIXME(#655): `f16` tests disabled until we can bootstrap symbols #[cfg(f16_enabled)] #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] - { - trunc_f128_f16(&mut criterion); - } + trunc_f128_f16(&mut criterion); trunc_f128_f32(&mut criterion); trunc_f128_f64(&mut criterion); From 03f09169d8f7ad84601dcf4c17e009be53ac94e8 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 28 Sep 2024 18:11:38 -0400 Subject: [PATCH 0850/1459] Ungate tests that were skipped due to a broken implementation The upstream issue [1] has been resolved so we can enable these tests again. [1]: https://github.com/rust-lang/compiler-builtins/issues/616 --- testcrate/src/bench.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/testcrate/src/bench.rs b/testcrate/src/bench.rs index 798be6579..f5da1f3ae 100644 --- a/testcrate/src/bench.rs +++ b/testcrate/src/bench.rs @@ -82,12 +82,9 @@ pub fn skip_sys_checks(test_name: &str) -> bool { /// Still run benchmarks/tests but don't check correctness between compiler-builtins and /// assembly functions -pub fn skip_asm_checks(test_name: &str) -> bool { - // FIXME(f16_f128): rounding error - // - const SKIPPED: &[&str] = &["mul_f32", "mul_f64"]; - - SKIPPED.contains(&test_name) +pub fn skip_asm_checks(_test_name: &str) -> bool { + // Nothing to skip at this time + false } /// Create a comparison of the system symbol, compiler_builtins, and optionally handwritten From 4e211124be8ab8291ea390af4b658139d6a57303 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 28 Sep 2024 18:19:52 -0400 Subject: [PATCH 0851/1459] Revert "Temporarily `use define_rust_probestack;`" has been resolved. Remove the workaround that was introduced to suppress it. This reverts commit 254edbcad4cfd6a8af32e3297c1037d7984c3c49. --- src/probestack.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/probestack.rs b/src/probestack.rs index 46caf1676..0c30384db 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -120,10 +120,6 @@ macro_rules! define_rust_probestack { }; } -// FIXME(rust-lang/rust#126984): Remove allow once lint is fixed -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use define_rust_probestack; - // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, // ensuring that if any pages are unmapped we'll make a page fault. // From b1e6c1de2fa1eda8772893c7293be1bb0d8bd6b8 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 28 Sep 2024 19:53:09 -0400 Subject: [PATCH 0852/1459] Fix some warnings from shellcheck --- ci/run-docker.sh | 14 ++++++++------ ci/run.sh | 31 +++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/ci/run-docker.sh b/ci/run-docker.sh index 50ae9dc83..215ad71a3 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -14,7 +14,7 @@ run() { # will be owned by root mkdir -p target - if [ $(uname -s) = "Linux" ] && [ -z "${DOCKER_BASE_IMAGE:-}" ]; then + if [ "$(uname -s)" = "Linux" ] && [ -z "${DOCKER_BASE_IMAGE:-}" ]; then # Share the host rustc and target. Do this only on Linux and if the image # isn't overridden run_args=( @@ -43,19 +43,21 @@ run() { if [ "${GITHUB_ACTIONS:-}" = "true" ]; then # Enable Docker image caching on GHA - + build_cmd=("buildx" "build") build_args=( "--cache-from" "type=local,src=/tmp/.buildx-cache" "--cache-to" "type=local,dest=/tmp/.buildx-cache-new" - "${build_args[@]:-}" + # This is the beautiful bash syntax for expanding an array but neither + # raising an error nor returning an empty string if the array is empty. + "${build_args[@]:+"${build_args[@]}"}" "--load" ) fi - docker ${build_cmd[@]:-build} \ + docker "${build_cmd[@]:-build}" \ -t "builtins-$target" \ - ${build_args[@]:-} \ + "${build_args[@]:-}" \ "ci/docker/$target" docker run \ --rm \ @@ -64,7 +66,7 @@ run() { -e "CARGO_TARGET_DIR=/builtins-target" \ -v "$(pwd):/checkout:ro" \ -w /checkout \ - ${run_args[@]:-} \ + "${run_args[@]:-}" \ --init \ "builtins-$target" \ sh -c "$run_cmd" diff --git a/ci/run.sh b/ci/run.sh index dcbe1caf4..057cdb083 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -38,17 +38,24 @@ fi if [ "${TEST_VERBATIM:-}" = "1" ]; then verb_path=$(cmd.exe //C echo \\\\?\\%cd%\\testcrate\\target2) - cargo build --manifest-path testcrate/Cargo.toml --target $target --target-dir $verb_path --features c + cargo build --manifest-path testcrate/Cargo.toml \ + --target "$target" --target-dir "$verb_path" --features c fi -if [ -d /builtins-target ]; then - rlib_paths=/builtins-target/"${target}"/debug/deps/libcompiler_builtins-*.rlib -else - rlib_paths=target/"${target}"/debug/deps/libcompiler_builtins-*.rlib -fi +declare -a rlib_paths + +# Set the `rlib_paths` global array to a list of all compiler-builtins rlibs +update_rlib_paths() { + if [ -d /builtins-target ]; then + rlib_paths=( /builtins-target/"${target}"/debug/deps/libcompiler_builtins-*.rlib ) + else + rlib_paths=( target/"${target}"/debug/deps/libcompiler_builtins-*.rlib ) + fi +} # Remove any existing artifacts from previous tests that don't set #![compiler_builtins] -rm -f $rlib_paths +update_rlib_paths +rm -f "${rlib_paths[@]}" cargo build --target "$target" cargo build --target "$target" --release @@ -76,6 +83,7 @@ NM=$(find "$(rustc --print sysroot)" \( -name llvm-nm -o -name llvm-nm.exe \) ) if [ "$NM" = "" ]; then NM="${PREFIX}nm" fi + # i686-pc-windows-gnu tools have a dependency on some DLLs, so run it with # rustup run to ensure that those are in PATH. TOOLCHAIN="$(rustup show active-toolchain | sed 's/ (default)//')" @@ -84,11 +92,13 @@ if [[ "$TOOLCHAIN" == *i686-pc-windows-gnu ]]; then fi # Look out for duplicated symbols when we include the compiler-rt (C) implementation -for rlib in $rlib_paths; do +update_rlib_paths +for rlib in "${rlib_paths[@]}"; do set +x echo "================================================================" echo "checking $rlib for duplicate symbols" echo "================================================================" + set -x duplicates_found=0 @@ -108,7 +118,7 @@ for rlib in $rlib_paths; do fi done -rm -f $rlib_paths +rm -f "${rlib_paths[@]}" build_intrinsics() { cargo build --target "$target" -v --example intrinsics "$@" @@ -128,7 +138,8 @@ CARGO_PROFILE_RELEASE_LTO=true \ cargo build --target "$target" --example intrinsics --release # Ensure no references to any symbols from core -for rlib in $(echo $rlib_paths); do +update_rlib_paths +for rlib in "${rlib_paths[@]}"; do set +x echo "================================================================" echo "checking $rlib for references to core" From 3a3745b48cf3bad74ca37e6b644194a22aecd3e8 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 2 Oct 2024 16:23:46 -0400 Subject: [PATCH 0853/1459] Add riscv64gc to CI There is a proposal to promote `riscv64gc-unknown-linux-gnu` to tier 1 [1]. We do not currently test RISC-V in CI; add it here. [1]: https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler/topic/Imminent.20RFC.20PR.3A.20riscv64gc-unknown-linux-gnu.20to.20Tier-1 --- .github/workflows/main.yml | 3 +++ ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile | 13 +++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fddb5973e..001acd2cc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -61,6 +61,9 @@ jobs: - target: powerpc64le-unknown-linux-gnu os: ubuntu-latest rust: nightly + - target: riscv64gc-unknown-linux-gnu + os: ubuntu-latest + rust: nightly - target: thumbv6m-none-eabi os: ubuntu-latest rust: nightly diff --git a/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000..4d4a194fd --- /dev/null +++ b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile @@ -0,0 +1,13 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user-static ca-certificates \ + gcc-riscv64-linux-gnu libc6-dev-riscv64-cross \ + qemu-system-riscv64 + +ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER=riscv64-linux-gnu-gcc \ + CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64-static \ + QEMU_LD_PREFIX=/usr/riscv64-linux-gnu \ + RUST_TEST_THREADS=1 From 64282a7c68062c690a21ab403cd5a9fe9a7cd532 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Thu, 30 May 2024 16:44:39 -0400 Subject: [PATCH 0854/1459] Build with `-Werror=implicit-function-declaration` To prevent fail-fast in situations like https://github.com/rust-lang/rust/issues/125619, where an upstream source compiles but creates a link error way downstream. --- build.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build.rs b/build.rs index df98688d1..e5503e88f 100644 --- a/build.rs +++ b/build.rs @@ -327,6 +327,8 @@ mod c { // in https://github.com/rust-lang/compiler-rt/blob/c8fbcb3/cmake/config-ix.cmake#L19. cfg.flag_if_supported("-fomit-frame-pointer"); cfg.define("VISIBILITY_HIDDEN", None); + // Avoid implicitly creating references to undefined functions + cfg.flag("-Werror=implicit-function-declaration"); } // int_util.c tries to include stdlib.h if `_WIN32` is defined, From 4eecc5e0dd8916e49f207eef3a2cffb6724aa127 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 3 Oct 2024 15:37:33 +0000 Subject: [PATCH 0855/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e84c5545e..835a47649 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.130" +version = "0.1.131" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From a8645c7de72e66211ebf5aa783f9375a02f4f5b8 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 2 Oct 2024 13:53:56 -0400 Subject: [PATCH 0856/1459] Upgrade CI LLVM version to 19.1 19.1 is the latest stable release from 2024-09-17. This will match what is currently being used in rust-lang/rust. --- .github/workflows/main.yml | 2 +- README.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 001acd2cc..ec5c059ba 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -4,7 +4,7 @@ on: [push, pull_request] env: RUSTDOCFLAGS: -Dwarnings RUSTFLAGS: -Dwarnings - RUST_LLVM_VERSION: 18.0-2024-02-13 + RUST_LLVM_VERSION: 19.1-2024-09-17 RUST_COMPILER_RT_ROOT: ./compiler-rt jobs: diff --git a/README.md b/README.md index 985020f5a..f792d1883 100644 --- a/README.md +++ b/README.md @@ -89,8 +89,8 @@ to test against, located in a directory called `compiler-rt`. This can be obtained with the following: ```sh -curl -L -o rustc-llvm-18.0.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/18.0-2024-02-13.tar.gz -tar xzf rustc-llvm-18.0.tar.gz --strip-components 1 llvm-project-rustc-18.0-2024-02-13/compiler-rt +curl -L -o rustc-llvm-19.1.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/19.1-2024-09-17.tar.gz +tar xzf rustc-llvm-19.1.tar.gz --strip-components 1 llvm-project-rustc-19.1-2024-09-17/compiler-rt ``` Local targets may also be tested with `./ci/run.sh [target]`. From 749c12f5914b1abb8db2b9ccb976e8a9cfd5949c Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Fri, 4 Oct 2024 12:33:16 -0400 Subject: [PATCH 0857/1459] Allow implicit function decl on A64 Testing in , we found that is unusable with the current LLVM version. --- build.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/build.rs b/build.rs index e5503e88f..2863c979f 100644 --- a/build.rs +++ b/build.rs @@ -327,8 +327,16 @@ mod c { // in https://github.com/rust-lang/compiler-rt/blob/c8fbcb3/cmake/config-ix.cmake#L19. cfg.flag_if_supported("-fomit-frame-pointer"); cfg.define("VISIBILITY_HIDDEN", None); - // Avoid implicitly creating references to undefined functions - cfg.flag("-Werror=implicit-function-declaration"); + + if let "aarch64" | "arm64ec" = target.arch.as_str() { + // FIXME(llvm20): Older GCCs on A64 fail to build with + // -Werror=implicit-function-declaration due to a compiler-rt bug. + // With a newer LLVM we should be able to enable the flag everywhere. + // https://github.com/llvm/llvm-project/commit/8aa9d6206ce55bdaaf422839c351fbd63f033b89 + } else { + // Avoid implicitly creating references to undefined functions + cfg.flag("-Werror=implicit-function-declaration"); + } } // int_util.c tries to include stdlib.h if `_WIN32` is defined, From 0b6dead6527ae367b624f3622958cd1644f0d1bd Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 4 Oct 2024 17:17:03 +0000 Subject: [PATCH 0858/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 835a47649..d0adf7444 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.131" +version = "0.1.132" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 986fae25140187045dc42c1d17e639bad2d11bcf Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 5 Oct 2024 13:57:10 -0500 Subject: [PATCH 0859/1459] Move `musl-reference-tests` to a new `libm-test` crate There isn't any reason for this feature to be exposed or part of the build script. Move it to a separate crate. We will also want more tests that require some support functions; this will create a place for them. --- libm/CONTRIBUTING.md | 4 +- libm/Cargo.toml | 11 +- libm/build.rs | 452 +-------------------- libm/ci/run.sh | 14 +- libm/crates/libm-test/Cargo.toml | 18 + libm/crates/libm-test/build.rs | 456 ++++++++++++++++++++++ libm/crates/libm-test/src/lib.rs | 1 + libm/crates/libm-test/tests/musl_biteq.rs | 4 + libm/src/lib.rs | 5 - 9 files changed, 494 insertions(+), 471 deletions(-) create mode 100644 libm/crates/libm-test/Cargo.toml create mode 100644 libm/crates/libm-test/build.rs create mode 100644 libm/crates/libm-test/src/lib.rs create mode 100644 libm/crates/libm-test/tests/musl_biteq.rs diff --git a/libm/CONTRIBUTING.md b/libm/CONTRIBUTING.md index 59c37a6f9..c15c45a43 100644 --- a/libm/CONTRIBUTING.md +++ b/libm/CONTRIBUTING.md @@ -7,7 +7,7 @@ in `src/lib.rs`. - Write some simple tests in your module (using `#[test]`) - Run `cargo test` to make sure it works -- Run `cargo test --features musl-reference-tests` to compare your +- Run `cargo test --features libm-test/musl-reference-tests` to compare your implementation against musl's - Send us a pull request! Make sure to run `cargo fmt` on your code before sending the PR. Also include "closes #42" in the PR description to close the @@ -88,7 +88,7 @@ If you'd like to run tests with randomized inputs that get compared against musl itself, you'll need to be on a Linux system and then you can execute: ``` -cargo test --features musl-reference-tests +cargo test --features libm-test/musl-reference-tests ``` Note that you may need to pass `--release` to Cargo if there are errors related diff --git a/libm/Cargo.toml b/libm/Cargo.toml index c2388083b..fc2db0c20 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -19,10 +19,6 @@ default = [] # that it should activate any useful Nightly things accordingly. unstable = [] -# Generate tests which are random inputs and the outputs are calculated with -# musl libc. -musl-reference-tests = ['rand'] - # Used to prevent using any intrinsics or arch-specific code. force-soft-floats = [] @@ -30,13 +26,16 @@ force-soft-floats = [] members = [ "crates/compiler-builtins-smoke-test", "crates/libm-bench", + "crates/libm-test", +] +default-members = [ + ".", + "crates/libm-test", ] [dev-dependencies] no-panic = "0.1.8" -[build-dependencies] -rand = { version = "0.6.5", optional = true } # This is needed for no-panic to correctly detect the lack of panics [profile.release] diff --git a/libm/build.rs b/libm/build.rs index c9ae23260..653ccf799 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -5,10 +5,8 @@ fn main() { println!("cargo::rustc-check-cfg=cfg(assert_no_panic)"); println!("cargo::rustc-check-cfg=cfg(feature, values(\"unstable\"))"); - #[cfg(feature = "musl-reference-tests")] - musl_reference_tests::generate(); - println!("cargo::rustc-check-cfg=cfg(feature, values(\"checked\"))"); + #[allow(unexpected_cfgs)] if !cfg!(feature = "checked") { let lvl = env::var("OPT_LEVEL").unwrap(); @@ -17,451 +15,3 @@ fn main() { } } } - -#[cfg(feature = "musl-reference-tests")] -mod musl_reference_tests { - use rand::seq::SliceRandom; - use rand::Rng; - use std::env; - use std::fs; - use std::process::Command; - - // Number of tests to generate for each function - const NTESTS: usize = 500; - - // These files are all internal functions or otherwise miscellaneous, not - // defining a function we want to test. - const IGNORED_FILES: &[&str] = &[ - "fenv.rs", - // These are giving slightly different results compared to musl - "lgamma.rs", - "lgammaf.rs", - "tgamma.rs", - "j0.rs", - "j0f.rs", - "jn.rs", - "jnf.rs", - "j1.rs", - "j1f.rs", - ]; - - struct Function { - name: String, - args: Vec, - ret: Vec, - tests: Vec, - } - - enum Ty { - F32, - F64, - I32, - Bool, - } - - struct Test { - inputs: Vec, - outputs: Vec, - } - - pub fn generate() { - // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 - let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); - if target_arch == "powerpc64" { - return; - } - - let files = fs::read_dir("src/math") - .unwrap() - .map(|f| f.unwrap().path()) - .collect::>(); - - let mut math = Vec::new(); - for file in files { - if IGNORED_FILES.iter().any(|f| file.ends_with(f)) { - continue; - } - - println!("generating musl reference tests in {:?}", file); - - let contents = fs::read_to_string(file).unwrap(); - let mut functions = contents.lines().filter(|f| f.starts_with("pub fn")); - while let Some(function_to_test) = functions.next() { - math.push(parse(function_to_test)); - } - } - - // Generate a bunch of random inputs for each function. This will - // attempt to generate a good set of uniform test cases for exercising - // all the various functionality. - generate_random_tests(&mut math, &mut rand::thread_rng()); - - // After we have all our inputs, use the x86_64-unknown-linux-musl - // target to generate the expected output. - generate_test_outputs(&mut math); - //panic!("Boo"); - // ... and now that we have both inputs and expected outputs, do a bunch - // of codegen to create the unit tests which we'll actually execute. - generate_unit_tests(&math); - } - - /// A "poor man's" parser for the signature of a function - fn parse(s: &str) -> Function { - let s = eat(s, "pub fn "); - let pos = s.find('(').unwrap(); - let name = &s[..pos]; - let s = &s[pos + 1..]; - let end = s.find(')').unwrap(); - let args = s[..end] - .split(',') - .map(|arg| { - let colon = arg.find(':').unwrap(); - parse_ty(arg[colon + 1..].trim()) - }) - .collect::>(); - let tail = &s[end + 1..]; - let tail = eat(tail, " -> "); - let ret = parse_retty(tail.replace("{", "").trim()); - - return Function { - name: name.to_string(), - args, - ret, - tests: Vec::new(), - }; - - fn parse_ty(s: &str) -> Ty { - match s { - "f32" => Ty::F32, - "f64" => Ty::F64, - "i32" => Ty::I32, - "bool" => Ty::Bool, - other => panic!("unknown type `{}`", other), - } - } - - fn parse_retty(s: &str) -> Vec { - match s { - "(f32, f32)" => vec![Ty::F32, Ty::F32], - "(f32, i32)" => vec![Ty::F32, Ty::I32], - "(f64, f64)" => vec![Ty::F64, Ty::F64], - "(f64, i32)" => vec![Ty::F64, Ty::I32], - other => vec![parse_ty(other)], - } - } - - fn eat<'a>(s: &'a str, prefix: &str) -> &'a str { - if s.starts_with(prefix) { - &s[prefix.len()..] - } else { - panic!("{:?} didn't start with {:?}", s, prefix) - } - } - } - - fn generate_random_tests(functions: &mut [Function], rng: &mut R) { - for function in functions { - for _ in 0..NTESTS { - function.tests.push(generate_test(function, rng)); - } - } - - fn generate_test(function: &Function, rng: &mut R) -> Test { - let mut inputs = function - .args - .iter() - .map(|ty| ty.gen_i64(rng)) - .collect::>(); - - // First argument to this function appears to be a number of - // iterations, so passing in massive random numbers causes it to - // take forever to execute, so make sure we're not running random - // math code until the heat death of the universe. - if function.name == "jn" || function.name == "jnf" { - inputs[0] &= 0xffff; - } - - Test { - inputs, - // zero output for now since we'll generate it later - outputs: vec![], - } - } - } - - impl Ty { - fn gen_i64(&self, r: &mut R) -> i64 { - use std::f32; - use std::f64; - - return match self { - Ty::F32 => { - if r.gen_range(0, 20) < 1 { - let i = *[f32::NAN, f32::INFINITY, f32::NEG_INFINITY] - .choose(r) - .unwrap(); - i.to_bits().into() - } else { - r.gen::().to_bits().into() - } - } - Ty::F64 => { - if r.gen_range(0, 20) < 1 { - let i = *[f64::NAN, f64::INFINITY, f64::NEG_INFINITY] - .choose(r) - .unwrap(); - i.to_bits() as i64 - } else { - r.gen::().to_bits() as i64 - } - } - Ty::I32 => { - if r.gen_range(0, 10) < 1 { - let i = *[i32::max_value(), 0, i32::min_value()].choose(r).unwrap(); - i.into() - } else { - r.gen::().into() - } - } - Ty::Bool => r.gen::() as i64, - }; - } - - fn libc_ty(&self) -> &'static str { - match self { - Ty::F32 => "f32", - Ty::F64 => "f64", - Ty::I32 => "i32", - Ty::Bool => "i32", - } - } - - fn libc_pty(&self) -> &'static str { - match self { - Ty::F32 => "*mut f32", - Ty::F64 => "*mut f64", - Ty::I32 => "*mut i32", - Ty::Bool => "*mut i32", - } - } - - fn default(&self) -> &'static str { - match self { - Ty::F32 => "0_f32", - Ty::F64 => "0_f64", - Ty::I32 => "0_i32", - Ty::Bool => "false", - } - } - - fn to_i64(&self) -> &'static str { - match self { - Ty::F32 => ".to_bits() as i64", - Ty::F64 => ".to_bits() as i64", - Ty::I32 => " as i64", - Ty::Bool => " as i64", - } - } - } - - fn generate_test_outputs(functions: &mut [Function]) { - let mut src = String::new(); - let dst = std::env::var("OUT_DIR").unwrap(); - - // Generate a program which will run all tests with all inputs in - // `functions`. This program will write all outputs to stdout (in a - // binary format). - src.push_str("use std::io::Write;"); - src.push_str("fn main() {"); - src.push_str("let mut result = Vec::new();"); - for function in functions.iter_mut() { - src.push_str("unsafe {"); - src.push_str("extern { fn "); - src.push_str(&function.name); - src.push_str("("); - - let (ret, retptr) = match function.name.as_str() { - "sincos" | "sincosf" => (None, &function.ret[..]), - _ => (Some(&function.ret[0]), &function.ret[1..]), - }; - for (i, arg) in function.args.iter().enumerate() { - src.push_str(&format!("arg{}: {},", i, arg.libc_ty())); - } - for (i, ret) in retptr.iter().enumerate() { - src.push_str(&format!("argret{}: {},", i, ret.libc_pty())); - } - src.push_str(")"); - if let Some(ty) = ret { - src.push_str(" -> "); - src.push_str(ty.libc_ty()); - } - src.push_str("; }"); - - src.push_str(&format!("static TESTS: &[[i64; {}]]", function.args.len())); - src.push_str(" = &["); - for test in function.tests.iter() { - src.push_str("["); - for val in test.inputs.iter() { - src.push_str(&val.to_string()); - src.push_str(","); - } - src.push_str("],"); - } - src.push_str("];"); - - src.push_str("for test in TESTS {"); - for (i, arg) in retptr.iter().enumerate() { - src.push_str(&format!("let mut argret{} = {};", i, arg.default())); - } - src.push_str("let output = "); - src.push_str(&function.name); - src.push_str("("); - for (i, arg) in function.args.iter().enumerate() { - src.push_str(&match arg { - Ty::F32 => format!("f32::from_bits(test[{}] as u32)", i), - Ty::F64 => format!("f64::from_bits(test[{}] as u64)", i), - Ty::I32 => format!("test[{}] as i32", i), - Ty::Bool => format!("test[{}] as i32", i), - }); - src.push_str(","); - } - for (i, _) in retptr.iter().enumerate() { - src.push_str(&format!("&mut argret{},", i)); - } - src.push_str(");"); - if let Some(ty) = &ret { - src.push_str(&format!("let output = output{};", ty.to_i64())); - src.push_str("result.extend_from_slice(&output.to_le_bytes());"); - } - - for (i, ret) in retptr.iter().enumerate() { - src.push_str(&format!( - "result.extend_from_slice(&(argret{}{}).to_le_bytes());", - i, - ret.to_i64(), - )); - } - src.push_str("}"); - - src.push_str("}"); - } - - src.push_str("std::io::stdout().write_all(&result).unwrap();"); - - src.push_str("}"); - - let path = format!("{}/gen.rs", dst); - fs::write(&path, src).unwrap(); - - // Make it somewhat pretty if something goes wrong - drop(Command::new("rustfmt").arg(&path).status()); - - // Compile and execute this tests for the musl target, assuming we're an - // x86_64 host effectively. - let status = Command::new("rustc") - .current_dir(&dst) - .arg(&path) - .arg("--target=x86_64-unknown-linux-musl") - .status() - .unwrap(); - assert!(status.success()); - let output = Command::new("./gen").current_dir(&dst).output().unwrap(); - assert!(output.status.success()); - assert!(output.stderr.is_empty()); - - // Map all the output bytes back to an `i64` and then shove it all into - // the expected results. - let mut results = output.stdout.chunks_exact(8).map(|buf| { - let mut exact = [0; 8]; - exact.copy_from_slice(buf); - i64::from_le_bytes(exact) - }); - - for f in functions.iter_mut() { - for test in f.tests.iter_mut() { - test.outputs = (0..f.ret.len()).map(|_| results.next().unwrap()).collect(); - } - } - assert!(results.next().is_none()); - } - - /// Codegens a file which has a ton of `#[test]` annotations for all the - /// tests that we generated above. - fn generate_unit_tests(functions: &[Function]) { - let mut src = String::new(); - let dst = std::env::var("OUT_DIR").unwrap(); - - for function in functions { - src.push_str("#[test]"); - src.push_str("fn "); - src.push_str(&function.name); - src.push_str("_matches_musl() {"); - src.push_str(&format!( - "static TESTS: &[([i64; {}], [i64; {}])]", - function.args.len(), - function.ret.len(), - )); - src.push_str(" = &["); - for test in function.tests.iter() { - src.push_str("(["); - for val in test.inputs.iter() { - src.push_str(&val.to_string()); - src.push_str(","); - } - src.push_str("],"); - src.push_str("["); - for val in test.outputs.iter() { - src.push_str(&val.to_string()); - src.push_str(","); - } - src.push_str("],"); - src.push_str("),"); - } - src.push_str("];"); - - src.push_str("for (test, expected) in TESTS {"); - src.push_str("let output = "); - src.push_str(&function.name); - src.push_str("("); - for (i, arg) in function.args.iter().enumerate() { - src.push_str(&match arg { - Ty::F32 => format!("f32::from_bits(test[{}] as u32)", i), - Ty::F64 => format!("f64::from_bits(test[{}] as u64)", i), - Ty::I32 => format!("test[{}] as i32", i), - Ty::Bool => format!("test[{}] as i32", i), - }); - src.push_str(","); - } - src.push_str(");"); - - for (i, ret) in function.ret.iter().enumerate() { - let get = if function.ret.len() == 1 { - String::new() - } else { - format!(".{}", i) - }; - src.push_str(&(match ret { - Ty::F32 => format!("if _eqf(output{}, f32::from_bits(expected[{}] as u32)).is_ok() {{ continue }}", get, i), - Ty::F64 => format!("if _eq(output{}, f64::from_bits(expected[{}] as u64)).is_ok() {{ continue }}", get, i), - Ty::I32 => format!("if output{} as i64 == expected[{}] {{ continue }}", get, i), - Ty::Bool => unreachable!(), - })); - } - - src.push_str( - r#" - panic!("INPUT: {:?} EXPECTED: {:?} ACTUAL {:?}", test, expected, output); - "#, - ); - src.push_str("}"); - - src.push_str("}"); - } - - let path = format!("{}/musl-tests.rs", dst); - fs::write(&path, src).unwrap(); - - // Try to make it somewhat pretty - drop(Command::new("rustfmt").arg(&path).status()); - } -} diff --git a/libm/ci/run.sh b/libm/ci/run.sh index d0cd42a8d..2a1ac52b1 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -3,19 +3,19 @@ set -ex TARGET=$1 -CMD="cargo test --all --target $TARGET" +cmd="cargo test --all --target $TARGET" # Needed for no-panic to correct detect a lack of panics export RUSTFLAGS="$RUSTFLAGS -Ccodegen-units=1" # stable by default -$CMD -$CMD --release +$cmd +$cmd --release # unstable with a feature -$CMD --features 'unstable' -$CMD --release --features 'unstable' +$cmd --features 'unstable' +$cmd --release --features 'unstable' # also run the reference tests -$CMD --features 'unstable musl-reference-tests' -$CMD --release --features 'unstable musl-reference-tests' +$cmd --features 'unstable libm-test/musl-reference-tests' +$cmd --release --features 'unstable libm-test/musl-reference-tests' diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml new file mode 100644 index 000000000..03e55b1d9 --- /dev/null +++ b/libm/crates/libm-test/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "libm-test" +version = "0.1.0" +edition = "2021" +publish = false + +[features] +default = [] + +# Generate tests which are random inputs and the outputs are calculated with +# musl libc. +musl-reference-tests = ["rand"] + +[dependencies] +libm = { path = "../.." } + +[build-dependencies] +rand = { version = "0.6.5", optional = true } diff --git a/libm/crates/libm-test/build.rs b/libm/crates/libm-test/build.rs new file mode 100644 index 000000000..fc8f305d6 --- /dev/null +++ b/libm/crates/libm-test/build.rs @@ -0,0 +1,456 @@ +fn main() { + #[cfg(feature = "musl-reference-tests")] + musl_reference_tests::generate(); +} + +#[cfg(feature = "musl-reference-tests")] +mod musl_reference_tests { + use rand::seq::SliceRandom; + use rand::Rng; + use std::env; + use std::fs; + use std::path::PathBuf; + use std::process::Command; + + // Number of tests to generate for each function + const NTESTS: usize = 500; + + // These files are all internal functions or otherwise miscellaneous, not + // defining a function we want to test. + const IGNORED_FILES: &[&str] = &[ + "fenv.rs", + // These are giving slightly different results compared to musl + "lgamma.rs", + "lgammaf.rs", + "tgamma.rs", + "j0.rs", + "j0f.rs", + "jn.rs", + "jnf.rs", + "j1.rs", + "j1f.rs", + ]; + + struct Function { + name: String, + args: Vec, + ret: Vec, + tests: Vec, + } + + enum Ty { + F32, + F64, + I32, + Bool, + } + + struct Test { + inputs: Vec, + outputs: Vec, + } + + pub fn generate() { + // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 + let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); + let libm_test = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + let math_src = libm_test.join("../../src/math"); + + if target_arch == "powerpc64" { + return; + } + + let files = fs::read_dir(math_src) + .unwrap() + .map(|f| f.unwrap().path()) + .collect::>(); + + let mut math = Vec::new(); + for file in files { + if IGNORED_FILES.iter().any(|f| file.ends_with(f)) { + continue; + } + + println!("generating musl reference tests in {:?}", file); + + let contents = fs::read_to_string(file).unwrap(); + let mut functions = contents.lines().filter(|f| f.starts_with("pub fn")); + while let Some(function_to_test) = functions.next() { + math.push(parse(function_to_test)); + } + } + + // Generate a bunch of random inputs for each function. This will + // attempt to generate a good set of uniform test cases for exercising + // all the various functionality. + generate_random_tests(&mut math, &mut rand::thread_rng()); + + // After we have all our inputs, use the x86_64-unknown-linux-musl + // target to generate the expected output. + generate_test_outputs(&mut math); + //panic!("Boo"); + // ... and now that we have both inputs and expected outputs, do a bunch + // of codegen to create the unit tests which we'll actually execute. + generate_unit_tests(&math); + } + + /// A "poor man's" parser for the signature of a function + fn parse(s: &str) -> Function { + let s = eat(s, "pub fn "); + let pos = s.find('(').unwrap(); + let name = &s[..pos]; + let s = &s[pos + 1..]; + let end = s.find(')').unwrap(); + let args = s[..end] + .split(',') + .map(|arg| { + let colon = arg.find(':').unwrap(); + parse_ty(arg[colon + 1..].trim()) + }) + .collect::>(); + let tail = &s[end + 1..]; + let tail = eat(tail, " -> "); + let ret = parse_retty(tail.replace("{", "").trim()); + + return Function { + name: name.to_string(), + args, + ret, + tests: Vec::new(), + }; + + fn parse_ty(s: &str) -> Ty { + match s { + "f32" => Ty::F32, + "f64" => Ty::F64, + "i32" => Ty::I32, + "bool" => Ty::Bool, + other => panic!("unknown type `{}`", other), + } + } + + fn parse_retty(s: &str) -> Vec { + match s { + "(f32, f32)" => vec![Ty::F32, Ty::F32], + "(f32, i32)" => vec![Ty::F32, Ty::I32], + "(f64, f64)" => vec![Ty::F64, Ty::F64], + "(f64, i32)" => vec![Ty::F64, Ty::I32], + other => vec![parse_ty(other)], + } + } + + fn eat<'a>(s: &'a str, prefix: &str) -> &'a str { + if s.starts_with(prefix) { + &s[prefix.len()..] + } else { + panic!("{:?} didn't start with {:?}", s, prefix) + } + } + } + + fn generate_random_tests(functions: &mut [Function], rng: &mut R) { + for function in functions { + for _ in 0..NTESTS { + function.tests.push(generate_test(function, rng)); + } + } + + fn generate_test(function: &Function, rng: &mut R) -> Test { + let mut inputs = function + .args + .iter() + .map(|ty| ty.gen_i64(rng)) + .collect::>(); + + // First argument to this function appears to be a number of + // iterations, so passing in massive random numbers causes it to + // take forever to execute, so make sure we're not running random + // math code until the heat death of the universe. + if function.name == "jn" || function.name == "jnf" { + inputs[0] &= 0xffff; + } + + Test { + inputs, + // zero output for now since we'll generate it later + outputs: vec![], + } + } + } + + impl Ty { + fn gen_i64(&self, r: &mut R) -> i64 { + use std::f32; + use std::f64; + + return match self { + Ty::F32 => { + if r.gen_range(0, 20) < 1 { + let i = *[f32::NAN, f32::INFINITY, f32::NEG_INFINITY] + .choose(r) + .unwrap(); + i.to_bits().into() + } else { + r.gen::().to_bits().into() + } + } + Ty::F64 => { + if r.gen_range(0, 20) < 1 { + let i = *[f64::NAN, f64::INFINITY, f64::NEG_INFINITY] + .choose(r) + .unwrap(); + i.to_bits() as i64 + } else { + r.gen::().to_bits() as i64 + } + } + Ty::I32 => { + if r.gen_range(0, 10) < 1 { + let i = *[i32::max_value(), 0, i32::min_value()].choose(r).unwrap(); + i.into() + } else { + r.gen::().into() + } + } + Ty::Bool => r.gen::() as i64, + }; + } + + fn libc_ty(&self) -> &'static str { + match self { + Ty::F32 => "f32", + Ty::F64 => "f64", + Ty::I32 => "i32", + Ty::Bool => "i32", + } + } + + fn libc_pty(&self) -> &'static str { + match self { + Ty::F32 => "*mut f32", + Ty::F64 => "*mut f64", + Ty::I32 => "*mut i32", + Ty::Bool => "*mut i32", + } + } + + fn default(&self) -> &'static str { + match self { + Ty::F32 => "0_f32", + Ty::F64 => "0_f64", + Ty::I32 => "0_i32", + Ty::Bool => "false", + } + } + + fn to_i64(&self) -> &'static str { + match self { + Ty::F32 => ".to_bits() as i64", + Ty::F64 => ".to_bits() as i64", + Ty::I32 => " as i64", + Ty::Bool => " as i64", + } + } + } + + fn generate_test_outputs(functions: &mut [Function]) { + let mut src = String::new(); + let dst = std::env::var("OUT_DIR").unwrap(); + + // Generate a program which will run all tests with all inputs in + // `functions`. This program will write all outputs to stdout (in a + // binary format). + src.push_str("use std::io::Write;"); + src.push_str("fn main() {"); + src.push_str("let mut result = Vec::new();"); + for function in functions.iter_mut() { + src.push_str("unsafe {"); + src.push_str("extern { fn "); + src.push_str(&function.name); + src.push_str("("); + + let (ret, retptr) = match function.name.as_str() { + "sincos" | "sincosf" => (None, &function.ret[..]), + _ => (Some(&function.ret[0]), &function.ret[1..]), + }; + for (i, arg) in function.args.iter().enumerate() { + src.push_str(&format!("arg{}: {},", i, arg.libc_ty())); + } + for (i, ret) in retptr.iter().enumerate() { + src.push_str(&format!("argret{}: {},", i, ret.libc_pty())); + } + src.push_str(")"); + if let Some(ty) = ret { + src.push_str(" -> "); + src.push_str(ty.libc_ty()); + } + src.push_str("; }"); + + src.push_str(&format!("static TESTS: &[[i64; {}]]", function.args.len())); + src.push_str(" = &["); + for test in function.tests.iter() { + src.push_str("["); + for val in test.inputs.iter() { + src.push_str(&val.to_string()); + src.push_str(","); + } + src.push_str("],"); + } + src.push_str("];"); + + src.push_str("for test in TESTS {"); + for (i, arg) in retptr.iter().enumerate() { + src.push_str(&format!("let mut argret{} = {};", i, arg.default())); + } + src.push_str("let output = "); + src.push_str(&function.name); + src.push_str("("); + for (i, arg) in function.args.iter().enumerate() { + src.push_str(&match arg { + Ty::F32 => format!("f32::from_bits(test[{}] as u32)", i), + Ty::F64 => format!("f64::from_bits(test[{}] as u64)", i), + Ty::I32 => format!("test[{}] as i32", i), + Ty::Bool => format!("test[{}] as i32", i), + }); + src.push_str(","); + } + for (i, _) in retptr.iter().enumerate() { + src.push_str(&format!("&mut argret{},", i)); + } + src.push_str(");"); + if let Some(ty) = &ret { + src.push_str(&format!("let output = output{};", ty.to_i64())); + src.push_str("result.extend_from_slice(&output.to_le_bytes());"); + } + + for (i, ret) in retptr.iter().enumerate() { + src.push_str(&format!( + "result.extend_from_slice(&(argret{}{}).to_le_bytes());", + i, + ret.to_i64(), + )); + } + src.push_str("}"); + + src.push_str("}"); + } + + src.push_str("std::io::stdout().write_all(&result).unwrap();"); + + src.push_str("}"); + + let path = format!("{}/gen.rs", dst); + fs::write(&path, src).unwrap(); + + // Make it somewhat pretty if something goes wrong + drop(Command::new("rustfmt").arg(&path).status()); + + // Compile and execute this tests for the musl target, assuming we're an + // x86_64 host effectively. + let status = Command::new("rustc") + .current_dir(&dst) + .arg(&path) + .arg("--target=x86_64-unknown-linux-musl") + .status() + .unwrap(); + assert!(status.success()); + let output = Command::new("./gen").current_dir(&dst).output().unwrap(); + assert!(output.status.success()); + assert!(output.stderr.is_empty()); + + // Map all the output bytes back to an `i64` and then shove it all into + // the expected results. + let mut results = output.stdout.chunks_exact(8).map(|buf| { + let mut exact = [0; 8]; + exact.copy_from_slice(buf); + i64::from_le_bytes(exact) + }); + + for f in functions.iter_mut() { + for test in f.tests.iter_mut() { + test.outputs = (0..f.ret.len()).map(|_| results.next().unwrap()).collect(); + } + } + assert!(results.next().is_none()); + } + + /// Codegens a file which has a ton of `#[test]` annotations for all the + /// tests that we generated above. + fn generate_unit_tests(functions: &[Function]) { + let mut src = String::new(); + let dst = std::env::var("OUT_DIR").unwrap(); + + for function in functions { + src.push_str("#[test]"); + src.push_str("fn "); + src.push_str(&function.name); + src.push_str("_matches_musl() {"); + src.push_str(&format!( + "static TESTS: &[([i64; {}], [i64; {}])]", + function.args.len(), + function.ret.len(), + )); + src.push_str(" = &["); + for test in function.tests.iter() { + src.push_str("(["); + for val in test.inputs.iter() { + src.push_str(&val.to_string()); + src.push_str(","); + } + src.push_str("],"); + src.push_str("["); + for val in test.outputs.iter() { + src.push_str(&val.to_string()); + src.push_str(","); + } + src.push_str("],"); + src.push_str("),"); + } + src.push_str("];"); + + src.push_str("for (test, expected) in TESTS {"); + src.push_str("let output = libm::"); + src.push_str(&function.name); + src.push_str("("); + for (i, arg) in function.args.iter().enumerate() { + src.push_str(&match arg { + Ty::F32 => format!("f32::from_bits(test[{}] as u32)", i), + Ty::F64 => format!("f64::from_bits(test[{}] as u64)", i), + Ty::I32 => format!("test[{}] as i32", i), + Ty::Bool => format!("test[{}] as i32", i), + }); + src.push_str(","); + } + src.push_str(");"); + + for (i, ret) in function.ret.iter().enumerate() { + let get = if function.ret.len() == 1 { + String::new() + } else { + format!(".{}", i) + }; + src.push_str(&(match ret { + Ty::F32 => format!("if libm::_eqf(output{}, f32::from_bits(expected[{}] as u32)).is_ok() {{ continue }}", get, i), + Ty::F64 => format!("if libm::_eq(output{}, f64::from_bits(expected[{}] as u64)).is_ok() {{ continue }}", get, i), + Ty::I32 => format!("if output{} as i64 == expected[{}] {{ continue }}", get, i), + Ty::Bool => unreachable!(), + })); + } + + src.push_str( + r#" + panic!("INPUT: {:?} EXPECTED: {:?} ACTUAL {:?}", test, expected, output); + "#, + ); + src.push_str("}"); + + src.push_str("}"); + } + + let path = format!("{}/musl-tests.rs", dst); + fs::write(&path, src).unwrap(); + + // Try to make it somewhat pretty + drop(Command::new("rustfmt").arg(&path).status()); + } +} diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/libm/crates/libm-test/src/lib.rs @@ -0,0 +1 @@ + diff --git a/libm/crates/libm-test/tests/musl_biteq.rs b/libm/crates/libm-test/tests/musl_biteq.rs new file mode 100644 index 000000000..46d4f3563 --- /dev/null +++ b/libm/crates/libm-test/tests/musl_biteq.rs @@ -0,0 +1,4 @@ +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(all(test, feature = "musl-reference-tests"))] +include!(concat!(env!("OUT_DIR"), "/musl-tests.rs")); diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 1f23ef8a8..23885ecf8 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -53,8 +53,3 @@ pub fn _eq(a: f64, b: f64) -> Result<(), u64> { } } } - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(all(test, feature = "musl-reference-tests"))] -include!(concat!(env!("OUT_DIR"), "/musl-tests.rs")); From 727e350f45ae71c703d1f666d011ff3fa983824a Mon Sep 17 00:00:00 2001 From: "Enzo \"raskyld\" Nocera" Date: Sun, 6 Oct 2024 03:45:06 +0200 Subject: [PATCH 0860/1459] fix(int): avoid infinite recursion on left shift Please, see this discussion for the full context: https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler/topic/.5Bwasm32.5D.20Infinite.20recursion.20.60compiler-builtins.60.20.60__multi3.60 Signed-off-by: Enzo "raskyld" Nocera We determined that some recursion problems on SPARC and WASM were due to infinite recusion. This was introduced at 9c6fcb56e8 ("Split Int into Int and MinInt") when moving the implementation of `widen_hi` from something on each `impl` block to a default on the trait. The reasoning is not fully understood, but undoing this portion of the change seems to resolve the issue. [ add the above context - Trevor ] Signed-off-by: Trevor Gross --- src/int/big.rs | 8 ++++++++ src/int/mod.rs | 7 ++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/int/big.rs b/src/int/big.rs index e565da897..0ef3caaed 100644 --- a/src/int/big.rs +++ b/src/int/big.rs @@ -222,6 +222,10 @@ impl HInt for u128 { fn widen_mul(self, rhs: Self) -> Self::D { self.zero_widen_mul(rhs) } + + fn widen_hi(self) -> Self::D { + self.widen() << ::BITS + } } impl HInt for i128 { @@ -247,6 +251,10 @@ impl HInt for i128 { fn widen_mul(self, rhs: Self) -> Self::D { unimplemented!("signed i128 widening multiply is not used") } + + fn widen_hi(self) -> Self::D { + self.widen() << ::BITS + } } impl DInt for u256 { diff --git a/src/int/mod.rs b/src/int/mod.rs index 5f56c6b6e..c7ca45e71 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -319,9 +319,7 @@ pub(crate) trait HInt: Int { /// around problems with associated type bounds (such as `Int`) being unstable fn zero_widen(self) -> Self::D; /// Widens the integer to have double bit width and shifts the integer into the higher bits - fn widen_hi(self) -> Self::D { - self.widen() << ::BITS - } + fn widen_hi(self) -> Self::D; /// Widening multiplication with zero widening. This cannot overflow. fn zero_widen_mul(self, rhs: Self) -> Self::D; /// Widening multiplication. This cannot overflow. @@ -364,6 +362,9 @@ macro_rules! impl_h_int { fn widen_mul(self, rhs: Self) -> Self::D { self.widen().wrapping_mul(rhs.widen()) } + fn widen_hi(self) -> Self::D { + (self as $X) << ::BITS + } } )* }; From 87077a93dab529569fd7a8685148bb9851142aef Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 5 Oct 2024 20:55:47 -0500 Subject: [PATCH 0861/1459] Add a note about avoiding default implemenations in some places Link: https://github.com/rust-lang/compiler-builtins/pull/707 --- src/int/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/int/mod.rs b/src/int/mod.rs index c7ca45e71..e6f31c530 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -313,6 +313,10 @@ pub(crate) trait HInt: Int { /// Integer that is double the bit width of the integer this trait is implemented for type D: DInt + MinInt; + // NB: some of the below methods could have default implementations (e.g. `widen_hi`), but for + // unknown reasons this can cause infinite recursion when optimizations are disabled. See + // for context. + /// Widens (using default extension) the integer to have double bit width fn widen(self) -> Self::D; /// Widens (zero extension only) the integer to have double bit width. This is needed to get From ad1dbeb336557ce919943695d4cbae3b51de2ea9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 6 Oct 2024 02:08:00 +0000 Subject: [PATCH 0862/1459] chore: release --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index d0adf7444..34e1dcdfd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.132" +version = "0.1.133" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From bc921f848c43b041ea7ed3234ed08e5cee10c55a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 5 Oct 2024 21:42:28 -0500 Subject: [PATCH 0863/1459] Rename the `musl-reference-tests` feature to `musl-bitwise-tests` The plan is to add more test related features that could be considered "reference tests". Rename the feature here to avoid future confusion. --- libm/CONTRIBUTING.md | 4 ++-- libm/ci/run.sh | 4 ++-- libm/crates/libm-test/Cargo.toml | 2 +- libm/crates/libm-test/build.rs | 4 ++-- libm/crates/libm-test/tests/musl_biteq.rs | 4 +++- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/libm/CONTRIBUTING.md b/libm/CONTRIBUTING.md index c15c45a43..1b5235db9 100644 --- a/libm/CONTRIBUTING.md +++ b/libm/CONTRIBUTING.md @@ -7,7 +7,7 @@ in `src/lib.rs`. - Write some simple tests in your module (using `#[test]`) - Run `cargo test` to make sure it works -- Run `cargo test --features libm-test/musl-reference-tests` to compare your +- Run `cargo test --features libm-test/musl-bitwise-tests` to compare your implementation against musl's - Send us a pull request! Make sure to run `cargo fmt` on your code before sending the PR. Also include "closes #42" in the PR description to close the @@ -88,7 +88,7 @@ If you'd like to run tests with randomized inputs that get compared against musl itself, you'll need to be on a Linux system and then you can execute: ``` -cargo test --features libm-test/musl-reference-tests +cargo test --features libm-test/musl-bitwise-tests ``` Note that you may need to pass `--release` to Cargo if there are errors related diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 2a1ac52b1..b5d6e45f7 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -17,5 +17,5 @@ $cmd --features 'unstable' $cmd --release --features 'unstable' # also run the reference tests -$cmd --features 'unstable libm-test/musl-reference-tests' -$cmd --release --features 'unstable libm-test/musl-reference-tests' +$cmd --features 'unstable libm-test/musl-bitwise-tests' +$cmd --release --features 'unstable libm-test/musl-bitwise-tests' diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 03e55b1d9..d04452376 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -9,7 +9,7 @@ default = [] # Generate tests which are random inputs and the outputs are calculated with # musl libc. -musl-reference-tests = ["rand"] +musl-bitwise-tests = ["rand"] [dependencies] libm = { path = "../.." } diff --git a/libm/crates/libm-test/build.rs b/libm/crates/libm-test/build.rs index fc8f305d6..09eb38410 100644 --- a/libm/crates/libm-test/build.rs +++ b/libm/crates/libm-test/build.rs @@ -1,9 +1,9 @@ fn main() { - #[cfg(feature = "musl-reference-tests")] + #[cfg(feature = "musl-bitwise-tests")] musl_reference_tests::generate(); } -#[cfg(feature = "musl-reference-tests")] +#[cfg(feature = "musl-bitwise-tests")] mod musl_reference_tests { use rand::seq::SliceRandom; use rand::Rng; diff --git a/libm/crates/libm-test/tests/musl_biteq.rs b/libm/crates/libm-test/tests/musl_biteq.rs index 46d4f3563..1a6b71817 100644 --- a/libm/crates/libm-test/tests/musl_biteq.rs +++ b/libm/crates/libm-test/tests/musl_biteq.rs @@ -1,4 +1,6 @@ +//! compare + // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 #[cfg(not(target_arch = "powerpc64"))] -#[cfg(all(test, feature = "musl-reference-tests"))] +#[cfg(all(test, feature = "musl-bitwise-tests"))] include!(concat!(env!("OUT_DIR"), "/musl-tests.rs")); From 6ac5248901be7048905f86f87b64d87fac41e02c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 5 Oct 2024 22:24:14 -0500 Subject: [PATCH 0864/1459] Upgrade all dependencies None of these affect the distributed library. --- libm/Cargo.toml | 2 +- libm/crates/libm-bench/Cargo.toml | 4 ++-- libm/crates/libm-test/Cargo.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index fc2db0c20..712baee79 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -34,7 +34,7 @@ default-members = [ ] [dev-dependencies] -no-panic = "0.1.8" +no-panic = "0.1.30" # This is needed for no-panic to correctly detect the lack of panics diff --git a/libm/crates/libm-bench/Cargo.toml b/libm/crates/libm-bench/Cargo.toml index b09db339b..ac9cb83a6 100644 --- a/libm/crates/libm-bench/Cargo.toml +++ b/libm/crates/libm-bench/Cargo.toml @@ -7,8 +7,8 @@ license = "MIT OR Apache-2.0" [dependencies] libm = { path = "../..", default-features = false } -rand = "0.6.5" -paste = "0.1.5" +rand = "0.8.5" +paste = "1.0.15" [features] default = [] diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index d04452376..7c193d3bb 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -15,4 +15,4 @@ musl-bitwise-tests = ["rand"] libm = { path = "../.." } [build-dependencies] -rand = { version = "0.6.5", optional = true } +rand = { version = "0.8.5", optional = true } From af57f436d1fb06af3eb96c00e058d9112732a068 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 5 Oct 2024 22:28:38 -0500 Subject: [PATCH 0865/1459] Do library updates necessary with dependency upgrades --- libm/crates/libm-test/build.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libm/crates/libm-test/build.rs b/libm/crates/libm-test/build.rs index 09eb38410..c2c4b0bd2 100644 --- a/libm/crates/libm-test/build.rs +++ b/libm/crates/libm-test/build.rs @@ -185,7 +185,7 @@ mod musl_reference_tests { return match self { Ty::F32 => { - if r.gen_range(0, 20) < 1 { + if r.gen_range(0..20) < 1 { let i = *[f32::NAN, f32::INFINITY, f32::NEG_INFINITY] .choose(r) .unwrap(); @@ -195,7 +195,7 @@ mod musl_reference_tests { } } Ty::F64 => { - if r.gen_range(0, 20) < 1 { + if r.gen_range(0..20) < 1 { let i = *[f64::NAN, f64::INFINITY, f64::NEG_INFINITY] .choose(r) .unwrap(); @@ -205,7 +205,7 @@ mod musl_reference_tests { } } Ty::I32 => { - if r.gen_range(0, 10) < 1 { + if r.gen_range(0..10) < 1 { let i = *[i32::max_value(), 0, i32::min_value()].choose(r).unwrap(); i.into() } else { From 01d762bfc3ff51950ea66f9896a16ba01abc4300 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 6 Oct 2024 00:07:55 -0500 Subject: [PATCH 0866/1459] Update Ubuntu images to 24.04 We don't have any specific reason to stay on 18.04, so upgrade to the latest LTS version. --- libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile | 3 ++- libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile | 3 ++- libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile | 3 ++- libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile | 3 ++- libm/ci/docker/i686-unknown-linux-gnu/Dockerfile | 3 ++- libm/ci/docker/mips-unknown-linux-gnu/Dockerfile | 2 +- libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile | 3 ++- libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile | 3 ++- libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile | 2 +- libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile | 2 +- libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile | 2 +- libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile | 2 +- libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile | 3 ++- 13 files changed, 21 insertions(+), 13 deletions(-) diff --git a/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index 9e2559f4a..b6b23d865 100644 --- a/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +FROM ubuntu:24.04 + RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile index afab874bc..ff340fba4 100644 --- a/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile +++ b/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +FROM ubuntu:24.04 + RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile index 3ed3602b0..15cd6eee9 100644 --- a/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile +++ b/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +FROM ubuntu:24.04 + RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile index 6617af155..3090a83f8 100644 --- a/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +++ b/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +FROM ubuntu:24.04 + RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ diff --git a/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile b/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile index 5783e28e1..3b0bfc0d3 100644 --- a/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +FROM ubuntu:24.04 + RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc-multilib libc6-dev ca-certificates diff --git a/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile b/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile index f47e8f522..2850e775c 100644 --- a/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:18.04 +FROM ubuntu:24.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile index 8fa77c7bd..ec23159c9 100644 --- a/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile +++ b/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +FROM ubuntu:24.04 + RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates \ diff --git a/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile index c6611d9ac..0ff1fba9c 100644 --- a/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile +++ b/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +FROM ubuntu:24.04 + RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates \ diff --git a/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile b/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile index 0bc695624..dfb9d0c87 100644 --- a/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:18.04 +FROM ubuntu:24.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile index 2d39fef61..532f2cb05 100644 --- a/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:18.04 +FROM ubuntu:24.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile index 653cd3511..3ce687873 100644 --- a/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:18.04 +FROM ubuntu:24.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile index 63ea9af9d..90212fe45 100644 --- a/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:18.04 +FROM ubuntu:24.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index 98000f4eb..15723ab57 100644 --- a/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,5 @@ -FROM ubuntu:18.04 +FROM ubuntu:24.04 + RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates From 32709cb0c968eb64ceecd9387e149a539cf21be0 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 6 Oct 2024 00:12:58 -0500 Subject: [PATCH 0867/1459] Set target-specific `AR` and `CC` arguments The Rust `cc` crate reads these, so make sure they are set for when we start making use of `cc`. --- libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile | 6 +++++- libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile | 6 +++++- libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile | 6 +++++- libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile | 6 +++++- libm/ci/docker/mips-unknown-linux-gnu/Dockerfile | 5 ++++- libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile | 7 +++++-- libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile | 7 +++++-- libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile | 5 ++++- libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile | 5 ++++- libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile | 6 ++++-- libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile | 5 ++++- 11 files changed, 50 insertions(+), 14 deletions(-) diff --git a/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index b6b23d865..a7b23cb9e 100644 --- a/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -5,7 +5,11 @@ RUN apt-get update && \ gcc libc6-dev ca-certificates \ gcc-aarch64-linux-gnu libc6-dev-arm64-cross \ qemu-user-static -ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ + +ENV TOOLCHAIN_PREFIX=aarch64-linux-gnu- +ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-aarch64-static \ + AR_aarch64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_aarch64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/aarch64-linux-gnu \ RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile index ff340fba4..e070a7d93 100644 --- a/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile +++ b/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile @@ -4,7 +4,11 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ gcc-arm-linux-gnueabi libc6-dev-armel-cross qemu-user-static -ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc \ + +ENV TOOLCHAIN_PREFIX=arm-linux-gnueabi- +ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER=qemu-arm-static \ + AR_arm_unknown_linux_gnueabi="$TOOLCHAIN_PREFIX"ar \ + CC_arm_unknown_linux_gnueabi="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/arm-linux-gnueabi \ RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile index 15cd6eee9..29f1e04a9 100644 --- a/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile +++ b/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile @@ -4,7 +4,11 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ gcc-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-user-static -ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ + +ENV TOOLCHAIN_PREFIX=arm-linux-gnueabihf- +ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm-static \ + AR_arm_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"ar \ + CC_arm_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf \ RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile index 3090a83f8..0a30801b4 100644 --- a/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +++ b/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -4,7 +4,11 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ gcc-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-user-static -ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ + +ENV TOOLCHAIN_PREFIX=arm-linux-gnueabihf- +ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm-static \ + AR_armv7_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"ar \ + CC_armv7_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf \ RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile b/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile index 2850e775c..298208c92 100644 --- a/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile @@ -6,7 +6,10 @@ RUN apt-get update && \ gcc-mips-linux-gnu libc6-dev-mips-cross \ binfmt-support qemu-user-static qemu-system-mips -ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \ +ENV TOOLCHAIN_PREFIX=mips-linux-gnu- +ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER=qemu-mips-static \ + AR_mips_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_mips_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/mips-linux-gnu \ RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile index ec23159c9..101b3853e 100644 --- a/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile +++ b/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile @@ -9,8 +9,11 @@ RUN apt-get update && \ libc6-dev-mips64-cross \ qemu-user-static \ qemu-system-mips -ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \ + +ENV TOOLCHAIN_PREFIX=mips64-linux-gnuabi64- +ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER=qemu-mips64-static \ - CC_mips64_unknown_linux_gnuabi64=mips64-linux-gnuabi64-gcc \ + AR_mips64_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"ar \ + CC_mips64_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/mips64-linux-gnuabi64 \ RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile index 0ff1fba9c..0eb14f9ac 100644 --- a/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile +++ b/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile @@ -8,8 +8,11 @@ RUN apt-get update && \ libc6-dev \ libc6-dev-mips64el-cross \ qemu-user-static -ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \ + +ENV TOOLCHAIN_PREFIX=mips64el-linux-gnuabi64- +ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER=qemu-mips64el-static \ - CC_mips64el_unknown_linux_gnuabi64=mips64el-linux-gnuabi64-gcc \ + AR_mips64el_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"ar \ + CC_mips64el_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/mips64el-linux-gnuabi64 \ RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile b/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile index dfb9d0c87..1b9817cfe 100644 --- a/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile @@ -6,7 +6,10 @@ RUN apt-get update && \ gcc-mipsel-linux-gnu libc6-dev-mipsel-cross \ binfmt-support qemu-user-static -ENV CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_LINKER=mipsel-linux-gnu-gcc \ +ENV TOOLCHAIN_PREFIX=mipsel-linux-gnu- +ENV CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_RUNNER=qemu-mipsel-static \ + AR_mipsel_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_mipsel_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/mipsel-linux-gnu \ RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile index 532f2cb05..1ea2e30a2 100644 --- a/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile @@ -6,7 +6,10 @@ RUN apt-get update && \ gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \ qemu-system-ppc -ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \ +ENV TOOLCHAIN_PREFIX=powerpc-linux-gnu- +ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc-static \ + AR_powerpc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_powerpc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/powerpc-linux-gnu \ RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile index 3ce687873..373814bca 100644 --- a/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile @@ -6,8 +6,10 @@ RUN apt-get update && \ gcc-powerpc64-linux-gnu libc6-dev-ppc64-cross \ binfmt-support qemu-user-static qemu-system-ppc -ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \ +ENV TOOLCHAIN_PREFIX=powerpc64-linux-gnu- +ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64-static \ - CC_powerpc64_unknown_linux_gnu=powerpc64-linux-gnu-gcc \ + AR_powerpc64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_powerpc64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/powerpc64-linux-gnu \ RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile index 90212fe45..403bb1d95 100644 --- a/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile @@ -6,8 +6,11 @@ RUN apt-get update && \ gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \ qemu-system-ppc -ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \ +ENV TOOLCHAIN_PREFIX=powerpc64le-linux-gnu- +ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64le-static \ + AR_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ QEMU_CPU=POWER8 \ QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu \ RUST_TEST_THREADS=1 From 6ace4510e1d8c1bdee262c6bb3c3e98220a17fb7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 6 Oct 2024 13:44:25 -0500 Subject: [PATCH 0868/1459] Fix shellcheck warnings in scripts --- libm/ci/run-docker.sh | 24 ++++++++++++++---------- libm/ci/run.sh | 9 +++++---- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/libm/ci/run-docker.sh b/libm/ci/run-docker.sh index 8d323634a..9191a17e2 100755 --- a/libm/ci/run-docker.sh +++ b/libm/ci/run-docker.sh @@ -1,36 +1,40 @@ +#!/bin/bash + # Small script to run tests for a target (or all targets) inside all the # respective docker images. -set -ex +set -euxo pipefail run() { local target=$1 - echo $target + echo "testing target: $target" # This directory needs to exist before calling docker, otherwise docker will create it but it # will be owned by root mkdir -p target - docker build -t $target ci/docker/$target + docker build -t "$target" "ci/docker/$target" docker run \ --rm \ - --user $(id -u):$(id -g) \ + --user "$(id -u):$(id -g)" \ -e RUSTFLAGS \ -e CARGO_HOME=/cargo \ -e CARGO_TARGET_DIR=/target \ - -v "${HOME}/.cargo":/cargo \ - -v `pwd`/target:/target \ - -v `pwd`:/checkout:ro \ - -v `rustc --print sysroot`:/rust:ro \ + -v "${HOME}/.cargo:/cargo" \ + -v "$(pwd)/target:/target" \ + -v "$(pwd):/checkout:ro" \ + -v "$(rustc --print sysroot):/rust:ro" \ --init \ -w /checkout \ - $target \ + "$target" \ sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/run.sh $target" } if [ -z "$1" ]; then - for d in `ls ci/docker/`; do + echo "running tests for all targets" + + for d in ci/docker/*; do run $d done else diff --git a/libm/ci/run.sh b/libm/ci/run.sh index b5d6e45f7..1b016cc4f 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -1,9 +1,10 @@ -#!/usr/bin/env sh +#!/bin/sh -set -ex -TARGET=$1 +set -eux -cmd="cargo test --all --target $TARGET" +target="$1" + +cmd="cargo test --all --target $target" # Needed for no-panic to correct detect a lack of panics export RUSTFLAGS="$RUSTFLAGS -Ccodegen-units=1" From 06058d39c701af0def25d63f75d91c7ed2ee40be Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 7 Oct 2024 00:18:41 -0500 Subject: [PATCH 0869/1459] Set edition to 2021 for all crates --- libm/Cargo.toml | 2 +- libm/crates/compiler-builtins-smoke-test/Cargo.toml | 1 + libm/crates/compiler-builtins-smoke-test/src/lib.rs | 2 ++ libm/crates/libm-bench/Cargo.toml | 2 +- 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 712baee79..181000f34 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -9,7 +9,7 @@ name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/libm" version = "0.2.8" -edition = "2018" +edition = "2021" exclude = ["/ci/", "/.github/workflows/"] [features] diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index 481d386a4..4bc62304a 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -2,6 +2,7 @@ name = "cb" version = "0.1.0" authors = ["Jorge Aparicio "] +edition = "2021" [lib] test = false diff --git a/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/libm/crates/compiler-builtins-smoke-test/src/lib.rs index ab744c45b..e65cb8da3 100644 --- a/libm/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/libm/crates/compiler-builtins-smoke-test/src/lib.rs @@ -2,6 +2,8 @@ //! //! This is used to test that we can source import `libm` into the compiler-builtins crate. +#![feature(core_intrinsics)] +#![allow(internal_features)] #![allow(dead_code)] #![no_std] diff --git a/libm/crates/libm-bench/Cargo.toml b/libm/crates/libm-bench/Cargo.toml index ac9cb83a6..282752c61 100644 --- a/libm/crates/libm-bench/Cargo.toml +++ b/libm/crates/libm-bench/Cargo.toml @@ -2,7 +2,7 @@ name = "libm-bench" version = "0.1.0" authors = ["Gonzalo Brito Gadeschi "] -edition = "2018" +edition = "2021" license = "MIT OR Apache-2.0" [dependencies] From 5bce2d90eee35808b6aa395afc5af4ff859a3c9b Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Wed, 16 Oct 2024 22:12:38 -0400 Subject: [PATCH 0870/1459] Use wrapping pointer arithmetic in mem/impls.rs Add a comment (and fix a typo) --- src/mem/impls.rs | 105 +++++++++++++++++++++++++++-------------------- 1 file changed, 61 insertions(+), 44 deletions(-) diff --git a/src/mem/impls.rs b/src/mem/impls.rs index 23c9d8d32..c602a67db 100644 --- a/src/mem/impls.rs +++ b/src/mem/impls.rs @@ -1,3 +1,20 @@ +// In C and Rust it is UB to read or write to usize::MAX because if an allocation extends to the +// last byte of address space (there must be an allocation to do the read or write), in C computing +// its one-past-the-end pointer would be equal to NULL and in Rust computing the address of a +// trailing ZST member with a safe place projection would wrap (place projection address computation +// is non-wrapping). +// +// However, some embedded systems have special memory at usize::MAX, and need to access that +// memory. If they do that with the intrinsics provided by compiler-builtins (such as memcpy!), the +// ptr::add in these loops will wrap. And if compiler-builtins is compiled with cfg(ub_checks), +// this will fail a UB check at runtime. +// +// Since this scenario is UB, we are within our rights hit this check and halt execution... +// But we are also within our rights to try to make it work. +// We use wrapping_add/wrapping_sub for pointer arithmetic in this module in an attempt to support +// this use. Of course this is not a guarantee that such use will work, it just means that this +// crate doing wrapping pointer arithmetic with a method that must not wrap won't be the problem if +// something does go wrong at runtime. use core::intrinsics::likely; const WORD_SIZE: usize = core::mem::size_of::(); @@ -9,7 +26,7 @@ const WORD_MASK: usize = WORD_SIZE - 1; // word-wise copy. // * The word-wise copy logic needs to perform some checks so it has some small overhead. // ensures that even on 32-bit platforms we have copied at least 8 bytes through -// word-wise copy so the saving of word-wise copy outweights the fixed overhead. +// word-wise copy so the saving of word-wise copy outweighs the fixed overhead. const WORD_COPY_THRESHOLD: usize = if 2 * WORD_SIZE > 16 { 2 * WORD_SIZE } else { @@ -28,11 +45,11 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize { pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) { #[inline(always)] unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) { - let dest_end = dest.add(n); + let dest_end = dest.wrapping_add(n); while dest < dest_end { *dest = *src; - dest = dest.add(1); - src = src.add(1); + dest = dest.wrapping_add(1); + src = src.wrapping_add(1); } } @@ -40,12 +57,12 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) unsafe fn copy_forward_aligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; let mut src_usize = src as *mut usize; - let dest_end = dest.add(n) as *mut usize; + let dest_end = dest.wrapping_add(n) as *mut usize; while dest_usize < dest_end { *dest_usize = *src_usize; - dest_usize = dest_usize.add(1); - src_usize = src_usize.add(1); + dest_usize = dest_usize.wrapping_add(1); + src_usize = src_usize.wrapping_add(1); } } @@ -53,7 +70,7 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) #[inline(always)] unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; - let dest_end = dest.add(n) as *mut usize; + let dest_end = dest.wrapping_add(n) as *mut usize; // Calculate the misalignment offset and shift needed to reassemble value. let offset = src as usize & WORD_MASK; @@ -70,7 +87,7 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) let mut prev_word = core::ptr::read_volatile(src_aligned); while dest_usize < dest_end { - src_aligned = src_aligned.add(1); + src_aligned = src_aligned.wrapping_add(1); let cur_word = *src_aligned; #[cfg(target_endian = "little")] let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift); @@ -79,7 +96,7 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) prev_word = cur_word; *dest_usize = resembled; - dest_usize = dest_usize.add(1); + dest_usize = dest_usize.wrapping_add(1); } } @@ -88,12 +105,12 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; let mut src_usize = src as *mut usize; - let dest_end = dest.add(n) as *mut usize; + let dest_end = dest.wrapping_add(n) as *mut usize; while dest_usize < dest_end { *dest_usize = read_usize_unaligned(src_usize); - dest_usize = dest_usize.add(1); - src_usize = src_usize.add(1); + dest_usize = dest_usize.wrapping_add(1); + src_usize = src_usize.wrapping_add(1); } } @@ -102,8 +119,8 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) // Because of n >= 2 * WORD_SIZE, dst_misalignment < n let dest_misalignment = (dest as usize).wrapping_neg() & WORD_MASK; copy_forward_bytes(dest, src, dest_misalignment); - dest = dest.add(dest_misalignment); - src = src.add(dest_misalignment); + dest = dest.wrapping_add(dest_misalignment); + src = src.wrapping_add(dest_misalignment); n -= dest_misalignment; let n_words = n & !WORD_MASK; @@ -113,8 +130,8 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) } else { copy_forward_misaligned_words(dest, src, n_words); } - dest = dest.add(n_words); - src = src.add(n_words); + dest = dest.wrapping_add(n_words); + src = src.wrapping_add(n_words); n -= n_words; } copy_forward_bytes(dest, src, n); @@ -126,10 +143,10 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { // as their inputs instead of pointers to the start! #[inline(always)] unsafe fn copy_backward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) { - let dest_start = dest.sub(n); + let dest_start = dest.wrapping_sub(n); while dest_start < dest { - dest = dest.sub(1); - src = src.sub(1); + dest = dest.wrapping_sub(1); + src = src.wrapping_sub(1); *dest = *src; } } @@ -138,11 +155,11 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { unsafe fn copy_backward_aligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; let mut src_usize = src as *mut usize; - let dest_start = dest.sub(n) as *mut usize; + let dest_start = dest.wrapping_sub(n) as *mut usize; while dest_start < dest_usize { - dest_usize = dest_usize.sub(1); - src_usize = src_usize.sub(1); + dest_usize = dest_usize.wrapping_sub(1); + src_usize = src_usize.wrapping_sub(1); *dest_usize = *src_usize; } } @@ -151,7 +168,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { #[inline(always)] unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; - let dest_start = dest.sub(n) as *mut usize; + let dest_start = dest.wrapping_sub(n) as *mut usize; // Calculate the misalignment offset and shift needed to reassemble value. let offset = src as usize & WORD_MASK; @@ -168,7 +185,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { let mut prev_word = core::ptr::read_volatile(src_aligned); while dest_start < dest_usize { - src_aligned = src_aligned.sub(1); + src_aligned = src_aligned.wrapping_sub(1); let cur_word = *src_aligned; #[cfg(target_endian = "little")] let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift; @@ -176,7 +193,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { let resembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift; prev_word = cur_word; - dest_usize = dest_usize.sub(1); + dest_usize = dest_usize.wrapping_sub(1); *dest_usize = resembled; } } @@ -186,25 +203,25 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; let mut src_usize = src as *mut usize; - let dest_start = dest.sub(n) as *mut usize; + let dest_start = dest.wrapping_sub(n) as *mut usize; while dest_start < dest_usize { - dest_usize = dest_usize.sub(1); - src_usize = src_usize.sub(1); + dest_usize = dest_usize.wrapping_sub(1); + src_usize = src_usize.wrapping_sub(1); *dest_usize = read_usize_unaligned(src_usize); } } - let mut dest = dest.add(n); - let mut src = src.add(n); + let mut dest = dest.wrapping_add(n); + let mut src = src.wrapping_add(n); if n >= WORD_COPY_THRESHOLD { // Align dest // Because of n >= 2 * WORD_SIZE, dst_misalignment < n let dest_misalignment = dest as usize & WORD_MASK; copy_backward_bytes(dest, src, dest_misalignment); - dest = dest.sub(dest_misalignment); - src = src.sub(dest_misalignment); + dest = dest.wrapping_sub(dest_misalignment); + src = src.wrapping_sub(dest_misalignment); n -= dest_misalignment; let n_words = n & !WORD_MASK; @@ -214,8 +231,8 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { } else { copy_backward_misaligned_words(dest, src, n_words); } - dest = dest.sub(n_words); - src = src.sub(n_words); + dest = dest.wrapping_sub(n_words); + src = src.wrapping_sub(n_words); n -= n_words; } copy_backward_bytes(dest, src, n); @@ -225,10 +242,10 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) { #[inline(always)] pub unsafe fn set_bytes_bytes(mut s: *mut u8, c: u8, n: usize) { - let end = s.add(n); + let end = s.wrapping_add(n); while s < end { *s = c; - s = s.add(1); + s = s.wrapping_add(1); } } @@ -242,11 +259,11 @@ pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) { } let mut s_usize = s as *mut usize; - let end = s.add(n) as *mut usize; + let end = s.wrapping_add(n) as *mut usize; while s_usize < end { *s_usize = broadcast; - s_usize = s_usize.add(1); + s_usize = s_usize.wrapping_add(1); } } @@ -255,12 +272,12 @@ pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) { // Because of n >= 2 * WORD_SIZE, dst_misalignment < n let misalignment = (s as usize).wrapping_neg() & WORD_MASK; set_bytes_bytes(s, c, misalignment); - s = s.add(misalignment); + s = s.wrapping_add(misalignment); n -= misalignment; let n_words = n & !WORD_MASK; set_bytes_words(s, c, n_words); - s = s.add(n_words); + s = s.wrapping_add(n_words); n -= n_words; } set_bytes_bytes(s, c, n); @@ -270,8 +287,8 @@ pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) { pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 { let mut i = 0; while i < n { - let a = *s1.add(i); - let b = *s2.add(i); + let a = *s1.wrapping_add(i); + let b = *s2.wrapping_add(i); if a != b { return a as i32 - b as i32; } @@ -285,7 +302,7 @@ pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { let mut n = 0; while *s != 0 { n += 1; - s = s.add(1); + s = s.wrapping_add(1); } n } From 15b49420fef60428d3f1ab4df7f3a2ef7c0805a3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 17 Oct 2024 23:55:25 +0000 Subject: [PATCH 0871/1459] chore: release v0.1.134 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 34e1dcdfd..7b853dedd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.133" +version = "0.1.134" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 3dbc1eeea4f46996d59638b1751b162b70c21acd Mon Sep 17 00:00:00 2001 From: Niklas Sombert Date: Mon, 21 Oct 2024 15:35:31 +0200 Subject: [PATCH 0872/1459] Re-enable math module on i686-unknown-uefi In 9ba77d1583e6de5ab9cf7c9b82827ba8fcb9062f, this was disabled for x86 without sse2. It should be fine to re-enable it for UEFI, as explained at . --- src/lib.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index dea30a3c6..ffcd3586c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,10 +44,16 @@ pub mod int; // Disable for any of the following: // - x86 without sse2 due to ABI issues // - +// - but exclude UEFI since it is a soft-float target +// - // - All unix targets (linux, macos, freebsd, android, etc) // - wasm with known target_os #[cfg(not(any( - all(target_arch = "x86", not(target_feature = "sse2")), + all( + target_arch = "x86", + not(target_feature = "sse2"), + not(target_os = "uefi"), + ), unix, all(target_family = "wasm", not(target_os = "unknown")) )))] From ec63ffbd9831f25fcdc9935198a3eb2972553ab6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 09:57:03 +0000 Subject: [PATCH 0873/1459] chore: release v0.1.135 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7b853dedd..ab966b84e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.134" +version = "0.1.135" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From ff646778014ea1df37e9eafeac8ab6a046047a1e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 25 Oct 2024 14:18:53 -0500 Subject: [PATCH 0874/1459] Rename `Float::repr` and `Float::from_repr` `to_bits` and `from_bits` are builtin methods on float types. Rename `repr` to `to_bits` and `from_repr` to `from_bits` so this is consistent with usage that doesn't go through the trait. --- src/float/add.rs | 20 ++++++++++---------- src/float/cmp.rs | 12 ++++++------ src/float/conv.rs | 6 +++--- src/float/div.rs | 26 +++++++++++++------------- src/float/extend.rs | 6 +++--- src/float/mod.rs | 28 ++++++++++++++-------------- src/float/mul.rs | 26 +++++++++++++------------- src/float/sub.rs | 6 +++--- src/float/trunc.rs | 9 ++++----- testcrate/src/lib.rs | 4 ++-- 10 files changed, 71 insertions(+), 72 deletions(-) diff --git a/src/float/add.rs b/src/float/add.rs index bceef7b0e..ecb96264a 100644 --- a/src/float/add.rs +++ b/src/float/add.rs @@ -25,8 +25,8 @@ where let quiet_bit = implicit_bit >> 1; let qnan_rep = exponent_mask | quiet_bit; - let mut a_rep = a.repr(); - let mut b_rep = b.repr(); + let mut a_rep = a.to_bits(); + let mut b_rep = b.to_bits(); let a_abs = a_rep & abs_mask; let b_abs = b_rep & abs_mask; @@ -34,17 +34,17 @@ where if a_abs.wrapping_sub(one) >= inf_rep - one || b_abs.wrapping_sub(one) >= inf_rep - one { // NaN + anything = qNaN if a_abs > inf_rep { - return F::from_repr(a_abs | quiet_bit); + return F::from_bits(a_abs | quiet_bit); } // anything + NaN = qNaN if b_abs > inf_rep { - return F::from_repr(b_abs | quiet_bit); + return F::from_bits(b_abs | quiet_bit); } if a_abs == inf_rep { // +/-infinity + -/+infinity = qNaN - if (a.repr() ^ b.repr()) == sign_bit { - return F::from_repr(qnan_rep); + if (a.to_bits() ^ b.to_bits()) == sign_bit { + return F::from_bits(qnan_rep); } else { // +/-infinity + anything remaining = +/- infinity return a; @@ -60,7 +60,7 @@ where if a_abs == MinInt::ZERO { // but we need to get the sign right for zero + zero if b_abs == MinInt::ZERO { - return F::from_repr(a.repr() & b.repr()); + return F::from_bits(a.to_bits() & b.to_bits()); } else { return b; } @@ -126,7 +126,7 @@ where a_significand = a_significand.wrapping_sub(b_significand); // If a == -b, return +zero. if a_significand == MinInt::ZERO { - return F::from_repr(MinInt::ZERO); + return F::from_bits(MinInt::ZERO); } // If partial cancellation occured, we need to left-shift the result @@ -152,7 +152,7 @@ where // If we have overflowed the type, return +/- infinity: if a_exponent >= max_exponent as i32 { - return F::from_repr(inf_rep | result_sign); + return F::from_bits(inf_rep | result_sign); } if a_exponent <= 0 { @@ -185,7 +185,7 @@ where result += result & one; } - F::from_repr(result) + F::from_bits(result) } intrinsics! { diff --git a/src/float/cmp.rs b/src/float/cmp.rs index bb7d4b498..8b97a0b5c 100644 --- a/src/float/cmp.rs +++ b/src/float/cmp.rs @@ -41,8 +41,8 @@ fn cmp(a: F, b: F) -> Result { let exponent_mask = F::EXPONENT_MASK; let inf_rep = exponent_mask; - let a_rep = a.repr(); - let b_rep = b.repr(); + let a_rep = a.to_bits(); + let b_rep = b.to_bits(); let a_abs = a_rep & abs_mask; let b_abs = b_rep & abs_mask; @@ -56,8 +56,8 @@ fn cmp(a: F, b: F) -> Result { return Result::Equal; } - let a_srep = a.signed_repr(); - let b_srep = b.signed_repr(); + let a_srep = a.to_bits_signed(); + let b_srep = b.to_bits_signed(); // If at least one of a and b is positive, we get the same result comparing // a and b as signed integers as we would with a fp_ting-point compare. @@ -90,8 +90,8 @@ fn unord(a: F, b: F) -> bool { let exponent_mask = F::EXPONENT_MASK; let inf_rep = exponent_mask; - let a_rep = a.repr(); - let b_rep = b.repr(); + let a_rep = a.to_bits(); + let b_rep = b.to_bits(); let a_abs = a_rep & abs_mask; let b_abs = b_rep & abs_mask; diff --git a/src/float/conv.rs b/src/float/conv.rs index d275f982b..e86fee6dc 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -158,7 +158,7 @@ where F::Int: CastInto, u32: CastFrom, { - float_to_int_inner::(f.repr(), |i: U| i, || U::MAX) + float_to_int_inner::(f.to_bits(), |i: U| i, || U::MAX) } /// Generic float to signed int conversions. @@ -172,7 +172,7 @@ where u32: CastFrom, { float_to_int_inner::( - f.repr() & !F::SIGN_MASK, + f.to_bits() & !F::SIGN_MASK, |i: I| if f.is_sign_negative() { -i } else { i }, || if f.is_sign_negative() { I::MIN } else { I::MAX }, ) @@ -203,7 +203,7 @@ where let int_max_exp = F::EXPONENT_BIAS + I::MAX.ilog2() + 1; let foobar = F::EXPONENT_BIAS + I::UnsignedInt::BITS - 1; - if fbits < F::ONE.repr() { + if fbits < F::ONE.to_bits() { // < 0 gets rounded to 0 I::ZERO } else if fbits < F::Int::cast_from(int_max_exp) << F::SIGNIFICAND_BITS { diff --git a/src/float/div.rs b/src/float/div.rs index f125771a0..4b3f97c35 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -126,8 +126,8 @@ where half_iterations += 1; } - let a_rep = a.repr(); - let b_rep = b.repr(); + let a_rep = a.to_bits(); + let b_rep = b.to_bits(); // Exponent numeric representationm not accounting for bias let a_exponent = (a_rep >> significand_bits) & exponent_sat; @@ -150,42 +150,42 @@ where // NaN / anything = qNaN if a_abs > inf_rep { - return F::from_repr(a_rep | quiet_bit); + return F::from_bits(a_rep | quiet_bit); } // anything / NaN = qNaN if b_abs > inf_rep { - return F::from_repr(b_rep | quiet_bit); + return F::from_bits(b_rep | quiet_bit); } if a_abs == inf_rep { if b_abs == inf_rep { // infinity / infinity = NaN - return F::from_repr(qnan_rep); + return F::from_bits(qnan_rep); } else { // infinity / anything else = +/- infinity - return F::from_repr(a_abs | quotient_sign); + return F::from_bits(a_abs | quotient_sign); } } // anything else / infinity = +/- 0 if b_abs == inf_rep { - return F::from_repr(quotient_sign); + return F::from_bits(quotient_sign); } if a_abs == zero { if b_abs == zero { // zero / zero = NaN - return F::from_repr(qnan_rep); + return F::from_bits(qnan_rep); } else { // zero / anything else = +/- zero - return F::from_repr(quotient_sign); + return F::from_bits(quotient_sign); } } // anything else / zero = +/- infinity if b_abs == zero { - return F::from_repr(inf_rep | quotient_sign); + return F::from_bits(inf_rep | quotient_sign); } // a is denormal. Renormalize it and set the scale to include the necessary exponent @@ -463,7 +463,7 @@ where // // If we have overflowed the exponent, return infinity if res_exponent >= i32::cast_from(exponent_sat) { - return F::from_repr(inf_rep | quotient_sign); + return F::from_bits(inf_rep | quotient_sign); } // Now, quotient <= the correctly-rounded result @@ -476,7 +476,7 @@ where ret } else { if ((significand_bits as i32) + res_exponent) < 0 { - return F::from_repr(quotient_sign); + return F::from_bits(quotient_sign); } let ret = quotient.wrapping_shr(u32::cast_from(res_exponent.wrapping_neg()) + 1); @@ -501,7 +501,7 @@ where u8::from(abs_result < inf_rep && residual_lo > (4 + 1).cast() * b_significand).into(); } - F::from_repr(abs_result | quotient_sign) + F::from_bits(abs_result | quotient_sign) } /// Calculate the number of iterations required for a float type's precision. diff --git a/src/float/extend.rs b/src/float/extend.rs index 997475c8e..2ec79070c 100644 --- a/src/float/extend.rs +++ b/src/float/extend.rs @@ -32,7 +32,7 @@ where let sign_bits_delta = dst_sign_bits - src_sign_bits; let exp_bias_delta = dst_exp_bias - src_exp_bias; - let a_abs = a.repr() & src_abs_mask; + let a_abs = a.to_bits() & src_abs_mask; let mut abs_result = R::Int::ZERO; if a_abs.wrapping_sub(src_min_normal) < src_infinity.wrapping_sub(src_min_normal) { @@ -65,8 +65,8 @@ where abs_result = (abs_result ^ dst_min_normal) | (bias_dst.wrapping_shl(dst_sign_bits)); } - let sign_result: R::Int = (a.repr() & src_sign_mask).cast(); - R::from_repr(abs_result | (sign_result.wrapping_shl(dst_bits - src_bits))) + let sign_result: R::Int = (a.to_bits() & src_sign_mask).cast(); + R::from_bits(abs_result | (sign_result.wrapping_shl(dst_bits - src_bits))) } intrinsics! { diff --git a/src/float/mod.rs b/src/float/mod.rs index 704bba0c0..5eedf544f 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -70,10 +70,10 @@ pub(crate) trait Float: const EXPONENT_MASK: Self::Int; /// Returns `self` transmuted to `Self::Int` - fn repr(self) -> Self::Int; + fn to_bits(self) -> Self::Int; /// Returns `self` transmuted to `Self::SignedInt` - fn signed_repr(self) -> Self::SignedInt; + fn to_bits_signed(self) -> Self::SignedInt; /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be /// represented in multiple different ways. This method returns `true` if two NaNs are @@ -93,10 +93,10 @@ pub(crate) trait Float: fn imp_frac(self) -> Self::Int; /// Returns a `Self::Int` transmuted back to `Self` - fn from_repr(a: Self::Int) -> Self; + fn from_bits(a: Self::Int) -> Self; /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. - fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self; + fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self; /// Returns (normalized exponent, normalized significand) fn normalize(significand: Self::Int) -> (i32, Self::Int); @@ -124,10 +124,10 @@ macro_rules! float_impl { const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS; const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK); - fn repr(self) -> Self::Int { + fn to_bits(self) -> Self::Int { self.to_bits() } - fn signed_repr(self) -> Self::SignedInt { + fn to_bits_signed(self) -> Self::SignedInt { self.to_bits() as Self::SignedInt } fn eq_repr(self, rhs: Self) -> bool { @@ -137,8 +137,8 @@ macro_rules! float_impl { // necessary builtin (__unordtf2) to test whether `f128` is NaN. // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin // x is NaN if all the bits of the exponent are set and the significand is non-0 - x.repr() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK - && x.repr() & $ty::SIGNIFICAND_MASK != 0 + x.to_bits() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK + && x.to_bits() & $ty::SIGNIFICAND_MASK != 0 } #[cfg(not(feature = "mangled-names"))] fn is_nan(x: $ty) -> bool { @@ -147,7 +147,7 @@ macro_rules! float_impl { if is_nan(self) && is_nan(rhs) { true } else { - self.repr() == rhs.repr() + self.to_bits() == rhs.to_bits() } } fn is_sign_negative(self) -> bool { @@ -162,12 +162,12 @@ macro_rules! float_impl { fn imp_frac(self) -> Self::Int { self.frac() | Self::IMPLICIT_BIT } - fn from_repr(a: Self::Int) -> Self { + fn from_bits(a: Self::Int) -> Self { Self::from_bits(a) } - fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self { - Self::from_repr( - ((sign as Self::Int) << (Self::BITS - 1)) + fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self { + Self::from_bits( + ((negative as Self::Int) << (Self::BITS - 1)) | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK) | (significand & Self::SIGNIFICAND_MASK), ) @@ -182,7 +182,7 @@ macro_rules! float_impl { ) } fn is_subnormal(self) -> bool { - (self.repr() & Self::EXPONENT_MASK) == Self::Int::ZERO + (self.to_bits() & Self::EXPONENT_MASK) == Self::Int::ZERO } } }; diff --git a/src/float/mul.rs b/src/float/mul.rs index a4c69ea87..77a271d65 100644 --- a/src/float/mul.rs +++ b/src/float/mul.rs @@ -28,8 +28,8 @@ where let qnan_rep = exponent_mask | quiet_bit; let exponent_bits = F::EXPONENT_BITS; - let a_rep = a.repr(); - let b_rep = b.repr(); + let a_rep = a.to_bits(); + let b_rep = b.to_bits(); let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); @@ -48,41 +48,41 @@ where // NaN + anything = qNaN if a_abs > inf_rep { - return F::from_repr(a_rep | quiet_bit); + return F::from_bits(a_rep | quiet_bit); } // anything + NaN = qNaN if b_abs > inf_rep { - return F::from_repr(b_rep | quiet_bit); + return F::from_bits(b_rep | quiet_bit); } if a_abs == inf_rep { if b_abs != zero { // infinity * non-zero = +/- infinity - return F::from_repr(a_abs | product_sign); + return F::from_bits(a_abs | product_sign); } else { // infinity * zero = NaN - return F::from_repr(qnan_rep); + return F::from_bits(qnan_rep); } } if b_abs == inf_rep { if a_abs != zero { // infinity * non-zero = +/- infinity - return F::from_repr(b_abs | product_sign); + return F::from_bits(b_abs | product_sign); } else { // infinity * zero = NaN - return F::from_repr(qnan_rep); + return F::from_bits(qnan_rep); } } // zero * anything = +/- zero if a_abs == zero { - return F::from_repr(product_sign); + return F::from_bits(product_sign); } // anything * zero = +/- zero if b_abs == zero { - return F::from_repr(product_sign); + return F::from_bits(product_sign); } // one or both of a or b is denormal, the other (if applicable) is a @@ -133,7 +133,7 @@ where // If we have overflowed the type, return +/- infinity. if product_exponent >= max_exponent as i32 { - return F::from_repr(inf_rep | product_sign); + return F::from_bits(inf_rep | product_sign); } if product_exponent <= 0 { @@ -145,7 +145,7 @@ where // simplify the shift logic. let shift = one.wrapping_sub(product_exponent.cast()).cast(); if shift >= bits { - return F::from_repr(product_sign); + return F::from_bits(product_sign); } // Otherwise, shift the significand of the result so that the round @@ -176,7 +176,7 @@ where product_high += product_high & one; } - F::from_repr(product_high) + F::from_bits(product_high) } intrinsics! { diff --git a/src/float/sub.rs b/src/float/sub.rs index 7e8a89458..175b3a165 100644 --- a/src/float/sub.rs +++ b/src/float/sub.rs @@ -4,13 +4,13 @@ intrinsics! { #[avr_skip] #[arm_aeabi_alias = __aeabi_fsub] pub extern "C" fn __subsf3(a: f32, b: f32) -> f32 { - crate::float::add::__addsf3(a, f32::from_repr(b.repr() ^ f32::SIGN_MASK)) + crate::float::add::__addsf3(a, f32::from_bits(b.to_bits() ^ f32::SIGN_MASK)) } #[avr_skip] #[arm_aeabi_alias = __aeabi_dsub] pub extern "C" fn __subdf3(a: f64, b: f64) -> f64 { - crate::float::add::__adddf3(a, f64::from_repr(b.repr() ^ f64::SIGN_MASK)) + crate::float::add::__adddf3(a, f64::from_bits(b.to_bits() ^ f64::SIGN_MASK)) } #[ppc_alias = __subkf3] @@ -21,6 +21,6 @@ intrinsics! { #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] use crate::float::add::__addtf3; - __addtf3(a, f128::from_repr(b.repr() ^ f128::SIGN_MASK)) + __addtf3(a, f128::from_bits(b.to_bits() ^ f128::SIGN_MASK)) } } diff --git a/src/float/trunc.rs b/src/float/trunc.rs index a25b6eabc..6fe44f50b 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -7,7 +7,6 @@ where F::Int: CastInto, u64: CastInto, u32: CastInto, - R::Int: CastInto, u32: CastInto, F::Int: CastInto, @@ -43,8 +42,8 @@ where let sign_bits_delta = F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS; // Break a into a sign and representation of the absolute value. - let a_abs = a.repr() & src_abs_mask; - let sign = a.repr() & src_sign_mask; + let a_abs = a.to_bits() & src_abs_mask; + let sign = a.to_bits() & src_sign_mask; let mut abs_result: R::Int; if a_abs.wrapping_sub(underflow) < a_abs.wrapping_sub(overflow) { @@ -87,7 +86,7 @@ where let a_exp: u32 = (a_abs >> F::SIGNIFICAND_BITS).cast(); let shift = src_exp_bias - dst_exp_bias - a_exp + 1; - let significand = (a.repr() & src_significand_mask) | src_min_normal; + let significand = (a.to_bits() & src_significand_mask) | src_min_normal; // Right shift by the denormalization amount with sticky. if shift > F::SIGNIFICAND_BITS { @@ -114,7 +113,7 @@ where } // Apply the signbit to the absolute value. - R::from_repr(abs_result | sign.wrapping_shr(src_bits - dst_bits).cast()) + R::from_bits(abs_result | sign.wrapping_shr(src_bits - dst_bits).cast()) } intrinsics! { diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs index cc9e73938..58419bf1b 100644 --- a/testcrate/src/lib.rs +++ b/testcrate/src/lib.rs @@ -190,7 +190,7 @@ fn fuzz_float_step(rng: &mut Xoshiro128StarStar, f: &mut F) { let tmp = ones.wrapping_shr(r0); (tmp.wrapping_shl(r1) | tmp.wrapping_shr(F::EXPONENT_BITS - r1)) & ones }; - let mut exp = (f.repr() & F::EXPONENT_MASK) >> F::SIGNIFICAND_BITS; + let mut exp = (f.to_bits() & F::EXPONENT_MASK) >> F::SIGNIFICAND_BITS; match (rng32 >> 9) % 4 { 0 => exp |= mask, 1 => exp &= mask, @@ -198,7 +198,7 @@ fn fuzz_float_step(rng: &mut Xoshiro128StarStar, f: &mut F) { } // significand fuzzing - let mut sig = f.repr() & F::SIGNIFICAND_MASK; + let mut sig = f.to_bits() & F::SIGNIFICAND_MASK; fuzz_step(rng, &mut sig); sig &= F::SIGNIFICAND_MASK; From 64f131a58e2b59de202f9008f79c584e9b3adad4 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 25 Oct 2024 14:30:03 -0500 Subject: [PATCH 0875/1459] Add an `abs` function to the `Float` trait There is no in-crate use for this yet, but we will make use of it in `libm`. --- src/float/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/float/mod.rs b/src/float/mod.rs index 5eedf544f..af8398644 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -98,6 +98,11 @@ pub(crate) trait Float: /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self; + fn abs(self) -> Self { + let abs_mask = !Self::SIGN_MASK ; + Self::from_bits(self.to_bits() & abs_mask) + } + /// Returns (normalized exponent, normalized significand) fn normalize(significand: Self::Int) -> (i32, Self::Int); From 2df2813257f8ed340f1a5928f3a2ab5321e09deb Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 25 Oct 2024 20:57:19 -0400 Subject: [PATCH 0876/1459] Rename the `musl-bitwise-tests` feature to `test-musl-serialized` We will have more test features in the near future, and it would be nice for them all to have a common `test-` prefix. Reverse the existing feature so this is the case. --- libm/CONTRIBUTING.md | 8 ++++---- libm/ci/run.sh | 4 ++-- libm/crates/libm-test/Cargo.toml | 2 +- libm/crates/libm-test/build.rs | 4 ++-- libm/crates/libm-test/tests/musl_biteq.rs | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/libm/CONTRIBUTING.md b/libm/CONTRIBUTING.md index 1b5235db9..a39623696 100644 --- a/libm/CONTRIBUTING.md +++ b/libm/CONTRIBUTING.md @@ -7,7 +7,7 @@ in `src/lib.rs`. - Write some simple tests in your module (using `#[test]`) - Run `cargo test` to make sure it works -- Run `cargo test --features libm-test/musl-bitwise-tests` to compare your +- Run `cargo test --features libm-test/test-musl-serialized` to compare your implementation against musl's - Send us a pull request! Make sure to run `cargo fmt` on your code before sending the PR. Also include "closes #42" in the PR description to close the @@ -80,15 +80,15 @@ let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 12 Normal tests can be executed with: -``` +```sh cargo test ``` If you'd like to run tests with randomized inputs that get compared against musl itself, you'll need to be on a Linux system and then you can execute: -``` -cargo test --features libm-test/musl-bitwise-tests +```sh +cargo test --features libm-test/test-musl-serialized ``` Note that you may need to pass `--release` to Cargo if there are errors related diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 1b016cc4f..505e25891 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -18,5 +18,5 @@ $cmd --features 'unstable' $cmd --release --features 'unstable' # also run the reference tests -$cmd --features 'unstable libm-test/musl-bitwise-tests' -$cmd --release --features 'unstable libm-test/musl-bitwise-tests' +$cmd --features 'unstable libm-test/test-musl-serialized' +$cmd --release --features 'unstable libm-test/test-musl-serialized' diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 7c193d3bb..6367bdca5 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -9,7 +9,7 @@ default = [] # Generate tests which are random inputs and the outputs are calculated with # musl libc. -musl-bitwise-tests = ["rand"] +test-musl-serialized = ["rand"] [dependencies] libm = { path = "../.." } diff --git a/libm/crates/libm-test/build.rs b/libm/crates/libm-test/build.rs index c2c4b0bd2..3a49e3c57 100644 --- a/libm/crates/libm-test/build.rs +++ b/libm/crates/libm-test/build.rs @@ -1,9 +1,9 @@ fn main() { - #[cfg(feature = "musl-bitwise-tests")] + #[cfg(feature = "test-musl-serialized")] musl_reference_tests::generate(); } -#[cfg(feature = "musl-bitwise-tests")] +#[cfg(feature = "test-musl-serialized")] mod musl_reference_tests { use rand::seq::SliceRandom; use rand::Rng; diff --git a/libm/crates/libm-test/tests/musl_biteq.rs b/libm/crates/libm-test/tests/musl_biteq.rs index 1a6b71817..f586fd03d 100644 --- a/libm/crates/libm-test/tests/musl_biteq.rs +++ b/libm/crates/libm-test/tests/musl_biteq.rs @@ -2,5 +2,5 @@ // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 #[cfg(not(target_arch = "powerpc64"))] -#[cfg(all(test, feature = "musl-bitwise-tests"))] +#[cfg(all(test, feature = "test-musl-serialized"))] include!(concat!(env!("OUT_DIR"), "/musl-tests.rs")); From 59a2ab7c58541c55f6a7a85ac6b44fd6487c1cc1 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 00:54:26 -0500 Subject: [PATCH 0877/1459] Don't deny warnings in lib.rs Having `#![deny(warnings)]` for the entire crate is a bit of a development annoyance. We already run CI with `RUSTFLAGS=-Dwarnings` so there isn't much of a reason to check this locally. Thus, remove the attribute. Additionally, sort the clippy allows. --- libm/src/lib.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 23885ecf8..04f4ac0f2 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -1,17 +1,16 @@ //! libm in pure Rust -#![deny(warnings)] #![no_std] #![cfg_attr(feature = "unstable", allow(internal_features))] #![cfg_attr(feature = "unstable", feature(core_intrinsics))] -#![allow(clippy::unreadable_literal)] -#![allow(clippy::many_single_char_names)] -#![allow(clippy::needless_return)] -#![allow(clippy::int_plus_one)] +#![allow(clippy::assign_op_pattern)] #![allow(clippy::deprecated_cfg_attr)] -#![allow(clippy::mixed_case_hex_literals)] -#![allow(clippy::float_cmp)] #![allow(clippy::eq_op)] -#![allow(clippy::assign_op_pattern)] +#![allow(clippy::float_cmp)] +#![allow(clippy::int_plus_one)] +#![allow(clippy::many_single_char_names)] +#![allow(clippy::mixed_case_hex_literals)] +#![allow(clippy::needless_return)] +#![allow(clippy::unreadable_literal)] mod libm_helper; mod math; From a6b19b29bee6f995800d4c9d7464bb3549a91834 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 24 Oct 2024 18:53:46 -0500 Subject: [PATCH 0878/1459] Update licensing to MIT AND (MIT OR Apache-2.0) Currently both Cargo.toml and the license files indicate that this library may be used under either MIT or Apache-2.0. However, this is not accurate; since portions of this library were derived from musl libc, which is available under the MIT license, this terms of use for this library must also include use under the MIT license. That is, it is not correct that this library may be used under only the Apache-2.0 license. Update the SPDX license identifier to `MIT OR (MIT AND Apache-2.0)` to indicate that use must include the MIT license, but to clarify that contributions are made under `MIT OR Apache-2.0`. This is compatible with the current state of this repository since it has always contained both license files, and the `Cargo.toml` license field has indicated `MIT OR Apache-2.0` since it was added. In accordance with the above, replace the two license files with a combined LICENSE.txt that makes these terms clear and gives attribution to works from which this library is derived. Fixes: https://github.com/rust-lang/libm/issues/215 Link: https://rust-lang.zulipchat.com/#narrow/channel/335408-foundation/topic/Request.20for.20legal.20team.20input.20on.20crate.20licensing Link: https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT --- libm/Cargo.toml | 2 +- libm/LICENSE-APACHE | 201 ---------------------------------- libm/LICENSE-MIT | 25 ----- libm/LICENSE.txt | 258 ++++++++++++++++++++++++++++++++++++++++++++ libm/README.md | 19 ++-- 5 files changed, 269 insertions(+), 236 deletions(-) delete mode 100644 libm/LICENSE-APACHE delete mode 100644 libm/LICENSE-MIT create mode 100644 libm/LICENSE.txt diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 181000f34..9ae8bd841 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -4,7 +4,7 @@ categories = ["no-std"] description = "libm in pure Rust" documentation = "https://docs.rs/libm" keywords = ["libm", "math"] -license = "MIT OR Apache-2.0" +license = "MIT AND (MIT OR Apache-2.0)" name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/libm" diff --git a/libm/LICENSE-APACHE b/libm/LICENSE-APACHE deleted file mode 100644 index 16fe87b06..000000000 --- a/libm/LICENSE-APACHE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/libm/LICENSE-MIT b/libm/LICENSE-MIT deleted file mode 100644 index 432fbea04..000000000 --- a/libm/LICENSE-MIT +++ /dev/null @@ -1,25 +0,0 @@ -Copyright (c) 2018 Jorge Aparicio - -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/libm/LICENSE.txt b/libm/LICENSE.txt new file mode 100644 index 000000000..2f8e41f14 --- /dev/null +++ b/libm/LICENSE.txt @@ -0,0 +1,258 @@ +rust-lang/libm as a whole is available for use under the MIT license: + +------------------------------------------------------------------------------ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ + +As a contributor, you agree that your code can be used under either the MIT +license or the Apache-2.0 license: + +------------------------------------------------------------------------------ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +------------------------------------------------------------------------------ + +This Rust library contains the following copyrights: + + Copyright (c) 2018 Jorge Aparicio + +Portions of this software are derived from third-party works licensed under +terms compatible with the above MIT license: + +* musl libc https://www.musl-libc.org/. This library contains the following + copyright: + + Copyright © 2005-2020 Rich Felker, et al. + +* The CORE-MATH project https://core-math.gitlabpages.inria.fr/. CORE-MATH + routines are available under the MIT license on a per-file basis. + +The musl libc COPYRIGHT file also includes the following notice relevant to +math portions of the library: + +------------------------------------------------------------------------------ +Much of the math library code (src/math/* and src/complex/*) is +Copyright © 1993,2004 Sun Microsystems or +Copyright © 2003-2011 David Schultz or +Copyright © 2003-2009 Steven G. Kargl or +Copyright © 2003-2009 Bruce D. Evans or +Copyright © 2008 Stephen L. Moshier or +Copyright © 2017-2018 Arm Limited +and labelled as such in comments in the individual source files. All +have been licensed under extremely permissive terms. +------------------------------------------------------------------------------ + +Copyright notices are retained in src/* files where relevant. diff --git a/libm/README.md b/libm/README.md index b864b5df8..24ee3d1c1 100644 --- a/libm/README.md +++ b/libm/README.md @@ -36,16 +36,17 @@ Please check [CONTRIBUTING.md](CONTRIBUTING.md) ## License -Licensed under either of +Usage is licensed under the MIT license ([LICENSE-MIT](LICENSE-MIT) or +http://opensource.org/licenses/MIT). -- Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or - http://www.apache.org/licenses/LICENSE-2.0) -- MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) - -at your option. ### Contribution -Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the -work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any -additional terms or conditions. +Contributions are licensed under both the MIT license and the Apache License, +Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or +http://www.apache.org/licenses/LICENSE-2.0). Unless you explicitly state +otherwise, any contribution intentionally submitted for inclusion in the work +by you, as defined in the Apache-2.0 license, shall be dual licensed as +mentioned, without any additional terms or conditions. + +See `LICENSE.txt` for full details. From 8646087b4aef1210f0d3dbf816def3a79ac3d8db Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 24 Oct 2024 20:13:56 -0500 Subject: [PATCH 0879/1459] Update licensing to MIT AND Apache-2.0 WITH LLVM-exception Currently, Cargo.toml specifies Apache-2.0 OR MIT, but LICENSE.txt describes MIT OR NCSA. compiler-builtins is derived from LLVM's compiler-rt. LICENSE.txt correctly reflects the state of compiler-rt prior to relicensing on 2019-01-19, during which time software was available for use under either MIT or the University of Illinois NCSA license. After relicensing, however, compiler-rt is available for use only under Apache-2.0 with the LLVM exception; this is not reflected anywhere in the repository. Update the SPDX license identifier to `MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)`. Each AND portion covers something specific: * Apache-2.0 WITH LLVM-exception: this covers work that is derived from the LLVM repository since after the LLVM relicensing. * MIT: This covers work that is derived from LLVM before the LLVM relicensing (under MIT OR NCSA), as well as the vendored `libm` components. * MIT AND Apache-2.0: This ensures that any contributions to this repository, in addition to meeting the above required licenses, is also released for use under the Rust-standard Apache-2.0 with no LLVM exception. See also the parallel license update in rust-lang/libm [1]. Fixes: https://github.com/rust-lang/compiler-builtins/issues/307 Closes: https://github.com/rust-lang/compiler-builtins/pull/511 Link: https://rust-lang.zulipchat.com/#narrow/channel/335408-foundation/topic/Request.20for.20legal.20team.20input.20on.20crate.20licensing Link: https://github.com/rust-lang/libm/pull/317 [1] --- Cargo.toml | 2 +- LICENSE.txt | 317 ++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 249 insertions(+), 70 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ab966b84e..c29fe6afe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ authors = ["Jorge Aparicio "] name = "compiler_builtins" version = "0.1.135" -license = "MIT/Apache-2.0" +license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" homepage = "https://github.com/rust-lang/compiler-builtins" diff --git a/LICENSE.txt b/LICENSE.txt index 92bbe113a..367e3538d 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,91 +1,270 @@ -============================================================================== -compiler-builtins License -============================================================================== +compiler-builtins as a whole is available for use under both the MIT license +and the Apache-2.0 license with the LLVM exception (MIT AND Apache-2.0 WITH +LLVM-exception). -The compiler-builtins crate is dual licensed under both the University of -Illinois "BSD-Like" license and the MIT license. As a user of this code you may -choose to use it under either license. As a contributor, you agree to allow -your code to be used under both. +As a contributor, you agree that your code can be used under either the MIT +license, or the Apache-2.0 license, or the Apache-2.0 license with the LLVM +exception. -Full text of the relevant licenses is included below. +Text of the relevant licenses is provided below: -============================================================================== +------------------------------------------------------------------------------ +MIT License -University of Illinois/NCSA -Open Source License +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: -Copyright (c) 2009-2016 by the contributors listed in CREDITS.TXT +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. -All rights reserved. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ -Developed by: + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - LLVM Team + 1. Definitions. - University of Illinois at Urbana-Champaign + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. - http://llvm.org + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). -============================================================================== + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. -Copyright (c) 2009-2015 by the contributors listed in CREDITS.TXT + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - -============================================================================== -Copyrights and Licenses for Third Party Software Distributed with LLVM: -============================================================================== -The LLVM software contains code written by third parties. Such software will -have its own individual LICENSE.TXT file in the directory in which it appears. -This file will describe the copyrights, license, and restrictions which apply -to that code. - -The disclaimer of warranty in the University of Illinois Open Source License -applies to all code in the LLVM Distribution, and nothing in any of the -other licenses gives permission to use the names of the LLVM Team or the -University of Illinois to endorse or promote products derived from this + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined Software. +------------------------------------------------------------------------------ + +Portions of this software are derived from third-party works licensed under +terms compatible with the above Apache-2.0 WITH LLVM-exception AND MIT +license: + +* compiler-builtins is derived from LLVM's compiler-rt (https://llvm.org/). + Work derived from compiler-rt prior to 2019-01-19 is usable under the MIT + license, with the following copyright: + + Copyright (c) 2009-2016 by the contributors listed in CREDITS.TXT + + The relevant CREDITS.TXT is located at + https://github.com/llvm/llvm-project/blob/main/compiler-rt/CREDITS.TXT. + +* Work derived from compiler-rt after 2019-01-19 is usable under the + Apache-2.0 license with the LLVM exception. + +* The bundled `math` module is from rust-lang/libm, usable under the MIT + license. See https://github.com/rust-lang/libm for details. +Additionally, some source files may contain comments with specific copyrights +or licenses. From 1055ed3b08eeb7c1730d2eb1231d4ff960519ca4 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 17:57:24 -0500 Subject: [PATCH 0880/1459] Add release-plz for automated releases --- libm/.github/workflows/publish.yml | 29 +++++++++++++++++++ .../compiler-builtins-smoke-test/Cargo.toml | 1 + libm/crates/libm-bench/Cargo.toml | 1 + 3 files changed, 31 insertions(+) create mode 100644 libm/.github/workflows/publish.yml diff --git a/libm/.github/workflows/publish.yml b/libm/.github/workflows/publish.yml new file mode 100644 index 000000000..1241181a0 --- /dev/null +++ b/libm/.github/workflows/publish.yml @@ -0,0 +1,29 @@ +name: Release-plz + +permissions: + pull-requests: write + contents: write + +on: + push: + branches: + - master + +jobs: + release-plz: + name: Release-plz + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install Rust (rustup) + run: rustup update nightly --no-self-update && rustup default nightly + - name: Publish `libm` as part of builtins, rather than its own crate + run: rm libm/Cargo.toml + - name: Run release-plz + uses: MarcoIeni/release-plz-action@v0.5 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index 4bc62304a..8d084ee34 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -3,6 +3,7 @@ name = "cb" version = "0.1.0" authors = ["Jorge Aparicio "] edition = "2021" +publish = false [lib] test = false diff --git a/libm/crates/libm-bench/Cargo.toml b/libm/crates/libm-bench/Cargo.toml index 282752c61..ee8c58200 100644 --- a/libm/crates/libm-bench/Cargo.toml +++ b/libm/crates/libm-bench/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" authors = ["Gonzalo Brito Gadeschi "] edition = "2021" license = "MIT OR Apache-2.0" +publish = false [dependencies] libm = { path = "../..", default-features = false } From 7a192682979a431f58a0dd33ee93ba0a37ba13df Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 18:01:56 -0500 Subject: [PATCH 0881/1459] Fix release-plz workflow configuration --- libm/.github/workflows/publish.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/libm/.github/workflows/publish.yml b/libm/.github/workflows/publish.yml index 1241181a0..e715c6187 100644 --- a/libm/.github/workflows/publish.yml +++ b/libm/.github/workflows/publish.yml @@ -20,8 +20,6 @@ jobs: fetch-depth: 0 - name: Install Rust (rustup) run: rustup update nightly --no-self-update && rustup default nightly - - name: Publish `libm` as part of builtins, rather than its own crate - run: rm libm/Cargo.toml - name: Run release-plz uses: MarcoIeni/release-plz-action@v0.5 env: From 9cb4c4e056e9c424b9093efba437b7723c59f1d4 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 26 Oct 2024 23:04:46 +0000 Subject: [PATCH 0882/1459] chore: release v0.2.9 --- libm/CHANGELOG.md | 39 ++++++++++++++++++++++++++++++++++++--- libm/Cargo.toml | 2 +- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index e8e9acf9b..36ed41d6b 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -1,20 +1,50 @@ -# Change Log +# Changelog All notable changes to this project will be documented in this file. -This project adheres to [Semantic Versioning](http://semver.org/). + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to +[Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] -... +## [0.2.9](https://github.com/rust-lang/libm/compare/libm-v0.2.8...libm-v0.2.9) - 2024-10-26 + +### Fixed + +- Update exponent calculations in nextafter to match musl + +### Changed + +- Update licensing to MIT AND (MIT OR Apache-2.0), as this is derivative from + MIT-licensed musl. +- Set edition to 2021 for all crates +- Upgrade all dependencies + +### Other + +- Don't deny warnings in lib.rs +- Rename the `musl-bitwise-tests` feature to `test-musl-serialized` +- Rename the `musl-reference-tests` feature to `musl-bitwise-tests` +- Move `musl-reference-tests` to a new `libm-test` crate +- Add a `force-soft-floats` feature to prevent using any intrinsics or + arch-specific code +- Deny warnings in CI +- Fix `clippy::deprecated_cfg_attr` on compiler_builtins +- Corrected English typos +- Remove unneeded `extern core` in `tgamma` +- Allow internal_features lint when building with "unstable" ## [v0.2.1] - 2019-11-22 ### Fixed + - sincosf ## [v0.2.0] - 2019-10-18 ### Added + - Benchmarks - signum - remainder @@ -23,17 +53,20 @@ This project adheres to [Semantic Versioning](http://semver.org/). - nextafterf ### Fixed + - Rounding to negative zero - Overflows in rem_pio2 and remquo - Overflows in fma - sincosf ### Removed + - F32Ext and F64Ext traits ## [v0.1.4] - 2019-06-12 ### Fixed + - Restored compatibility with Rust 1.31.0 ## [v0.1.3] - 2019-05-14 diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 9ae8bd841..f04fc12df 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT AND (MIT OR Apache-2.0)" name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/libm" -version = "0.2.8" +version = "0.2.9" edition = "2021" exclude = ["/ci/", "/.github/workflows/"] From 679f72890817d9840ba1e99297aa787a52433370 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 18:19:18 -0500 Subject: [PATCH 0883/1459] Update the libm submodule --- libm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm b/libm index 300edb325..f4e5b38ae 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 300edb32520b1673e16d2411a0e2e6273959eb46 +Subproject commit f4e5b38aee0e0c592a82ed45b21cd068c9b6c89a From 826dd1d08a7ae480ba7ae7994b384527eafcbac7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 26 Oct 2024 23:29:14 +0000 Subject: [PATCH 0884/1459] chore: release v0.1.136 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c29fe6afe..4a85e5215 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.135" +version = "0.1.136" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 2a2b9611d9f2427a036efc9e48fe5880fbc7d50b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 20:18:50 -0500 Subject: [PATCH 0885/1459] Add a rustfmt.toml file matching rust-lang/rust Duplicate the settings from rust-lang/rust to this repository. This is mostly for consistency, but `use_small_heuristics = "Max"` does make a large difference with lookup tables. Also apply the needed CI changes to run nightly rustfmt. --- libm/.github/workflows/main.yml | 2 +- libm/.rustfmt.toml | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 libm/.rustfmt.toml diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 322043d85..f312e1f5e 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -41,7 +41,7 @@ jobs: steps: - uses: actions/checkout@master - name: Install Rust - run: rustup update stable && rustup default stable && rustup component add rustfmt + run: rustup update nightly && rustup default nightly && rustup component add rustfmt - run: cargo fmt -- --check wasm: diff --git a/libm/.rustfmt.toml b/libm/.rustfmt.toml new file mode 100644 index 000000000..c73bb9301 --- /dev/null +++ b/libm/.rustfmt.toml @@ -0,0 +1,5 @@ +# This matches rustc +style_edition = "2024" +use_small_heuristics = "Max" +group_imports = "StdExternalCrate" +imports_granularity = "Module" From 4bb07a6275cc628ef81c65ac971dc6479963322f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 20:22:24 -0500 Subject: [PATCH 0886/1459] Run `cargo fmt` with new settings Apply the changes from the `.rustfmt.toml` file added in the previous commit. --- libm/crates/libm-test/build.rs | 43 ++++++++-------------------- libm/src/lib.rs | 15 +++------- libm/src/math/asin.rs | 6 +--- libm/src/math/asinf.rs | 6 +--- libm/src/math/asinh.rs | 6 +--- libm/src/math/asinhf.rs | 6 +--- libm/src/math/atan.rs | 12 ++++---- libm/src/math/atan2.rs | 3 +- libm/src/math/atan2f.rs | 3 +- libm/src/math/atanf.rs | 15 ++-------- libm/src/math/atanh.rs | 6 +--- libm/src/math/atanhf.rs | 6 +--- libm/src/math/ceil.rs | 15 +++------- libm/src/math/ceilf.rs | 3 +- libm/src/math/cosf.rs | 4 +-- libm/src/math/cosh.rs | 4 +-- libm/src/math/coshf.rs | 4 +-- libm/src/math/erf.rs | 12 ++------ libm/src/math/erff.rs | 12 ++------ libm/src/math/exp.rs | 6 +--- libm/src/math/exp10f.rs | 5 ++-- libm/src/math/expf.rs | 6 +--- libm/src/math/expm1f.rs | 6 +--- libm/src/math/fabs.rs | 3 +- libm/src/math/fabsf.rs | 3 +- libm/src/math/floor.rs | 15 +++------- libm/src/math/floorf.rs | 3 +- libm/src/math/fma.rs | 10 ++----- libm/src/math/fmaf.rs | 8 ++---- libm/src/math/fmodf.rs | 3 +- libm/src/math/ilogb.rs | 6 +--- libm/src/math/ilogbf.rs | 6 +--- libm/src/math/jn.rs | 12 ++------ libm/src/math/jnf.rs | 12 ++------ libm/src/math/k_sin.rs | 6 +--- libm/src/math/log1p.rs | 6 +--- libm/src/math/log1pf.rs | 6 +--- libm/src/math/mod.rs | 32 ++++++++------------- libm/src/math/pow.rs | 50 ++++++++------------------------- libm/src/math/powf.rs | 12 ++------ libm/src/math/rem_pio2.rs | 20 +++---------- libm/src/math/rem_pio2_large.rs | 3 +- libm/src/math/rem_pio2f.rs | 4 +-- libm/src/math/remquo.rs | 6 +--- libm/src/math/remquof.rs | 6 +--- libm/src/math/rint.rs | 10 +------ libm/src/math/rintf.rs | 10 +------ libm/src/math/round.rs | 4 +-- libm/src/math/roundf.rs | 4 +-- libm/src/math/sinf.rs | 16 +++-------- libm/src/math/sinhf.rs | 3 +- libm/src/math/sqrt.rs | 3 +- libm/src/math/sqrtf.rs | 10 ++----- libm/src/math/tan.rs | 6 +--- libm/src/math/tanf.rs | 10 ++----- libm/src/math/tanh.rs | 6 +--- libm/src/math/tanhf.rs | 6 +--- 57 files changed, 131 insertions(+), 393 deletions(-) diff --git a/libm/crates/libm-test/build.rs b/libm/crates/libm-test/build.rs index 3a49e3c57..9653bd830 100644 --- a/libm/crates/libm-test/build.rs +++ b/libm/crates/libm-test/build.rs @@ -5,12 +5,12 @@ fn main() { #[cfg(feature = "test-musl-serialized")] mod musl_reference_tests { - use rand::seq::SliceRandom; - use rand::Rng; - use std::env; - use std::fs; use std::path::PathBuf; use std::process::Command; + use std::{env, fs}; + + use rand::Rng; + use rand::seq::SliceRandom; // Number of tests to generate for each function const NTESTS: usize = 500; @@ -60,10 +60,7 @@ mod musl_reference_tests { return; } - let files = fs::read_dir(math_src) - .unwrap() - .map(|f| f.unwrap().path()) - .collect::>(); + let files = fs::read_dir(math_src).unwrap().map(|f| f.unwrap().path()).collect::>(); let mut math = Vec::new(); for file in files { @@ -112,12 +109,7 @@ mod musl_reference_tests { let tail = eat(tail, " -> "); let ret = parse_retty(tail.replace("{", "").trim()); - return Function { - name: name.to_string(), - args, - ret, - tests: Vec::new(), - }; + return Function { name: name.to_string(), args, ret, tests: Vec::new() }; fn parse_ty(s: &str) -> Ty { match s { @@ -156,11 +148,7 @@ mod musl_reference_tests { } fn generate_test(function: &Function, rng: &mut R) -> Test { - let mut inputs = function - .args - .iter() - .map(|ty| ty.gen_i64(rng)) - .collect::>(); + let mut inputs = function.args.iter().map(|ty| ty.gen_i64(rng)).collect::>(); // First argument to this function appears to be a number of // iterations, so passing in massive random numbers causes it to @@ -180,15 +168,12 @@ mod musl_reference_tests { impl Ty { fn gen_i64(&self, r: &mut R) -> i64 { - use std::f32; - use std::f64; + use std::{f32, f64}; return match self { Ty::F32 => { if r.gen_range(0..20) < 1 { - let i = *[f32::NAN, f32::INFINITY, f32::NEG_INFINITY] - .choose(r) - .unwrap(); + let i = *[f32::NAN, f32::INFINITY, f32::NEG_INFINITY].choose(r).unwrap(); i.to_bits().into() } else { r.gen::().to_bits().into() @@ -196,9 +181,7 @@ mod musl_reference_tests { } Ty::F64 => { if r.gen_range(0..20) < 1 { - let i = *[f64::NAN, f64::INFINITY, f64::NEG_INFINITY] - .choose(r) - .unwrap(); + let i = *[f64::NAN, f64::INFINITY, f64::NEG_INFINITY].choose(r).unwrap(); i.to_bits() as i64 } else { r.gen::().to_bits() as i64 @@ -424,11 +407,7 @@ mod musl_reference_tests { src.push_str(");"); for (i, ret) in function.ret.iter().enumerate() { - let get = if function.ret.len() == 1 { - String::new() - } else { - format!(".{}", i) - }; + let get = if function.ret.len() == 1 { String::new() } else { format!(".{}", i) }; src.push_str(&(match ret { Ty::F32 => format!("if libm::_eqf(output{}, f32::from_bits(expected[{}] as u32)).is_ok() {{ continue }}", get, i), Ty::F64 => format!("if libm::_eq(output{}, f64::from_bits(expected[{}] as u64)).is_ok() {{ continue }}", get, i), diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 04f4ac0f2..6d95fa173 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -17,9 +17,10 @@ mod math; use core::{f32, f64}; -pub use self::math::*; pub use libm_helper::*; +pub use self::math::*; + /// Approximate equality with 1 ULP of tolerance #[doc(hidden)] #[inline] @@ -29,11 +30,7 @@ pub fn _eqf(a: f32, b: f32) -> Result<(), u32> { } else { let err = (a.to_bits() as i32).wrapping_sub(b.to_bits() as i32).abs(); - if err <= 1 { - Ok(()) - } else { - Err(err as u32) - } + if err <= 1 { Ok(()) } else { Err(err as u32) } } } @@ -45,10 +42,6 @@ pub fn _eq(a: f64, b: f64) -> Result<(), u64> { } else { let err = (a.to_bits() as i64).wrapping_sub(b.to_bits() as i64).abs(); - if err <= 1 { - Ok(()) - } else { - Err(err as u64) - } + if err <= 1 { Ok(()) } else { Err(err as u64) } } } diff --git a/libm/src/math/asin.rs b/libm/src/math/asin.rs index 3e4b7c56e..12fe08fc7 100644 --- a/libm/src/math/asin.rs +++ b/libm/src/math/asin.rs @@ -111,9 +111,5 @@ pub fn asin(mut x: f64) -> f64 { c = (z - f * f) / (s + f); x = 0.5 * PIO2_HI - (2.0 * s * r - (PIO2_LO - 2.0 * c) - (0.5 * PIO2_HI - 2.0 * f)); } - if hx >> 31 != 0 { - -x - } else { - x - } + if hx >> 31 != 0 { -x } else { x } } diff --git a/libm/src/math/asinf.rs b/libm/src/math/asinf.rs index 6ec61b629..2c785abe2 100644 --- a/libm/src/math/asinf.rs +++ b/libm/src/math/asinf.rs @@ -64,9 +64,5 @@ pub fn asinf(mut x: f32) -> f32 { let z = (1. - fabsf(x)) * 0.5; let s = sqrt(z as f64); x = (PIO2 - 2. * (s + s * (r(z) as f64))) as f32; - if (hx >> 31) != 0 { - -x - } else { - x - } + if (hx >> 31) != 0 { -x } else { x } } diff --git a/libm/src/math/asinh.rs b/libm/src/math/asinh.rs index 0abd80c2f..75d3c3ad4 100644 --- a/libm/src/math/asinh.rs +++ b/libm/src/math/asinh.rs @@ -32,9 +32,5 @@ pub fn asinh(mut x: f64) -> f64 { force_eval!(x + x1p120); } - if sign { - -x - } else { - x - } + if sign { -x } else { x } } diff --git a/libm/src/math/asinhf.rs b/libm/src/math/asinhf.rs index 09c77823e..27ed9dd37 100644 --- a/libm/src/math/asinhf.rs +++ b/libm/src/math/asinhf.rs @@ -31,9 +31,5 @@ pub fn asinhf(mut x: f32) -> f32 { force_eval!(x + x1p120); } - if sign { - -x - } else { - x - } + if sign { -x } else { x } } diff --git a/libm/src/math/atan.rs b/libm/src/math/atan.rs index 4259dc71a..4ca5cc91a 100644 --- a/libm/src/math/atan.rs +++ b/libm/src/math/atan.rs @@ -29,9 +29,10 @@ * to produce the hexadecimal values shown. */ -use super::fabs; use core::f64; +use super::fabs; + const ATANHI: [f64; 4] = [ 4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */ 7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */ @@ -128,18 +129,15 @@ pub fn atan(x: f64) -> f64 { let z = i!(ATANHI, id as usize) - (x * (s1 + s2) - i!(ATANLO, id as usize) - x); - if sign != 0 { - -z - } else { - z - } + if sign != 0 { -z } else { z } } #[cfg(test)] mod tests { - use super::atan; use core::f64; + use super::atan; + #[test] fn sanity_check() { for (input, answer) in [ diff --git a/libm/src/math/atan2.rs b/libm/src/math/atan2.rs index fb2ea4eda..b9bf0da93 100644 --- a/libm/src/math/atan2.rs +++ b/libm/src/math/atan2.rs @@ -37,8 +37,7 @@ * to produce the hexadecimal values shown. */ -use super::atan; -use super::fabs; +use super::{atan, fabs}; const PI: f64 = 3.1415926535897931160E+00; /* 0x400921FB, 0x54442D18 */ const PI_LO: f64 = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */ diff --git a/libm/src/math/atan2f.rs b/libm/src/math/atan2f.rs index eae3b002d..fa33f54f6 100644 --- a/libm/src/math/atan2f.rs +++ b/libm/src/math/atan2f.rs @@ -13,8 +13,7 @@ * ==================================================== */ -use super::atanf; -use super::fabsf; +use super::{atanf, fabsf}; const PI: f32 = 3.1415927410e+00; /* 0x40490fdb */ const PI_LO: f32 = -8.7422776573e-08; /* 0xb3bbbd2e */ diff --git a/libm/src/math/atanf.rs b/libm/src/math/atanf.rs index d042b3bc0..eb3d401cd 100644 --- a/libm/src/math/atanf.rs +++ b/libm/src/math/atanf.rs @@ -29,13 +29,8 @@ const ATAN_LO: [f32; 4] = [ 7.5497894159e-08, /* atan(inf)lo 0x33a22168 */ ]; -const A_T: [f32; 5] = [ - 3.3333328366e-01, - -1.9999158382e-01, - 1.4253635705e-01, - -1.0648017377e-01, - 6.1687607318e-02, -]; +const A_T: [f32; 5] = + [3.3333328366e-01, -1.9999158382e-01, 1.4253635705e-01, -1.0648017377e-01, 6.1687607318e-02]; /// Arctangent (f32) /// @@ -104,9 +99,5 @@ pub fn atanf(mut x: f32) -> f32 { } let id = id as usize; let z = i!(ATAN_HI, id) - ((x * (s1 + s2) - i!(ATAN_LO, id)) - x); - if sign { - -z - } else { - z - } + if sign { -z } else { z } } diff --git a/libm/src/math/atanh.rs b/libm/src/math/atanh.rs index b984c4ac6..9dc826f56 100644 --- a/libm/src/math/atanh.rs +++ b/libm/src/math/atanh.rs @@ -29,9 +29,5 @@ pub fn atanh(x: f64) -> f64 { y = 0.5 * log1p(2.0 * (y / (1.0 - y))); } - if sign { - -y - } else { - y - } + if sign { -y } else { y } } diff --git a/libm/src/math/atanhf.rs b/libm/src/math/atanhf.rs index a1aa314a5..3545411bb 100644 --- a/libm/src/math/atanhf.rs +++ b/libm/src/math/atanhf.rs @@ -29,9 +29,5 @@ pub fn atanhf(mut x: f32) -> f32 { x = 0.5 * log1pf(2.0 * (x / (1.0 - x))); } - if sign { - -x - } else { - x - } + if sign { -x } else { x } } diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index cde5a19d0..1593fdaff 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -42,28 +42,21 @@ pub fn ceil(x: f64) -> f64 { return x; } // y = int(x) - x, where int(x) is an integer neighbor of x - y = if (u >> 63) != 0 { - x - TOINT + TOINT - x - } else { - x + TOINT - TOINT - x - }; + y = if (u >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x }; // special case because of non-nearest rounding modes if e < 0x3ff { force_eval!(y); return if (u >> 63) != 0 { -0. } else { 1. }; } - if y < 0. { - x + y + 1. - } else { - x + y - } + if y < 0. { x + y + 1. } else { x + y } } #[cfg(test)] mod tests { - use super::*; use core::f64::*; + use super::*; + #[test] fn sanity_check() { assert_eq!(ceil(1.1), 2.0); diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs index 7bcc647ca..bf9ba1227 100644 --- a/libm/src/math/ceilf.rs +++ b/libm/src/math/ceilf.rs @@ -44,9 +44,10 @@ pub fn ceilf(x: f32) -> f32 { #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { - use super::*; use core::f32::*; + use super::*; + #[test] fn sanity_check() { assert_eq!(ceilf(1.1), 2.0); diff --git a/libm/src/math/cosf.rs b/libm/src/math/cosf.rs index 424fa42ed..0a01335f7 100644 --- a/libm/src/math/cosf.rs +++ b/libm/src/math/cosf.rs @@ -14,10 +14,10 @@ * ==================================================== */ -use super::{k_cosf, k_sinf, rem_pio2f}; - use core::f64::consts::FRAC_PI_2; +use super::{k_cosf, k_sinf, rem_pio2f}; + /* Small multiples of pi/2 rounded to double precision. */ const C1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ const C2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ diff --git a/libm/src/math/cosh.rs b/libm/src/math/cosh.rs index 2fb568ab3..d2e43fd6c 100644 --- a/libm/src/math/cosh.rs +++ b/libm/src/math/cosh.rs @@ -1,6 +1,4 @@ -use super::exp; -use super::expm1; -use super::k_expo2; +use super::{exp, expm1, k_expo2}; /// Hyperbolic cosine (f64) /// diff --git a/libm/src/math/coshf.rs b/libm/src/math/coshf.rs index e7b684587..567a24410 100644 --- a/libm/src/math/coshf.rs +++ b/libm/src/math/coshf.rs @@ -1,6 +1,4 @@ -use super::expf; -use super::expm1f; -use super::k_expo2f; +use super::{expf, expm1f, k_expo2f}; /// Hyperbolic cosine (f64) /// diff --git a/libm/src/math/erf.rs b/libm/src/math/erf.rs index 55569affc..1b634abec 100644 --- a/libm/src/math/erf.rs +++ b/libm/src/math/erf.rs @@ -256,11 +256,7 @@ pub fn erf(x: f64) -> f64 { y = 1.0 - x1p_1022; } - if sign != 0 { - -y - } else { - y - } + if sign != 0 { -y } else { y } } /// Complementary error function (f64) @@ -310,9 +306,5 @@ pub fn erfc(x: f64) -> f64 { } let x1p_1022 = f64::from_bits(0x0010000000000000); - if sign != 0 { - 2.0 - x1p_1022 - } else { - x1p_1022 * x1p_1022 - } + if sign != 0 { 2.0 - x1p_1022 } else { x1p_1022 * x1p_1022 } } diff --git a/libm/src/math/erff.rs b/libm/src/math/erff.rs index 7b25474f6..2e41183bf 100644 --- a/libm/src/math/erff.rs +++ b/libm/src/math/erff.rs @@ -167,11 +167,7 @@ pub fn erff(x: f32) -> f32 { y = 1.0 - x1p_120; } - if sign != 0 { - -y - } else { - y - } + if sign != 0 { -y } else { y } } /// Complementary error function (f32) @@ -222,9 +218,5 @@ pub fn erfcf(x: f32) -> f32 { } let x1p_120 = f32::from_bits(0x03800000); - if sign != 0 { - 2.0 - x1p_120 - } else { - x1p_120 * x1p_120 - } + if sign != 0 { 2.0 - x1p_120 } else { x1p_120 * x1p_120 } } diff --git a/libm/src/math/exp.rs b/libm/src/math/exp.rs index d4994277f..782042b62 100644 --- a/libm/src/math/exp.rs +++ b/libm/src/math/exp.rs @@ -146,9 +146,5 @@ pub fn exp(mut x: f64) -> f64 { xx = x * x; c = x - xx * (P1 + xx * (P2 + xx * (P3 + xx * (P4 + xx * P5)))); y = 1. + (x * c / (2. - c) - lo + hi); - if k == 0 { - y - } else { - scalbn(y, k) - } + if k == 0 { y } else { scalbn(y, k) } } diff --git a/libm/src/math/exp10f.rs b/libm/src/math/exp10f.rs index 1279bc6c5..786305481 100644 --- a/libm/src/math/exp10f.rs +++ b/libm/src/math/exp10f.rs @@ -2,9 +2,8 @@ use super::{exp2, exp2f, modff}; const LN10_F32: f32 = 3.32192809488736234787031942948939; const LN10_F64: f64 = 3.32192809488736234787031942948939; -const P10: &[f32] = &[ - 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, -]; +const P10: &[f32] = + &[1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7]; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp10f(x: f32) -> f32 { diff --git a/libm/src/math/expf.rs b/libm/src/math/expf.rs index a53aa90a6..8dc067ab0 100644 --- a/libm/src/math/expf.rs +++ b/libm/src/math/expf.rs @@ -93,9 +93,5 @@ pub fn expf(mut x: f32) -> f32 { let xx = x * x; let c = x - xx * (P1 + xx * P2); let y = 1. + (x * c / (2. - c) - lo + hi); - if k == 0 { - y - } else { - scalbnf(y, k) - } + if k == 0 { y } else { scalbnf(y, k) } } diff --git a/libm/src/math/expm1f.rs b/libm/src/math/expm1f.rs index 3fc2a247b..a862fe255 100644 --- a/libm/src/math/expm1f.rs +++ b/libm/src/math/expm1f.rs @@ -126,9 +126,5 @@ pub fn expm1f(mut x: f32) -> f32 { return y - 1.; } let uf = f32::from_bits(((0x7f - k) << 23) as u32); /* 2^-k */ - if k < 23 { - (x - e + (1. - uf)) * twopk - } else { - (x - (e + uf) + 1.) * twopk - } + if k < 23 { (x - e + (1. - uf)) * twopk } else { (x - (e + uf) + 1.) * twopk } } diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index b2255ad32..3b0628aa6 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -18,9 +18,10 @@ pub fn fabs(x: f64) -> f64 { #[cfg(test)] mod tests { - use super::*; use core::f64::*; + use super::*; + #[test] fn sanity_check() { assert_eq!(fabs(-1.0), 1.0); diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs index 23f3646dc..f81c8ca44 100644 --- a/libm/src/math/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -18,9 +18,10 @@ pub fn fabsf(x: f32) -> f32 { #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { - use super::*; use core::f32::*; + use super::*; + #[test] fn sanity_check() { assert_eq!(fabsf(-1.0), 1.0); diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index b7d1a04d2..e8fb21e58 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -41,28 +41,21 @@ pub fn floor(x: f64) -> f64 { return x; } /* y = int(x) - x, where int(x) is an integer neighbor of x */ - let y = if (ui >> 63) != 0 { - x - TOINT + TOINT - x - } else { - x + TOINT - TOINT - x - }; + let y = if (ui >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x }; /* special case because of non-nearest rounding modes */ if e < 0x3ff { force_eval!(y); return if (ui >> 63) != 0 { -1. } else { 0. }; } - if y > 0. { - x + y - 1. - } else { - x + y - } + if y > 0. { x + y - 1. } else { x + y } } #[cfg(test)] mod tests { - use super::*; use core::f64::*; + use super::*; + #[test] fn sanity_check() { assert_eq!(floor(1.1), 1.0); diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs index dfdab91a0..f66cab74f 100644 --- a/libm/src/math/floorf.rs +++ b/libm/src/math/floorf.rs @@ -44,9 +44,10 @@ pub fn floorf(x: f32) -> f32 { #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { - use super::*; use core::f32::*; + use super::*; + #[test] fn sanity_check() { assert_eq!(floorf(0.5), 0.0); diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 940ee2db9..bb2028fa7 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -216,17 +216,11 @@ mod tests { #[test] fn fma_sbb() { - assert_eq!( - fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), - -3991680619069439e277 - ); + assert_eq!(fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), -3991680619069439e277); } #[test] fn fma_underflow() { - assert_eq!( - fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), - 0.0, - ); + assert_eq!(fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), 0.0,); } } diff --git a/libm/src/math/fmaf.rs b/libm/src/math/fmaf.rs index 2848f2aee..10bdaeab3 100644 --- a/libm/src/math/fmaf.rs +++ b/libm/src/math/fmaf.rs @@ -29,7 +29,7 @@ use core::f32; use core::ptr::read_volatile; use super::fenv::{ - feclearexcept, fegetround, feraiseexcept, fetestexcept, FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, + FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, feclearexcept, fegetround, feraiseexcept, fetestexcept, }; /* @@ -91,11 +91,7 @@ pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 { * we need to adjust the low-order bit in the direction of the error. */ let neg = ui >> 63 != 0; - let err = if neg == (z as f64 > xy) { - xy - result + z as f64 - } else { - z as f64 - result + xy - }; + let err = if neg == (z as f64 > xy) { xy - result + z as f64 } else { z as f64 - result + xy }; if neg == (err < 0.0) { ui += 1; } else { diff --git a/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs index c53dc186a..1d8001384 100644 --- a/libm/src/math/fmodf.rs +++ b/libm/src/math/fmodf.rs @@ -1,5 +1,4 @@ -use core::f32; -use core::u32; +use core::{f32, u32}; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmodf(x: f32, y: f32) -> f32 { diff --git a/libm/src/math/ilogb.rs b/libm/src/math/ilogb.rs index 7d74dcfb6..9d58d0608 100644 --- a/libm/src/math/ilogb.rs +++ b/libm/src/math/ilogb.rs @@ -21,11 +21,7 @@ pub fn ilogb(x: f64) -> i32 { e } else if e == 0x7ff { force_eval!(0.0 / 0.0); - if (i << 12) != 0 { - FP_ILOGBNAN - } else { - i32::max_value() - } + if (i << 12) != 0 { FP_ILOGBNAN } else { i32::max_value() } } else { e - 0x3ff } diff --git a/libm/src/math/ilogbf.rs b/libm/src/math/ilogbf.rs index 0fa58748c..85deb43c8 100644 --- a/libm/src/math/ilogbf.rs +++ b/libm/src/math/ilogbf.rs @@ -21,11 +21,7 @@ pub fn ilogbf(x: f32) -> i32 { e } else if e == 0xff { force_eval!(0.0 / 0.0); - if (i << 9) != 0 { - FP_ILOGBNAN - } else { - i32::max_value() - } + if (i << 9) != 0 { FP_ILOGBNAN } else { i32::max_value() } } else { e - 0x7f } diff --git a/libm/src/math/jn.rs b/libm/src/math/jn.rs index 1be167f84..22ced20c1 100644 --- a/libm/src/math/jn.rs +++ b/libm/src/math/jn.rs @@ -244,11 +244,7 @@ pub fn jn(n: i32, mut x: f64) -> f64 { } } - if sign { - -b - } else { - b - } + if sign { -b } else { b } } pub fn yn(n: i32, x: f64) -> f64 { @@ -335,9 +331,5 @@ pub fn yn(n: i32, x: f64) -> f64 { } } - if sign { - -b - } else { - b - } + if sign { -b } else { b } } diff --git a/libm/src/math/jnf.rs b/libm/src/math/jnf.rs index 360f62e20..9cd0bb37d 100644 --- a/libm/src/math/jnf.rs +++ b/libm/src/math/jnf.rs @@ -188,11 +188,7 @@ pub fn jnf(n: i32, mut x: f32) -> f32 { } } - if sign { - -b - } else { - b - } + if sign { -b } else { b } } pub fn ynf(n: i32, x: f32) -> f32 { @@ -251,9 +247,5 @@ pub fn ynf(n: i32, x: f32) -> f32 { a = temp; } - if sign { - -b - } else { - b - } + if sign { -b } else { b } } diff --git a/libm/src/math/k_sin.rs b/libm/src/math/k_sin.rs index 9dd96c944..42441455f 100644 --- a/libm/src/math/k_sin.rs +++ b/libm/src/math/k_sin.rs @@ -49,9 +49,5 @@ pub(crate) fn k_sin(x: f64, y: f64, iy: i32) -> f64 { let w = z * z; let r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6); let v = z * x; - if iy == 0 { - x + v * (S1 + z * r) - } else { - x - ((z * (0.5 * y - v * r) - y) - v * S1) - } + if iy == 0 { x + v * (S1 + z * r) } else { x - ((z * (0.5 * y - v * r) - y) - v * S1) } } diff --git a/libm/src/math/log1p.rs b/libm/src/math/log1p.rs index 4fd1c73eb..552de549b 100644 --- a/libm/src/math/log1p.rs +++ b/libm/src/math/log1p.rs @@ -117,11 +117,7 @@ pub fn log1p(x: f64) -> f64 { k = (hu >> 20) as i32 - 0x3ff; /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ if k < 54 { - c = if k >= 2 { - 1. - (f64::from_bits(ui) - x) - } else { - x - (f64::from_bits(ui) - 1.) - }; + c = if k >= 2 { 1. - (f64::from_bits(ui) - x) } else { x - (f64::from_bits(ui) - 1.) }; c /= f64::from_bits(ui); } else { c = 0.; diff --git a/libm/src/math/log1pf.rs b/libm/src/math/log1pf.rs index 500e8eeaa..8068128db 100644 --- a/libm/src/math/log1pf.rs +++ b/libm/src/math/log1pf.rs @@ -72,11 +72,7 @@ pub fn log1pf(x: f32) -> f32 { k = (iu >> 23) as i32 - 0x7f; /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ if k < 25 { - c = if k >= 2 { - 1. - (f32::from_bits(ui) - x) - } else { - x - (f32::from_bits(ui) - 1.) - }; + c = if k >= 2 { 1. - (f32::from_bits(ui) - x) } else { x - (f32::from_bits(ui) - 1.) }; c /= f32::from_bits(ui); } else { c = 0.; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index a56532ddd..85c9fc5bf 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -218,15 +218,13 @@ pub use self::cos::cos; pub use self::cosf::cosf; pub use self::cosh::cosh; pub use self::coshf::coshf; -pub use self::erf::erf; -pub use self::erf::erfc; -pub use self::erff::erfcf; -pub use self::erff::erff; +pub use self::erf::{erf, erfc}; +pub use self::erff::{erfcf, erff}; pub use self::exp::exp; -pub use self::exp10::exp10; -pub use self::exp10f::exp10f; pub use self::exp2::exp2; pub use self::exp2f::exp2f; +pub use self::exp10::exp10; +pub use self::exp10f::exp10f; pub use self::expf::expf; pub use self::expm1::expm1; pub use self::expm1f::expm1f; @@ -250,18 +248,12 @@ pub use self::hypot::hypot; pub use self::hypotf::hypotf; pub use self::ilogb::ilogb; pub use self::ilogbf::ilogbf; -pub use self::j0::j0; -pub use self::j0::y0; -pub use self::j0f::j0f; -pub use self::j0f::y0f; -pub use self::j1::j1; -pub use self::j1::y1; -pub use self::j1f::j1f; -pub use self::j1f::y1f; -pub use self::jn::jn; -pub use self::jn::yn; -pub use self::jnf::jnf; -pub use self::jnf::ynf; +pub use self::j0::{j0, y0}; +pub use self::j0f::{j0f, y0f}; +pub use self::j1::{j1, y1}; +pub use self::j1f::{j1f, y1f}; +pub use self::jn::{jn, yn}; +pub use self::jnf::{jnf, ynf}; pub use self::ldexp::ldexp; pub use self::ldexpf::ldexpf; pub use self::lgamma::lgamma; @@ -269,12 +261,12 @@ pub use self::lgamma_r::lgamma_r; pub use self::lgammaf::lgammaf; pub use self::lgammaf_r::lgammaf_r; pub use self::log::log; -pub use self::log10::log10; -pub use self::log10f::log10f; pub use self::log1p::log1p; pub use self::log1pf::log1pf; pub use self::log2::log2; pub use self::log2f::log2f; +pub use self::log10::log10; +pub use self::log10f::log10f; pub use self::logf::logf; pub use self::modf::modf; pub use self::modff::modff; diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 09d12c185..9b617cadb 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -159,18 +159,10 @@ pub fn pow(x: f64, y: f64) -> f64 { 1.0 } else if ix >= 0x3ff00000 { /* (|x|>1)**+-inf = inf,0 */ - if hy >= 0 { - y - } else { - 0.0 - } + if hy >= 0 { y } else { 0.0 } } else { /* (|x|<1)**+-inf = 0,inf */ - if hy >= 0 { - 0.0 - } else { - -y - } + if hy >= 0 { 0.0 } else { -y } }; } @@ -246,18 +238,10 @@ pub fn pow(x: f64, y: f64) -> f64 { /* over/underflow if x is not close to one */ if ix < 0x3fefffff { - return if hy < 0 { - s * HUGE * HUGE - } else { - s * TINY * TINY - }; + return if hy < 0 { s * HUGE * HUGE } else { s * TINY * TINY }; } if ix > 0x3ff00000 { - return if hy > 0 { - s * HUGE * HUGE - } else { - s * TINY * TINY - }; + return if hy > 0 { s * HUGE * HUGE } else { s * TINY * TINY }; } /* now |1-x| is TINY <= 2**-20, suffice to compute @@ -455,11 +439,7 @@ mod tests { fn pow_test(base: f64, exponent: f64, expected: f64) { let res = pow(base, exponent); assert!( - if expected.is_nan() { - res.is_nan() - } else { - pow(base, exponent) == expected - }, + if expected.is_nan() { res.is_nan() } else { pow(base, exponent) == expected }, "{} ** {} was {} instead of {}", base, exponent, @@ -469,13 +449,11 @@ mod tests { } fn test_sets_as_base(sets: &[&[f64]], exponent: f64, expected: f64) { - sets.iter() - .for_each(|s| s.iter().for_each(|val| pow_test(*val, exponent, expected))); + sets.iter().for_each(|s| s.iter().for_each(|val| pow_test(*val, exponent, expected))); } fn test_sets_as_exponent(base: f64, sets: &[&[f64]], expected: f64) { - sets.iter() - .for_each(|s| s.iter().for_each(|val| pow_test(base, *val, expected))); + sets.iter().for_each(|s| s.iter().for_each(|val| pow_test(base, *val, expected))); } fn test_sets(sets: &[&[f64]], computed: &dyn Fn(f64) -> f64, expected: &dyn Fn(f64) -> f64) { @@ -489,11 +467,7 @@ mod tests { #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] let res = force_eval!(res); assert!( - if exp.is_nan() { - res.is_nan() - } else { - exp == res - }, + if exp.is_nan() { res.is_nan() } else { exp == res }, "test for {} was {} instead of {}", val, res, @@ -608,15 +582,15 @@ mod tests { // Factoring -1 out: // (negative anything ^ integer should be (-1 ^ integer) * (positive anything ^ integer)) - (&[POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS]) - .iter() - .for_each(|int_set| { + (&[POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS]).iter().for_each( + |int_set| { int_set.iter().for_each(|int| { test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| { pow(-1.0, *int) * pow(v, *int) }); }) - }); + }, + ); // Negative base (imaginary results): // (-anything except 0 and Infinity ^ non-integer should be NAN) diff --git a/libm/src/math/powf.rs b/libm/src/math/powf.rs index 68d2083bb..d47ab4b3d 100644 --- a/libm/src/math/powf.rs +++ b/libm/src/math/powf.rs @@ -181,19 +181,11 @@ pub fn powf(x: f32, y: f32) -> f32 { /* if |y| > 2**27 */ /* over/underflow if x is not close to one */ if ix < 0x3f7ffff8 { - return if hy < 0 { - sn * HUGE * HUGE - } else { - sn * TINY * TINY - }; + return if hy < 0 { sn * HUGE * HUGE } else { sn * TINY * TINY }; } if ix > 0x3f800007 { - return if hy > 0 { - sn * HUGE * HUGE - } else { - sn * TINY * TINY - }; + return if hy > 0 { sn * HUGE * HUGE } else { sn * TINY * TINY }; } /* now |1-x| is TINY <= 2**-20, suffice to compute diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 644616f2d..6be23a43c 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -197,28 +197,16 @@ mod tests { fn test_near_pi() { let arg = 3.141592025756836; let arg = force_eval!(arg); - assert_eq!( - rem_pio2(arg), - (2, -6.278329573009626e-7, -2.1125998133974653e-23) - ); + assert_eq!(rem_pio2(arg), (2, -6.278329573009626e-7, -2.1125998133974653e-23)); let arg = 3.141592033207416; let arg = force_eval!(arg); - assert_eq!( - rem_pio2(arg), - (2, -6.20382377148128e-7, -2.1125998133974653e-23) - ); + assert_eq!(rem_pio2(arg), (2, -6.20382377148128e-7, -2.1125998133974653e-23)); let arg = 3.141592144966125; let arg = force_eval!(arg); - assert_eq!( - rem_pio2(arg), - (2, -5.086236681942706e-7, -2.1125998133974653e-23) - ); + assert_eq!(rem_pio2(arg), (2, -5.086236681942706e-7, -2.1125998133974653e-23)); let arg = 3.141592979431152; let arg = force_eval!(arg); - assert_eq!( - rem_pio2(arg), - (2, 3.2584135866119817e-7, -2.1125998133974653e-23) - ); + assert_eq!(rem_pio2(arg), (2, 3.2584135866119817e-7, -2.1125998133974653e-23)); } #[test] diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index db97a39d4..1dfbba3b1 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -11,8 +11,7 @@ * ==================================================== */ -use super::floor; -use super::scalbn; +use super::{floor, scalbn}; // initial value for jk const INIT_JK: [usize; 4] = [3, 4, 4, 6]; diff --git a/libm/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs index 775f5d750..3c658fe3d 100644 --- a/libm/src/math/rem_pio2f.rs +++ b/libm/src/math/rem_pio2f.rs @@ -14,10 +14,10 @@ * ==================================================== */ -use super::rem_pio2_large; - use core::f64; +use super::rem_pio2_large; + const TOINT: f64 = 1.5 / f64::EPSILON; /// 53 bits of 2/pi diff --git a/libm/src/math/remquo.rs b/libm/src/math/remquo.rs index 0afd1f7f5..4c11e8487 100644 --- a/libm/src/math/remquo.rs +++ b/libm/src/math/remquo.rs @@ -91,11 +91,7 @@ pub fn remquo(mut x: f64, mut y: f64) -> (f64, i32) { } q &= 0x7fffffff; let quo = if sx ^ sy { -(q as i32) } else { q as i32 }; - if sx { - (-x, quo) - } else { - (x, quo) - } + if sx { (-x, quo) } else { (x, quo) } } #[cfg(test)] diff --git a/libm/src/math/remquof.rs b/libm/src/math/remquof.rs index d71bd38e3..b0e85ca66 100644 --- a/libm/src/math/remquof.rs +++ b/libm/src/math/remquof.rs @@ -89,9 +89,5 @@ pub fn remquof(mut x: f32, mut y: f32) -> (f32, i32) { } q &= 0x7fffffff; let quo = if sx ^ sy { -(q as i32) } else { q as i32 }; - if sx { - (-x, quo) - } else { - (x, quo) - } + if sx { (-x, quo) } else { (x, quo) } } diff --git a/libm/src/math/rint.rs b/libm/src/math/rint.rs index 8edbe3440..618b26e54 100644 --- a/libm/src/math/rint.rs +++ b/libm/src/math/rint.rs @@ -23,15 +23,7 @@ pub fn rint(x: f64) -> f64 { xminusoneovere + one_over_e }; - if ans == 0.0 { - if is_positive { - 0.0 - } else { - -0.0 - } - } else { - ans - } + if ans == 0.0 { if is_positive { 0.0 } else { -0.0 } } else { ans } } } diff --git a/libm/src/math/rintf.rs b/libm/src/math/rintf.rs index 7a7da618a..0726d83ba 100644 --- a/libm/src/math/rintf.rs +++ b/libm/src/math/rintf.rs @@ -23,15 +23,7 @@ pub fn rintf(x: f32) -> f32 { xminusoneovere + one_over_e }; - if ans == 0.0 { - if is_positive { - 0.0 - } else { - -0.0 - } - } else { - ans - } + if ans == 0.0 { if is_positive { 0.0 } else { -0.0 } } else { ans } } } diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs index 46fabc90f..b81ebaa1d 100644 --- a/libm/src/math/round.rs +++ b/libm/src/math/round.rs @@ -1,7 +1,7 @@ -use super::copysign; -use super::trunc; use core::f64; +use super::{copysign, trunc}; + #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn round(x: f64) -> f64 { trunc(x + copysign(0.5 - 0.25 * f64::EPSILON, x)) diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs index becdb5620..fb974bbfe 100644 --- a/libm/src/math/roundf.rs +++ b/libm/src/math/roundf.rs @@ -1,7 +1,7 @@ -use super::copysignf; -use super::truncf; use core::f32; +use super::{copysignf, truncf}; + #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn roundf(x: f32) -> f32 { truncf(x + copysignf(0.5 - 0.25 * f32::EPSILON, x)) diff --git a/libm/src/math/sinf.rs b/libm/src/math/sinf.rs index 6e20be2ae..ca1814627 100644 --- a/libm/src/math/sinf.rs +++ b/libm/src/math/sinf.rs @@ -14,10 +14,10 @@ * ==================================================== */ -use super::{k_cosf, k_sinf, rem_pio2f}; - use core::f64::consts::FRAC_PI_2; +use super::{k_cosf, k_sinf, rem_pio2f}; + /* Small multiples of pi/2 rounded to double precision. */ const S1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ const S2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ @@ -39,11 +39,7 @@ pub fn sinf(x: f32) -> f32 { if ix < 0x39800000 { /* |x| < 2**-12 */ /* raise inexact if x!=0 and underflow if subnormal */ - force_eval!(if ix < 0x00800000 { - x / x1p120 - } else { - x + x1p120 - }); + force_eval!(if ix < 0x00800000 { x / x1p120 } else { x + x1p120 }); return x; } return k_sinf(x64); @@ -58,11 +54,7 @@ pub fn sinf(x: f32) -> f32 { return k_cosf(x64 - S1_PIO2); } } - return k_sinf(if sign { - -(x64 + S2_PIO2) - } else { - -(x64 - S2_PIO2) - }); + return k_sinf(if sign { -(x64 + S2_PIO2) } else { -(x64 - S2_PIO2) }); } if ix <= 0x40e231d5 { /* |x| ~<= 9*pi/4 */ diff --git a/libm/src/math/sinhf.rs b/libm/src/math/sinhf.rs index 24f863c44..6788642f0 100644 --- a/libm/src/math/sinhf.rs +++ b/libm/src/math/sinhf.rs @@ -1,5 +1,4 @@ -use super::expm1f; -use super::k_expo2f; +use super::{expm1f, k_expo2f}; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sinhf(x: f32) -> f32 { diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 66cb7659c..5862b119b 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -242,9 +242,10 @@ pub fn sqrt(x: f64) -> f64 { #[cfg(test)] mod tests { - use super::*; use core::f64::*; + use super::*; + #[test] fn sanity_check() { assert_eq!(sqrt(100.0), 10.0); diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index 16cbb2f97..f7324c941 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -132,9 +132,10 @@ pub fn sqrtf(x: f32) -> f32 { #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { - use super::*; use core::f32::*; + use super::*; + #[test] fn sanity_check() { assert_eq!(sqrtf(100.0), 10.0); @@ -154,12 +155,7 @@ mod tests { #[test] fn conformance_tests() { - let values = [ - 3.14159265359f32, - 10000.0f32, - f32::from_bits(0x0000000f), - INFINITY, - ]; + let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), INFINITY]; let results = [1071833029u32, 1120403456u32, 456082799u32, 2139095040u32]; for i in 0..values.len() { diff --git a/libm/src/math/tan.rs b/libm/src/math/tan.rs index 5a72f6801..5aa44aeef 100644 --- a/libm/src/math/tan.rs +++ b/libm/src/math/tan.rs @@ -49,11 +49,7 @@ pub fn tan(x: f64) -> f64 { if ix < 0x3e400000 { /* |x| < 2**-27 */ /* raise inexact if x!=0 and underflow if subnormal */ - force_eval!(if ix < 0x00100000 { - x / x1p120 as f64 - } else { - x + x1p120 as f64 - }); + force_eval!(if ix < 0x00100000 { x / x1p120 as f64 } else { x + x1p120 as f64 }); return x; } return k_tan(x, 0.0, 0); diff --git a/libm/src/math/tanf.rs b/libm/src/math/tanf.rs index 10de59c39..f6b2399d0 100644 --- a/libm/src/math/tanf.rs +++ b/libm/src/math/tanf.rs @@ -14,10 +14,10 @@ * ==================================================== */ -use super::{k_tanf, rem_pio2f}; - use core::f64::consts::FRAC_PI_2; +use super::{k_tanf, rem_pio2f}; + /* Small multiples of pi/2 rounded to double precision. */ const T1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ const T2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ @@ -39,11 +39,7 @@ pub fn tanf(x: f32) -> f32 { if ix < 0x39800000 { /* |x| < 2**-12 */ /* raise inexact if x!=0 and underflow if subnormal */ - force_eval!(if ix < 0x00800000 { - x / x1p120 - } else { - x + x1p120 - }); + force_eval!(if ix < 0x00800000 { x / x1p120 } else { x + x1p120 }); return x; } return k_tanf(x64, false); diff --git a/libm/src/math/tanh.rs b/libm/src/math/tanh.rs index 980c68554..cfea2c167 100644 --- a/libm/src/math/tanh.rs +++ b/libm/src/math/tanh.rs @@ -45,9 +45,5 @@ pub fn tanh(mut x: f64) -> f64 { t = x; } - if sign { - -t - } else { - t - } + if sign { -t } else { t } } diff --git a/libm/src/math/tanhf.rs b/libm/src/math/tanhf.rs index fc94e3ddd..ab13e1abf 100644 --- a/libm/src/math/tanhf.rs +++ b/libm/src/math/tanhf.rs @@ -31,9 +31,5 @@ pub fn tanhf(mut x: f32) -> f32 { force_eval!(x * x); x }; - if sign { - -tt - } else { - tt - } + if sign { -tt } else { tt } } From 50041678907a74864507630ed6a8e22704576042 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 20:25:50 -0500 Subject: [PATCH 0887/1459] Add a .git-blame-ignore-revs file Include the recent formatting commit. --- libm/.git-blame-ignore-revs | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 libm/.git-blame-ignore-revs diff --git a/libm/.git-blame-ignore-revs b/libm/.git-blame-ignore-revs new file mode 100644 index 000000000..c1e43134f --- /dev/null +++ b/libm/.git-blame-ignore-revs @@ -0,0 +1,5 @@ +# Use `git config blame.ignorerevsfile .git-blame-ignore-revs` to make +# `git blame` ignore the following commits. + +# Reformat with a new `.rustfmt.toml` +5882cabb83c30bf7c36023f9a55a80583636b0e8 From 9dfe1107e279825bccdfc3d02895ff2599015120 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 27 Oct 2024 01:08:38 -0500 Subject: [PATCH 0888/1459] Add a CI test that the crate builds on stable Currently everything we have runs with nightly Rust. Add a stable test to make sure we don't accidentally make use of behavior that isn't yet stable without gating it. --- libm/.github/workflows/main.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index f312e1f5e..5340e91e5 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -35,6 +35,16 @@ jobs: - run: cargo generate-lockfile - run: ./ci/run-docker.sh ${{ matrix.target }} + stable: + name: Build succeeds on stable + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: Install Rust + run: rustup update stable && rustup default stable + - run: cargo build -p libm + + rustfmt: name: Rustfmt runs-on: ubuntu-latest From 96707e021a5d3928063ab1d1ebc9e8b054ca6588 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 27 Oct 2024 22:45:37 -0500 Subject: [PATCH 0889/1459] Change `build.rs` to use the older `:` rather than `::` This allows supporting Rust < 1.77. --- libm/build.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libm/build.rs b/libm/build.rs index 653ccf799..b683557e4 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -2,10 +2,10 @@ use std::env; fn main() { println!("cargo:rerun-if-changed=build.rs"); - println!("cargo::rustc-check-cfg=cfg(assert_no_panic)"); - println!("cargo::rustc-check-cfg=cfg(feature, values(\"unstable\"))"); + println!("cargo:rustc-check-cfg=cfg(assert_no_panic)"); + println!("cargo:rustc-check-cfg=cfg(feature, values(\"unstable\"))"); - println!("cargo::rustc-check-cfg=cfg(feature, values(\"checked\"))"); + println!("cargo:rustc-check-cfg=cfg(feature, values(\"checked\"))"); #[allow(unexpected_cfgs)] if !cfg!(feature = "checked") { From c95dc87320021421eb300df3b505b773e920d1a6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 27 Oct 2024 22:46:22 -0500 Subject: [PATCH 0890/1459] Set the MSRV to 1.63 and document it as such Fixes: https://github.com/rust-lang/libm/issues/330 --- libm/.github/workflows/main.yml | 10 +++++++--- libm/Cargo.toml | 1 + libm/README.md | 4 ++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 5340e91e5..400ca2c0b 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -35,13 +35,17 @@ jobs: - run: cargo generate-lockfile - run: ./ci/run-docker.sh ${{ matrix.target }} - stable: - name: Build succeeds on stable + msrv: + name: Check MSRV runs-on: ubuntu-latest steps: - uses: actions/checkout@master + - run: | + msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' Cargo.toml)" + echo "MSRV: $msrv" + echo "MSRV=$msrv" >> "$GITHUB_ENV" - name: Install Rust - run: rustup update stable && rustup default stable + run: rustup update "$MSRV" && rustup default "$MSRV" - run: cargo build -p libm diff --git a/libm/Cargo.toml b/libm/Cargo.toml index f04fc12df..a498b4eb7 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -11,6 +11,7 @@ repository = "https://github.com/rust-lang/libm" version = "0.2.9" edition = "2021" exclude = ["/ci/", "/.github/workflows/"] +rust-version = "1.63" [features] default = [] diff --git a/libm/README.md b/libm/README.md index 24ee3d1c1..e5d64bd2d 100644 --- a/libm/README.md +++ b/libm/README.md @@ -34,6 +34,10 @@ To run all benchmarks: Please check [CONTRIBUTING.md](CONTRIBUTING.md) +## Minimum Rust version policy + +This crate supports rustc 1.63 and newer. + ## License Usage is licensed under the MIT license ([LICENSE-MIT](LICENSE-MIT) or From e87d96ea38e53088709b97defc9a75285482741a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 28 Oct 2024 03:49:48 +0000 Subject: [PATCH 0891/1459] chore: release v0.2.10 --- libm/CHANGELOG.md | 6 ++++++ libm/Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index 36ed41d6b..317dfafc0 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -8,6 +8,12 @@ and this project adheres to ## [Unreleased] +## [0.2.10](https://github.com/rust-lang/libm/compare/libm-v0.2.9...libm-v0.2.10) - 2024-10-28 + +### Other + +- Set the MSRV to 1.63 and test this in CI + ## [0.2.9](https://github.com/rust-lang/libm/compare/libm-v0.2.8...libm-v0.2.9) - 2024-10-26 ### Fixed diff --git a/libm/Cargo.toml b/libm/Cargo.toml index a498b4eb7..9282b7157 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT AND (MIT OR Apache-2.0)" name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/libm" -version = "0.2.9" +version = "0.2.10" edition = "2021" exclude = ["/ci/", "/.github/workflows/"] rust-version = "1.63" From 9ff6859006d71e9c1c1aa4b70be69f4eea3f39e9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 27 Oct 2024 23:17:52 -0500 Subject: [PATCH 0892/1459] Don't deny warnings when checking MSRV 1.63 reports some false positive lints that we don't need to worry about. Make sure we don't fail CI for this. --- libm/.github/workflows/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 400ca2c0b..f834b5def 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -38,6 +38,8 @@ jobs: msrv: name: Check MSRV runs-on: ubuntu-latest + env: + RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings` steps: - uses: actions/checkout@master - run: | From d60a3b94483e5f828b08207f000ac6f201acaf11 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 27 Oct 2024 23:41:29 -0500 Subject: [PATCH 0893/1459] ci: `rustup --no-self-update` and reuqire MSRV checks Pass `--no-self-update` to `rustup`, which is typical for CI. Also add the MSRV job to `success` so GitHub won't merge without it passing. --- libm/.github/workflows/main.yml | 63 +++++++++++++++++---------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index f834b5def..926e3c19e 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -29,44 +29,19 @@ jobs: steps: - uses: actions/checkout@master - name: Install Rust - run: rustup update nightly && rustup default nightly + run: rustup update nightly --no-self-update && rustup default nightly - run: rustup target add ${{ matrix.target }} - run: rustup target add x86_64-unknown-linux-musl - run: cargo generate-lockfile - run: ./ci/run-docker.sh ${{ matrix.target }} - msrv: - name: Check MSRV - runs-on: ubuntu-latest - env: - RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings` - steps: - - uses: actions/checkout@master - - run: | - msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' Cargo.toml)" - echo "MSRV: $msrv" - echo "MSRV=$msrv" >> "$GITHUB_ENV" - - name: Install Rust - run: rustup update "$MSRV" && rustup default "$MSRV" - - run: cargo build -p libm - - - rustfmt: - name: Rustfmt - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@master - - name: Install Rust - run: rustup update nightly && rustup default nightly && rustup component add rustfmt - - run: cargo fmt -- --check - wasm: name: WebAssembly runs-on: ubuntu-latest steps: - uses: actions/checkout@master - name: Install Rust - run: rustup update nightly && rustup default nightly + run: rustup update nightly --no-self-update && rustup default nightly - run: rustup target add wasm32-unknown-unknown - run: cargo build --target wasm32-unknown-unknown @@ -76,7 +51,7 @@ jobs: steps: - uses: actions/checkout@master - name: Install Rust - run: rustup update nightly && rustup default nightly + run: rustup update nightly --no-self-update && rustup default nightly - run: cargo build -p cb benchmarks: @@ -85,16 +60,44 @@ jobs: steps: - uses: actions/checkout@master - name: Install Rust - run: rustup update nightly && rustup default nightly + run: rustup update nightly --no-self-update && rustup default nightly - run: cargo bench --all + msrv: + name: Check MSRV + runs-on: ubuntu-latest + env: + RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings` + steps: + - uses: actions/checkout@master + - run: | + msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' Cargo.toml)" + echo "MSRV: $msrv" + echo "MSRV=$msrv" >> "$GITHUB_ENV" + - name: Install Rust + run: rustup update "$MSRV" --no-self-update && rustup default "$MSRV" + - run: cargo build -p libm + + rustfmt: + name: Rustfmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: Install Rust + run: | + rustup update nightly --no-self-update + rustup default nightly + rustup component add rustfmt + - run: cargo fmt -- --check + success: needs: - docker - - rustfmt - wasm - cb - benchmarks + - msrv + - rustfmt runs-on: ubuntu-latest # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency # failed" as success. So we have to do some contortions to ensure the job fails if any of its From 15839258aa211eb5783c264c9a81ca1144497da7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johanna=20S=C3=B6rng=C3=A5rd?= <44257381+JSorngard@users.noreply.github.com> Date: Mon, 28 Oct 2024 18:12:45 +0100 Subject: [PATCH 0894/1459] Add basic docstrings to some functions (#337) * Add docstring to Bessel functions * Add docstrings to logarithm functions * Add docstrings to pow functions * Specify argument bit-size of the Bessel functions * Specify argument bit-size for pow functions * Specify argument bit-size for logarithms * Add docstrings to sin, cos, sincos and sinh functions * Add docstrings to sqrt * Add docstrings to tan and tanh functions * Add an inline link to https://en.wikipedia.org/wiki/Bessel_function to the docstrings of all Bessel functions. --- libm/src/math/cos.rs | 4 ++++ libm/src/math/cosf.rs | 3 +++ libm/src/math/j0.rs | 2 ++ libm/src/math/j0f.rs | 2 ++ libm/src/math/j1.rs | 2 ++ libm/src/math/j1f.rs | 2 ++ libm/src/math/jn.rs | 2 ++ libm/src/math/jnf.rs | 2 ++ libm/src/math/log.rs | 1 + libm/src/math/log10.rs | 1 + libm/src/math/log10f.rs | 1 + libm/src/math/log1p.rs | 1 + libm/src/math/log1pf.rs | 1 + libm/src/math/log2.rs | 1 + libm/src/math/log2f.rs | 1 + libm/src/math/logf.rs | 1 + libm/src/math/pow.rs | 1 + libm/src/math/powf.rs | 1 + libm/src/math/sin.rs | 4 ++++ libm/src/math/sincos.rs | 3 +++ libm/src/math/sincosf.rs | 3 +++ libm/src/math/sinf.rs | 3 +++ libm/src/math/sinh.rs | 2 ++ libm/src/math/sinhf.rs | 1 + libm/src/math/sqrt.rs | 1 + libm/src/math/sqrtf.rs | 1 + libm/src/math/tan.rs | 4 ++++ libm/src/math/tanf.rs | 3 +++ libm/src/math/tanh.rs | 4 ++++ libm/src/math/tanhf.rs | 3 +++ 30 files changed, 61 insertions(+) diff --git a/libm/src/math/cos.rs b/libm/src/math/cos.rs index db8bc4989..de99cd4c5 100644 --- a/libm/src/math/cos.rs +++ b/libm/src/math/cos.rs @@ -41,6 +41,10 @@ use super::{k_cos, k_sin, rem_pio2}; // Accuracy: // TRIG(x) returns trig(x) nearly rounded // + +/// The cosine of `x` (f64). +/// +/// `x` is specified in radians. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cos(x: f64) -> f64 { let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; diff --git a/libm/src/math/cosf.rs b/libm/src/math/cosf.rs index 0a01335f7..27c2fc3b9 100644 --- a/libm/src/math/cosf.rs +++ b/libm/src/math/cosf.rs @@ -24,6 +24,9 @@ const C2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ const C3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ const C4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ +/// The cosine of `x` (f32). +/// +/// `x` is specified in radians. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cosf(x: f32) -> f32 { let x64 = x as f64; diff --git a/libm/src/math/j0.rs b/libm/src/math/j0.rs index c4258ccca..5e5e839f8 100644 --- a/libm/src/math/j0.rs +++ b/libm/src/math/j0.rs @@ -109,6 +109,7 @@ const S02: f64 = 1.16926784663337450260e-04; /* 0x3F1EA6D2, 0xDD57DBF4 */ const S03: f64 = 5.13546550207318111446e-07; /* 0x3EA13B54, 0xCE84D5A9 */ const S04: f64 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */ +/// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64). pub fn j0(mut x: f64) -> f64 { let z: f64; let r: f64; @@ -162,6 +163,7 @@ const V02: f64 = 7.60068627350353253702e-05; /* 0x3F13ECBB, 0xF578C6C1 */ const V03: f64 = 2.59150851840457805467e-07; /* 0x3E91642D, 0x7FF202FD */ const V04: f64 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */ +/// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64). pub fn y0(x: f64) -> f64 { let z: f64; let u: f64; diff --git a/libm/src/math/j0f.rs b/libm/src/math/j0f.rs index 91c03dbbc..afb6ee9ba 100644 --- a/libm/src/math/j0f.rs +++ b/libm/src/math/j0f.rs @@ -62,6 +62,7 @@ const S02: f32 = 1.1692678527e-04; /* 0x38f53697 */ const S03: f32 = 5.1354652442e-07; /* 0x3509daa6 */ const S04: f32 = 1.1661400734e-09; /* 0x30a045e8 */ +/// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). pub fn j0f(mut x: f32) -> f32 { let z: f32; let r: f32; @@ -107,6 +108,7 @@ const V02: f32 = 7.6006865129e-05; /* 0x389f65e0 */ const V03: f32 = 2.5915085189e-07; /* 0x348b216c */ const V04: f32 = 4.4111031494e-10; /* 0x2ff280c2 */ +/// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). pub fn y0f(x: f32) -> f32 { let z: f32; let u: f32; diff --git a/libm/src/math/j1.rs b/libm/src/math/j1.rs index 02a65ca5a..cef17a63e 100644 --- a/libm/src/math/j1.rs +++ b/libm/src/math/j1.rs @@ -113,6 +113,7 @@ const S03: f64 = 1.17718464042623683263e-06; /* 0x3EB3BFF8, 0x333F8498 */ const S04: f64 = 5.04636257076217042715e-09; /* 0x3E35AC88, 0xC97DFF2C */ const S05: f64 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */ +/// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64). pub fn j1(x: f64) -> f64 { let mut z: f64; let r: f64; @@ -158,6 +159,7 @@ const V0: [f64; 5] = [ 1.66559246207992079114e-11, /* 0x3DB25039, 0xDACA772A */ ]; +/// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64). pub fn y1(x: f64) -> f64 { let z: f64; let u: f64; diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs index c39f8ff7e..02a3efd24 100644 --- a/libm/src/math/j1f.rs +++ b/libm/src/math/j1f.rs @@ -63,6 +63,7 @@ const S03: f32 = 1.1771846857e-06; /* 0x359dffc2 */ const S04: f32 = 5.0463624390e-09; /* 0x31ad6446 */ const S05: f32 = 1.2354227016e-11; /* 0x2d59567e */ +/// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). pub fn j1f(x: f32) -> f32 { let mut z: f32; let r: f32; @@ -107,6 +108,7 @@ const V0: [f32; 5] = [ 1.6655924903e-11, /* 0x2d9281cf */ ]; +/// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). pub fn y1f(x: f32) -> f32 { let z: f32; let u: f32; diff --git a/libm/src/math/jn.rs b/libm/src/math/jn.rs index 22ced20c1..aff051f24 100644 --- a/libm/src/math/jn.rs +++ b/libm/src/math/jn.rs @@ -38,6 +38,7 @@ use super::{cos, fabs, get_high_word, get_low_word, j0, j1, log, sin, sqrt, y0, const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ +/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64). pub fn jn(n: i32, mut x: f64) -> f64 { let mut ix: u32; let lx: u32; @@ -247,6 +248,7 @@ pub fn jn(n: i32, mut x: f64) -> f64 { if sign { -b } else { b } } +/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64). pub fn yn(n: i32, x: f64) -> f64 { let mut ix: u32; let lx: u32; diff --git a/libm/src/math/jnf.rs b/libm/src/math/jnf.rs index 9cd0bb37d..e5afda448 100644 --- a/libm/src/math/jnf.rs +++ b/libm/src/math/jnf.rs @@ -15,6 +15,7 @@ use super::{fabsf, j0f, j1f, logf, y0f, y1f}; +/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). pub fn jnf(n: i32, mut x: f32) -> f32 { let mut ix: u32; let mut nm1: i32; @@ -191,6 +192,7 @@ pub fn jnf(n: i32, mut x: f32) -> f32 { if sign { -b } else { b } } +/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). pub fn ynf(n: i32, x: f32) -> f32 { let mut ix: u32; let mut ib: u32; diff --git a/libm/src/math/log.rs b/libm/src/math/log.rs index 27a26da60..f2dc47ec5 100644 --- a/libm/src/math/log.rs +++ b/libm/src/math/log.rs @@ -70,6 +70,7 @@ const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ +/// The natural logarithm of `x` (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 diff --git a/libm/src/math/log10.rs b/libm/src/math/log10.rs index 40dacf2c9..f9d118f12 100644 --- a/libm/src/math/log10.rs +++ b/libm/src/math/log10.rs @@ -31,6 +31,7 @@ const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ +/// The base 10 logarithm of `x` (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log10(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 diff --git a/libm/src/math/log10f.rs b/libm/src/math/log10f.rs index 108dfa8b5..18bf8fcc8 100644 --- a/libm/src/math/log10f.rs +++ b/libm/src/math/log10f.rs @@ -25,6 +25,7 @@ const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ +/// The base 10 logarithm of `x` (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log10f(mut x: f32) -> f32 { let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 diff --git a/libm/src/math/log1p.rs b/libm/src/math/log1p.rs index 552de549b..80561ec74 100644 --- a/libm/src/math/log1p.rs +++ b/libm/src/math/log1p.rs @@ -65,6 +65,7 @@ const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ +/// The natural logarithm of 1+`x` (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log1p(x: f64) -> f64 { let mut ui: u64 = x.to_bits(); diff --git a/libm/src/math/log1pf.rs b/libm/src/math/log1pf.rs index 8068128db..bba5b8a2f 100644 --- a/libm/src/math/log1pf.rs +++ b/libm/src/math/log1pf.rs @@ -20,6 +20,7 @@ const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ +/// The natural logarithm of 1+`x` (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log1pf(x: f32) -> f32 { let mut ui: u32 = x.to_bits(); diff --git a/libm/src/math/log2.rs b/libm/src/math/log2.rs index 83da3a193..59533340b 100644 --- a/libm/src/math/log2.rs +++ b/libm/src/math/log2.rs @@ -29,6 +29,7 @@ const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ +/// The base 2 logarithm of `x` (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log2(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 diff --git a/libm/src/math/log2f.rs b/libm/src/math/log2f.rs index 3a20fb15b..5ba2427d1 100644 --- a/libm/src/math/log2f.rs +++ b/libm/src/math/log2f.rs @@ -23,6 +23,7 @@ const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ +/// The base 2 logarithm of `x` (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log2f(mut x: f32) -> f32 { let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 diff --git a/libm/src/math/logf.rs b/libm/src/math/logf.rs index 2b57b934f..68d194302 100644 --- a/libm/src/math/logf.rs +++ b/libm/src/math/logf.rs @@ -21,6 +21,7 @@ const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ +/// The natural logarithm of `x` (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn logf(mut x: f32) -> f32 { let x1p25 = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 9b617cadb..7ecad291d 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -89,6 +89,7 @@ const IVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547_652b82fe =1/ln2 */ const IVLN2_H: f64 = 1.44269502162933349609e+00; /* 0x3ff71547_60000000 =24b 1/ln2*/ const IVLN2_L: f64 = 1.92596299112661746887e-08; /* 0x3e54ae0b_f85ddf44 =1/ln2 tail*/ +/// Returns `x` to the power of `y` (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn pow(x: f64, y: f64) -> f64 { let t1: f64; diff --git a/libm/src/math/powf.rs b/libm/src/math/powf.rs index d47ab4b3d..2d9d1e4bb 100644 --- a/libm/src/math/powf.rs +++ b/libm/src/math/powf.rs @@ -43,6 +43,7 @@ const IVLN2: f32 = 1.4426950216e+00; const IVLN2_H: f32 = 1.4426879883e+00; const IVLN2_L: f32 = 7.0526075433e-06; +/// Returns `x` to the power of `y` (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn powf(x: f32, y: f32) -> f32 { let mut z: f32; diff --git a/libm/src/math/sin.rs b/libm/src/math/sin.rs index a53843dcd..e04e0d6a0 100644 --- a/libm/src/math/sin.rs +++ b/libm/src/math/sin.rs @@ -40,6 +40,10 @@ use super::{k_cos, k_sin, rem_pio2}; // // Accuracy: // TRIG(x) returns trig(x) nearly rounded + +/// The sine of `x` (f64). +/// +/// `x` is specified in radians. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sin(x: f64) -> f64 { let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 diff --git a/libm/src/math/sincos.rs b/libm/src/math/sincos.rs index ff5d87a1c..ebf482f2d 100644 --- a/libm/src/math/sincos.rs +++ b/libm/src/math/sincos.rs @@ -12,6 +12,9 @@ use super::{get_high_word, k_cos, k_sin, rem_pio2}; +/// Both the sine and cosine of `x` (f64). +/// +/// `x` is specified in radians and the return value is (sin(x), cos(x)). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sincos(x: f64) -> (f64, f64) { let s: f64; diff --git a/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs index 9a4c36104..82c40fb8c 100644 --- a/libm/src/math/sincosf.rs +++ b/libm/src/math/sincosf.rs @@ -23,6 +23,9 @@ const S2PIO2: f32 = 2.0 * PI_2; /* 0x400921FB, 0x54442D18 */ const S3PIO2: f32 = 3.0 * PI_2; /* 0x4012D97C, 0x7F3321D2 */ const S4PIO2: f32 = 4.0 * PI_2; /* 0x401921FB, 0x54442D18 */ +/// Both the sine and cosine of `x` (f32). +/// +/// `x` is specified in radians and the return value is (sin(x), cos(x)). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sincosf(x: f32) -> (f32, f32) { let s: f32; diff --git a/libm/src/math/sinf.rs b/libm/src/math/sinf.rs index ca1814627..b8fae2c98 100644 --- a/libm/src/math/sinf.rs +++ b/libm/src/math/sinf.rs @@ -24,6 +24,9 @@ const S2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ const S3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ const S4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ +/// The sine of `x` (f32). +/// +/// `x` is specified in radians. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sinf(x: f32) -> f32 { let x64 = x as f64; diff --git a/libm/src/math/sinh.rs b/libm/src/math/sinh.rs index fd24fd20c..791841982 100644 --- a/libm/src/math/sinh.rs +++ b/libm/src/math/sinh.rs @@ -4,6 +4,8 @@ use super::{expm1, expo2}; // = (exp(x)-1 + (exp(x)-1)/exp(x))/2 // = x + x^3/6 + o(x^5) // + +/// The hyperbolic sine of `x` (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sinh(x: f64) -> f64 { // union {double f; uint64_t i;} u = {.f = x}; diff --git a/libm/src/math/sinhf.rs b/libm/src/math/sinhf.rs index 6788642f0..44d2e3560 100644 --- a/libm/src/math/sinhf.rs +++ b/libm/src/math/sinhf.rs @@ -1,5 +1,6 @@ use super::{expm1f, k_expo2f}; +/// The hyperbolic sine of `x` (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sinhf(x: f32) -> f32 { let mut h = 0.5f32; diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 5862b119b..e2907384d 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -78,6 +78,7 @@ use core::f64; +/// The square root of `x` (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrt(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index f7324c941..a738fc0b6 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -13,6 +13,7 @@ * ==================================================== */ +/// The square root of `x` (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrtf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized diff --git a/libm/src/math/tan.rs b/libm/src/math/tan.rs index 5aa44aeef..a074ca554 100644 --- a/libm/src/math/tan.rs +++ b/libm/src/math/tan.rs @@ -39,6 +39,10 @@ use super::{k_tan, rem_pio2}; // // Accuracy: // TRIG(x) returns trig(x) nearly rounded + +/// The tangent of `x` (f64). +/// +/// `x` is specified in radians. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tan(x: f64) -> f64 { let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 diff --git a/libm/src/math/tanf.rs b/libm/src/math/tanf.rs index f6b2399d0..7586aae4c 100644 --- a/libm/src/math/tanf.rs +++ b/libm/src/math/tanf.rs @@ -24,6 +24,9 @@ const T2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ const T3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ const T4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ +/// The tangent of `x` (f32). +/// +/// `x` is specified in radians. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tanf(x: f32) -> f32 { let x64 = x as f64; diff --git a/libm/src/math/tanh.rs b/libm/src/math/tanh.rs index cfea2c167..cc0abe4fc 100644 --- a/libm/src/math/tanh.rs +++ b/libm/src/math/tanh.rs @@ -4,6 +4,10 @@ use super::expm1; * = (exp(2*x) - 1)/(exp(2*x) - 1 + 2) * = (1 - exp(-2*x))/(exp(-2*x) - 1 + 2) */ + +/// The hyperbolic tangent of `x` (f64). +/// +/// `x` is specified in radians. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tanh(mut x: f64) -> f64 { let mut uf: f64 = x; diff --git a/libm/src/math/tanhf.rs b/libm/src/math/tanhf.rs index ab13e1abf..fffbba6c6 100644 --- a/libm/src/math/tanhf.rs +++ b/libm/src/math/tanhf.rs @@ -1,5 +1,8 @@ use super::expm1f; +/// The hyperbolic tangent of `x` (f32). +/// +/// `x` is specified in radians. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tanhf(mut x: f32) -> f32 { /* x = |x| */ From 1c2c6a021292a9e48482c62d5d4f59f7815f8d30 Mon Sep 17 00:00:00 2001 From: quaternic <57393910+quaternic@users.noreply.github.com> Date: Mon, 28 Oct 2024 19:59:21 +0200 Subject: [PATCH 0895/1459] fix type of constants in ported sincosf (#331) * fix type of constants in ported sincosf --- libm/src/math/sincosf.rs | 50 +++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs index 82c40fb8c..423845e44 100644 --- a/libm/src/math/sincosf.rs +++ b/libm/src/math/sincosf.rs @@ -17,11 +17,11 @@ use super::{k_cosf, k_sinf, rem_pio2f}; /* Small multiples of pi/2 rounded to double precision. */ -const PI_2: f32 = 0.5 * 3.1415926535897931160E+00; -const S1PIO2: f32 = 1.0 * PI_2; /* 0x3FF921FB, 0x54442D18 */ -const S2PIO2: f32 = 2.0 * PI_2; /* 0x400921FB, 0x54442D18 */ -const S3PIO2: f32 = 3.0 * PI_2; /* 0x4012D97C, 0x7F3321D2 */ -const S4PIO2: f32 = 4.0 * PI_2; /* 0x401921FB, 0x54442D18 */ +const PI_2: f64 = 0.5 * 3.1415926535897931160E+00; +const S1PIO2: f64 = 1.0 * PI_2; /* 0x3FF921FB, 0x54442D18 */ +const S2PIO2: f64 = 2.0 * PI_2; /* 0x400921FB, 0x54442D18 */ +const S3PIO2: f64 = 3.0 * PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const S4PIO2: f64 = 4.0 * PI_2; /* 0x401921FB, 0x54442D18 */ /// Both the sine and cosine of `x` (f32). /// @@ -59,21 +59,21 @@ pub fn sincosf(x: f32) -> (f32, f32) { if ix <= 0x4016cbe3 { /* |x| ~<= 3pi/4 */ if sign { - s = -k_cosf((x + S1PIO2) as f64); - c = k_sinf((x + S1PIO2) as f64); + s = -k_cosf(x as f64 + S1PIO2); + c = k_sinf(x as f64 + S1PIO2); } else { - s = k_cosf((S1PIO2 - x) as f64); - c = k_sinf((S1PIO2 - x) as f64); + s = k_cosf(S1PIO2 - x as f64); + c = k_sinf(S1PIO2 - x as f64); } } /* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */ else { if sign { - s = -k_sinf((x + S2PIO2) as f64); - c = -k_cosf((x + S2PIO2) as f64); + s = -k_sinf(x as f64 + S2PIO2); + c = -k_cosf(x as f64 + S2PIO2); } else { - s = -k_sinf((x - S2PIO2) as f64); - c = -k_cosf((x - S2PIO2) as f64); + s = -k_sinf(x as f64 - S2PIO2); + c = -k_cosf(x as f64 - S2PIO2); } } @@ -85,19 +85,19 @@ pub fn sincosf(x: f32) -> (f32, f32) { if ix <= 0x40afeddf { /* |x| ~<= 7*pi/4 */ if sign { - s = k_cosf((x + S3PIO2) as f64); - c = -k_sinf((x + S3PIO2) as f64); + s = k_cosf(x as f64 + S3PIO2); + c = -k_sinf(x as f64 + S3PIO2); } else { - s = -k_cosf((x - S3PIO2) as f64); - c = k_sinf((x - S3PIO2) as f64); + s = -k_cosf(x as f64 - S3PIO2); + c = k_sinf(x as f64 - S3PIO2); } } else { if sign { - s = k_sinf((x + S4PIO2) as f64); - c = k_cosf((x + S4PIO2) as f64); + s = k_sinf(x as f64 + S4PIO2); + c = k_cosf(x as f64 + S4PIO2); } else { - s = k_sinf((x - S4PIO2) as f64); - c = k_cosf((x - S4PIO2) as f64); + s = k_sinf(x as f64 - S4PIO2); + c = k_cosf(x as f64 - S4PIO2); } } @@ -131,14 +131,6 @@ pub fn sincosf(x: f32) -> (f32, f32) { #[cfg(test)] mod tests { use super::sincosf; - use crate::_eqf; - - #[test] - fn with_pi() { - let (s, c) = sincosf(core::f32::consts::PI); - _eqf(s.abs(), 0.0).unwrap(); - _eqf(c, -1.0).unwrap(); - } #[test] fn rotational_symmetry() { From 23b53f4b7fdf88fd470f0366d50e18cc0a65e6e9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 16 Oct 2024 22:25:16 -0500 Subject: [PATCH 0896/1459] Introduce `musl-math-sys` for bindings to musl math symbols This crate builds math symbols from a musl checkout and provides a Rust interface. The intent is that we will be able to compare our implementations against musl on more than just linux (which are the only currently the only targets we run `*-musl` targets against for comparison). Musl libc can't compile on anything other than Linux; however, the routines in `src/math` are cross platform enough to build on MacOS and windows-gnu with only minor adjustments. We take advantage of this and build only needed files using `cc`. The build script also performs remapping (via defines) so that e.g. `cos` gets defined as `musl_cos`. This gives us more certainty that we are actually testing against the intended symbol; without it, it is easy to unknowingly link to system libraries or even Rust's `libm` itself and wind up with an ineffective test. There is also a small procedure to verify remapping worked correctly by checking symbols in object files. --- libm/.gitignore | 5 +- libm/Cargo.toml | 2 + libm/crates/musl-math-sys/Cargo.toml | 12 + libm/crates/musl-math-sys/build.rs | 328 ++++++++++++++++++ libm/crates/musl-math-sys/c_patches/alias.c | 40 +++ .../crates/musl-math-sys/c_patches/features.h | 39 +++ libm/crates/musl-math-sys/src/lib.rs | 279 +++++++++++++++ 7 files changed, 703 insertions(+), 2 deletions(-) create mode 100644 libm/crates/musl-math-sys/Cargo.toml create mode 100644 libm/crates/musl-math-sys/build.rs create mode 100644 libm/crates/musl-math-sys/c_patches/alias.c create mode 100644 libm/crates/musl-math-sys/c_patches/features.h create mode 100644 libm/crates/musl-math-sys/src/lib.rs diff --git a/libm/.gitignore b/libm/.gitignore index 39950911a..b6a532751 100644 --- a/libm/.gitignore +++ b/libm/.gitignore @@ -1,8 +1,9 @@ -**/*.rs.bk +**.bk .#* /bin /math/src /math/target /target -/tests Cargo.lock +musl/ +**.tar.gz diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 9282b7157..99bfdbfdc 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -24,10 +24,12 @@ unstable = [] force-soft-floats = [] [workspace] +resolver = "2" members = [ "crates/compiler-builtins-smoke-test", "crates/libm-bench", "crates/libm-test", + "crates/musl-math-sys", ] default-members = [ ".", diff --git a/libm/crates/musl-math-sys/Cargo.toml b/libm/crates/musl-math-sys/Cargo.toml new file mode 100644 index 000000000..449ce4f3e --- /dev/null +++ b/libm/crates/musl-math-sys/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "musl-math-sys" +version = "0.1.0" +edition = "2021" + +[dependencies] + +[dev-dependencies] +libm = { path = "../../" } + +[build-dependencies] +cc = "1.1.24" diff --git a/libm/crates/musl-math-sys/build.rs b/libm/crates/musl-math-sys/build.rs new file mode 100644 index 000000000..03df06c79 --- /dev/null +++ b/libm/crates/musl-math-sys/build.rs @@ -0,0 +1,328 @@ +use std::collections::BTreeMap; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::{env, fs, str}; + +/// Static library that will be built +const LIB_NAME: &str = "musl_math_prefixed"; + +/// Files that have more than one symbol. Map of file names to the symbols defined in that file. +const MULTIPLE_SYMBOLS: &[(&str, &[&str])] = &[ + ("__invtrigl", &["__invtrigl", "__invtrigl_R", "__pio2_hi", "__pio2_lo"]), + ("__polevll", &["__polevll", "__p1evll"]), + ("erf", &["erf", "erfc"]), + ("erff", &["erff", "erfcf"]), + ("erfl", &["erfl", "erfcl"]), + ("exp10", &["exp10", "pow10"]), + ("exp10f", &["exp10f", "pow10f"]), + ("exp10l", &["exp10l", "pow10l"]), + ("exp2f_data", &["exp2f_data", "__exp2f_data"]), + ("exp_data", &["exp_data", "__exp_data"]), + ("j0", &["j0", "y0"]), + ("j0f", &["j0f", "y0f"]), + ("j1", &["j1", "y1"]), + ("j1f", &["j1f", "y1f"]), + ("jn", &["jn", "yn"]), + ("jnf", &["jnf", "ynf"]), + ("lgamma", &["lgamma", "__lgamma_r"]), + ("remainder", &["remainder", "drem"]), + ("remainderf", &["remainderf", "dremf"]), + ("lgammaf", &["lgammaf", "lgammaf_r", "__lgammaf_r"]), + ("lgammal", &["lgammal", "lgammal_r", "__lgammal_r"]), + ("log2_data", &["log2_data", "__log2_data"]), + ("log2f_data", &["log2f_data", "__log2f_data"]), + ("log_data", &["log_data", "__log_data"]), + ("logf_data", &["logf_data", "__logf_data"]), + ("pow_data", &["pow_data", "__pow_log_data"]), + ("powf_data", &["powf_data", "__powf_log2_data"]), + ("signgam", &["signgam", "__signgam"]), + ("sqrt_data", &["sqrt_data", "__rsqrt_tab"]), +]; + +fn main() { + let cfg = Config::from_env(); + + if cfg.target_env == "msvc" + || cfg.target_family == "wasm" + || cfg.target_features.iter().any(|f| f == "thumb-mode") + { + println!( + "cargo::warning=Musl doesn't compile with the current \ + target {}; skipping build", + &cfg.target_string + ); + return; + } + + build_musl_math(&cfg); +} + +#[allow(dead_code)] +#[derive(Debug)] +struct Config { + manifest_dir: PathBuf, + out_dir: PathBuf, + musl_dir: PathBuf, + musl_arch: String, + target_arch: String, + target_env: String, + target_family: String, + target_os: String, + target_string: String, + target_vendor: String, + target_features: Vec, +} + +impl Config { + fn from_env() -> Self { + let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + let target_features = env::var("CARGO_CFG_TARGET_FEATURE") + .map(|feats| feats.split(',').map(ToOwned::to_owned).collect()) + .unwrap_or_default(); + + // Default to the `{workspace_root}/musl` if not specified + let musl_dir = env::var("MUSL_SOURCE_DIR") + .map(PathBuf::from) + .unwrap_or_else(|_| manifest_dir.parent().unwrap().parent().unwrap().join("musl")); + + let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); + let musl_arch = if target_arch == "x86" { "i386".to_owned() } else { target_arch.clone() }; + + println!("cargo::rerun-if-changed={}/c_patches", manifest_dir.display()); + println!("cargo::rerun-if-env-changed=MUSL_SOURCE_DIR"); + println!("cargo::rerun-if-changed={}", musl_dir.display()); + + Self { + manifest_dir, + out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()), + musl_dir, + musl_arch, + target_arch, + target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(), + target_family: env::var("CARGO_CFG_TARGET_FAMILY").unwrap(), + target_os: env::var("CARGO_CFG_TARGET_OS").unwrap(), + target_string: env::var("TARGET").unwrap(), + target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), + target_features, + } + } +} + +/// Build musl math symbols to a static library +fn build_musl_math(cfg: &Config) { + let musl_dir = &cfg.musl_dir; + assert!( + musl_dir.exists(), + "musl source is missing. it can be downloaded with ./ci/download-musl.sh" + ); + + let math = musl_dir.join("src/math"); + let arch_dir = musl_dir.join("arch").join(&cfg.musl_arch); + let source_map = find_math_source(&math, cfg); + let out_path = cfg.out_dir.join(format!("lib{LIB_NAME}.a")); + + // Run configuration steps. Usually done as part of the musl `Makefile`. + let obj_include = cfg.out_dir.join("musl_obj/include"); + fs::create_dir_all(&obj_include).unwrap(); + fs::create_dir_all(&obj_include.join("bits")).unwrap(); + let sed_stat = Command::new("sed") + .arg("-f") + .arg(musl_dir.join("tools/mkalltypes.sed")) + .arg(arch_dir.join("bits/alltypes.h.in")) + .arg(musl_dir.join("include/alltypes.h.in")) + .stderr(Stdio::inherit()) + .output() + .unwrap(); + assert!(sed_stat.status.success(), "sed command failed: {:?}", sed_stat.status); + + fs::write(obj_include.join("bits/alltypes.h"), sed_stat.stdout).unwrap(); + + let mut cbuild = cc::Build::new(); + cbuild + .extra_warnings(false) + .warnings(false) + .flag_if_supported("-Wno-bitwise-op-parentheses") + .flag_if_supported("-Wno-literal-range") + .flag_if_supported("-Wno-parentheses") + .flag_if_supported("-Wno-shift-count-overflow") + .flag_if_supported("-Wno-shift-op-parentheses") + .flag_if_supported("-Wno-unused-but-set-variable") + .flag_if_supported("-std=c99") + .flag_if_supported("-ffreestanding") + .flag_if_supported("-nostdinc") + .define("_ALL_SOURCE", "1") + .opt_level(3) + .define( + "ROOT_INCLUDE_FEATURES", + Some(musl_dir.join("include/features.h").to_str().unwrap()), + ) + // Our overrides are in this directory + .include(cfg.manifest_dir.join("c_patches")) + .include(musl_dir.join("arch").join(&cfg.musl_arch)) + .include(musl_dir.join("arch/generic")) + .include(musl_dir.join("src/include")) + .include(musl_dir.join("src/internal")) + .include(obj_include) + .include(musl_dir.join("include")) + .file(cfg.manifest_dir.join("c_patches/alias.c")); + + for (sym_name, src_file) in source_map { + // Build the source file + cbuild.file(src_file); + + // Trickery! Redefine the symbol names to have the prefix `musl_`, which allows us to + // differentiate these symbols from whatever we provide. + if let Some((_names, syms)) = + MULTIPLE_SYMBOLS.iter().find(|(name, _syms)| *name == sym_name) + { + // Handle the occasional file that defines multiple symbols + for sym in *syms { + cbuild.define(sym, Some(format!("musl_{sym}").as_str())); + } + } else { + // If the file doesn't define multiple symbols, the file name will be the symbol + cbuild.define(&sym_name, Some(format!("musl_{sym_name}").as_str())); + } + } + + if cfg!(windows) { + // On Windows we don't have a good way to check symbols, so skip that step. + cbuild.compile(LIB_NAME); + return; + } + + let objfiles = cbuild.compile_intermediates(); + + // We create the archive ourselves with relocations rather than letting `cc` do it so we can + // encourage it to resolve symbols now. This should help avoid accidentally linking the wrong + // thing. + let stat = cbuild + .get_compiler() + .to_command() + .arg("-r") + .arg("-o") + .arg(&out_path) + .args(objfiles) + .status() + .unwrap(); + assert!(stat.success()); + + println!("cargo::rustc-link-lib={LIB_NAME}"); + println!("cargo::rustc-link-search=native={}", cfg.out_dir.display()); + + validate_archive_symbols(&out_path); +} + +/// Build a map of `name -> path`. `name` is typically the symbol name, but this doesn't account +/// for files that provide multiple symbols. +fn find_math_source(math_root: &Path, cfg: &Config) -> BTreeMap { + let mut map = BTreeMap::new(); + let mut arch_dir = None; + + // Locate all files and directories + for item in fs::read_dir(math_root).unwrap() { + let path = item.unwrap().path(); + let meta = fs::metadata(&path).unwrap(); + + if meta.is_dir() { + // Make note of the arch-specific directory if it exists + if path.file_name().unwrap() == cfg.target_arch.as_str() { + arch_dir = Some(path); + } + continue; + } + + // Skip non-source files + if path.extension().is_some_and(|ext| ext == "h") { + continue; + } + + let sym_name = path.file_stem().unwrap(); + map.insert(sym_name.to_str().unwrap().to_owned(), path.to_owned()); + } + + // If arch-specific versions are available, build those instead. + if let Some(arch_dir) = arch_dir { + for item in fs::read_dir(arch_dir).unwrap() { + let path = item.unwrap().path(); + let sym_name = path.file_stem().unwrap(); + + if path.extension().unwrap() == "s" { + // FIXME: we never build assembly versions since we have no good way to + // rename the symbol (our options are probably preprocessor or objcopy). + continue; + } + map.insert(sym_name.to_str().unwrap().to_owned(), path); + } + } + + map +} + +/// Make sure we don't have something like a loose unprefixed `_cos` called somewhere, which could +/// wind up linking to system libraries rather than the built musl library. +fn validate_archive_symbols(out_path: &Path) { + const ALLOWED_UNDEF_PFX: &[&str] = &[ + // PIC and arch-specific + ".TOC", + "_GLOBAL_OFFSET_TABLE_", + "__x86.get_pc_thunk", + // gcc/compiler-rt/compiler-builtins symbols + "__add", + "__aeabi_", + "__div", + "__eq", + "__extend", + "__fix", + "__float", + "__gcc_", + "__ge", + "__gt", + "__le", + "__lshr", + "__lt", + "__mul", + "__ne", + "__stack_chk_fail", + "__stack_chk_guard", + "__sub", + "__trunc", + "__undef", + // string routines + "__bzero", + "bzero", + // FPENV interfaces + "feclearexcept", + "fegetround", + "feraiseexcept", + "fesetround", + "fetestexcept", + ]; + + // List global undefined symbols + let out = + Command::new("nm").arg("-guj").arg(out_path).stderr(Stdio::inherit()).output().unwrap(); + + let undef = str::from_utf8(&out.stdout).unwrap(); + let mut undef = undef.lines().collect::>(); + undef.retain(|sym| { + // Account for file formats that add a leading `_` + !ALLOWED_UNDEF_PFX.iter().any(|pfx| sym.starts_with(pfx) || sym[1..].starts_with(pfx)) + }); + + assert!(undef.is_empty(), "found disallowed undefined symbols: {undef:#?}"); + + // Find any symbols that are missing the `_musl_` prefix` + let out = + Command::new("nm").arg("-gUj").arg(out_path).stderr(Stdio::inherit()).output().unwrap(); + + let defined = str::from_utf8(&out.stdout).unwrap(); + let mut defined = defined.lines().collect::>(); + defined.retain(|sym| { + !(sym.starts_with("_musl_") + || sym.starts_with("musl_") + || sym.starts_with("__x86.get_pc_thunk")) + }); + + assert!(defined.is_empty(), "found unprefixed symbols: {defined:#?}"); +} diff --git a/libm/crates/musl-math-sys/c_patches/alias.c b/libm/crates/musl-math-sys/c_patches/alias.c new file mode 100644 index 000000000..63e0f08d5 --- /dev/null +++ b/libm/crates/musl-math-sys/c_patches/alias.c @@ -0,0 +1,40 @@ +/* On platforms that don't support weak symbols, define required aliases + * as wrappers. See comments in `features.h` for more. + */ +#if defined(__APPLE__) || defined(__MINGW32__) + +double __lgamma_r(double a, int *b); +float __lgammaf_r(float a, int *b); +long __lgammal_r(long double a, int *b); +double exp10(double a); +float exp10f(float a); +long exp10l(long double a); +double remainder(double a, double b); +float remainderf(float a, float b); + +double lgamma_r(double a, int *b) { + return __lgamma_r(a, b); +} +float lgammaf_r(float a, int *b) { + return __lgammaf_r(a, b); +} +long double lgammal_r(long double a, int *b) { + return __lgammal_r(a, b); +} +double pow10(double a) { + return exp10(a); +} +float pow10f(float a) { + return exp10f(a); +} +long double pow10l(long double a) { + return exp10l(a); +} +double drem(double a, double b) { + return remainder(a, b); +} +float dremf(float a, float b) { + return remainderf(a, b); +} + +#endif diff --git a/libm/crates/musl-math-sys/c_patches/features.h b/libm/crates/musl-math-sys/c_patches/features.h new file mode 100644 index 000000000..97af93597 --- /dev/null +++ b/libm/crates/musl-math-sys/c_patches/features.h @@ -0,0 +1,39 @@ +/* This is meant to override Musl's src/include/features.h + * + * We use a separate file here to redefine some attributes that don't work on + * all platforms that we would like to build on. + */ + +#ifndef FEATURES_H +#define FEATURES_H + +/* Get the required `#include "../../include/features.h"` since we can't use + * the relative path. The C macros need double indirection to get a usable + * string. */ +#define _stringify_inner(s) #s +#define _stringify(s) _stringify_inner(s) +#include _stringify(ROOT_INCLUDE_FEATURES) + +#if defined(__APPLE__) +#define weak __attribute__((__weak__)) +#define hidden __attribute__((__visibility__("hidden"))) + +/* We _should_ be able to define this as: + * _Pragma(_stringify(weak musl_ ## new = musl_ ## old)) + * However, weak symbols aren't handled correctly [1]. So we manually write + * wrappers, which are in `alias.c`. + * + * [1]: https://github.com/llvm/llvm-project/issues/111321 + */ +#define weak_alias(old, new) /* nothing */ + +#else +#define weak __attribute__((__weak__)) +#define hidden __attribute__((__visibility__("hidden"))) +#define weak_alias(old, new) \ + extern __typeof(old) musl_ ## new \ + __attribute__((__weak__, __alias__(_stringify(musl_ ## old)))) + +#endif /* defined(__APPLE__) */ + +#endif diff --git a/libm/crates/musl-math-sys/src/lib.rs b/libm/crates/musl-math-sys/src/lib.rs new file mode 100644 index 000000000..fe3c89229 --- /dev/null +++ b/libm/crates/musl-math-sys/src/lib.rs @@ -0,0 +1,279 @@ +//! Bindings to Musl math functions (these are built in `build.rs`). + +use std::ffi::{c_char, c_int, c_long}; + +/// Macro for creating bindings and exposing a safe function (since the implementations have no +/// preconditions). Included functions must have correct signatures, otherwise this will be +/// unsound. +macro_rules! functions { + ( $( + $pfx_name:ident: $name:ident( $($arg:ident: $aty:ty),+ ) -> $rty:ty; + )* ) => { + extern "C" { + $( fn $pfx_name( $($arg: $aty),+ ) -> $rty; )* + } + + $( + // Expose a safe version + pub fn $name( $($arg: $aty),+ ) -> $rty { + // SAFETY: FFI calls with no preconditions + unsafe { $pfx_name( $($arg),+ ) } + } + )* + + #[cfg(test)] + mod tests { + use super::*; + use test_support::CallTest; + + $( functions!( + @single_test + $name($($arg: $aty),+) -> $rty + ); )* + } + }; + + (@single_test + $name:ident( $($arg:ident: $aty:ty),+ ) -> $rty:ty + ) => { + // Run a simple check to ensure we can link and call the function without crashing. + #[test] + // FIXME(#309): LE PPC crashes calling some musl functions + #[cfg_attr(all(target_arch = "powerpc64", target_endian = "little"), ignore)] + fn $name() { + $rty>::check(super::$name); + } + }; +} + +#[cfg(test)] +mod test_support { + use core::ffi::c_char; + + /// Just verify that we are able to call the function. + pub trait CallTest { + fn check(f: Self); + } + + macro_rules! impl_calltest { + ($( ($($arg:ty),*) -> $ret:ty; )*) => { + $( + impl CallTest for fn($($arg),*) -> $ret { + fn check(f: Self) { + f($(1 as $arg),*); + } + } + )* + }; + } + + impl_calltest! { + (f32) -> f32; + (f64) -> f64; + (f32, f32) -> f32; + (f64, f64) -> f64; + (i32, f32) -> f32; + (i32, f64) -> f64; + (f32, f32, f32) -> f32; + (f64, f64, f64) -> f64; + (f32, i32) -> f32; + (f32, i64) -> f32; + (f32) -> i32; + (f64) -> i32; + (f64, i32) -> f64; + (f64, i64) -> f64; + } + + impl CallTest for fn(f32, &mut f32) -> f32 { + fn check(f: Self) { + let mut tmp = 0.0; + f(0.0, &mut tmp); + } + } + impl CallTest for fn(f64, &mut f64) -> f64 { + fn check(f: Self) { + let mut tmp = 0.0; + f(0.0, &mut tmp); + } + } + impl CallTest for fn(f32, &mut i32) -> f32 { + fn check(f: Self) { + let mut tmp = 1; + f(0.0, &mut tmp); + } + } + impl CallTest for fn(f64, &mut i32) -> f64 { + fn check(f: Self) { + let mut tmp = 1; + f(0.0, &mut tmp); + } + } + impl CallTest for fn(f32, f32, &mut i32) -> f32 { + fn check(f: Self) { + let mut tmp = 1; + f(0.0, 0.0, &mut tmp); + } + } + impl CallTest for fn(f64, f64, &mut i32) -> f64 { + fn check(f: Self) { + let mut tmp = 1; + f(0.0, 0.0, &mut tmp); + } + } + impl CallTest for fn(f32, &mut f32, &mut f32) { + fn check(f: Self) { + let mut tmp1 = 1.0; + let mut tmp2 = 1.0; + f(0.0, &mut tmp1, &mut tmp2); + } + } + impl CallTest for fn(f64, &mut f64, &mut f64) { + fn check(f: Self) { + let mut tmp1 = 1.0; + let mut tmp2 = 1.0; + f(0.0, &mut tmp1, &mut tmp2); + } + } + impl CallTest for fn(*const c_char) -> f32 { + fn check(f: Self) { + f(c"1".as_ptr()); + } + } + impl CallTest for fn(*const c_char) -> f64 { + fn check(f: Self) { + f(c"1".as_ptr()); + } + } +} + +functions! { + musl_acos: acos(a: f64) -> f64; + musl_acosf: acosf(a: f32) -> f32; + musl_acosh: acosh(a: f64) -> f64; + musl_acoshf: acoshf(a: f32) -> f32; + musl_asin: asin(a: f64) -> f64; + musl_asinf: asinf(a: f32) -> f32; + musl_asinh: asinh(a: f64) -> f64; + musl_asinhf: asinhf(a: f32) -> f32; + musl_atan2: atan2(a: f64, b: f64) -> f64; + musl_atan2f: atan2f(a: f32, b: f32) -> f32; + musl_atan: atan(a: f64) -> f64; + musl_atanf: atanf(a: f32) -> f32; + musl_atanh: atanh(a: f64) -> f64; + musl_atanhf: atanhf(a: f32) -> f32; + musl_cbrt: cbrt(a: f64) -> f64; + musl_cbrtf: cbrtf(a: f32) -> f32; + musl_ceil: ceil(a: f64) -> f64; + musl_ceilf: ceilf(a: f32) -> f32; + musl_copysign: copysign(a: f64, b: f64) -> f64; + musl_copysignf: copysignf(a: f32, b: f32) -> f32; + musl_cos: cos(a: f64) -> f64; + musl_cosf: cosf(a: f32) -> f32; + musl_cosh: cosh(a: f64) -> f64; + musl_coshf: coshf(a: f32) -> f32; + musl_drem: drem(a: f64, b: f64) -> f64; + musl_dremf: dremf(a: f32, b: f32) -> f32; + musl_erf: erf(a: f64) -> f64; + musl_erfc: erfc(a: f64) -> f64; + musl_erfcf: erfcf(a: f32) -> f32; + musl_erff: erff(a: f32) -> f32; + musl_exp10: exp10(a: f64) -> f64; + musl_exp10f: exp10f(a: f32) -> f32; + musl_exp2: exp2(a: f64) -> f64; + musl_exp2f: exp2f(a: f32) -> f32; + musl_exp: exp(a: f64) -> f64; + musl_expf: expf(a: f32) -> f32; + musl_expm1: expm1(a: f64) -> f64; + musl_expm1f: expm1f(a: f32) -> f32; + musl_fabs: fabs(a: f64) -> f64; + musl_fabsf: fabsf(a: f32) -> f32; + musl_fdim: fdim(a: f64, b: f64) -> f64; + musl_fdimf: fdimf(a: f32, b: f32) -> f32; + musl_finite: finite(a: f64) -> c_int; + musl_finitef: finitef(a: f32) -> c_int; + musl_floor: floor(a: f64) -> f64; + musl_floorf: floorf(a: f32) -> f32; + musl_fma: fma(a: f64, b: f64, c: f64) -> f64; + musl_fmaf: fmaf(a: f32, b: f32, c: f32) -> f32; + musl_fmax: fmax(a: f64, b: f64) -> f64; + musl_fmaxf: fmaxf(a: f32, b: f32) -> f32; + musl_fmin: fmin(a: f64, b: f64) -> f64; + musl_fminf: fminf(a: f32, b: f32) -> f32; + musl_fmod: fmod(a: f64, b: f64) -> f64; + musl_fmodf: fmodf(a: f32, b: f32) -> f32; + musl_frexp: frexp(a: f64, b: &mut c_int) -> f64; + musl_frexpf: frexpf(a: f32, b: &mut c_int) -> f32; + musl_hypot: hypot(a: f64, b: f64) -> f64; + musl_hypotf: hypotf(a: f32, b: f32) -> f32; + musl_ilogb: ilogb(a: f64) -> c_int; + musl_ilogbf: ilogbf(a: f32) -> c_int; + musl_j0: j0(a: f64) -> f64; + musl_j0f: j0f(a: f32) -> f32; + musl_j1: j1(a: f64) -> f64; + musl_j1f: j1f(a: f32) -> f32; + musl_jn: jn(a: c_int, b: f64) -> f64; + musl_jnf: jnf(a: c_int, b: f32) -> f32; + musl_ldexp: ldexp(a: f64, b: c_int) -> f64; + musl_ldexpf: ldexpf(a: f32, b: c_int) -> f32; + musl_lgamma: lgamma(a: f64) -> f64; + musl_lgamma_r: lgamma_r(a: f64, b: &mut c_int) -> f64; + musl_lgammaf: lgammaf(a: f32) -> f32; + musl_lgammaf_r: lgammaf_r(a: f32, b: &mut c_int) -> f32; + musl_log10: log10(a: f64) -> f64; + musl_log10f: log10f(a: f32) -> f32; + musl_log1p: log1p(a: f64) -> f64; + musl_log1pf: log1pf(a: f32) -> f32; + musl_log2: log2(a: f64) -> f64; + musl_log2f: log2f(a: f32) -> f32; + musl_log: log(a: f64) -> f64; + musl_logb: logb(a: f64) -> f64; + musl_logbf: logbf(a: f32) -> f32; + musl_logf: logf(a: f32) -> f32; + musl_modf: modf(a: f64, b: &mut f64) -> f64; + musl_modff: modff(a: f32, b: &mut f32) -> f32; + musl_nan: nan(a: *const c_char) -> f64; + musl_nanf: nanf(a: *const c_char) -> f32; + musl_nearbyint: nearbyint(a: f64) -> f64; + musl_nearbyintf: nearbyintf(a: f32) -> f32; + musl_nextafter: nextafter(a: f64, b: f64) -> f64; + musl_nextafterf: nextafterf(a: f32, b: f32) -> f32; + musl_pow10: pow10(a: f64) -> f64; + musl_pow10f: pow10f(a: f32) -> f32; + musl_pow: pow(a: f64, b: f64) -> f64; + musl_powf: powf(a: f32, b: f32) -> f32; + musl_remainder: remainder(a: f64, b: f64) -> f64; + musl_remainderf: remainderf(a: f32, b: f32) -> f32; + musl_remquo: remquo(a: f64, b: f64, c: &mut c_int) -> f64; + musl_remquof: remquof(a: f32, b: f32, c: &mut c_int) -> f32; + musl_rint: rint(a: f64) -> f64; + musl_rintf: rintf(a: f32) -> f32; + musl_round: round(a: f64) -> f64; + musl_roundf: roundf(a: f32) -> f32; + musl_scalbln: scalbln(a: f64, b: c_long) -> f64; + musl_scalblnf: scalblnf(a: f32, b: c_long) -> f32; + musl_scalbn: scalbn(a: f64, b: c_int) -> f64; + musl_scalbnf: scalbnf(a: f32, b: c_int) -> f32; + musl_significand: significand(a: f64) -> f64; + musl_significandf: significandf(a: f32) -> f32; + musl_sin: sin(a: f64) -> f64; + musl_sincos: sincos(a: f64, b: &mut f64, c: &mut f64) -> (); + musl_sincosf: sincosf(a: f32, b: &mut f32, c: &mut f32) -> (); + musl_sinf: sinf(a: f32) -> f32; + musl_sinh: sinh(a: f64) -> f64; + musl_sinhf: sinhf(a: f32) -> f32; + musl_sqrt: sqrt(a: f64) -> f64; + musl_sqrtf: sqrtf(a: f32) -> f32; + musl_tan: tan(a: f64) -> f64; + musl_tanf: tanf(a: f32) -> f32; + musl_tanh: tanh(a: f64) -> f64; + musl_tanhf: tanhf(a: f32) -> f32; + musl_tgamma: tgamma(a: f64) -> f64; + musl_tgammaf: tgammaf(a: f32) -> f32; + musl_trunc: trunc(a: f64) -> f64; + musl_truncf: truncf(a: f32) -> f32; + musl_y0: y0(a: f64) -> f64; + musl_y0f: y0f(a: f32) -> f32; + musl_y1: y1(a: f64) -> f64; + musl_y1f: y1f(a: f32) -> f32; + musl_ynf: ynf(a: c_int, b: f32) -> f32; +} From fff398aa1e5247d9df5a6c920a2efa52b0bafd4a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 16 Oct 2024 22:36:05 -0500 Subject: [PATCH 0897/1459] Add a script for downloading musl --- libm/ci/download-musl.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100755 libm/ci/download-musl.sh diff --git a/libm/ci/download-musl.sh b/libm/ci/download-musl.sh new file mode 100755 index 000000000..d0d8b310e --- /dev/null +++ b/libm/ci/download-musl.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# Download the expected version of musl to a directory `musl` + +set -eux + +fname=musl-1.2.5.tar.gz +sha=a9a118bbe84d8764da0ea0d28b3ab3fae8477fc7e4085d90102b8596fc7c75e4 + +mkdir musl +curl "https://musl.libc.org/releases/$fname" -O + +case "$(uname -s)" in + MINGW*) + # Need to extract the second line because certutil does human output + fsha=$(certutil -hashfile "$fname" SHA256 | sed -n '2p') + [ "$sha" = "$fsha" ] || exit 1 + ;; + *) + echo "$sha $fname" | shasum -a 256 --check || exit 1 + ;; +esac + +tar -xzf "$fname" -C musl --strip-components 1 +rm "$fname" From 4e453a5a659f527a7789201e22b795b74f0d1f0a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 16 Oct 2024 22:37:44 -0500 Subject: [PATCH 0898/1459] Add a procedural macro for expanding all function signatures Introduce `libm_test::for_each_function`. which macro takes a callback macro and invokes it once per function signature. This provides an easier way of registering various tests and benchmarks without duplicating the function names and signatures each time. --- libm/Cargo.toml | 2 + libm/crates/libm-macros/Cargo.toml | 12 + libm/crates/libm-macros/src/lib.rs | 541 +++++++++++++++++++++++++ libm/crates/libm-macros/src/parse.rs | 236 +++++++++++ libm/crates/libm-macros/tests/basic.rs | 96 +++++ 5 files changed, 887 insertions(+) create mode 100644 libm/crates/libm-macros/Cargo.toml create mode 100644 libm/crates/libm-macros/src/lib.rs create mode 100644 libm/crates/libm-macros/src/parse.rs create mode 100644 libm/crates/libm-macros/tests/basic.rs diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 99bfdbfdc..72b6dcd5e 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -28,11 +28,13 @@ resolver = "2" members = [ "crates/compiler-builtins-smoke-test", "crates/libm-bench", + "crates/libm-macros", "crates/libm-test", "crates/musl-math-sys", ] default-members = [ ".", + "crates/libm-macros", "crates/libm-test", ] diff --git a/libm/crates/libm-macros/Cargo.toml b/libm/crates/libm-macros/Cargo.toml new file mode 100644 index 000000000..9d2b08e2d --- /dev/null +++ b/libm/crates/libm-macros/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "libm-macros" +version = "0.1.0" +edition = "2021" + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = "1.0.88" +quote = "1.0.37" +syn = { version = "2.0.79", features = ["full", "extra-traits", "visit-mut"] } diff --git a/libm/crates/libm-macros/src/lib.rs b/libm/crates/libm-macros/src/lib.rs new file mode 100644 index 000000000..dc78598ca --- /dev/null +++ b/libm/crates/libm-macros/src/lib.rs @@ -0,0 +1,541 @@ +mod parse; +use std::sync::LazyLock; + +use parse::{Invocation, StructuredInput}; +use proc_macro as pm; +use proc_macro2::{self as pm2, Span}; +use quote::{ToTokens, quote}; +use syn::Ident; +use syn::visit_mut::VisitMut; + +const ALL_FUNCTIONS: &[(Signature, Option, &[&str])] = &[ + ( + // `fn(f32) -> f32` + Signature { args: &[Ty::F32], returns: &[Ty::F32] }, + None, + &[ + "acosf", "acoshf", "asinf", "asinhf", "atanf", "atanhf", "cbrtf", "ceilf", "cosf", + "coshf", "erff", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf", "j0f", "j1f", + "lgammaf", "log10f", "log1pf", "log2f", "logf", "rintf", "roundf", "sinf", "sinhf", + "sqrtf", "tanf", "tanhf", "tgammaf", "truncf", + ], + ), + ( + // `(f64) -> f64` + Signature { args: &[Ty::F64], returns: &[Ty::F64] }, + None, + &[ + "acos", "acosh", "asin", "asinh", "atan", "atanh", "cbrt", "ceil", "cos", "cosh", + "erf", "exp10", "exp2", "exp", "expm1", "fabs", "floor", "j0", "j1", "lgamma", "log10", + "log1p", "log2", "log", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh", + "tgamma", "trunc", + ], + ), + ( + // `(f32, f32) -> f32` + Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32] }, + None, + &[ + "atan2f", + "copysignf", + "fdimf", + "fmaxf", + "fminf", + "fmodf", + "hypotf", + "nextafterf", + "powf", + "remainderf", + ], + ), + ( + // `(f64, f64) -> f64` + Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64] }, + None, + &[ + "atan2", + "copysign", + "fdim", + "fmax", + "fmin", + "fmod", + "hypot", + "nextafter", + "pow", + "remainder", + ], + ), + ( + // `(f32, f32, f32) -> f32` + Signature { args: &[Ty::F32, Ty::F32, Ty::F32], returns: &[Ty::F32] }, + None, + &["fmaf"], + ), + ( + // `(f64, f64, f64) -> f64` + Signature { args: &[Ty::F64, Ty::F64, Ty::F64], returns: &[Ty::F64] }, + None, + &["fma"], + ), + ( + // `(f32) -> i32` + Signature { args: &[Ty::F32], returns: &[Ty::I32] }, + None, + &["ilogbf"], + ), + ( + // `(f64) -> i32` + Signature { args: &[Ty::F64], returns: &[Ty::I32] }, + None, + &["ilogb"], + ), + ( + // `(i32, f32) -> f32` + Signature { args: &[Ty::I32, Ty::F32], returns: &[Ty::F32] }, + None, + &["jnf"], + ), + ( + // `(i32, f64) -> f64` + Signature { args: &[Ty::I32, Ty::F64], returns: &[Ty::F64] }, + None, + &["jn"], + ), + ( + // `(f32, i32) -> f32` + Signature { args: &[Ty::F32, Ty::I32], returns: &[Ty::F32] }, + None, + &["scalbnf", "ldexpf"], + ), + ( + // `(f64, i64) -> f64` + Signature { args: &[Ty::F64, Ty::I32], returns: &[Ty::F64] }, + None, + &["scalbn", "ldexp"], + ), + ( + // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)` + Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] }, + Some(Signature { args: &[Ty::F32, Ty::MutF32], returns: &[Ty::F32] }), + &["modff"], + ), + ( + // `(f64, &mut f64) -> f64` as `(f64) -> (f64, f64)` + Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] }, + Some(Signature { args: &[Ty::F64, Ty::MutF64], returns: &[Ty::F64] }), + &["modf"], + ), + ( + // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)` + Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::I32] }, + Some(Signature { args: &[Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }), + &["frexpf", "lgammaf_r"], + ), + ( + // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)` + Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::I32] }, + Some(Signature { args: &[Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }), + &["frexp", "lgamma_r"], + ), + ( + // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)` + Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32, Ty::I32] }, + Some(Signature { args: &[Ty::F32, Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }), + &["remquof"], + ), + ( + // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)` + Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64, Ty::I32] }, + Some(Signature { args: &[Ty::F64, Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }), + &["remquo"], + ), + ( + // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)` + Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] }, + Some(Signature { args: &[Ty::F32, Ty::MutF32, Ty::MutF32], returns: &[] }), + &["sincosf"], + ), + ( + // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)` + Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] }, + Some(Signature { args: &[Ty::F64, Ty::MutF64, Ty::MutF64], returns: &[] }), + &["sincos"], + ), +]; + +/// A type used in a function signature. +#[allow(dead_code)] +#[derive(Debug, Clone, Copy)] +enum Ty { + F16, + F32, + F64, + F128, + I32, + CInt, + MutF16, + MutF32, + MutF64, + MutF128, + MutI32, + MutCInt, +} + +impl ToTokens for Ty { + fn to_tokens(&self, tokens: &mut pm2::TokenStream) { + let ts = match self { + Ty::F16 => quote! { f16 }, + Ty::F32 => quote! { f32 }, + Ty::F64 => quote! { f64 }, + Ty::F128 => quote! { f128 }, + Ty::I32 => quote! { i32 }, + Ty::CInt => quote! { ::core::ffi::c_int }, + Ty::MutF16 => quote! { &mut f16 }, + Ty::MutF32 => quote! { &mut f32 }, + Ty::MutF64 => quote! { &mut f64 }, + Ty::MutF128 => quote! { &mut f128 }, + Ty::MutI32 => quote! { &mut i32 }, + Ty::MutCInt => quote! { &mut core::ffi::c_int }, + }; + + tokens.extend(ts); + } +} + +/// Representation of e.g. `(f32, f32) -> f32` +#[derive(Debug, Clone)] +struct Signature { + args: &'static [Ty], + returns: &'static [Ty], +} + +/// Combined information about a function implementation. +#[derive(Debug, Clone)] +struct FunctionInfo { + name: &'static str, + /// Function signature for C implementations + c_sig: Signature, + /// Function signature for Rust implementations + rust_sig: Signature, +} + +/// A flat representation of `ALL_FUNCTIONS`. +static ALL_FUNCTIONS_FLAT: LazyLock> = LazyLock::new(|| { + let mut ret = Vec::new(); + + for (rust_sig, c_sig, names) in ALL_FUNCTIONS { + for name in *names { + let api = FunctionInfo { + name, + rust_sig: rust_sig.clone(), + c_sig: c_sig.clone().unwrap_or_else(|| rust_sig.clone()), + }; + ret.push(api); + } + } + + ret.sort_by_key(|item| item.name); + ret +}); + +/// Do something for each function present in this crate. +/// +/// Takes a callback macro and invokes it multiple times, once for each function that +/// this crate exports. This makes it easy to create generic tests, benchmarks, or other checks +/// and apply it to each symbol. +/// +/// Additionally, the `extra` and `fn_extra` patterns can make use of magic identifiers: +/// +/// - `MACRO_FN_NAME`: gets replaced with the name of the function on that invocation. +/// - `MACRO_FN_NAME_NORMALIZED`: similar to the above, but removes sufixes so e.g. `sinf` becomes +/// `sin`, `cosf128` becomes `cos`, etc. +/// +/// Invoke as: +/// +/// ``` +/// // Macro that is invoked once per function +/// macro_rules! callback_macro { +/// ( +/// // Name of that function +/// fn_name: $fn_name:ident, +/// // Function signature of the C version (e.g. `fn(f32, &mut f32) -> f32`) +/// CFn: $CFn:ty, +/// // A tuple representing the C version's arguments (e.g. `(f32, &mut f32)`) +/// CArgs: $CArgs:ty, +/// // The C version's return type (e.g. `f32`) +/// CRet: $CRet:ty, +/// // Function signature of the Rust version (e.g. `fn(f32) -> (f32, f32)`) +/// RustFn: $RustFn:ty, +/// // A tuple representing the Rust version's arguments (e.g. `(f32,)`) +/// RustArgs: $RustArgs:ty, +/// // The Rust version's return type (e.g. `(f32, f32)`) +/// RustRet: $RustRet:ty, +/// // Attributes for the current function, if any +/// attrs: [$($meta:meta)*] +/// // Extra tokens passed directly (if any) +/// extra: [$extra:ident], +/// // Extra function-tokens passed directly (if any) +/// fn_extra: $fn_extra:expr, +/// ) => { }; +/// } +/// +/// libm_macros::for_each_function! { +/// // The macro to invoke as a callback +/// callback: callback_macro, +/// // Functions to skip, i.e. `callback` shouldn't be called at all for these. +/// // +/// // This is an optional field. +/// skip: [sin, cos], +/// // Attributes passed as `attrs` for specific functions. For example, here the invocation +/// // with `sinf` and that with `cosf` will both get `meta1` and `meta2`, but no others will. +/// // +/// // This is an optional field. +/// attributes: [ +/// #[meta1] +/// #[meta2] +/// [sinf, cosf], +/// ], +/// // Any tokens that should be passed directly to all invocations of the callback. This can +/// // be used to pass local variables or other things the macro needs access to. +/// // +/// // This is an optional field. +/// extra: [foo], +/// // Similar to `extra`, but allow providing a pattern for only specific functions. Uses +/// // a simplified match-like syntax. +/// fn_extra: match MACRO_FN_NAME { +/// hypot | hypotf => |x| x.hypot(), +/// _ => |x| x, +/// }, +/// } +/// ``` +#[proc_macro] +pub fn for_each_function(tokens: pm::TokenStream) -> pm::TokenStream { + let input = syn::parse_macro_input!(tokens as Invocation); + + let res = StructuredInput::from_fields(input) + .and_then(|s_in| validate(&s_in).map(|fn_list| (s_in, fn_list))) + .and_then(|(s_in, fn_list)| expand(s_in, &fn_list)); + + match res { + Ok(ts) => ts.into(), + Err(e) => e.into_compile_error().into(), + } +} + +/// Check for any input that is structurally correct but has other problems. +/// +/// Returns the list of function names that we should expand for. +fn validate(input: &StructuredInput) -> syn::Result> { + // Collect lists of all functions that are provied as macro inputs in various fields (only, + // skip, attributes). + let attr_mentions = input + .attributes + .iter() + .flat_map(|map_list| map_list.iter()) + .flat_map(|attr_map| attr_map.names.iter()); + let only_mentions = input.only.iter().flat_map(|only_list| only_list.iter()); + let fn_extra_mentions = + input.fn_extra.iter().flat_map(|v| v.keys()).filter(|name| *name != "_"); + let all_mentioned_fns = + input.skip.iter().chain(only_mentions).chain(attr_mentions).chain(fn_extra_mentions); + + // Make sure that every function mentioned is a real function + for mentioned in all_mentioned_fns { + if !ALL_FUNCTIONS_FLAT.iter().any(|func| mentioned == func.name) { + let e = syn::Error::new( + mentioned.span(), + format!("unrecognized function name `{mentioned}`"), + ); + return Err(e); + } + } + + if !input.skip.is_empty() && input.only.is_some() { + let e = syn::Error::new( + input.only_span.unwrap(), + format!("only one of `skip` or `only` may be specified"), + ); + return Err(e); + } + + // Construct a list of what we intend to expand + let mut fn_list = Vec::new(); + for func in ALL_FUNCTIONS_FLAT.iter() { + let fn_name = func.name; + // If we have an `only` list and it does _not_ contain this function name, skip it + if input.only.as_ref().is_some_and(|only| !only.iter().any(|o| o == fn_name)) { + continue; + } + + // If there is a `skip` list that contains this function name, skip it + if input.skip.iter().any(|s| s == fn_name) { + continue; + } + + // Run everything else + fn_list.push(func); + } + + if let Some(map) = &input.fn_extra { + if !map.keys().any(|key| key == "_") { + // No default provided; make sure every expected function is covered + let mut fns_not_covered = Vec::new(); + for func in &fn_list { + if !map.keys().any(|key| key == func.name) { + // `name` was not mentioned in the `match` statement + fns_not_covered.push(func); + } + } + + if !fns_not_covered.is_empty() { + let e = syn::Error::new( + input.fn_extra_span.unwrap(), + format!( + "`fn_extra`: no default `_` pattern specified and the following \ + patterns are not covered: {fns_not_covered:#?}" + ), + ); + return Err(e); + } + } + }; + + Ok(fn_list) +} + +/// Expand our structured macro input into invocations of the callback macro. +fn expand(input: StructuredInput, fn_list: &[&FunctionInfo]) -> syn::Result { + let mut out = pm2::TokenStream::new(); + let default_ident = Ident::new("_", Span::call_site()); + let callback = input.callback; + + for func in fn_list { + let fn_name = Ident::new(func.name, Span::call_site()); + + // Prepare attributes in an `attrs: ...` field + let meta_field = match &input.attributes { + Some(attrs) => { + let meta = attrs + .iter() + .filter(|map| map.names.contains(&fn_name)) + .flat_map(|map| &map.meta); + quote! { attrs: [ #( #meta )* ] } + } + None => pm2::TokenStream::new(), + }; + + // Prepare extra in an `extra: ...` field, running the replacer + let extra_field = match input.extra.clone() { + Some(mut extra) => { + let mut v = MacroReplace::new(func.name); + v.visit_expr_mut(&mut extra); + v.finish()?; + + quote! { extra: #extra, } + } + None => pm2::TokenStream::new(), + }; + + // Prepare function-specific extra in a `fn_extra: ...` field, running the replacer + let fn_extra_field = match input.fn_extra { + Some(ref map) => { + let mut fn_extra = + map.get(&fn_name).or_else(|| map.get(&default_ident)).unwrap().clone(); + + let mut v = MacroReplace::new(func.name); + v.visit_expr_mut(&mut fn_extra); + v.finish()?; + + quote! { fn_extra: #fn_extra, } + } + None => pm2::TokenStream::new(), + }; + + let c_args = &func.c_sig.args; + let c_ret = &func.c_sig.returns; + let rust_args = &func.rust_sig.args; + let rust_ret = &func.rust_sig.returns; + + let new = quote! { + #callback! { + fn_name: #fn_name, + CFn: fn( #(#c_args),* ,) -> ( #(#c_ret),* ), + CArgs: ( #(#c_args),* ,), + CRet: ( #(#c_ret),* ), + RustFn: fn( #(#rust_args),* ,) -> ( #(#rust_ret),* ), + RustArgs: ( #(#rust_args),* ,), + RustRet: ( #(#rust_ret),* ), + #meta_field + #extra_field + #fn_extra_field + } + }; + + out.extend(new); + } + + Ok(out) +} + +/// Visitor to replace "magic" identifiers that we allow: `MACRO_FN_NAME` and +/// `MACRO_FN_NAME_NORMALIZED`. +struct MacroReplace { + fn_name: &'static str, + /// Remove the trailing `f` or `f128` to make + norm_name: String, + error: Option, +} + +impl MacroReplace { + fn new(name: &'static str) -> Self { + // Keep this in sync with `libm_test::canonical_name` + let known_mappings = &[ + ("erff", "erf"), + ("erf", "erf"), + ("lgammaf_r", "lgamma_r"), + ("modff", "modf"), + ("modf", "modf"), + ]; + + let norm_name = match known_mappings.iter().find(|known| known.0 == name) { + Some(found) => found.1, + None => name + .strip_suffix("f") + .or_else(|| name.strip_suffix("f16")) + .or_else(|| name.strip_suffix("f128")) + .unwrap_or(name), + }; + + Self { fn_name: name, norm_name: norm_name.to_owned(), error: None } + } + + fn finish(self) -> syn::Result<()> { + match self.error { + Some(e) => Err(e), + None => Ok(()), + } + } + + fn visit_ident_inner(&mut self, i: &mut Ident) { + let s = i.to_string(); + if !s.starts_with("MACRO") || self.error.is_some() { + return; + } + + match s.as_str() { + "MACRO_FN_NAME" => *i = Ident::new(self.fn_name, i.span()), + "MACRO_FN_NAME_NORMALIZED" => *i = Ident::new(&self.norm_name, i.span()), + _ => { + self.error = + Some(syn::Error::new(i.span(), format!("unrecognized meta expression `{s}`"))); + } + } + } +} + +impl VisitMut for MacroReplace { + fn visit_ident_mut(&mut self, i: &mut Ident) { + self.visit_ident_inner(i); + syn::visit_mut::visit_ident_mut(self, i); + } +} diff --git a/libm/crates/libm-macros/src/parse.rs b/libm/crates/libm-macros/src/parse.rs new file mode 100644 index 000000000..ee9bd524b --- /dev/null +++ b/libm/crates/libm-macros/src/parse.rs @@ -0,0 +1,236 @@ +use std::collections::BTreeMap; + +use proc_macro2::Span; +use quote::ToTokens; +use syn::parse::{Parse, ParseStream, Parser}; +use syn::punctuated::Punctuated; +use syn::spanned::Spanned; +use syn::token::Comma; +use syn::{Arm, Attribute, Expr, ExprMatch, Ident, Meta, Token, bracketed}; + +/// The input to our macro; just a list of `field: value` items. +#[derive(Debug)] +pub struct Invocation { + fields: Punctuated, +} + +impl Parse for Invocation { + fn parse(input: ParseStream) -> syn::Result { + Ok(Self { fields: input.parse_terminated(Mapping::parse, Token![,])? }) + } +} + +/// A `key: expression` mapping with nothing else. Basically a simplified `syn::Field`. +#[derive(Debug)] +struct Mapping { + name: Ident, + _sep: Token![:], + expr: Expr, +} + +impl Parse for Mapping { + fn parse(input: ParseStream) -> syn::Result { + Ok(Self { name: input.parse()?, _sep: input.parse()?, expr: input.parse()? }) + } +} + +/// The input provided to our proc macro, after parsing into the form we expect. +#[derive(Debug)] +pub struct StructuredInput { + /// Macro to invoke once per function + pub callback: Ident, + /// Skip these functions + pub skip: Vec, + /// Invoke only for these functions + pub only: Option>, + /// Attributes that get applied to specific functions + pub attributes: Option>, + /// Extra expressions to pass to all invocations of the macro + pub extra: Option, + /// Per-function extra expressions to pass to the macro + pub fn_extra: Option>, + // For diagnostics + pub only_span: Option, + pub fn_extra_span: Option, +} + +impl StructuredInput { + pub fn from_fields(input: Invocation) -> syn::Result { + let mut map: Vec<_> = input.fields.into_iter().collect(); + let cb_expr = expect_field(&mut map, "callback")?; + let skip_expr = expect_field(&mut map, "skip").ok(); + let only_expr = expect_field(&mut map, "only").ok(); + let attr_expr = expect_field(&mut map, "attributes").ok(); + let extra = expect_field(&mut map, "extra").ok(); + let fn_extra = expect_field(&mut map, "fn_extra").ok(); + + if !map.is_empty() { + Err(syn::Error::new( + map.first().unwrap().name.span(), + format!("unexpected fields {map:?}"), + ))?; + } + + let skip = match skip_expr { + Some(expr) => Parser::parse2(parse_ident_array, expr.into_token_stream())?, + None => Vec::new(), + }; + + let only_span = only_expr.as_ref().map(|expr| expr.span()); + let only = match only_expr { + Some(expr) => Some(Parser::parse2(parse_ident_array, expr.into_token_stream())?), + None => None, + }; + + let attributes = match attr_expr { + Some(expr) => { + let mut attributes = Vec::new(); + let attr_exprs = Parser::parse2(parse_expr_array, expr.into_token_stream())?; + + for attr in attr_exprs { + attributes.push(syn::parse2(attr.into_token_stream())?); + } + Some(attributes) + } + None => None, + }; + + let fn_extra_span = fn_extra.as_ref().map(|expr| expr.span()); + let fn_extra = match fn_extra { + Some(expr) => Some(extract_fn_extra_field(expr)?), + None => None, + }; + + Ok(Self { + callback: expect_ident(cb_expr)?, + skip, + only, + only_span, + attributes, + extra, + fn_extra, + fn_extra_span, + }) + } +} + +fn extract_fn_extra_field(expr: Expr) -> syn::Result> { + let Expr::Match(mexpr) = expr else { + let e = syn::Error::new(expr.span(), "`fn_extra` expects a match expression"); + return Err(e); + }; + + let ExprMatch { attrs, match_token: _, expr, brace_token: _, arms } = mexpr; + + expect_empty_attrs(&attrs)?; + + let match_on = expect_ident(*expr)?; + if match_on != "MACRO_FN_NAME" { + let e = syn::Error::new(match_on.span(), "only allowed to match on `MACRO_FN_NAME`"); + return Err(e); + } + + let mut res = BTreeMap::new(); + + for arm in arms { + let Arm { attrs, pat, guard, fat_arrow_token: _, body, comma: _ } = arm; + + expect_empty_attrs(&attrs)?; + + let keys = match pat { + syn::Pat::Wild(w) => vec![Ident::new("_", w.span())], + _ => Parser::parse2(parse_ident_pat, pat.into_token_stream())?, + }; + + if let Some(guard) = guard { + let e = syn::Error::new(guard.0.span(), "no guards allowed in this position"); + return Err(e); + } + + for key in keys { + let inserted = res.insert(key.clone(), *body.clone()); + if inserted.is_some() { + let e = syn::Error::new(key.span(), format!("key `{key}` specified twice")); + return Err(e); + } + } + } + + Ok(res) +} + +fn expect_empty_attrs(attrs: &[Attribute]) -> syn::Result<()> { + if attrs.is_empty() { + return Ok(()); + } + + let e = + syn::Error::new(attrs.first().unwrap().span(), "no attributes allowed in this position"); + Err(e) +} + +/// Extract a named field from a map, raising an error if it doesn't exist. +fn expect_field(v: &mut Vec, name: &str) -> syn::Result { + let pos = v.iter().position(|v| v.name == name).ok_or_else(|| { + syn::Error::new(Span::call_site(), format!("missing expected field `{name}`")) + })?; + + Ok(v.remove(pos).expr) +} + +/// Coerce an expression into a simple identifier. +fn expect_ident(expr: Expr) -> syn::Result { + syn::parse2(expr.into_token_stream()) +} + +/// Parse an array of expressions. +fn parse_expr_array(input: ParseStream) -> syn::Result> { + let content; + let _ = bracketed!(content in input); + let fields = content.parse_terminated(Expr::parse, Token![,])?; + Ok(fields.into_iter().collect()) +} + +/// Parse an array of idents, e.g. `[foo, bar, baz]`. +fn parse_ident_array(input: ParseStream) -> syn::Result> { + let content; + let _ = bracketed!(content in input); + let fields = content.parse_terminated(Ident::parse, Token![,])?; + Ok(fields.into_iter().collect()) +} + +/// Parse an pattern of idents, specifically `(foo | bar | baz)`. +fn parse_ident_pat(input: ParseStream) -> syn::Result> { + if !input.peek2(Token![|]) { + return Ok(vec![input.parse()?]); + } + + let fields = Punctuated::::parse_separated_nonempty(input)?; + Ok(fields.into_iter().collect()) +} + +/// A mapping of attributes to identifiers (just a simplified `Expr`). +/// +/// Expressed as: +/// +/// ```ignore +/// #[meta1] +/// #[meta2] +/// [foo, bar, baz] +/// ``` +#[derive(Debug)] +pub struct AttributeMap { + pub meta: Vec, + pub names: Vec, +} + +impl Parse for AttributeMap { + fn parse(input: ParseStream) -> syn::Result { + let attrs = input.call(Attribute::parse_outer)?; + + Ok(Self { + meta: attrs.into_iter().map(|a| a.meta).collect(), + names: parse_ident_array(input)?, + }) + } +} diff --git a/libm/crates/libm-macros/tests/basic.rs b/libm/crates/libm-macros/tests/basic.rs new file mode 100644 index 000000000..8f8c09f1b --- /dev/null +++ b/libm/crates/libm-macros/tests/basic.rs @@ -0,0 +1,96 @@ +// `STATUS_DLL_NOT_FOUND` on i686 MinGW, not worth looking into. +#![cfg(not(all(target_arch = "x86", target_os = "windows", target_env = "gnu")))] + +macro_rules! basic { + ( + fn_name: $fn_name:ident, + CFn: $CFn:ty, + CArgs: $CArgs:ty, + CRet: $CRet:ty, + RustFn: $RustFn:ty, + RustArgs: $RustArgs:ty, + RustRet: $RustRet:ty, + attrs: [$($meta:meta)*] + extra: [$($extra_tt:tt)*], + fn_extra: $fn_extra:expr, + ) => { + $(#[$meta])* + mod $fn_name { + #[allow(unused)] + type CFnTy = $CFn; + // type CArgsTy<'_> = $CArgs; + // type CRetTy<'_> = $CRet; + #[allow(unused)] + type RustFnTy = $RustFn; + #[allow(unused)] + type RustArgsTy = $RustArgs; + #[allow(unused)] + type RustRetTy = $RustRet; + #[allow(unused)] + const A: &[&str] = &[$($extra_tt)*]; + #[allow(unused)] + fn foo(a: f32) -> f32 { + $fn_extra(a) + } + } + }; +} + +mod test_basic { + libm_macros::for_each_function! { + callback: basic, + skip: [sin, cos], + attributes: [ + // just some random attributes + #[allow(clippy::pedantic)] + #[allow(dead_code)] + [sinf, cosf] + ], + extra: ["foo", "bar"], + fn_extra: match MACRO_FN_NAME { + sin => |x| x + 2.0, + cos | cosf => |x: f32| x.MACRO_FN_NAME_NORMALIZED(), + _ => |_x| 100.0 + } + } +} + +macro_rules! basic_no_extra { + ( + fn_name: $fn_name:ident, + CFn: $CFn:ty, + CArgs: $CArgs:ty, + CRet: $CRet:ty, + RustFn: $RustFn:ty, + RustArgs: $RustArgs:ty, + RustRet: $RustRet:ty, + ) => { + mod $fn_name { + #[allow(unused)] + type CFnTy = $CFn; + // type CArgsTy<'_> = $CArgs; + // type CRetTy<'_> = $CRet; + #[allow(unused)] + type RustFnTy = $RustFn; + #[allow(unused)] + type RustArgsTy = $RustArgs; + #[allow(unused)] + type RustRetTy = $RustRet; + } + }; +} + +mod test_basic_no_extra { + // Test with no extra, no skip, and no attributes + libm_macros::for_each_function! { + callback: basic_no_extra, + } +} + +mod test_only { + // Test that only works + libm_macros::for_each_function! { + callback: basic_no_extra, + only: [sin, sinf], + } +} From 3667a9e174b051fbf12d2ba59747db513541240e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 16 Oct 2024 22:41:07 -0500 Subject: [PATCH 0899/1459] Collect all function names to an array Use a build script for `libm-test` to enumerate all symbols provided by `libm` and provide this list in a variable. This will allow us to make sure no functions are missed anytime they must be manually listed. Additionally, introduce some helper config options. --- libm/crates/libm-test/build.rs | 100 ++++++++++++++++++++++++++++++- libm/crates/libm-test/src/lib.rs | 3 +- 2 files changed, 100 insertions(+), 3 deletions(-) diff --git a/libm/crates/libm-test/build.rs b/libm/crates/libm-test/build.rs index 9653bd830..472dec9d3 100644 --- a/libm/crates/libm-test/build.rs +++ b/libm/crates/libm-test/build.rs @@ -1,10 +1,106 @@ +use std::fmt::Write; +use std::path::PathBuf; +use std::{env, fs}; + fn main() { + let cfg = Config::from_env(); + + emit_optimization_cfg(&cfg); + emit_cfg_shorthands(&cfg); + list_all_tests(&cfg); + #[cfg(feature = "test-musl-serialized")] - musl_reference_tests::generate(); + musl_serialized_tests::generate(); +} + +#[allow(dead_code)] +struct Config { + manifest_dir: PathBuf, + out_dir: PathBuf, + opt_level: u8, + target_arch: String, + target_env: String, + target_family: Option, + target_os: String, + target_string: String, + target_vendor: String, + target_features: Vec, +} + +impl Config { + fn from_env() -> Self { + let target_features = env::var("CARGO_CFG_TARGET_FEATURE") + .map(|feats| feats.split(',').map(ToOwned::to_owned).collect()) + .unwrap_or_default(); + + Self { + manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()), + out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()), + opt_level: env::var("OPT_LEVEL").unwrap().parse().unwrap(), + target_arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(), + target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(), + target_family: env::var("CARGO_CFG_TARGET_FAMILY").ok(), + target_os: env::var("CARGO_CFG_TARGET_OS").unwrap(), + target_string: env::var("TARGET").unwrap(), + target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), + target_features, + } + } +} + +/// Some tests are extremely slow. Emit a config option based on optimization level. +fn emit_optimization_cfg(cfg: &Config) { + println!("cargo::rustc-check-cfg=cfg(optimizations_enabled)"); + + if cfg.opt_level >= 2 { + println!("cargo::rustc-cfg=optimizations_enabled"); + } +} + +/// Provide an alias for common longer config combinations. +fn emit_cfg_shorthands(cfg: &Config) { + println!("cargo::rustc-check-cfg=cfg(x86_no_sse)"); + if cfg.target_arch == "x86" && !cfg.target_features.iter().any(|f| f == "sse") { + // Shorthand to detect i586 targets + println!("cargo::rustc-cfg=x86_no_sse"); + } +} + +/// Create a list of all source files in an array. This can be used for making sure that +/// all functions are tested or otherwise covered in some way. +// FIXME: it would probably be better to use rustdoc JSON output to get public functions. +fn list_all_tests(cfg: &Config) { + let math_src = cfg.manifest_dir.join("../../src/math"); + + let mut files = fs::read_dir(math_src) + .unwrap() + .map(|f| f.unwrap().path()) + .filter(|entry| entry.is_file()) + .map(|f| f.file_stem().unwrap().to_str().unwrap().to_owned()) + .collect::>(); + files.sort(); + + let mut s = "pub const ALL_FUNCTIONS: &[&str] = &[".to_owned(); + for f in files { + if f == "mod" { + // skip mod.rs + continue; + } + write!(s, "\"{f}\",").unwrap(); + } + write!(s, "];").unwrap(); + + let outfile = cfg.out_dir.join("all_files.rs"); + fs::write(outfile, s).unwrap(); } +/// At build time, generate the output of what the corresponding `*musl` target does with a range +/// of inputs. +/// +/// Serialize that target's output, run the same thing with our symbols, then load and compare +/// the resulting values. #[cfg(feature = "test-musl-serialized")] -mod musl_reference_tests { +mod musl_serialized_tests { use std::path::PathBuf; use std::process::Command; use std::{env, fs}; diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 8b1378917..32c061896 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -1 +1,2 @@ - +// List of all files present in libm's source +include!(concat!(env!("OUT_DIR"), "/all_files.rs")); From fc20ba7f029b7937e9248f3b3f3abfaac878a4f9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 16 Oct 2024 22:44:31 -0500 Subject: [PATCH 0900/1459] Add a test that `for_each_fn` correctly lists all functions Create a new test that checks `for_each_fn` against `ALL_FUNCTIONS`, i.e. the manually entered function list against the automatically collected list. If any are missing (e.g. new symbol added), then this will produce an error. --- libm/crates/libm-test/Cargo.toml | 1 + libm/crates/libm-test/tests/check_coverage.rs | 60 +++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 libm/crates/libm-test/tests/check_coverage.rs diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 6367bdca5..1e76fb707 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -13,6 +13,7 @@ test-musl-serialized = ["rand"] [dependencies] libm = { path = "../.." } +libm-macros = { path = "../libm-macros" } [build-dependencies] rand = { version = "0.8.5", optional = true } diff --git a/libm/crates/libm-test/tests/check_coverage.rs b/libm/crates/libm-test/tests/check_coverage.rs new file mode 100644 index 000000000..ef6d21fdb --- /dev/null +++ b/libm/crates/libm-test/tests/check_coverage.rs @@ -0,0 +1,60 @@ +//! Ensure that `for_each_function!` isn't missing any symbols. + +/// Files in `src/` that do not export a testable symbol. +const ALLOWED_SKIPS: &[&str] = &[ + // Not a generic test function + "fenv", + // Nonpublic functions + "expo2", + "k_cos", + "k_cosf", + "k_expo2", + "k_expo2f", + "k_sin", + "k_sinf", + "k_tan", + "k_tanf", + "rem_pio2", + "rem_pio2_large", + "rem_pio2f", +]; + +macro_rules! callback { + ( + fn_name: $name:ident, + CFn: $_CFn:ty, + CArgs: $_CArgs:ty, + CRet: $_CRet:ty, + RustFn: $_RustFn:ty, + RustArgs: $_RustArgs:ty, + RustRet: $_RustRet:ty, + extra: [$push_to:ident], + ) => { + $push_to.push(stringify!($name)); + }; +} + +#[test] +fn test_for_each_function_all_included() { + let mut included = Vec::new(); + let mut missing = Vec::new(); + + libm_macros::for_each_function! { + callback: callback, + extra: [included], + }; + + for f in libm_test::ALL_FUNCTIONS { + if !included.contains(f) && !ALLOWED_SKIPS.contains(f) { + missing.push(f) + } + } + + if !missing.is_empty() { + panic!( + "missing tests for the following: {missing:#?} \ + \nmake sure any new functions are entered in \ + `ALL_FUNCTIONS` (in `libm-macros`)." + ); + } +} From 609d3481e3ebf987c944b4f11b30596c1e601308 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 16 Oct 2024 22:47:55 -0500 Subject: [PATCH 0901/1459] Add numeric traits These traits are simplified versions of what we have in `compiler_builtins` and will be used for tests. --- libm/crates/libm-test/src/lib.rs | 4 + libm/crates/libm-test/src/num_traits.rs | 181 ++++++++++++++++++++++++ 2 files changed, 185 insertions(+) create mode 100644 libm/crates/libm-test/src/num_traits.rs diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 32c061896..5444709d8 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -1,2 +1,6 @@ +mod num_traits; + +pub use num_traits::{Float, Hex, Int}; + // List of all files present in libm's source include!(concat!(env!("OUT_DIR"), "/all_files.rs")); diff --git a/libm/crates/libm-test/src/num_traits.rs b/libm/crates/libm-test/src/num_traits.rs new file mode 100644 index 000000000..835d6e46d --- /dev/null +++ b/libm/crates/libm-test/src/num_traits.rs @@ -0,0 +1,181 @@ +use std::fmt; + +/// Common types and methods for floating point numbers. +pub trait Float: Copy + fmt::Display + fmt::Debug + PartialEq { + type Int: Int; + type SignedInt: Int + Int; + + const ZERO: Self; + const ONE: Self; + + /// The bitwidth of the float type + const BITS: u32; + + /// The bitwidth of the significand + const SIGNIFICAND_BITS: u32; + + /// The bitwidth of the exponent + const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1; + + fn is_nan(self) -> bool; + fn is_infinite(self) -> bool; + fn to_bits(self) -> Self::Int; + fn from_bits(bits: Self::Int) -> Self; + fn signum(self) -> Self; +} + +macro_rules! impl_float { + ($($fty:ty, $ui:ty, $si:ty, $significand_bits:expr;)+) => { + $( + impl Float for $fty { + type Int = $ui; + type SignedInt = $si; + + const ZERO: Self = 0.0; + const ONE: Self = 1.0; + + const BITS: u32 = <$ui>::BITS; + const SIGNIFICAND_BITS: u32 = $significand_bits; + + fn is_nan(self) -> bool { + self.is_nan() + } + fn is_infinite(self) -> bool { + self.is_infinite() + } + fn to_bits(self) -> Self::Int { + self.to_bits() + } + fn from_bits(bits: Self::Int) -> Self { + Self::from_bits(bits) + } + fn signum(self) -> Self { + self.signum() + } + } + + impl Hex for $fty { + fn hex(self) -> String { + self.to_bits().hex() + } + } + )+ + } +} + +impl_float!( + f32, u32, i32, 23; + f64, u64, i64, 52; +); + +/// Common types and methods for integers. +pub trait Int: Copy + fmt::Display + fmt::Debug + PartialEq { + type OtherSign: Int; + type Unsigned: Int; + const BITS: u32; + const SIGNED: bool; + + fn signed(self) -> ::OtherSign; + fn unsigned(self) -> Self::Unsigned; + fn checked_sub(self, other: Self) -> Option; + fn abs(self) -> Self; +} + +macro_rules! impl_int { + ($($ui:ty, $si:ty ;)+) => { + $( + impl Int for $ui { + type OtherSign = $si; + type Unsigned = Self; + const BITS: u32 = <$ui>::BITS; + const SIGNED: bool = false; + fn signed(self) -> Self::OtherSign { + self as $si + } + fn unsigned(self) -> Self { + self + } + fn checked_sub(self, other: Self) -> Option { + self.checked_sub(other) + } + fn abs(self) -> Self { + unimplemented!() + } + } + + impl Int for $si { + type OtherSign = $ui; + type Unsigned = $ui; + const BITS: u32 = <$ui>::BITS; + const SIGNED: bool = true; + fn signed(self) -> Self { + self + } + fn unsigned(self) -> $ui { + self as $ui + } + fn checked_sub(self, other: Self) -> Option { + self.checked_sub(other) + } + fn abs(self) -> Self { + self.abs() + } + } + + impl_int!(@for_both $si); + impl_int!(@for_both $ui); + + )+ + }; + + (@for_both $ty:ty) => { + impl Hex for $ty { + fn hex(self) -> String { + format!("{self:#0width$x}", width = ((Self::BITS / 4) + 2) as usize) + } + } + } +} + +impl_int!( + u32, i32; + u64, i64; +); + +/// A helper trait to print something as hex with the correct number of nibbles, e.g. a `u32` +/// will always print with `0x` followed by 8 digits. +/// +/// This is only used for printing errors so allocating is okay. +pub trait Hex: Copy { + fn hex(self) -> String; +} + +impl Hex for (T1,) +where + T1: Hex, +{ + fn hex(self) -> String { + format!("({},)", self.0.hex()) + } +} + +impl Hex for (T1, T2) +where + T1: Hex, + T2: Hex, +{ + fn hex(self) -> String { + format!("({}, {})", self.0.hex(), self.1.hex()) + } +} + +impl Hex for (T1, T2, T3) +where + T1: Hex, + T2: Hex, + T3: Hex, +{ + fn hex(self) -> String { + format!("({}, {}, {})", self.0.hex(), self.1.hex(), self.2.hex()) + } +} From f8ce0b699c62588500ee831d80f57dd569c4947a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 16 Oct 2024 22:55:05 -0500 Subject: [PATCH 0902/1459] Add traits for testing These traits give us a more generic way to interface with tuples used for (1) test input, (2) function arguments, and (3) test input. --- libm/crates/libm-test/Cargo.toml | 1 + libm/crates/libm-test/src/lib.rs | 6 + libm/crates/libm-test/src/num_traits.rs | 25 +++ libm/crates/libm-test/src/test_traits.rs | 217 +++++++++++++++++++++++ 4 files changed, 249 insertions(+) create mode 100644 libm/crates/libm-test/src/test_traits.rs diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 1e76fb707..b6e2ced58 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -12,6 +12,7 @@ default = [] test-musl-serialized = ["rand"] [dependencies] +anyhow = "1.0.90" libm = { path = "../.." } libm-macros = { path = "../libm-macros" } diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 5444709d8..41873099f 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -1,6 +1,12 @@ mod num_traits; +mod test_traits; pub use num_traits::{Float, Hex, Int}; +pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, TupleCall}; + +/// Result type for tests is usually from `anyhow`. Most times there is no success value to +/// propagate. +pub type TestResult = Result; // List of all files present in libm's source include!(concat!(env!("OUT_DIR"), "/all_files.rs")); diff --git a/libm/crates/libm-test/src/num_traits.rs b/libm/crates/libm-test/src/num_traits.rs index 835d6e46d..d7d806bab 100644 --- a/libm/crates/libm-test/src/num_traits.rs +++ b/libm/crates/libm-test/src/num_traits.rs @@ -1,5 +1,7 @@ use std::fmt; +use crate::TestResult; + /// Common types and methods for floating point numbers. pub trait Float: Copy + fmt::Display + fmt::Debug + PartialEq { type Int: Int; @@ -134,6 +136,29 @@ macro_rules! impl_int { format!("{self:#0width$x}", width = ((Self::BITS / 4) + 2) as usize) } } + + impl $crate::CheckOutput for $ty { + fn validate<'a>( + self, + expected: Self, + input: Input, + _ctx: &$crate::CheckCtx, + ) -> TestResult { + anyhow::ensure!( + self == expected, + "\ + \n input: {input:?} {ibits}\ + \n expected: {expected:<22?} {expbits}\ + \n actual: {self:<22?} {actbits}\ + ", + actbits = self.hex(), + expbits = expected.hex(), + ibits = input.hex(), + ); + + Ok(()) + } + } } } diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs new file mode 100644 index 000000000..c6f1f84ae --- /dev/null +++ b/libm/crates/libm-test/src/test_traits.rs @@ -0,0 +1,217 @@ +//! Traits related to testing. +//! +//! There are three main traits in this module: +//! +//! - `GenerateInput`: implemented on any types that create test cases. +//! - `TupleCall`: implemented on tuples to allow calling them as function arguments. +//! - `CheckOutput`: implemented on anything that is an output type for validation against an +//! expected value. + +use std::fmt; + +use anyhow::{Context, bail, ensure}; + +use crate::{Float, Hex, Int, TestResult}; + +/// Implement this on types that can generate a sequence of tuples for test input. +pub trait GenerateInput { + fn get_cases(&self) -> impl Iterator; +} + +/// Trait for calling a function with a tuple as arguments. +/// +/// Implemented on the tuple with the function signature as the generic (so we can use the same +/// tuple for multiple signatures). +pub trait TupleCall: fmt::Debug { + type Output; + fn call(self, f: Func) -> Self::Output; +} + +/// Context passed to [`CheckOutput`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct CheckCtx { + /// Allowed ULP deviation + pub ulp: u32, + /// Function name. + pub fname: &'static str, + /// Source of truth for tests. + pub basis: CheckBasis, +} + +/// Possible items to test against +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum CheckBasis {} + +/// A trait to implement on any output type so we can verify it in a generic way. +pub trait CheckOutput: Sized { + /// Validate `self` (actual) and `expected` are the same. + /// + /// `input` is only used here for error messages. + fn validate<'a>(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult; +} + +impl TupleCall R> for (T1,) +where + T1: fmt::Debug, +{ + type Output = R; + + fn call(self, f: fn(T1) -> R) -> Self::Output { + f(self.0) + } +} + +impl TupleCall R> for (T1, T2) +where + T1: fmt::Debug, + T2: fmt::Debug, +{ + type Output = R; + + fn call(self, f: fn(T1, T2) -> R) -> Self::Output { + f(self.0, self.1) + } +} + +impl TupleCall R> for (T1,) +where + T1: fmt::Debug, + T2: fmt::Debug + Default, +{ + type Output = (R, T2); + + fn call(self, f: fn(T1, &mut T2) -> R) -> Self::Output { + let mut t2 = T2::default(); + (f(self.0, &mut t2), t2) + } +} + +impl TupleCall R> for (T1, T2, T3) +where + T1: fmt::Debug, + T2: fmt::Debug, + T3: fmt::Debug, +{ + type Output = R; + + fn call(self, f: fn(T1, T2, T3) -> R) -> Self::Output { + f(self.0, self.1, self.2) + } +} + +impl TupleCall R> for (T1, T2) +where + T1: fmt::Debug, + T2: fmt::Debug, + T3: fmt::Debug + Default, +{ + type Output = (R, T3); + + fn call(self, f: fn(T1, T2, &mut T3) -> R) -> Self::Output { + let mut t3 = T3::default(); + (f(self.0, self.1, &mut t3), t3) + } +} + +impl TupleCall for (T1,) +where + T1: fmt::Debug, + T2: fmt::Debug + Default, + T3: fmt::Debug + Default, +{ + type Output = (T2, T3); + + fn call(self, f: fn(T1, &mut T2, &mut T3)) -> Self::Output { + let mut t2 = T2::default(); + let mut t3 = T3::default(); + f(self.0, &mut t2, &mut t3); + (t2, t3) + } +} + +// Implement for floats +impl CheckOutput for F +where + F: Float + Hex, + Input: Hex + fmt::Debug, + u32: TryFrom, +{ + fn validate<'a>(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult { + // Create a wrapper function so we only need to `.with_context` once. + let inner = || -> TestResult { + // Check when both are NaNs + if self.is_nan() && expected.is_nan() { + ensure!(self.to_bits() == expected.to_bits(), "NaNs have different bitpatterns"); + // Nothing else to check + return Ok(()); + } else if self.is_nan() || expected.is_nan() { + // Check when only one is a NaN + bail!("real value != NaN") + } + + // Make sure that the signs are the same before checing ULP to avoid wraparound + let act_sig = self.signum(); + let exp_sig = expected.signum(); + ensure!(act_sig == exp_sig, "mismatched signs {act_sig} {exp_sig}"); + + if self.is_infinite() ^ expected.is_infinite() { + bail!("mismatched infinities"); + } + + let act_bits = self.to_bits().signed(); + let exp_bits = expected.to_bits().signed(); + + let ulp_diff = act_bits.checked_sub(exp_bits).unwrap().abs(); + + let ulp_u32 = u32::try_from(ulp_diff) + .map_err(|e| anyhow::anyhow!("{e:?}: ulp of {ulp_diff} exceeds u32::MAX"))?; + + let allowed_ulp = ctx.ulp; + ensure!(ulp_u32 <= allowed_ulp, "ulp {ulp_diff} > {allowed_ulp}",); + + Ok(()) + }; + + inner().with_context(|| { + format!( + "\ + \n input: {input:?} {ibits}\ + \n expected: {expected:<22?} {expbits}\ + \n actual: {self:<22?} {actbits}\ + ", + actbits = self.hex(), + expbits = expected.hex(), + ibits = input.hex(), + ) + }) + } +} + +/// Implement `CheckOutput` for combinations of types. +macro_rules! impl_tuples { + ($(($a:ty, $b:ty);)*) => { + $( + impl CheckOutput for ($a, $b) { + fn validate<'a>( + self, + expected: Self, + input: Input, + ctx: &CheckCtx, + ) -> TestResult { + self.0.validate(expected.0, input, ctx,) + .and_then(|()| self.1.validate(expected.1, input, ctx)) + .with_context(|| format!( + "full input {input:?} full actual {self:?} expected {expected:?}" + )) + } + } + )* + }; +} + +impl_tuples!( + (f32, i32); + (f64, i32); + (f32, f32); + (f64, f64); +); From 83eb4c0d372ff84365d2a420283b088b2304c8f1 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 16 Oct 2024 22:56:28 -0500 Subject: [PATCH 0903/1459] Add a helper for cached test inputs Add a type that caches values used to implement `GenerateInput` on a variety of signatures. --- libm/crates/libm-test/src/gen.rs | 71 ++++++++++++++++++++++++++++++++ libm/crates/libm-test/src/lib.rs | 1 + 2 files changed, 72 insertions(+) create mode 100644 libm/crates/libm-test/src/gen.rs diff --git a/libm/crates/libm-test/src/gen.rs b/libm/crates/libm-test/src/gen.rs new file mode 100644 index 000000000..12e17d476 --- /dev/null +++ b/libm/crates/libm-test/src/gen.rs @@ -0,0 +1,71 @@ +//! Different generators that can create random or systematic bit patterns. + +use crate::GenerateInput; + +/// Helper type to turn any reusable input into a generator. +#[derive(Clone, Debug, Default)] +pub struct CachedInput { + pub inputs_f32: Vec<(f32, f32, f32)>, + pub inputs_f64: Vec<(f64, f64, f64)>, + pub inputs_i32: Vec<(i32, i32, i32)>, +} + +impl GenerateInput<(f32,)> for CachedInput { + fn get_cases(&self) -> impl Iterator { + self.inputs_f32.iter().map(|f| (f.0,)) + } +} + +impl GenerateInput<(f32, f32)> for CachedInput { + fn get_cases(&self) -> impl Iterator { + self.inputs_f32.iter().map(|f| (f.0, f.1)) + } +} + +impl GenerateInput<(i32, f32)> for CachedInput { + fn get_cases(&self) -> impl Iterator { + self.inputs_i32.iter().zip(self.inputs_f32.iter()).map(|(i, f)| (i.0, f.0)) + } +} + +impl GenerateInput<(f32, i32)> for CachedInput { + fn get_cases(&self) -> impl Iterator { + GenerateInput::<(i32, f32)>::get_cases(self).map(|(i, f)| (f, i)) + } +} + +impl GenerateInput<(f32, f32, f32)> for CachedInput { + fn get_cases(&self) -> impl Iterator { + self.inputs_f32.iter().copied() + } +} + +impl GenerateInput<(f64,)> for CachedInput { + fn get_cases(&self) -> impl Iterator { + self.inputs_f64.iter().map(|f| (f.0,)) + } +} + +impl GenerateInput<(f64, f64)> for CachedInput { + fn get_cases(&self) -> impl Iterator { + self.inputs_f64.iter().map(|f| (f.0, f.1)) + } +} + +impl GenerateInput<(i32, f64)> for CachedInput { + fn get_cases(&self) -> impl Iterator { + self.inputs_i32.iter().zip(self.inputs_f64.iter()).map(|(i, f)| (i.0, f.0)) + } +} + +impl GenerateInput<(f64, i32)> for CachedInput { + fn get_cases(&self) -> impl Iterator { + GenerateInput::<(i32, f64)>::get_cases(self).map(|(i, f)| (f, i)) + } +} + +impl GenerateInput<(f64, f64, f64)> for CachedInput { + fn get_cases(&self) -> impl Iterator { + self.inputs_f64.iter().copied() + } +} diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 41873099f..fc5385ecb 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -1,3 +1,4 @@ +pub mod gen; mod num_traits; mod test_traits; From 33456f2f308cf842d55fd95ef6c7b1fd9b0225b2 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 21 Oct 2024 16:59:14 -0500 Subject: [PATCH 0904/1459] Add a deterministic random generator Create a test generator that creates a known number of random inputs and caches them, such that the same inputs are used for all functions. --- libm/crates/libm-test/Cargo.toml | 6 ++ libm/crates/libm-test/src/gen.rs | 1 + libm/crates/libm-test/src/gen/random.rs | 125 ++++++++++++++++++++++++ 3 files changed, 132 insertions(+) create mode 100644 libm/crates/libm-test/src/gen/random.rs diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index b6e2ced58..760340e51 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -15,6 +15,12 @@ test-musl-serialized = ["rand"] anyhow = "1.0.90" libm = { path = "../.." } libm-macros = { path = "../libm-macros" } +rand = "0.8.5" +rand_chacha = "0.3.1" + +[target.'cfg(target_family = "wasm")'.dependencies] +# Enable randomness on WASM +getrandom = { version = "0.2", features = ["js"] } [build-dependencies] rand = { version = "0.8.5", optional = true } diff --git a/libm/crates/libm-test/src/gen.rs b/libm/crates/libm-test/src/gen.rs index 12e17d476..3e9eca37a 100644 --- a/libm/crates/libm-test/src/gen.rs +++ b/libm/crates/libm-test/src/gen.rs @@ -1,6 +1,7 @@ //! Different generators that can create random or systematic bit patterns. use crate::GenerateInput; +pub mod random; /// Helper type to turn any reusable input into a generator. #[derive(Clone, Debug, Default)] diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs new file mode 100644 index 000000000..e59643195 --- /dev/null +++ b/libm/crates/libm-test/src/gen/random.rs @@ -0,0 +1,125 @@ +//! A simple generator that produces deterministic random input, caching to use the same +//! inputs for all functions. + +use std::sync::LazyLock; + +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaCha8Rng; + +use super::CachedInput; +use crate::GenerateInput; + +const SEED: [u8; 32] = *b"3.141592653589793238462643383279"; + +/// Number of tests to run. +const NTESTS: usize = { + let ntests = if cfg!(optimizations_enabled) { + if cfg!(target_arch = "x86_64") || cfg!(target_arch = "aarch64") { + 5_000_000 + } else if !cfg!(target_pointer_width = "64") + || cfg!(all(target_arch = "x86_64", target_vendor = "apple")) + || option_env!("EMULATED").is_some() + && cfg!(any(target_arch = "aarch64", target_arch = "powerpc64")) + { + // Tests are pretty slow on: + // - Non-64-bit targets + // - Emulated ppc + // - Emulated aarch64 + // - x86 MacOS + // So reduce the number of iterations + 100_000 + } else { + // Most everything else gets tested in docker and works okay, but we still + // don't need 20 minutes of tests. + 1_000_000 + } + } else { + 800 + }; + + ntests +}; + +/// Tested inputs. +static TEST_CASES: LazyLock = LazyLock::new(|| make_test_cases(NTESTS)); + +/// The first argument to `jn` and `jnf` is the number of iterations. Make this a reasonable +/// value so tests don't run forever. +static TEST_CASES_JN: LazyLock = LazyLock::new(|| { + // Start with regular test cases + let mut cases = (&*TEST_CASES).clone(); + + // These functions are extremely slow, limit them + cases.inputs_i32.truncate((NTESTS / 1000).max(80)); + cases.inputs_f32.truncate((NTESTS / 1000).max(80)); + cases.inputs_f64.truncate((NTESTS / 1000).max(80)); + + // It is easy to overflow the stack with these in debug mode + let max_iterations = if cfg!(optimizations_enabled) && cfg!(target_pointer_width = "64") { + 0xffff + } else if cfg!(windows) { + 0x00ff + } else { + 0x0fff + }; + + let mut rng = ChaCha8Rng::from_seed(SEED); + + for case in cases.inputs_i32.iter_mut() { + case.0 = rng.gen_range(3..=max_iterations); + } + + cases +}); + +fn make_test_cases(ntests: usize) -> CachedInput { + let mut rng = ChaCha8Rng::from_seed(SEED); + + // make sure we include some basic cases + let mut inputs_i32 = vec![(0, 0, 0), (1, 1, 1), (-1, -1, -1)]; + let mut inputs_f32 = vec![ + (0.0, 0.0, 0.0), + (f32::EPSILON, f32::EPSILON, f32::EPSILON), + (f32::INFINITY, f32::INFINITY, f32::INFINITY), + (f32::NEG_INFINITY, f32::NEG_INFINITY, f32::NEG_INFINITY), + (f32::MAX, f32::MAX, f32::MAX), + (f32::MIN, f32::MIN, f32::MIN), + (f32::MIN_POSITIVE, f32::MIN_POSITIVE, f32::MIN_POSITIVE), + (f32::NAN, f32::NAN, f32::NAN), + ]; + let mut inputs_f64 = vec![ + (0.0, 0.0, 0.0), + (f64::EPSILON, f64::EPSILON, f64::EPSILON), + (f64::INFINITY, f64::INFINITY, f64::INFINITY), + (f64::NEG_INFINITY, f64::NEG_INFINITY, f64::NEG_INFINITY), + (f64::MAX, f64::MAX, f64::MAX), + (f64::MIN, f64::MIN, f64::MIN), + (f64::MIN_POSITIVE, f64::MIN_POSITIVE, f64::MIN_POSITIVE), + (f64::NAN, f64::NAN, f64::NAN), + ]; + + inputs_i32.extend((0..(ntests - inputs_i32.len())).map(|_| rng.gen::<(i32, i32, i32)>())); + + // Generate integers to get a full range of bitpatterns, then convert back to + // floats. + inputs_f32.extend((0..(ntests - inputs_f32.len())).map(|_| { + let ints = rng.gen::<(u32, u32, u32)>(); + (f32::from_bits(ints.0), f32::from_bits(ints.1), f32::from_bits(ints.2)) + })); + inputs_f64.extend((0..(ntests - inputs_f64.len())).map(|_| { + let ints = rng.gen::<(u64, u64, u64)>(); + (f64::from_bits(ints.0), f64::from_bits(ints.1), f64::from_bits(ints.2)) + })); + + CachedInput { inputs_f32, inputs_f64, inputs_i32 } +} + +/// Create a test case iterator. +pub fn get_test_cases(fname: &str) -> impl Iterator +where + CachedInput: GenerateInput, +{ + let inputs = if fname == "jn" || fname == "jnf" { &TEST_CASES_JN } else { &TEST_CASES }; + + CachedInput::get_cases(inputs) +} From 50919ef69b2f8535e59c6f067b840724903759cf Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 22 Oct 2024 00:52:00 -0500 Subject: [PATCH 0905/1459] Introduce a generic way to control checks for specific cases Sometimes we want to be able to xfail specific inputs without changing the checked ULP for all cases or skipping the tests. There are also some cases where we need to perform extra checks for only specific functions. Add a trait that provides a hook for providing extra checks or skipping existing checks on a per-function or per-input basis. --- libm/crates/libm-test/src/lib.rs | 23 ++++++ libm/crates/libm-test/src/num_traits.rs | 14 +++- libm/crates/libm-test/src/special_case.rs | 95 +++++++++++++++++++++++ libm/crates/libm-test/src/test_traits.rs | 39 ++++++++-- 4 files changed, 163 insertions(+), 8 deletions(-) create mode 100644 libm/crates/libm-test/src/special_case.rs diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index fc5385ecb..511b5139f 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -1,8 +1,10 @@ pub mod gen; mod num_traits; +mod special_case; mod test_traits; pub use num_traits::{Float, Hex, Int}; +pub use special_case::{MaybeOverride, SpecialCase}; pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, TupleCall}; /// Result type for tests is usually from `anyhow`. Most times there is no success value to @@ -11,3 +13,24 @@ pub type TestResult = Result; // List of all files present in libm's source include!(concat!(env!("OUT_DIR"), "/all_files.rs")); + +/// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`, +/// `lgamma_r` and `lgammaf_r` both return `lgamma_r`. +pub fn canonical_name(name: &str) -> &str { + let known_mappings = &[ + ("erff", "erf"), + ("erf", "erf"), + ("lgammaf_r", "lgamma_r"), + ("modff", "modf"), + ("modf", "modf"), + ]; + + match known_mappings.iter().find(|known| known.0 == name) { + Some(found) => found.1, + None => name + .strip_suffix("f") + .or_else(|| name.strip_suffix("f16")) + .or_else(|| name.strip_suffix("f128")) + .unwrap_or(name), + } +} diff --git a/libm/crates/libm-test/src/num_traits.rs b/libm/crates/libm-test/src/num_traits.rs index d7d806bab..e16f4e4dc 100644 --- a/libm/crates/libm-test/src/num_traits.rs +++ b/libm/crates/libm-test/src/num_traits.rs @@ -1,6 +1,6 @@ use std::fmt; -use crate::TestResult; +use crate::{MaybeOverride, SpecialCase, TestResult}; /// Common types and methods for floating point numbers. pub trait Float: Copy + fmt::Display + fmt::Debug + PartialEq { @@ -137,13 +137,21 @@ macro_rules! impl_int { } } - impl $crate::CheckOutput for $ty { + impl $crate::CheckOutput for $ty + where + Input: Hex + fmt::Debug, + SpecialCase: MaybeOverride, + { fn validate<'a>( self, expected: Self, input: Input, - _ctx: &$crate::CheckCtx, + ctx: &$crate::CheckCtx, ) -> TestResult { + if let Some(res) = SpecialCase::check_int(input, self, expected, ctx) { + return res; + } + anyhow::ensure!( self == expected, "\ diff --git a/libm/crates/libm-test/src/special_case.rs b/libm/crates/libm-test/src/special_case.rs new file mode 100644 index 000000000..116a0a188 --- /dev/null +++ b/libm/crates/libm-test/src/special_case.rs @@ -0,0 +1,95 @@ +//! Configuration for skipping or changing the result for individual test cases (inputs) rather +//! than ignoring entire tests. + +use crate::{CheckCtx, Float, Int, TestResult}; + +/// Type implementing [`IgnoreCase`]. +pub struct SpecialCase; + +/// Don't run further validation on this test case. +const SKIP: Option = Some(Ok(())); + +/// Return this to skip checks on a test that currently fails but shouldn't. Looks +/// the same as skip, but we keep them separate to better indicate purpose. +const XFAIL: Option = Some(Ok(())); + +/// Allow overriding the outputs of specific test cases. +/// +/// There are some cases where we want to xfail specific cases or handle certain inputs +/// differently than the rest of calls to `validate`. This provides a hook to do that. +/// +/// If `None` is returned, checks will proceed as usual. If `Some(result)` is returned, checks +/// are skipped and the provided result is returned instead. +/// +/// This gets implemented once per input type, then the functions provide further filtering +/// based on function name and values. +/// +/// `ulp` can also be set to adjust the ULP for that specific test, even if `None` is still +/// returned. +pub trait MaybeOverride { + fn check_float( + _input: Input, + _actual: F, + _expected: F, + _ulp: &mut u32, + _ctx: &CheckCtx, + ) -> Option { + None + } + + fn check_int( + _input: Input, + _actual: I, + _expected: I, + _ctx: &CheckCtx, + ) -> Option { + None + } +} + +impl MaybeOverride<(f32,)> for SpecialCase { + fn check_float( + _input: (f32,), + actual: F, + expected: F, + _ulp: &mut u32, + ctx: &CheckCtx, + ) -> Option { + maybe_check_nan_bits(actual, expected, ctx) + } +} + +impl MaybeOverride<(f64,)> for SpecialCase { + fn check_float( + _input: (f64,), + actual: F, + expected: F, + _ulp: &mut u32, + ctx: &CheckCtx, + ) -> Option { + maybe_check_nan_bits(actual, expected, ctx) + } +} + +impl MaybeOverride<(f32, f32)> for SpecialCase {} +impl MaybeOverride<(f64, f64)> for SpecialCase {} +impl MaybeOverride<(f32, f32, f32)> for SpecialCase {} +impl MaybeOverride<(f64, f64, f64)> for SpecialCase {} +impl MaybeOverride<(i32, f32)> for SpecialCase {} +impl MaybeOverride<(i32, f64)> for SpecialCase {} +impl MaybeOverride<(f32, i32)> for SpecialCase {} +impl MaybeOverride<(f64, i32)> for SpecialCase {} + +/// Check NaN bits if the function requires it +fn maybe_check_nan_bits(actual: F, expected: F, ctx: &CheckCtx) -> Option { + if !(ctx.canonical_name == "abs" || ctx.canonical_name == "copysigh") { + return None; + } + + // abs and copysign require signaling NaNs to be propagated, so verify bit equality. + if actual.to_bits() == expected.to_bits() { + return SKIP; + } else { + Some(Err(anyhow::anyhow!("NaNs have different bitpatterns"))) + } +} diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index c6f1f84ae..296f1b0fd 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -11,7 +11,7 @@ use std::fmt; use anyhow::{Context, bail, ensure}; -use crate::{Float, Hex, Int, TestResult}; +use crate::{Float, Hex, Int, MaybeOverride, SpecialCase, TestResult}; /// Implement this on types that can generate a sequence of tuples for test input. pub trait GenerateInput { @@ -34,10 +34,19 @@ pub struct CheckCtx { pub ulp: u32, /// Function name. pub fname: &'static str, + /// Return the unsuffixed version of the function name. + pub canonical_name: &'static str, /// Source of truth for tests. pub basis: CheckBasis, } +impl CheckCtx { + pub fn new(ulp: u32, fname: &'static str, basis: CheckBasis) -> Self { + let canonical_fname = crate::canonical_name(fname); + Self { ulp, fname, canonical_name: canonical_fname, basis } + } +} + /// Possible items to test against #[derive(Clone, Debug, PartialEq, Eq)] pub enum CheckBasis {} @@ -135,10 +144,20 @@ where F: Float + Hex, Input: Hex + fmt::Debug, u32: TryFrom, + SpecialCase: MaybeOverride, { fn validate<'a>(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult { // Create a wrapper function so we only need to `.with_context` once. let inner = || -> TestResult { + let mut allowed_ulp = ctx.ulp; + + // If the tested function requires a nonstandard test, run it here. + if let Some(res) = + SpecialCase::check_float(input, self, expected, &mut allowed_ulp, ctx) + { + return res; + } + // Check when both are NaNs if self.is_nan() && expected.is_nan() { ensure!(self.to_bits() == expected.to_bits(), "NaNs have different bitpatterns"); @@ -166,7 +185,6 @@ where let ulp_u32 = u32::try_from(ulp_diff) .map_err(|e| anyhow::anyhow!("{e:?}: ulp of {ulp_diff} exceeds u32::MAX"))?; - let allowed_ulp = ctx.ulp; ensure!(ulp_u32 <= allowed_ulp, "ulp {ulp_diff} > {allowed_ulp}",); Ok(()) @@ -191,17 +209,28 @@ where macro_rules! impl_tuples { ($(($a:ty, $b:ty);)*) => { $( - impl CheckOutput for ($a, $b) { + impl CheckOutput for ($a, $b) + where + Input: Hex + fmt::Debug, + SpecialCase: MaybeOverride, + { fn validate<'a>( self, expected: Self, input: Input, ctx: &CheckCtx, ) -> TestResult { - self.0.validate(expected.0, input, ctx,) + self.0.validate(expected.0, input, ctx) .and_then(|()| self.1.validate(expected.1, input, ctx)) .with_context(|| format!( - "full input {input:?} full actual {self:?} expected {expected:?}" + "full context:\ + \n input: {input:?} {ibits}\ + \n expected: {expected:?} {expbits}\ + \n actual: {self:?} {actbits}\ + ", + actbits = self.hex(), + expbits = expected.hex(), + ibits = input.hex(), )) } } From ad6a0839c37e8c9053307baaa1c28139be775393 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 16 Oct 2024 22:59:17 -0500 Subject: [PATCH 0906/1459] Add a test against musl libm Check our functions against `musl-math-sys`. This is similar to the existing musl tests that go through binary serialization, but works on more platforms. --- libm/crates/libm-test/Cargo.toml | 5 + libm/crates/libm-test/src/lib.rs | 28 +++ libm/crates/libm-test/src/special_case.rs | 170 ++++++++++++++++-- libm/crates/libm-test/src/test_traits.rs | 8 +- .../libm-test/tests/compare_built_musl.rs | 52 ++++++ 5 files changed, 247 insertions(+), 16 deletions(-) create mode 100644 libm/crates/libm-test/tests/compare_built_musl.rs diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 760340e51..703524bcd 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -11,10 +11,15 @@ default = [] # musl libc. test-musl-serialized = ["rand"] +# Build our own musl for testing and benchmarks +build-musl = ["dep:musl-math-sys"] + [dependencies] anyhow = "1.0.90" libm = { path = "../.." } libm-macros = { path = "../libm-macros" } +musl-math-sys = { path = "../musl-math-sys", optional = true } +paste = "1.0.15" rand = "0.8.5" rand_chacha = "0.3.1" diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 511b5139f..3baf77524 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -14,6 +14,34 @@ pub type TestResult = Result; // List of all files present in libm's source include!(concat!(env!("OUT_DIR"), "/all_files.rs")); +/// ULP allowed to differ from musl (note that musl itself may not be accurate). +const MUSL_DEFAULT_ULP: u32 = 2; + +/// Certain functions have different allowed ULP (consider these xfail). +/// +/// Note that these results were obtained using 400,000,000 rounds of random inputs, which +/// is not a value used by default. +pub fn musl_allowed_ulp(name: &str) -> u32 { + match name { + #[cfg(x86_no_sse)] + "asinh" | "asinhf" => 6, + "lgamma" | "lgamma_r" | "lgammaf" | "lgammaf_r" => 400, + "tanh" | "tanhf" => 4, + "tgamma" => 20, + "j0" | "j0f" | "j1" | "j1f" => { + // Results seem very target-dependent + if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 } + } + "jn" | "jnf" => 1000, + "sincosf" => 500, + #[cfg(not(target_pointer_width = "64"))] + "exp10" => 4, + #[cfg(not(target_pointer_width = "64"))] + "exp10f" => 4, + _ => MUSL_DEFAULT_ULP, + } +} + /// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`, /// `lgamma_r` and `lgammaf_r` both return `lgamma_r`. pub fn canonical_name(name: &str) -> &str { diff --git a/libm/crates/libm-test/src/special_case.rs b/libm/crates/libm-test/src/special_case.rs index 116a0a188..df263d742 100644 --- a/libm/crates/libm-test/src/special_case.rs +++ b/libm/crates/libm-test/src/special_case.rs @@ -1,7 +1,9 @@ //! Configuration for skipping or changing the result for individual test cases (inputs) rather //! than ignoring entire tests. -use crate::{CheckCtx, Float, Int, TestResult}; +use core::f32; + +use crate::{CheckBasis, CheckCtx, Float, Int, TestResult}; /// Type implementing [`IgnoreCase`]. pub struct SpecialCase; @@ -49,43 +51,97 @@ pub trait MaybeOverride { impl MaybeOverride<(f32,)> for SpecialCase { fn check_float( - _input: (f32,), + input: (f32,), actual: F, expected: F, _ulp: &mut u32, ctx: &CheckCtx, ) -> Option { + if ctx.basis == CheckBasis::Musl { + if ctx.fname == "acoshf" && input.0 < -1.0 { + // acoshf is undefined for x <= 1.0, but we return a random result at lower + // values. + return XFAIL; + } + + if ctx.fname == "sincosf" { + let factor_frac_pi_2 = input.0.abs() / f32::consts::FRAC_PI_2; + if (factor_frac_pi_2 - factor_frac_pi_2.round()).abs() < 1e-2 { + // we have a bad approximation near multiples of pi/2 + return XFAIL; + } + } + + if ctx.fname == "expm1f" && input.0 > 80.0 && actual.is_infinite() { + // we return infinity but the number is representable + return XFAIL; + } + + if ctx.fname == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() { + // we return some NaN that should be real values or infinite + // doesn't seem to happen on x86 + return XFAIL; + } + + if ctx.fname == "lgammaf" || ctx.fname == "lgammaf_r" && input.0 < 0.0 { + // loggamma should not be defined for x < 0, yet we both return results + return XFAIL; + } + } + maybe_check_nan_bits(actual, expected, ctx) } } impl MaybeOverride<(f64,)> for SpecialCase { fn check_float( - _input: (f64,), + input: (f64,), actual: F, expected: F, _ulp: &mut u32, ctx: &CheckCtx, ) -> Option { + if ctx.basis == CheckBasis::Musl { + if cfg!(target_arch = "x86") && ctx.fname == "acosh" && input.0 < 1.0 { + // The function is undefined, both implementations return random results + return SKIP; + } + + if cfg!(x86_no_sse) + && ctx.fname == "ceil" + && input.0 < 0.0 + && input.0 > -1.0 + && expected == F::ZERO + && actual == F::ZERO + { + // musl returns -0.0, we return +0.0 + return XFAIL; + } + + if ctx.fname == "lgamma" || ctx.fname == "lgamma_r" && input.0 < 0.0 { + // loggamma should not be defined for x < 0, yet we both return results + return XFAIL; + } + } + maybe_check_nan_bits(actual, expected, ctx) } } -impl MaybeOverride<(f32, f32)> for SpecialCase {} -impl MaybeOverride<(f64, f64)> for SpecialCase {} -impl MaybeOverride<(f32, f32, f32)> for SpecialCase {} -impl MaybeOverride<(f64, f64, f64)> for SpecialCase {} -impl MaybeOverride<(i32, f32)> for SpecialCase {} -impl MaybeOverride<(i32, f64)> for SpecialCase {} -impl MaybeOverride<(f32, i32)> for SpecialCase {} -impl MaybeOverride<(f64, i32)> for SpecialCase {} - /// Check NaN bits if the function requires it fn maybe_check_nan_bits(actual: F, expected: F, ctx: &CheckCtx) -> Option { - if !(ctx.canonical_name == "abs" || ctx.canonical_name == "copysigh") { + if !(ctx.canonical_name == "fabs" || ctx.canonical_name == "copysign") { return None; } + // LLVM currently uses x87 instructions which quieten signalling NaNs to handle the i686 + // `extern "C"` `f32`/`f64` return ABI. + // LLVM issue + // Rust issue + if cfg!(target_arch = "x86") && ctx.basis == CheckBasis::Musl { + return SKIP; + } + // abs and copysign require signaling NaNs to be propagated, so verify bit equality. if actual.to_bits() == expected.to_bits() { return SKIP; @@ -93,3 +149,91 @@ fn maybe_check_nan_bits(actual: F, expected: F, ctx: &CheckCtx) -> Opt Some(Err(anyhow::anyhow!("NaNs have different bitpatterns"))) } } + +impl MaybeOverride<(f32, f32)> for SpecialCase { + fn check_float( + input: (f32, f32), + _actual: F, + expected: F, + _ulp: &mut u32, + ctx: &CheckCtx, + ) -> Option { + maybe_skip_min_max_nan(input, expected, ctx) + } +} +impl MaybeOverride<(f64, f64)> for SpecialCase { + fn check_float( + input: (f64, f64), + _actual: F, + expected: F, + _ulp: &mut u32, + ctx: &CheckCtx, + ) -> Option { + maybe_skip_min_max_nan(input, expected, ctx) + } +} + +/// Musl propagates NaNs if one is provided as the input, but we return the other input. +// F1 and F2 are always the same type, this is just to please generics +fn maybe_skip_min_max_nan( + input: (F1, F1), + expected: F2, + ctx: &CheckCtx, +) -> Option { + if (ctx.canonical_name == "fmax" || ctx.canonical_name == "fmin") + && (input.0.is_nan() || input.1.is_nan()) + && expected.is_nan() + { + return XFAIL; + } else { + None + } +} + +impl MaybeOverride<(i32, f32)> for SpecialCase { + fn check_float( + input: (i32, f32), + _actual: F, + _expected: F, + ulp: &mut u32, + ctx: &CheckCtx, + ) -> Option { + bessel_prec_dropoff(input, ulp, ctx) + } +} +impl MaybeOverride<(i32, f64)> for SpecialCase { + fn check_float( + input: (i32, f64), + _actual: F, + _expected: F, + ulp: &mut u32, + ctx: &CheckCtx, + ) -> Option { + bessel_prec_dropoff(input, ulp, ctx) + } +} + +/// Our bessel functions blow up with large N values +fn bessel_prec_dropoff( + input: (i32, F), + ulp: &mut u32, + ctx: &CheckCtx, +) -> Option { + if ctx.canonical_name == "jn" { + if input.0 > 4000 { + return XFAIL; + } else if input.0 > 2000 { + // *ulp = 20_000; + *ulp = 20000; + } else if input.0 > 1000 { + *ulp = 4000; + } + } + + None +} + +impl MaybeOverride<(f32, f32, f32)> for SpecialCase {} +impl MaybeOverride<(f64, f64, f64)> for SpecialCase {} +impl MaybeOverride<(f32, i32)> for SpecialCase {} +impl MaybeOverride<(f64, i32)> for SpecialCase {} diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index 296f1b0fd..c24ac6e43 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -49,7 +49,10 @@ impl CheckCtx { /// Possible items to test against #[derive(Clone, Debug, PartialEq, Eq)] -pub enum CheckBasis {} +pub enum CheckBasis { + /// Check against Musl's math sources. + Musl, +} /// A trait to implement on any output type so we can verify it in a generic way. pub trait CheckOutput: Sized { @@ -160,8 +163,7 @@ where // Check when both are NaNs if self.is_nan() && expected.is_nan() { - ensure!(self.to_bits() == expected.to_bits(), "NaNs have different bitpatterns"); - // Nothing else to check + // By default, NaNs have nothing special to check. return Ok(()); } else if self.is_nan() || expected.is_nan() { // Check when only one is a NaN diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs new file mode 100644 index 000000000..208b8e286 --- /dev/null +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -0,0 +1,52 @@ +//! Compare our implementations with the result of musl functions, as provided by `musl-math-sys`. +//! +//! Currently this only tests randomized inputs. In the future this may be improved to test edge +//! cases or run exhaustive tests. +//! +//! Note that musl functions do not always provide 0.5ULP rounding, so our functions can do better +//! than these results. + +// There are some targets we can't build musl for +#![cfg(feature = "build-musl")] + +use libm_test::gen::random; +use libm_test::{CheckBasis, CheckCtx, CheckOutput, TupleCall, musl_allowed_ulp}; +use musl_math_sys as musl; + +macro_rules! musl_rand_tests { + ( + fn_name: $fn_name:ident, + CFn: $CFn:ty, + CArgs: $CArgs:ty, + CRet: $CRet:ty, + RustFn: $RustFn:ty, + RustArgs: $RustArgs:ty, + RustRet: $RustRet:ty, + attrs: [$($meta:meta)*] + ) => { paste::paste! { + #[test] + $(#[$meta])* + fn [< musl_random_ $fn_name >]() { + let fname = stringify!($fn_name); + let ulp = musl_allowed_ulp(fname); + let cases = random::get_test_cases::<$RustArgs>(fname); + let ctx = CheckCtx::new(ulp, fname, CheckBasis::Musl); + + for input in cases { + let musl_res = input.call(musl::$fn_name as $CFn); + let crate_res = input.call(libm::$fn_name as $RustFn); + + crate_res.validate(musl_res, input, &ctx).unwrap(); + } + } + } }; +} + +libm_macros::for_each_function! { + callback: musl_rand_tests, + skip: [], + attributes: [ + #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586 + [exp10, exp10f, exp2, exp2f, rint] + ], +} From fbdcf05d0fae9f6e648870570944c8d770723291 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 16 Oct 2024 22:33:27 -0500 Subject: [PATCH 0907/1459] Add dockerfiles for i586, riscv, and thumb These targets are tested in `compiler-builtins`, but not yet `libm`. Add dockerfiles to prepare for this. --- libm/ci/docker/i586-unknown-linux-gnu/Dockerfile | 5 +++++ .../docker/riscv64gc-unknown-linux-gnu/Dockerfile | 15 +++++++++++++++ libm/ci/docker/thumbv6m-none-eabi/Dockerfile | 9 +++++++++ libm/ci/docker/thumbv7em-none-eabi/Dockerfile | 9 +++++++++ libm/ci/docker/thumbv7em-none-eabihf/Dockerfile | 9 +++++++++ libm/ci/docker/thumbv7m-none-eabi/Dockerfile | 9 +++++++++ 6 files changed, 56 insertions(+) create mode 100644 libm/ci/docker/i586-unknown-linux-gnu/Dockerfile create mode 100644 libm/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile create mode 100644 libm/ci/docker/thumbv6m-none-eabi/Dockerfile create mode 100644 libm/ci/docker/thumbv7em-none-eabi/Dockerfile create mode 100644 libm/ci/docker/thumbv7em-none-eabihf/Dockerfile create mode 100644 libm/ci/docker/thumbv7m-none-eabi/Dockerfile diff --git a/libm/ci/docker/i586-unknown-linux-gnu/Dockerfile b/libm/ci/docker/i586-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000..3b0bfc0d3 --- /dev/null +++ b/libm/ci/docker/i586-unknown-linux-gnu/Dockerfile @@ -0,0 +1,5 @@ +FROM ubuntu:24.04 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc-multilib libc6-dev ca-certificates diff --git a/libm/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile b/libm/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000..5f8a28924 --- /dev/null +++ b/libm/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile @@ -0,0 +1,15 @@ +FROM ubuntu:24.04 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user-static ca-certificates \ + gcc-riscv64-linux-gnu libc6-dev-riscv64-cross \ + qemu-system-riscv64 + +ENV TOOLCHAIN_PREFIX=riscv64-linux-gnu- +ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ + CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64-static \ + AR_riscv64gc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_riscv64gc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ + QEMU_LD_PREFIX=/usr/riscv64-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/libm/ci/docker/thumbv6m-none-eabi/Dockerfile b/libm/ci/docker/thumbv6m-none-eabi/Dockerfile new file mode 100644 index 000000000..ad0d4351e --- /dev/null +++ b/libm/ci/docker/thumbv6m-none-eabi/Dockerfile @@ -0,0 +1,9 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-none-eabi \ + libnewlib-arm-none-eabi +ENV BUILD_ONLY=1 diff --git a/libm/ci/docker/thumbv7em-none-eabi/Dockerfile b/libm/ci/docker/thumbv7em-none-eabi/Dockerfile new file mode 100644 index 000000000..ad0d4351e --- /dev/null +++ b/libm/ci/docker/thumbv7em-none-eabi/Dockerfile @@ -0,0 +1,9 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-none-eabi \ + libnewlib-arm-none-eabi +ENV BUILD_ONLY=1 diff --git a/libm/ci/docker/thumbv7em-none-eabihf/Dockerfile b/libm/ci/docker/thumbv7em-none-eabihf/Dockerfile new file mode 100644 index 000000000..ad0d4351e --- /dev/null +++ b/libm/ci/docker/thumbv7em-none-eabihf/Dockerfile @@ -0,0 +1,9 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-none-eabi \ + libnewlib-arm-none-eabi +ENV BUILD_ONLY=1 diff --git a/libm/ci/docker/thumbv7m-none-eabi/Dockerfile b/libm/ci/docker/thumbv7m-none-eabi/Dockerfile new file mode 100644 index 000000000..ad0d4351e --- /dev/null +++ b/libm/ci/docker/thumbv7m-none-eabi/Dockerfile @@ -0,0 +1,9 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-none-eabi \ + libnewlib-arm-none-eabi +ENV BUILD_ONLY=1 From 36d39192452af6d32729f0de489a3f36c6aeec66 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 16 Oct 2024 23:00:17 -0500 Subject: [PATCH 0908/1459] Disable a unit test that is failing on i586 --- libm/src/math/rem_pio2.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 6be23a43c..4dfb8c658 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -194,6 +194,8 @@ mod tests { use super::rem_pio2; #[test] + // FIXME(correctness): inaccurate results on i586 + #[cfg_attr(all(target_arch = "x86", not(target_feature = "sse")), ignore)] fn test_near_pi() { let arg = 3.141592025756836; let arg = force_eval!(arg); From e1af31632d83c9f4887c76bb040111b3caeea40a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 16 Oct 2024 22:59:59 -0500 Subject: [PATCH 0909/1459] Enable more targets on CI This brings the targets tested here in line with those tested in `compiler-builtins`. --- libm/.github/workflows/main.yml | 121 +++++++++++++++++++++++++------- libm/ci/run-docker.sh | 1 + libm/ci/run.sh | 69 ++++++++++++++---- 3 files changed, 151 insertions(+), 40 deletions(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 926e3c19e..a2c779526 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -2,38 +2,103 @@ name: CI on: [push, pull_request] env: + CARGO_TERM_VERBOSE: true RUSTDOCFLAGS: -Dwarnings RUSTFLAGS: -Dwarnings + RUST_BACKTRACE: full jobs: - docker: + test: name: Docker - runs-on: ubuntu-latest + timeout-minutes: 20 strategy: + fail-fast: false matrix: - target: - - aarch64-unknown-linux-gnu - - arm-unknown-linux-gnueabi - - arm-unknown-linux-gnueabihf - - armv7-unknown-linux-gnueabihf - # - i686-unknown-linux-gnu - # MIPS targets disabled since they are dropped to tier 3. - # See https://github.com/rust-lang/compiler-team/issues/648 - #- mips-unknown-linux-gnu - #- mips64-unknown-linux-gnuabi64 - #- mips64el-unknown-linux-gnuabi64 - - powerpc-unknown-linux-gnu - - powerpc64-unknown-linux-gnu - - powerpc64le-unknown-linux-gnu - - x86_64-unknown-linux-gnu + include: + - target: aarch64-apple-darwin + os: macos-latest + - target: aarch64-unknown-linux-gnu + os: ubuntu-latest + - target: aarch64-pc-windows-msvc + os: windows-latest + build_only: 1 # Can't run on x86 hosts + - target: arm-unknown-linux-gnueabi + os: ubuntu-latest + - target: arm-unknown-linux-gnueabihf + os: ubuntu-latest + - target: armv7-unknown-linux-gnueabihf + os: ubuntu-latest + - target: i586-unknown-linux-gnu + os: ubuntu-latest + - target: i686-unknown-linux-gnu + os: ubuntu-latest + - target: powerpc-unknown-linux-gnu + os: ubuntu-latest + - target: powerpc64-unknown-linux-gnu + os: ubuntu-latest + - target: powerpc64le-unknown-linux-gnu + os: ubuntu-latest + - target: riscv64gc-unknown-linux-gnu + os: ubuntu-latest + - target: thumbv6m-none-eabi + os: ubuntu-latest + - target: thumbv7em-none-eabi + os: ubuntu-latest + - target: thumbv7em-none-eabihf + os: ubuntu-latest + - target: thumbv7m-none-eabi + os: ubuntu-latest + - target: x86_64-unknown-linux-gnu + os: ubuntu-latest + - target: x86_64-apple-darwin + os: macos-13 + - target: i686-pc-windows-msvc + os: windows-latest + - target: x86_64-pc-windows-msvc + os: windows-latest + - target: i686-pc-windows-gnu + os: windows-latest + channel: nightly-i686-gnu + - target: x86_64-pc-windows-gnu + os: windows-latest + channel: nightly-x86_64-gnu + runs-on: ${{ matrix.os }} + env: + BUILD_ONLY: ${{ matrix.build_only }} steps: - - uses: actions/checkout@master - - name: Install Rust - run: rustup update nightly --no-self-update && rustup default nightly - - run: rustup target add ${{ matrix.target }} - - run: rustup target add x86_64-unknown-linux-musl - - run: cargo generate-lockfile - - run: ./ci/run-docker.sh ${{ matrix.target }} + - name: Print runner information + run: uname -a + - uses: actions/checkout@v4 + - name: Install Rust (rustup) + shell: bash + run: | + channel="nightly" + # Account for channels that have required components (MinGW) + [ -n "${{ matrix.channel }}" ] && channel="${{ matrix.channel }}" + rustup update "$channel" --no-self-update + rustup default "$channel" + rustup target add ${{ matrix.target }} + rustup component add llvm-tools-preview + - uses: Swatinem/rust-cache@v2 + with: + key: ${{ matrix.target }} + + - name: Download musl source + run: ./ci/download-musl.sh + shell: bash + + # Non-linux tests just use our raw script + - name: Run locally + if: matrix.os != 'ubuntu-latest' + shell: bash + run: ./ci/run.sh ${{ matrix.target }} + + # Otherwise we use our docker containers to run builds + - name: Run in Docker + if: matrix.os == 'ubuntu-latest' + run: | + rustup target add x86_64-unknown-linux-musl + cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} wasm: name: WebAssembly @@ -45,7 +110,7 @@ jobs: - run: rustup target add wasm32-unknown-unknown - run: cargo build --target wasm32-unknown-unknown - cb: + builtins: name: "The compiler-builtins crate works" runs-on: ubuntu-latest steps: @@ -61,6 +126,8 @@ jobs: - uses: actions/checkout@master - name: Install Rust run: rustup update nightly --no-self-update && rustup default nightly + - name: Download musl source + run: ./ci/download-musl.sh - run: cargo bench --all msrv: @@ -92,9 +159,9 @@ jobs: success: needs: - - docker + - test - wasm - - cb + - builtins - benchmarks - msrv - rustfmt diff --git a/libm/ci/run-docker.sh b/libm/ci/run-docker.sh index 9191a17e2..2e09dd41a 100755 --- a/libm/ci/run-docker.sh +++ b/libm/ci/run-docker.sh @@ -21,6 +21,7 @@ run() { -e RUSTFLAGS \ -e CARGO_HOME=/cargo \ -e CARGO_TARGET_DIR=/target \ + -e EMULATED=1 \ -v "${HOME}/.cargo:/cargo" \ -v "$(pwd)/target:/target" \ -v "$(pwd):/checkout:ro" \ diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 505e25891..f61fff843 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -2,21 +2,64 @@ set -eux -target="$1" +export RUST_BACKTRACE="${RUST_BACKTRACE:-full}" +# Needed for no-panic to correct detect a lack of panics +export RUSTFLAGS="${RUSTFLAGS:-} -Ccodegen-units=1" -cmd="cargo test --all --target $target" +target="${1:-}" -# Needed for no-panic to correct detect a lack of panics -export RUSTFLAGS="$RUSTFLAGS -Ccodegen-units=1" +if [ -z "$target" ]; then + host_target=$(rustc -vV | awk '/^host/ { print $2 }') + echo "Defaulted to host target $host_target" + target="$host_target" +fi + +extra_flags="" + +# We need to specifically skip tests for musl-math-sys on systems that can't +# build musl since otherwise `--all` will activate it. +case "$target" in + # Can't build at all on MSVC, WASM, or thumb + *windows-msvc*) extra_flags="$extra_flags --exclude musl-math-sys" ;; + *wasm*) extra_flags="$extra_flags --exclude musl-math-sys" ;; + *thumb*) extra_flags="$extra_flags --exclude musl-math-sys" ;; + + # We can build musl on MinGW but running tests gets a stack overflow + *windows-gnu*) ;; + # FIXME(#309): LE PPC crashes calling the musl version of some functions. It + # seems like a qemu bug but should be investigated further at some point. + # See . + *powerpc64le*) ;; + + # Everything else gets musl enabled + *) extra_flags="$extra_flags --features libm-test/build-musl" ;; +esac + +# FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI. +# +case "$target" in + *windows-gnu) extra_flags="$extra_flags --exclude libm-macros" ;; +esac + +if [ "$(uname -a)" = "Linux" ]; then + # also run the reference tests when we can. requires a Linux host. + extra_flags="$extra_flags --features libm-test/test-musl-serialized" +fi + +if [ "${BUILD_ONLY:-}" = "1" ]; then + cmd="cargo build --target $target --package libm" + $cmd + $cmd --features 'unstable' -# stable by default -$cmd -$cmd --release + echo "can't run tests on $target" +else + cmd="cargo test --all --target $target $extra_flags" -# unstable with a feature -$cmd --features 'unstable' -$cmd --release --features 'unstable' + # stable by default + $cmd + $cmd --release -# also run the reference tests -$cmd --features 'unstable libm-test/test-musl-serialized' -$cmd --release --features 'unstable libm-test/test-musl-serialized' + # unstable with a feature + $cmd --features 'unstable' + $cmd --release --features 'unstable' +fi From 1a6a1cf64195b65bd5d76fe84abe2b8ea338e478 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 13:22:57 -0500 Subject: [PATCH 0910/1459] Mark libm-macros and musl-math-sys as publish=false --- libm/crates/libm-macros/Cargo.toml | 1 + libm/crates/musl-math-sys/Cargo.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/libm/crates/libm-macros/Cargo.toml b/libm/crates/libm-macros/Cargo.toml index 9d2b08e2d..3da9d45a2 100644 --- a/libm/crates/libm-macros/Cargo.toml +++ b/libm/crates/libm-macros/Cargo.toml @@ -2,6 +2,7 @@ name = "libm-macros" version = "0.1.0" edition = "2021" +publish = false [lib] proc-macro = true diff --git a/libm/crates/musl-math-sys/Cargo.toml b/libm/crates/musl-math-sys/Cargo.toml index 449ce4f3e..7f6272d79 100644 --- a/libm/crates/musl-math-sys/Cargo.toml +++ b/libm/crates/musl-math-sys/Cargo.toml @@ -2,6 +2,7 @@ name = "musl-math-sys" version = "0.1.0" edition = "2021" +publish = false [dependencies] From 5a07879e39e7336fcb6b1db772898d71a3df9d24 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 13:30:39 -0500 Subject: [PATCH 0911/1459] Reduce the number of iterations on emulated aarch64 Linux CI for aarch64 Linux is significantly slower than the others. Adjust how iteration selection is done to better handle this case, which also simplifies things. Also set the `EMULATED` environment variable in Docker to be more accurate, and reindents run-docker.sh. --- libm/ci/run-docker.sh | 48 +++++++++++++++---------- libm/crates/libm-test/src/gen/random.rs | 26 +++++--------- libm/crates/libm-test/src/lib.rs | 9 +++++ 3 files changed, 46 insertions(+), 37 deletions(-) diff --git a/libm/ci/run-docker.sh b/libm/ci/run-docker.sh index 2e09dd41a..a040126df 100755 --- a/libm/ci/run-docker.sh +++ b/libm/ci/run-docker.sh @@ -5,39 +5,49 @@ set -euxo pipefail +host_arch="$(uname -m | sed 's/arm64/aarch64/')" + run() { local target=$1 echo "testing target: $target" + target_arch="$(echo "$target" | cut -d'-' -f1)" + + emulated="" + if [ "$target_arch" != "$host_arch" ]; then + emulated=1 + echo "target is emulated" + fi + # This directory needs to exist before calling docker, otherwise docker will create it but it # will be owned by root mkdir -p target docker build -t "$target" "ci/docker/$target" docker run \ - --rm \ - --user "$(id -u):$(id -g)" \ - -e RUSTFLAGS \ - -e CARGO_HOME=/cargo \ - -e CARGO_TARGET_DIR=/target \ - -e EMULATED=1 \ - -v "${HOME}/.cargo:/cargo" \ - -v "$(pwd)/target:/target" \ - -v "$(pwd):/checkout:ro" \ - -v "$(rustc --print sysroot):/rust:ro" \ - --init \ - -w /checkout \ - "$target" \ - sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/run.sh $target" + --rm \ + --user "$(id -u):$(id -g)" \ + -e RUSTFLAGS \ + -e CARGO_HOME=/cargo \ + -e CARGO_TARGET_DIR=/target \ + -e "EMULATED=$emulated" \ + -v "${HOME}/.cargo:/cargo" \ + -v "$(pwd)/target:/target" \ + -v "$(pwd):/checkout:ro" \ + -v "$(rustc --print sysroot):/rust:ro" \ + --init \ + -w /checkout \ + "$target" \ + sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/run.sh $target" } if [ -z "$1" ]; then - echo "running tests for all targets" + echo "running tests for all targets" - for d in ci/docker/*; do - run $d - done + for d in ci/docker/*; do + run $d + done else - run $1 + run $1 fi diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index e59643195..601ef4f1d 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -13,31 +13,21 @@ const SEED: [u8; 32] = *b"3.141592653589793238462643383279"; /// Number of tests to run. const NTESTS: usize = { - let ntests = if cfg!(optimizations_enabled) { - if cfg!(target_arch = "x86_64") || cfg!(target_arch = "aarch64") { - 5_000_000 - } else if !cfg!(target_pointer_width = "64") + if cfg!(optimizations_enabled) { + if crate::emulated() + || !cfg!(target_pointer_width = "64") || cfg!(all(target_arch = "x86_64", target_vendor = "apple")) - || option_env!("EMULATED").is_some() - && cfg!(any(target_arch = "aarch64", target_arch = "powerpc64")) { - // Tests are pretty slow on: - // - Non-64-bit targets - // - Emulated ppc - // - Emulated aarch64 - // - x86 MacOS - // So reduce the number of iterations + // Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run + // in QEMU. 100_000 } else { - // Most everything else gets tested in docker and works okay, but we still - // don't need 20 minutes of tests. - 1_000_000 + 5_000_000 } } else { + // Without optimizations just run a quick check 800 - }; - - ntests + } }; /// Tested inputs. diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 3baf77524..2abe7f605 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -62,3 +62,12 @@ pub fn canonical_name(name: &str) -> &str { .unwrap_or(name), } } + +/// True if `EMULATED` is set and nonempty. Used to determine how many iterations to run. +pub const fn emulated() -> bool { + match option_env!("EMULATED") { + Some(s) if s.is_empty() => false, + None => false, + Some(_) => true, + } +} From dba82cb522e7f42c436945503bf8086b7e66bf13 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 15:07:34 -0500 Subject: [PATCH 0912/1459] Enable caching on all CI jobs that build the crate (#341) --- libm/.github/workflows/main.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index a2c779526..15eba6e89 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -108,6 +108,7 @@ jobs: - name: Install Rust run: rustup update nightly --no-self-update && rustup default nightly - run: rustup target add wasm32-unknown-unknown + - uses: Swatinem/rust-cache@v2 - run: cargo build --target wasm32-unknown-unknown builtins: @@ -117,6 +118,7 @@ jobs: - uses: actions/checkout@master - name: Install Rust run: rustup update nightly --no-self-update && rustup default nightly + - uses: Swatinem/rust-cache@v2 - run: cargo build -p cb benchmarks: @@ -126,6 +128,7 @@ jobs: - uses: actions/checkout@master - name: Install Rust run: rustup update nightly --no-self-update && rustup default nightly + - uses: Swatinem/rust-cache@v2 - name: Download musl source run: ./ci/download-musl.sh - run: cargo bench --all @@ -143,6 +146,7 @@ jobs: echo "MSRV=$msrv" >> "$GITHUB_ENV" - name: Install Rust run: rustup update "$MSRV" --no-self-update && rustup default "$MSRV" + - uses: Swatinem/rust-cache@v2 - run: cargo build -p libm rustfmt: From 3d89566aa8335f414abaaf7a153f722455f7dc0e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 28 Oct 2024 20:08:07 +0000 Subject: [PATCH 0913/1459] chore: release v0.2.11 --- libm/CHANGELOG.md | 13 +++++++++++++ libm/Cargo.toml | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index 317dfafc0..4e5acb899 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -8,6 +8,19 @@ and this project adheres to ## [Unreleased] +## [0.2.11](https://github.com/rust-lang/libm/compare/libm-v0.2.10...libm-v0.2.11) - 2024-10-28 + +### Fixed + +- fix type of constants in ported sincosf ([#331](https://github.com/rust-lang/libm/pull/331)) + +### Other + +- Disable a unit test that is failing on i586 +- Add a procedural macro for expanding all function signatures +- Introduce `musl-math-sys` for bindings to musl math symbols +- Add basic docstrings to some functions ([#337](https://github.com/rust-lang/libm/pull/337)) + ## [0.2.10](https://github.com/rust-lang/libm/compare/libm-v0.2.9...libm-v0.2.10) - 2024-10-28 ### Other diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 72b6dcd5e..aa6c08ddb 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT AND (MIT OR Apache-2.0)" name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/libm" -version = "0.2.10" +version = "0.2.11" edition = "2021" exclude = ["/ci/", "/.github/workflows/"] rust-version = "1.63" From c16ac580096c01703d2826f47bdbeeb1d61f63ce Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 17:44:02 -0500 Subject: [PATCH 0914/1459] Move the existing "unstable" feature to "unstable-intrinsics" Currently there is a single feature called "unstable" that is used to control whether intrinsics may be called. In anticipation of adding other unstable features that we will want to control separately, create a new feature called "unstable-intrinsics" that is enabled by "unstable". Then move everything gated by "unstable" to "unstable-intrinsics". --- libm/Cargo.toml | 5 ++++- libm/ci/run.sh | 6 +++--- libm/crates/compiler-builtins-smoke-test/Cargo.toml | 2 ++ libm/src/lib.rs | 4 ++-- libm/src/math/mod.rs | 8 +++++--- 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index aa6c08ddb..de450468a 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -18,7 +18,10 @@ default = [] # This tells the compiler to assume that a Nightly toolchain is being used and # that it should activate any useful Nightly things accordingly. -unstable = [] +unstable = ["unstable-intrinsics"] + +# Enable calls to functions in `core::intrinsics` +unstable-intrinsics = [] # Used to prevent using any intrinsics or arch-specific code. force-soft-floats = [] diff --git a/libm/ci/run.sh b/libm/ci/run.sh index f61fff843..f1ca4b0cb 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -49,7 +49,7 @@ fi if [ "${BUILD_ONLY:-}" = "1" ]; then cmd="cargo build --target $target --package libm" $cmd - $cmd --features 'unstable' + $cmd --features "unstable-intrinsics" echo "can't run tests on $target" else @@ -60,6 +60,6 @@ else $cmd --release # unstable with a feature - $cmd --features 'unstable' - $cmd --release --features 'unstable' + $cmd --features "unstable-intrinsics" + $cmd --release --features "unstable-intrinsics" fi diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index 8d084ee34..2aa7c8371 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -10,6 +10,8 @@ test = false bench = false [features] +# Duplicated from libm's Cargo.toml unstable = [] +unstable-intrinsics = [] checked = [] force-soft-floats = [] diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 6d95fa173..1305d35ab 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -1,7 +1,7 @@ //! libm in pure Rust #![no_std] -#![cfg_attr(feature = "unstable", allow(internal_features))] -#![cfg_attr(feature = "unstable", feature(core_intrinsics))] +#![cfg_attr(feature = "unstable-intrinsics", allow(internal_features))] +#![cfg_attr(feature = "unstable-intrinsics", feature(core_intrinsics))] #![allow(clippy::assign_op_pattern)] #![allow(clippy::deprecated_cfg_attr)] #![allow(clippy::eq_op)] diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 85c9fc5bf..17b9e6b4c 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -60,14 +60,14 @@ macro_rules! i { // the time of this writing this is only used in a few places, and once // rust-lang/rust#72751 is fixed then this macro will no longer be necessary and // the native `/` operator can be used and panics won't be codegen'd. -#[cfg(any(debug_assertions, not(feature = "unstable")))] +#[cfg(any(debug_assertions, not(feature = "unstable-intrinsics")))] macro_rules! div { ($a:expr, $b:expr) => { $a / $b }; } -#[cfg(all(not(debug_assertions), feature = "unstable"))] +#[cfg(all(not(debug_assertions), feature = "unstable-intrinsics"))] macro_rules! div { ($a:expr, $b:expr) => { unsafe { core::intrinsics::unchecked_div($a, $b) } @@ -76,7 +76,9 @@ macro_rules! div { macro_rules! llvm_intrinsically_optimized { (#[cfg($($clause:tt)*)] $e:expr) => { - #[cfg(all(feature = "unstable", not(feature = "force-soft-floats"), $($clause)*))] + #[cfg(all( + feature = "unstable-intrinsics", not(feature = "force-soft-floats"), $($clause)* + ))] { if true { // thwart the dead code lint $e From 9a44b6fdaf9632c5ff5b310e6ec240ea31098f96 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 18:15:19 -0500 Subject: [PATCH 0915/1459] Replace `feature = "unstable-intrinsics"` with `intrinsics_enabled` We currently have a non-additive feature, "force-soft-floats", and we will need to gain another "no-f16-f128". This makes `cfg` usage in code somewhat confusing and redundant. Use `build.rs` to figure out if "unstable-intrinsics" is enabled while "force-soft-floats" is not enabled and if so, emit a cfg `intrinsics_enabled`. This is cleaner to use and should make adding more features easier to reason about. Also use this as an opportunity to eliminate the build.rs from the compiler-builtins test crate, replaced with the `[lints]` table in Cargo.toml. --- libm/Cargo.toml | 5 +++++ libm/build.rs | 15 ++++++++++++++- libm/ci/run.sh | 4 ++++ .../compiler-builtins-smoke-test/Cargo.toml | 6 ++++++ libm/crates/compiler-builtins-smoke-test/build.rs | 3 --- libm/src/lib.rs | 4 ++-- libm/src/math/mod.rs | 8 +++----- 7 files changed, 34 insertions(+), 11 deletions(-) delete mode 100644 libm/crates/compiler-builtins-smoke-test/build.rs diff --git a/libm/Cargo.toml b/libm/Cargo.toml index de450468a..5e4565556 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -24,6 +24,11 @@ unstable = ["unstable-intrinsics"] unstable-intrinsics = [] # Used to prevent using any intrinsics or arch-specific code. +# +# HACK: this is a negative feature which is generally a bad idea in Cargo, but +# we need it to be able to forbid other features when this crate is used in +# Rust dependencies. Setting this overrides all features that may enable +# hard float operations. force-soft-floats = [] [workspace] diff --git a/libm/build.rs b/libm/build.rs index b683557e4..adb521407 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -3,7 +3,6 @@ use std::env; fn main() { println!("cargo:rerun-if-changed=build.rs"); println!("cargo:rustc-check-cfg=cfg(assert_no_panic)"); - println!("cargo:rustc-check-cfg=cfg(feature, values(\"unstable\"))"); println!("cargo:rustc-check-cfg=cfg(feature, values(\"checked\"))"); @@ -14,4 +13,18 @@ fn main() { println!("cargo:rustc-cfg=assert_no_panic"); } } + + configure_intrinsics(); +} + +/// Simplify the feature logic for enabling intrinsics so code only needs to use +/// `cfg(intrinsics_enabled)`. +fn configure_intrinsics() { + println!("cargo:rustc-check-cfg=cfg(intrinsics_enabled)"); + + // Disabled by default; `unstable-intrinsics` enables again; `force-soft-floats` overrides + // to disable. + if cfg!(feature = "unstable-intrinsics") && !cfg!(feature = "force-soft-floats") { + println!("cargo:rustc-cfg=intrinsics_enabled"); + } } diff --git a/libm/ci/run.sh b/libm/ci/run.sh index f1ca4b0cb..d3fc4ce24 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -46,6 +46,10 @@ if [ "$(uname -a)" = "Linux" ]; then extra_flags="$extra_flags --features libm-test/test-musl-serialized" fi +# Make sure we can build with overriding features. We test the indibidual +# features it controls separately. +cargo check --features "force-soft-floats" + if [ "${BUILD_ONLY:-}" = "1" ]; then cmd="cargo build --target $target --package libm" $cmd diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index 2aa7c8371..2a6c62961 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -15,3 +15,9 @@ unstable = [] unstable-intrinsics = [] checked = [] force-soft-floats = [] + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = [ + "cfg(assert_no_panic)", + "cfg(intrinsics_enabled)", +] } diff --git a/libm/crates/compiler-builtins-smoke-test/build.rs b/libm/crates/compiler-builtins-smoke-test/build.rs deleted file mode 100644 index 27d4a0e89..000000000 --- a/libm/crates/compiler-builtins-smoke-test/build.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - println!("cargo::rustc-check-cfg=cfg(assert_no_panic)"); -} diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 1305d35ab..98ac55988 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -1,7 +1,7 @@ //! libm in pure Rust #![no_std] -#![cfg_attr(feature = "unstable-intrinsics", allow(internal_features))] -#![cfg_attr(feature = "unstable-intrinsics", feature(core_intrinsics))] +#![cfg_attr(intrinsics_enabled, allow(internal_features))] +#![cfg_attr(intrinsics_enabled, feature(core_intrinsics))] #![allow(clippy::assign_op_pattern)] #![allow(clippy::deprecated_cfg_attr)] #![allow(clippy::eq_op)] diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 17b9e6b4c..9baa57fc8 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -60,14 +60,14 @@ macro_rules! i { // the time of this writing this is only used in a few places, and once // rust-lang/rust#72751 is fixed then this macro will no longer be necessary and // the native `/` operator can be used and panics won't be codegen'd. -#[cfg(any(debug_assertions, not(feature = "unstable-intrinsics")))] +#[cfg(any(debug_assertions, not(intrinsics_enabled)))] macro_rules! div { ($a:expr, $b:expr) => { $a / $b }; } -#[cfg(all(not(debug_assertions), feature = "unstable-intrinsics"))] +#[cfg(all(not(debug_assertions), intrinsics_enabled))] macro_rules! div { ($a:expr, $b:expr) => { unsafe { core::intrinsics::unchecked_div($a, $b) } @@ -76,9 +76,7 @@ macro_rules! div { macro_rules! llvm_intrinsically_optimized { (#[cfg($($clause:tt)*)] $e:expr) => { - #[cfg(all( - feature = "unstable-intrinsics", not(feature = "force-soft-floats"), $($clause)* - ))] + #[cfg(all(intrinsics_enabled, not(feature = "force-soft-floats"), $($clause)*))] { if true { // thwart the dead code lint $e From 82e472bbcd797091373a531240861e221fef3d30 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 19:30:04 -0500 Subject: [PATCH 0916/1459] Introduce `math::arch::intrinsics` This module provides implementations of basic functions that defer to LLVM for what to do, rather than either using a builtin operation or calling another function in this library. `math::arch` will become the home of anything architecture-specific in the future. --- libm/src/math/arch/intrinsics.rs | 52 ++++++++++++++++++++++++++++++++ libm/src/math/arch/mod.rs | 9 ++++++ libm/src/math/mod.rs | 1 + 3 files changed, 62 insertions(+) create mode 100644 libm/src/math/arch/intrinsics.rs create mode 100644 libm/src/math/arch/mod.rs diff --git a/libm/src/math/arch/intrinsics.rs b/libm/src/math/arch/intrinsics.rs new file mode 100644 index 000000000..1cf9291f4 --- /dev/null +++ b/libm/src/math/arch/intrinsics.rs @@ -0,0 +1,52 @@ +// Config is needed for times when this module is available but we don't call everything +#![allow(dead_code)] + +pub fn ceil(x: f64) -> f64 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::ceilf64(x) } +} + +pub fn ceilf(x: f32) -> f32 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::ceilf32(x) } +} + +pub fn fabs(x: f64) -> f64 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::fabsf64(x) } +} + +pub fn fabsf(x: f32) -> f32 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::fabsf32(x) } +} + +pub fn floor(x: f64) -> f64 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::floorf64(x) } +} + +pub fn floorf(x: f32) -> f32 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::floorf32(x) } +} + +pub fn sqrt(x: f64) -> f64 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::sqrtf64(x) } +} + +pub fn sqrtf(x: f32) -> f32 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::sqrtf32(x) } +} + +pub fn trunc(x: f64) -> f64 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::truncf64(x) } +} + +pub fn truncf(x: f32) -> f32 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::truncf32(x) } +} diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs new file mode 100644 index 000000000..a4bc218b7 --- /dev/null +++ b/libm/src/math/arch/mod.rs @@ -0,0 +1,9 @@ +//! Architecture-specific routines and operations. +//! +//! LLVM will already optimize calls to some of these in cases that there are hardware +//! instructions. Providing an implementation here just ensures that the faster implementation +//! is used when calling the function directly. This helps anyone who uses `libm` directly, as +//! well as improving things when these routines are called as part of other implementations. + +#[cfg(intrinsics_enabled)] +pub mod intrinsics; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 9baa57fc8..e3e6846d3 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -302,6 +302,7 @@ pub use self::trunc::trunc; pub use self::truncf::truncf; // Private modules +mod arch; mod expo2; mod fenv; mod k_cos; From 56ca0ef3798b8a5bd4962ed0437c7f4437ecb803 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 02:56:22 -0500 Subject: [PATCH 0917/1459] Introduce a `select_implementation` macro Currently there is a macro called `llvm_intrinsically_optimized` that uses an intrinsic rather than the function implementation if the configuration is correct. Add a new macro `select_implementation` that is somewhat cleaner to use. In the future, we can update this macro with more fields to specify other implementations that may be selected, such as something architecture-specific or e.g. using a generic implementation for `f32` routines, rather than those that convert to `f64`. This introduces a `macros` module within `math/support`. We will be able to move more things here later. --- libm/src/math/mod.rs | 63 +++++++++++++++++---------------- libm/src/math/support/macros.rs | 34 ++++++++++++++++++ libm/src/math/support/mod.rs | 2 ++ 3 files changed, 69 insertions(+), 30 deletions(-) create mode 100644 libm/src/math/support/macros.rs create mode 100644 libm/src/math/support/mod.rs diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index e3e6846d3..a7e16bfc8 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -74,6 +74,7 @@ macro_rules! div { }; } +// FIXME: phase this out, to be replaced by the more flexible `select_implementation` macro_rules! llvm_intrinsically_optimized { (#[cfg($($clause:tt)*)] $e:expr) => { #[cfg(all(intrinsics_enabled, not(feature = "force-soft-floats"), $($clause)*))] @@ -85,6 +86,38 @@ macro_rules! llvm_intrinsically_optimized { }; } +// Private modules +#[macro_use] +mod support; +mod arch; +mod expo2; +mod fenv; +mod k_cos; +mod k_cosf; +mod k_expo2; +mod k_expo2f; +mod k_sin; +mod k_sinf; +mod k_tan; +mod k_tanf; +mod rem_pio2; +mod rem_pio2_large; +mod rem_pio2f; + +// Private re-imports +use self::expo2::expo2; +use self::k_cos::k_cos; +use self::k_cosf::k_cosf; +use self::k_expo2::k_expo2; +use self::k_expo2f::k_expo2f; +use self::k_sin::k_sin; +use self::k_sinf::k_sinf; +use self::k_tan::k_tan; +use self::k_tanf::k_tanf; +use self::rem_pio2::rem_pio2; +use self::rem_pio2_large::rem_pio2_large; +use self::rem_pio2f::rem_pio2f; + // Public modules mod acos; mod acosf; @@ -301,36 +334,6 @@ pub use self::tgammaf::tgammaf; pub use self::trunc::trunc; pub use self::truncf::truncf; -// Private modules -mod arch; -mod expo2; -mod fenv; -mod k_cos; -mod k_cosf; -mod k_expo2; -mod k_expo2f; -mod k_sin; -mod k_sinf; -mod k_tan; -mod k_tanf; -mod rem_pio2; -mod rem_pio2_large; -mod rem_pio2f; - -// Private re-imports -use self::expo2::expo2; -use self::k_cos::k_cos; -use self::k_cosf::k_cosf; -use self::k_expo2::k_expo2; -use self::k_expo2f::k_expo2f; -use self::k_sin::k_sin; -use self::k_sinf::k_sinf; -use self::k_tan::k_tan; -use self::k_tanf::k_tanf; -use self::rem_pio2::rem_pio2; -use self::rem_pio2_large::rem_pio2_large; -use self::rem_pio2f::rem_pio2f; - #[inline] fn get_high_word(x: f64) -> u32 { (x.to_bits() >> 32) as u32 diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs new file mode 100644 index 000000000..6bc75837a --- /dev/null +++ b/libm/src/math/support/macros.rs @@ -0,0 +1,34 @@ +/// Choose among using an intrinsic (if available) and falling back to the default function body. +/// Returns directly if the intrinsic version is used, otherwise continues to the rest of the +/// function. +/// +/// Use this if the intrinsic is likely to be more performant on the platform(s) specified +/// in `intrinsic_available`. +/// +/// The `cfg` used here is controlled by `build.rs` so the passed meta does not need to account +/// for e.g. the `unstable-intrinsics` or `force-soft-float` features. +macro_rules! select_implementation { + ( + name: $fname:ident, + // Configuration meta for when to call intrinsics and let LLVM figure it out + $( use_intrinsic: $use_intrinsic:meta, )? + args: $($arg:ident),+ , + ) => { + // FIXME: these use paths that are a pretty fragile (`super`). We should figure out + // something better w.r.t. how this is vendored into compiler-builtins. + + // Never use intrinsics if we are forcing soft floats, and only enable with the + // `unstable-intrinsics` feature. + #[cfg(intrinsics_enabled)] + select_implementation! { + @cfg $( $use_intrinsic )?; + if true { + return super::arch::intrinsics::$fname( $($arg),+ ); + } + } + }; + + // Coalesce helper to construct an expression only if a config is provided + (@cfg ; $ex:expr) => { }; + (@cfg $provided:meta; $ex:expr) => { #[cfg($provided)] $ex }; +} diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs new file mode 100644 index 000000000..10532f0d1 --- /dev/null +++ b/libm/src/math/support/mod.rs @@ -0,0 +1,2 @@ +#[macro_use] +pub mod macros; From cf291fd7bcc8bcafece7e2eb3ed009240c453776 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 19:38:19 -0500 Subject: [PATCH 0918/1459] Make use of `select_implementation` Replace all uses of `llvm_intrinsically` with select_implementation`. --- libm/src/math/ceil.rs | 12 +++++------- libm/src/math/ceilf.rs | 12 +++++------- libm/src/math/fabs.rs | 12 +++++------- libm/src/math/fabsf.rs | 12 +++++------- libm/src/math/floor.rs | 12 +++++------- libm/src/math/floorf.rs | 12 +++++------- libm/src/math/mod.rs | 12 ------------ libm/src/math/sqrt.rs | 16 +++++----------- libm/src/math/sqrtf.rs | 16 +++++----------- libm/src/math/trunc.rs | 12 +++++------- libm/src/math/truncf.rs | 12 +++++------- 11 files changed, 50 insertions(+), 90 deletions(-) diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index 1593fdaff..0da01b4d0 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -8,14 +8,12 @@ const TOINT: f64 = 1. / f64::EPSILON; /// Finds the nearest integer greater than or equal to `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ceil(x: f64) -> f64 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f64.ceil` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::ceilf64(x) } - } + select_implementation! { + name: ceil, + use_intrinsic: target_arch = "wasm32", + args: x, } + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] { //use an alternative implementation on x86, because the diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs index bf9ba1227..0da384350 100644 --- a/libm/src/math/ceilf.rs +++ b/libm/src/math/ceilf.rs @@ -5,14 +5,12 @@ use core::f32; /// Finds the nearest integer greater than or equal to `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ceilf(x: f32) -> f32 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f32.ceil` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::ceilf32(x) } - } + select_implementation! { + name: ceilf, + use_intrinsic: target_arch = "wasm32", + args: x, } + let mut ui = x.to_bits(); let e = (((ui >> 23) & 0xff).wrapping_sub(0x7f)) as i32; diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index 3b0628aa6..8d3ea2fd6 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -5,14 +5,12 @@ use core::u64; /// by direct manipulation of the bit representation of `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fabs(x: f64) -> f64 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f64.abs` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::fabsf64(x) } - } + select_implementation! { + name: fabs, + use_intrinsic: target_arch = "wasm32", + args: x, } + f64::from_bits(x.to_bits() & (u64::MAX / 2)) } diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs index f81c8ca44..1dac6389d 100644 --- a/libm/src/math/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -3,14 +3,12 @@ /// by direct manipulation of the bit representation of `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fabsf(x: f32) -> f32 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f32.abs` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::fabsf32(x) } - } + select_implementation! { + name: fabsf, + use_intrinsic: target_arch = "wasm32", + args: x, } + f32::from_bits(x.to_bits() & 0x7fffffff) } diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index e8fb21e58..2b9955eba 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -8,14 +8,12 @@ const TOINT: f64 = 1. / f64::EPSILON; /// Finds the nearest integer less than or equal to `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn floor(x: f64) -> f64 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f64.floor` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::floorf64(x) } - } + select_implementation! { + name: floor, + use_intrinsic: target_arch = "wasm32", + args: x, } + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] { //use an alternative implementation on x86, because the diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs index f66cab74f..4f38cb15b 100644 --- a/libm/src/math/floorf.rs +++ b/libm/src/math/floorf.rs @@ -5,14 +5,12 @@ use core::f32; /// Finds the nearest integer less than or equal to `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn floorf(x: f32) -> f32 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f32.floor` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::floorf32(x) } - } + select_implementation! { + name: floorf, + use_intrinsic: target_arch = "wasm32", + args: x, } + let mut ui = x.to_bits(); let e = (((ui >> 23) as i32) & 0xff) - 0x7f; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index a7e16bfc8..393bc5150 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -74,18 +74,6 @@ macro_rules! div { }; } -// FIXME: phase this out, to be replaced by the more flexible `select_implementation` -macro_rules! llvm_intrinsically_optimized { - (#[cfg($($clause:tt)*)] $e:expr) => { - #[cfg(all(intrinsics_enabled, not(feature = "force-soft-floats"), $($clause)*))] - { - if true { // thwart the dead code lint - $e - } - } - }; -} - // Private modules #[macro_use] mod support; diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index e2907384d..2e856100f 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -81,18 +81,12 @@ use core::f64; /// The square root of `x` (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrt(x: f64) -> f64 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f64.sqrt` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return if x < 0.0 { - f64::NAN - } else { - unsafe { ::core::intrinsics::sqrtf64(x) } - } - } + select_implementation! { + name: sqrt, + use_intrinsic: target_arch = "wasm32", + args: x, } + #[cfg(all(target_feature = "sse2", not(feature = "force-soft-floats")))] { // Note: This path is unlikely since LLVM will usually have already diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index a738fc0b6..b2996b350 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -16,18 +16,12 @@ /// The square root of `x` (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrtf(x: f32) -> f32 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f32.sqrt` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return if x < 0.0 { - ::core::f32::NAN - } else { - unsafe { ::core::intrinsics::sqrtf32(x) } - } - } + select_implementation! { + name: sqrtf, + use_intrinsic: target_arch = "wasm32", + args: x, } + #[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))] { // Note: This path is unlikely since LLVM will usually have already diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs index f7892a2c5..6961bb950 100644 --- a/libm/src/math/trunc.rs +++ b/libm/src/math/trunc.rs @@ -2,14 +2,12 @@ use core::f64; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn trunc(x: f64) -> f64 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f64.trunc` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::truncf64(x) } - } + select_implementation! { + name: trunc, + use_intrinsic: target_arch = "wasm32", + args: x, } + let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 let mut i: u64 = x.to_bits(); diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs index 20d5b73bd..8270c8eb3 100644 --- a/libm/src/math/truncf.rs +++ b/libm/src/math/truncf.rs @@ -2,14 +2,12 @@ use core::f32; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn truncf(x: f32) -> f32 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f32.trunc` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::truncf32(x) } - } + select_implementation! { + name: truncf, + use_intrinsic: target_arch = "wasm32", + args: x, } + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 let mut i: u32 = x.to_bits(); From 89f0324c5f4b14f490b5190884f1d6f02aeab3fd Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 20:45:12 -0500 Subject: [PATCH 0919/1459] Combine the WASM CI job with the others There isn't any reason to be distinct here, and it would be better to test with all feature configurations in run.sh anyway. --- libm/.github/workflows/main.yml | 23 +++++++---------------- libm/ci/run.sh | 2 +- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 15eba6e89..cc0d23ffc 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -9,7 +9,7 @@ env: jobs: test: - name: Docker + name: Build and test timeout-minutes: 20 strategy: fail-fast: false @@ -52,6 +52,9 @@ jobs: os: ubuntu-latest - target: x86_64-apple-darwin os: macos-13 + - target: wasm32-unknown-unknown + os: ubuntu-latest + build_only: 1 - target: i686-pc-windows-msvc os: windows-latest - target: x86_64-pc-windows-msvc @@ -89,30 +92,19 @@ jobs: # Non-linux tests just use our raw script - name: Run locally - if: matrix.os != 'ubuntu-latest' + if: matrix.os != 'ubuntu-latest' || contains(matrix.target, 'wasm') shell: bash run: ./ci/run.sh ${{ matrix.target }} # Otherwise we use our docker containers to run builds - name: Run in Docker - if: matrix.os == 'ubuntu-latest' + if: matrix.os == 'ubuntu-latest' && !contains(matrix.target, 'wasm') run: | rustup target add x86_64-unknown-linux-musl cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} - wasm: - name: WebAssembly - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@master - - name: Install Rust - run: rustup update nightly --no-self-update && rustup default nightly - - run: rustup target add wasm32-unknown-unknown - - uses: Swatinem/rust-cache@v2 - - run: cargo build --target wasm32-unknown-unknown - builtins: - name: "The compiler-builtins crate works" + name: Check use with compiler-builtins runs-on: ubuntu-latest steps: - uses: actions/checkout@master @@ -164,7 +156,6 @@ jobs: success: needs: - test - - wasm - builtins - benchmarks - msrv diff --git a/libm/ci/run.sh b/libm/ci/run.sh index d3fc4ce24..30265e513 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -55,7 +55,7 @@ if [ "${BUILD_ONLY:-}" = "1" ]; then $cmd $cmd --features "unstable-intrinsics" - echo "can't run tests on $target" + echo "can't run tests on $target; skipping" else cmd="cargo test --all --target $target $extra_flags" From a1a066611dc26d1d42290472a9d989848fb49731 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 21 Oct 2024 17:38:57 -0500 Subject: [PATCH 0920/1459] Create interfaces for testing against MPFR Add a way to call MPFR versions of functions in a predictable way, using the `MpOp` trait. Everything new here is guarded by the feature `test-multiprecision` since MPFR cannot easily build on Windows or any cross compiled targets. --- libm/crates/libm-test/Cargo.toml | 3 + libm/crates/libm-test/src/lib.rs | 2 + libm/crates/libm-test/src/mpfloat.rs | 389 +++++++++++++++++++++++++++ 3 files changed, 394 insertions(+) create mode 100644 libm/crates/libm-test/src/mpfloat.rs diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 703524bcd..72ac57232 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -10,18 +10,21 @@ default = [] # Generate tests which are random inputs and the outputs are calculated with # musl libc. test-musl-serialized = ["rand"] +test-multiprecision = ["dep:az", "dep:rug"] # Build our own musl for testing and benchmarks build-musl = ["dep:musl-math-sys"] [dependencies] anyhow = "1.0.90" +az = { version = "1.2.1", optional = true } libm = { path = "../.." } libm-macros = { path = "../libm-macros" } musl-math-sys = { path = "../musl-math-sys", optional = true } paste = "1.0.15" rand = "0.8.5" rand_chacha = "0.3.1" +rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "std"] } [target.'cfg(target_family = "wasm")'.dependencies] # Enable randomness on WASM diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 2abe7f605..64343e00d 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -1,4 +1,6 @@ pub mod gen; +#[cfg(feature = "test-multiprecision")] +pub mod mpfloat; mod num_traits; mod special_case; mod test_traits; diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs new file mode 100644 index 000000000..db072780a --- /dev/null +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -0,0 +1,389 @@ +//! Interfaces needed to support testing with multi-precision floating point numbers. +//! +//! Within this module, the macros create a submodule for each `libm` function. These contain +//! a struct named `Operation` that implements [`MpOp`]. + +use std::cmp::Ordering; + +use az::Az; +use rug::Assign; +pub use rug::Float as MpFloat; +use rug::float::Round::Nearest; +use rug::ops::{PowAssignRound, RemAssignRound}; + +use crate::Float; + +/// Create a multiple-precision float with the correct number of bits for a concrete float type. +fn new_mpfloat() -> MpFloat { + MpFloat::new(F::SIGNIFICAND_BITS + 1) +} + +/// Set subnormal emulation and convert to a concrete float type. +fn prep_retval(mp: &mut MpFloat, ord: Ordering) -> F +where + for<'a> &'a MpFloat: az::Cast, +{ + mp.subnormalize_ieee_round(ord, Nearest); + (&*mp).az::() +} + +/// Structures that represent a float operation. +/// +/// The struct itself should hold any context that can be reused among calls to `run` (allocated +/// `MpFloat`s). +pub trait MpOp { + /// Inputs to the operation (concrete float types). + type Input; + + /// Outputs from the operation (concrete float types). + type Output; + + /// Create a new instance. + fn new() -> Self; + + /// Perform the operation. + /// + /// Usually this means assigning inputs to cached floats, performing the operation, applying + /// subnormal approximation, and converting the result back to concrete values. + fn run(&mut self, input: Self::Input) -> Self::Output; +} + +/// Implement `MpOp` for functions with a single return value. +macro_rules! impl_mp_op { + // Matcher for unary functions + ( + fn_name: $fn_name:ident, + CFn: $CFn:ty, + CArgs: $CArgs:ty, + CRet: $CRet:ty, + RustFn: fn($fty:ty,) -> $_ret:ty, + RustArgs: $RustArgs:ty, + RustRet: $RustRet:ty, + fn_extra: $fn_name_normalized:expr, + ) => { + paste::paste! { + pub mod $fn_name { + use super::*; + pub struct Operation(MpFloat); + + impl MpOp for Operation { + type Input = $RustArgs; + type Output = $RustRet; + + fn new() -> Self { + Self(new_mpfloat::<$fty>()) + } + + fn run(&mut self, input: Self::Input) -> Self::Output { + self.0.assign(input.0); + let ord = self.0.[< $fn_name_normalized _round >](Nearest); + prep_retval::(&mut self.0, ord) + } + } + } + } + }; + // Matcher for binary functions + ( + fn_name: $fn_name:ident, + CFn: $CFn:ty, + CArgs: $CArgs:ty, + CRet: $CRet:ty, + RustFn: fn($fty:ty, $_fty2:ty,) -> $_ret:ty, + RustArgs: $RustArgs:ty, + RustRet: $RustRet:ty, + fn_extra: $fn_name_normalized:expr, + ) => { + paste::paste! { + pub mod $fn_name { + use super::*; + pub struct Operation(MpFloat, MpFloat); + + impl MpOp for Operation { + type Input = $RustArgs; + type Output = $RustRet; + + fn new() -> Self { + Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>()) + } + + fn run(&mut self, input: Self::Input) -> Self::Output { + self.0.assign(input.0); + self.1.assign(input.1); + let ord = self.0.[< $fn_name_normalized _round >](&self.1, Nearest); + prep_retval::(&mut self.0, ord) + } + } + } + } + }; + // Matcher for ternary functions + ( + fn_name: $fn_name:ident, + CFn: $CFn:ty, + CArgs: $CArgs:ty, + CRet: $CRet:ty, + RustFn: fn($fty:ty, $_fty2:ty, $_fty3:ty,) -> $_ret:ty, + RustArgs: $RustArgs:ty, + RustRet: $RustRet:ty, + fn_extra: $fn_name_normalized:expr, + ) => { + paste::paste! { + pub mod $fn_name { + use super::*; + pub struct Operation(MpFloat, MpFloat, MpFloat); + + impl MpOp for Operation { + type Input = $RustArgs; + type Output = $RustRet; + + fn new() -> Self { + Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>(), new_mpfloat::<$fty>()) + } + + fn run(&mut self, input: Self::Input) -> Self::Output { + self.0.assign(input.0); + self.1.assign(input.1); + self.2.assign(input.2); + let ord = self.0.[< $fn_name_normalized _round >](&self.1, &self.2, Nearest); + prep_retval::(&mut self.0, ord) + } + } + } + } + }; +} + +libm_macros::for_each_function! { + callback: impl_mp_op, + skip: [ + // Most of these need a manual implementation + fabs, ceil, copysign, floor, rint, round, trunc, + fabsf, ceilf, copysignf, floorf, rintf, roundf, truncf, + fmod, fmodf, frexp, frexpf, ilogb, ilogbf, jn, jnf, ldexp, ldexpf, + lgamma_r, lgammaf_r, modf, modff, nextafter, nextafterf, pow,powf, + remquo, remquof, scalbn, scalbnf, sincos, sincosf, + ], + fn_extra: match MACRO_FN_NAME { + // Remap function names that are different between mpfr and libm + expm1 | expm1f => exp_m1, + fabs | fabsf => abs, + fdim | fdimf => positive_diff, + fma | fmaf => mul_add, + fmax | fmaxf => max, + fmin | fminf => min, + lgamma | lgammaf => ln_gamma, + log | logf => ln, + log1p | log1pf => ln_1p, + tgamma | tgammaf => gamma, + _ => MACRO_FN_NAME_NORMALIZED + } +} + +/// Implement unary functions that don't have a `_round` version +macro_rules! impl_no_round { + // Unary matcher + ($($fn_name:ident, $rug_name:ident;)*) => { + paste::paste! { + // Implement for both f32 and f64 + $( impl_no_round!{ @inner_unary [< $fn_name f >], (f32,), $rug_name } )* + $( impl_no_round!{ @inner_unary $fn_name, (f64,), $rug_name } )* + } + }; + + (@inner_unary $fn_name:ident, ($fty:ty,), $rug_name:ident) => { + pub mod $fn_name { + use super::*; + pub struct Operation(MpFloat); + + impl MpOp for Operation { + type Input = ($fty,); + type Output = $fty; + + fn new() -> Self { + Self(new_mpfloat::<$fty>()) + } + + fn run(&mut self, input: Self::Input) -> Self::Output { + self.0.assign(input.0); + self.0.$rug_name(); + prep_retval::(&mut self.0, Ordering::Equal) + } + } + } + }; +} + +impl_no_round! { + fabs, abs_mut; + ceil, ceil_mut; + floor, floor_mut; + rint, round_even_mut; // FIXME: respect rounding mode + round, round_mut; + trunc, trunc_mut; +} + +/// Some functions are difficult to do in a generic way. Implement them here. +macro_rules! impl_op_for_ty { + ($fty:ty, $suffix:literal) => { + paste::paste! { + pub mod [] { + use super::*; + pub struct Operation(MpFloat, MpFloat); + + impl MpOp for Operation { + type Input = ($fty, $fty); + type Output = $fty; + + fn new() -> Self { + Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>()) + } + + fn run(&mut self, input: Self::Input) -> Self::Output { + self.0.assign(input.0); + self.1.assign(input.1); + self.0.copysign_mut(&self.1); + prep_retval::(&mut self.0, Ordering::Equal) + } + } + } + + pub mod [] { + use super::*; + pub struct Operation(MpFloat, MpFloat); + + impl MpOp for Operation { + type Input = ($fty, $fty); + type Output = $fty; + + fn new() -> Self { + Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>()) + } + + fn run(&mut self, input: Self::Input) -> Self::Output { + self.0.assign(input.0); + self.1.assign(input.1); + self.0.next_toward(&self.1); + prep_retval::(&mut self.0, Ordering::Equal) + } + } + } + + pub mod [] { + use super::*; + pub struct Operation(MpFloat, MpFloat); + + impl MpOp for Operation { + type Input = ($fty, $fty); + type Output = $fty; + + fn new() -> Self { + Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>()) + } + + fn run(&mut self, input: Self::Input) -> Self::Output { + self.0.assign(input.0); + self.1.assign(input.1); + let ord = self.0.pow_assign_round(&self.1, Nearest); + prep_retval::(&mut self.0, ord) + } + } + } + + pub mod [] { + use super::*; + pub struct Operation(MpFloat, MpFloat); + + impl MpOp for Operation { + type Input = ($fty, $fty); + type Output = $fty; + + fn new() -> Self { + Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>()) + } + + fn run(&mut self, input: Self::Input) -> Self::Output { + self.0.assign(input.0); + self.1.assign(input.1); + let ord = self.0.rem_assign_round(&self.1, Nearest); + prep_retval::(&mut self.0, ord) + } + } + } + + pub mod [] { + use super::*; + pub struct Operation(MpFloat); + + impl MpOp for Operation { + type Input = ($fty,); + type Output = ($fty, i32); + + fn new() -> Self { + Self(new_mpfloat::<$fty>()) + } + + fn run(&mut self, input: Self::Input) -> Self::Output { + self.0.assign(input.0); + let (sign, ord) = self.0.ln_abs_gamma_round(Nearest); + let ret = prep_retval::<$fty>(&mut self.0, ord); + (ret, sign as i32) + } + } + } + + pub mod [] { + use super::*; + pub struct Operation(i32, MpFloat); + + impl MpOp for Operation { + type Input = (i32, $fty); + type Output = $fty; + + fn new() -> Self { + Self(0, new_mpfloat::<$fty>()) + } + + fn run(&mut self, input: Self::Input) -> Self::Output { + self.0 = input.0; + self.1.assign(input.1); + let ord = self.1.jn_round(self.0, Nearest); + prep_retval::<$fty>(&mut self.1, ord) + } + } + } + + pub mod [] { + use super::*; + pub struct Operation(MpFloat, MpFloat); + + impl MpOp for Operation { + type Input = ($fty,); + type Output = ($fty, $fty); + + fn new() -> Self { + Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>()) + } + + fn run(&mut self, input: Self::Input) -> Self::Output { + self.0.assign(input.0); + self.1.assign(0.0); + let (sord, cord) = self.0.sin_cos_round(&mut self.1, Nearest); + ( + prep_retval::<$fty>(&mut self.0, sord), + prep_retval::<$fty>(&mut self.1, cord) + ) + } + } + } + } + }; +} + +impl_op_for_ty!(f32, "f"); +impl_op_for_ty!(f64, ""); + +// Account for `lgamma_r` not having a simple `f` suffix +pub mod lgammaf_r { + pub use super::lgamma_rf::*; +} From 83bdb01b280b74d6da5d0601d3f05879da9a42e5 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 21 Oct 2024 17:41:24 -0500 Subject: [PATCH 0921/1459] Add a test against MPFR using random inputs --- libm/crates/libm-test/src/gen/random.rs | 16 +- libm/crates/libm-test/src/lib.rs | 29 +++- libm/crates/libm-test/src/mpfloat.rs | 21 --- libm/crates/libm-test/src/special_case.rs | 157 +++++++++++++----- libm/crates/libm-test/src/test_traits.rs | 2 + .../libm-test/tests/compare_built_musl.rs | 2 +- libm/crates/libm-test/tests/multiprecision.rs | 71 ++++++++ 7 files changed, 228 insertions(+), 70 deletions(-) create mode 100644 libm/crates/libm-test/tests/multiprecision.rs diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index 601ef4f1d..c73937aac 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -7,7 +7,7 @@ use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; use super::CachedInput; -use crate::GenerateInput; +use crate::{CheckCtx, GenerateInput}; const SEED: [u8; 32] = *b"3.141592653589793238462643383279"; @@ -40,9 +40,10 @@ static TEST_CASES_JN: LazyLock = LazyLock::new(|| { let mut cases = (&*TEST_CASES).clone(); // These functions are extremely slow, limit them - cases.inputs_i32.truncate((NTESTS / 1000).max(80)); - cases.inputs_f32.truncate((NTESTS / 1000).max(80)); - cases.inputs_f64.truncate((NTESTS / 1000).max(80)); + let ntests_jn = (NTESTS / 1000).max(80); + cases.inputs_i32.truncate(ntests_jn); + cases.inputs_f32.truncate(ntests_jn); + cases.inputs_f64.truncate(ntests_jn); // It is easy to overflow the stack with these in debug mode let max_iterations = if cfg!(optimizations_enabled) && cfg!(target_pointer_width = "64") { @@ -105,11 +106,10 @@ fn make_test_cases(ntests: usize) -> CachedInput { } /// Create a test case iterator. -pub fn get_test_cases(fname: &str) -> impl Iterator +pub fn get_test_cases(ctx: &CheckCtx) -> impl Iterator where CachedInput: GenerateInput, { - let inputs = if fname == "jn" || fname == "jnf" { &TEST_CASES_JN } else { &TEST_CASES }; - - CachedInput::get_cases(inputs) + let inputs = if ctx.fname == "jn" || ctx.fname == "jnf" { &TEST_CASES_JN } else { &TEST_CASES }; + inputs.get_cases() } diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 64343e00d..13b76d6c5 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -16,14 +16,18 @@ pub type TestResult = Result; // List of all files present in libm's source include!(concat!(env!("OUT_DIR"), "/all_files.rs")); -/// ULP allowed to differ from musl (note that musl itself may not be accurate). +/// Default ULP allowed to differ from musl (note that musl itself may not be accurate). const MUSL_DEFAULT_ULP: u32 = 2; -/// Certain functions have different allowed ULP (consider these xfail). +/// Default ULP allowed to differ from multiprecision (i.e. infinite) results. +const MULTIPREC_DEFAULT_ULP: u32 = 1; + +/// ULP allowed to differ from muls results. /// /// Note that these results were obtained using 400,000,000 rounds of random inputs, which /// is not a value used by default. pub fn musl_allowed_ulp(name: &str) -> u32 { + // Consider overrides xfail match name { #[cfg(x86_no_sse)] "asinh" | "asinhf" => 6, @@ -44,6 +48,27 @@ pub fn musl_allowed_ulp(name: &str) -> u32 { } } +/// ULP allowed to differ from multiprecision results. +pub fn multiprec_allowed_ulp(name: &str) -> u32 { + // Consider overrides xfail + match name { + "asinh" | "asinhf" => 2, + "acoshf" => 4, + "atanh" | "atanhf" => 2, + "exp10" | "exp10f" => 3, + "j0" | "j0f" | "j1" | "j1f" => { + // Results seem very target-dependent + if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 } + } + "jn" | "jnf" => 1000, + "lgamma" | "lgammaf" | "lgamma_r" | "lgammaf_r" => 16, + "sinh" | "sinhf" => 2, + "tanh" | "tanhf" => 2, + "tgamma" => 20, + _ => MULTIPREC_DEFAULT_ULP, + } +} + /// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`, /// `lgamma_r` and `lgammaf_r` both return `lgamma_r`. pub fn canonical_name(name: &str) -> &str { diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index db072780a..44962d116 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -248,27 +248,6 @@ macro_rules! impl_op_for_ty { } } - pub mod [] { - use super::*; - pub struct Operation(MpFloat, MpFloat); - - impl MpOp for Operation { - type Input = ($fty, $fty); - type Output = $fty; - - fn new() -> Self { - Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>()) - } - - fn run(&mut self, input: Self::Input) -> Self::Output { - self.0.assign(input.0); - self.1.assign(input.1); - self.0.next_toward(&self.1); - prep_retval::(&mut self.0, Ordering::Equal) - } - } - } - pub mod [] { use super::*; pub struct Operation(MpFloat, MpFloat); diff --git a/libm/crates/libm-test/src/special_case.rs b/libm/crates/libm-test/src/special_case.rs index df263d742..dac7a349d 100644 --- a/libm/crates/libm-test/src/special_case.rs +++ b/libm/crates/libm-test/src/special_case.rs @@ -58,20 +58,6 @@ impl MaybeOverride<(f32,)> for SpecialCase { ctx: &CheckCtx, ) -> Option { if ctx.basis == CheckBasis::Musl { - if ctx.fname == "acoshf" && input.0 < -1.0 { - // acoshf is undefined for x <= 1.0, but we return a random result at lower - // values. - return XFAIL; - } - - if ctx.fname == "sincosf" { - let factor_frac_pi_2 = input.0.abs() / f32::consts::FRAC_PI_2; - if (factor_frac_pi_2 - factor_frac_pi_2.round()).abs() < 1e-2 { - // we have a bad approximation near multiples of pi/2 - return XFAIL; - } - } - if ctx.fname == "expm1f" && input.0 > 80.0 && actual.is_infinite() { // we return infinity but the number is representable return XFAIL; @@ -82,15 +68,40 @@ impl MaybeOverride<(f32,)> for SpecialCase { // doesn't seem to happen on x86 return XFAIL; } + } - if ctx.fname == "lgammaf" || ctx.fname == "lgammaf_r" && input.0 < 0.0 { - // loggamma should not be defined for x < 0, yet we both return results - return XFAIL; - } + if ctx.fname == "acoshf" && input.0 < -1.0 { + // acoshf is undefined for x <= 1.0, but we return a random result at lower + // values. + return XFAIL; + } + + if ctx.fname == "lgammaf" || ctx.fname == "lgammaf_r" && input.0 < 0.0 { + // loggamma should not be defined for x < 0, yet we both return results + return XFAIL; } maybe_check_nan_bits(actual, expected, ctx) } + + fn check_int( + input: (f32,), + actual: I, + expected: I, + ctx: &CheckCtx, + ) -> Option> { + // On MPFR for lgammaf_r, we set -1 as the integer result for negative infinity but MPFR + // sets +1 + if ctx.basis == CheckBasis::Mpfr + && ctx.fname == "lgammaf_r" + && input.0 == f32::NEG_INFINITY + && actual.abs() == expected.abs() + { + XFAIL + } else { + None + } + } } impl MaybeOverride<(f64,)> for SpecialCase { @@ -117,15 +128,40 @@ impl MaybeOverride<(f64,)> for SpecialCase { // musl returns -0.0, we return +0.0 return XFAIL; } + } - if ctx.fname == "lgamma" || ctx.fname == "lgamma_r" && input.0 < 0.0 { - // loggamma should not be defined for x < 0, yet we both return results - return XFAIL; - } + if ctx.fname == "acosh" && input.0 < 1.0 { + // The function is undefined for the inputs, musl and our libm both return + // random results. + return XFAIL; + } + + if ctx.fname == "lgamma" || ctx.fname == "lgamma_r" && input.0 < 0.0 { + // loggamma should not be defined for x < 0, yet we both return results + return XFAIL; } maybe_check_nan_bits(actual, expected, ctx) } + + fn check_int( + input: (f64,), + actual: I, + expected: I, + ctx: &CheckCtx, + ) -> Option> { + // On MPFR for lgamma_r, we set -1 as the integer result for negative infinity but MPFR + // sets +1 + if ctx.basis == CheckBasis::Mpfr + && ctx.fname == "lgamma_r" + && input.0 == f64::NEG_INFINITY + && actual.abs() == expected.abs() + { + XFAIL + } else { + None + } + } } /// Check NaN bits if the function requires it @@ -142,6 +178,11 @@ fn maybe_check_nan_bits(actual: F, expected: F, ctx: &CheckCtx) -> Opt return SKIP; } + // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate. + if ctx.basis == CheckBasis::Mpfr { + return SKIP; + } + // abs and copysign require signaling NaNs to be propagated, so verify bit equality. if actual.to_bits() == expected.to_bits() { return SKIP; @@ -158,9 +199,10 @@ impl MaybeOverride<(f32, f32)> for SpecialCase { _ulp: &mut u32, ctx: &CheckCtx, ) -> Option { - maybe_skip_min_max_nan(input, expected, ctx) + maybe_skip_binop_nan(input, expected, ctx) } } + impl MaybeOverride<(f64, f64)> for SpecialCase { fn check_float( input: (f64, f64), @@ -169,47 +211,86 @@ impl MaybeOverride<(f64, f64)> for SpecialCase { _ulp: &mut u32, ctx: &CheckCtx, ) -> Option { - maybe_skip_min_max_nan(input, expected, ctx) + maybe_skip_binop_nan(input, expected, ctx) } } /// Musl propagates NaNs if one is provided as the input, but we return the other input. // F1 and F2 are always the same type, this is just to please generics -fn maybe_skip_min_max_nan( +fn maybe_skip_binop_nan( input: (F1, F1), expected: F2, ctx: &CheckCtx, ) -> Option { - if (ctx.canonical_name == "fmax" || ctx.canonical_name == "fmin") - && (input.0.is_nan() || input.1.is_nan()) - && expected.is_nan() - { - return XFAIL; - } else { - None + match ctx.basis { + CheckBasis::Musl => { + if (ctx.canonical_name == "fmax" || ctx.canonical_name == "fmin") + && (input.0.is_nan() || input.1.is_nan()) + && expected.is_nan() + { + XFAIL + } else { + None + } + } + CheckBasis::Mpfr => { + if ctx.canonical_name == "copysign" && input.1.is_nan() { + SKIP + } else { + None + } + } } } impl MaybeOverride<(i32, f32)> for SpecialCase { fn check_float( input: (i32, f32), - _actual: F, - _expected: F, + actual: F, + expected: F, ulp: &mut u32, ctx: &CheckCtx, ) -> Option { - bessel_prec_dropoff(input, ulp, ctx) + match ctx.basis { + CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx), + CheckBasis::Mpfr => { + // We return +0.0, MPFR returns -0.0 + if ctx.fname == "jnf" + && input.1 == f32::NEG_INFINITY + && actual == F::ZERO + && expected == F::ZERO + { + XFAIL + } else { + None + } + } + } } } impl MaybeOverride<(i32, f64)> for SpecialCase { fn check_float( input: (i32, f64), - _actual: F, - _expected: F, + actual: F, + expected: F, ulp: &mut u32, ctx: &CheckCtx, ) -> Option { - bessel_prec_dropoff(input, ulp, ctx) + match ctx.basis { + CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx), + CheckBasis::Mpfr => { + // We return +0.0, MPFR returns -0.0 + if ctx.fname == "jn" + && input.1 == f64::NEG_INFINITY + && actual == F::ZERO + && expected == F::ZERO + { + XFAIL + } else { + bessel_prec_dropoff(input, ulp, ctx) + } + } + } } } diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index c24ac6e43..deb837887 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -52,6 +52,8 @@ impl CheckCtx { pub enum CheckBasis { /// Check against Musl's math sources. Musl, + /// Check against infinite precision (MPFR). + Mpfr, } /// A trait to implement on any output type so we can verify it in a generic way. diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 208b8e286..5a118f7c2 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -29,8 +29,8 @@ macro_rules! musl_rand_tests { fn [< musl_random_ $fn_name >]() { let fname = stringify!($fn_name); let ulp = musl_allowed_ulp(fname); - let cases = random::get_test_cases::<$RustArgs>(fname); let ctx = CheckCtx::new(ulp, fname, CheckBasis::Musl); + let cases = random::get_test_cases::<$RustArgs>(&ctx); for input in cases { let musl_res = input.call(musl::$fn_name as $CFn); diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs new file mode 100644 index 000000000..f8d94a160 --- /dev/null +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -0,0 +1,71 @@ +//! Test with "infinite precision" + +#![cfg(feature = "test-multiprecision")] + +use libm_test::gen::random; +use libm_test::mpfloat::{self, MpOp}; +use libm_test::{CheckBasis, CheckCtx, CheckOutput, TupleCall, multiprec_allowed_ulp}; + +/// Implement a test against MPFR with random inputs. +macro_rules! multiprec_rand_tests { + ( + fn_name: $fn_name:ident, + CFn: $CFn:ty, + CArgs: $CArgs:ty, + CRet: $CRet:ty, + RustFn: $RustFn:ty, + RustArgs: $RustArgs:ty, + RustRet: $RustRet:ty, + attrs: [$($meta:meta)*] + ) => { + paste::paste! { + #[test] + $(#[$meta])* + fn [< multiprec_random_ $fn_name >]() { + type MpOpTy = mpfloat::$fn_name::Operation; + + let fname = stringify!($fn_name); + let ulp = multiprec_allowed_ulp(fname); + let mut mp_vals = MpOpTy::new(); + let ctx = CheckCtx::new(ulp, fname, CheckBasis::Mpfr); + let cases = random::get_test_cases::<$RustArgs>(&ctx); + + for input in cases { + let mp_res = mp_vals.run(input); + let crate_res = input.call(libm::$fn_name as $RustFn); + + crate_res.validate(mp_res, input, &ctx).unwrap(); + } + } + } + }; +} + +libm_macros::for_each_function! { + callback: multiprec_rand_tests, + attributes: [ + // Also an assertion failure on i686: at `MPFR_ASSERTN (! mpfr_erangeflag_p ())` + #[ignore = "large values are infeasible in MPFR"] + [jn, jnf], + ], + skip: [ + // FIXME: MPFR tests needed + frexp, + frexpf, + ilogb, + ilogbf, + ldexp, + ldexpf, + modf, + modff, + remquo, + remquof, + scalbn, + scalbnf, + + // FIXME: test needed, see + // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392 + nextafter, + nextafterf, + ], +} From aed2e6736c45f76b92f8a6ca18e3a50cc79bb1a4 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 21 Oct 2024 17:41:42 -0500 Subject: [PATCH 0922/1459] Run tests against MPFR on CI where possible This effectively gives us tests against infinite-precision results on MacOS and x86+sse Linux. --- .../docker/aarch64-unknown-linux-gnu/Dockerfile | 2 +- libm/ci/docker/i686-unknown-linux-gnu/Dockerfile | 2 +- .../docker/x86_64-unknown-linux-gnu/Dockerfile | 2 +- libm/ci/run.sh | 16 ++++++++++++++++ 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index a7b23cb9e..7fa06b286 100644 --- a/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -3,7 +3,7 @@ FROM ubuntu:24.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ - gcc-aarch64-linux-gnu libc6-dev-arm64-cross \ + gcc-aarch64-linux-gnu m4 make libc6-dev-arm64-cross \ qemu-user-static ENV TOOLCHAIN_PREFIX=aarch64-linux-gnu- diff --git a/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile b/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile index 3b0bfc0d3..37e206a84 100644 --- a/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile @@ -2,4 +2,4 @@ FROM ubuntu:24.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ - gcc-multilib libc6-dev ca-certificates + gcc-multilib m4 make libc6-dev ca-certificates diff --git a/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index 15723ab57..c84a31c57 100644 --- a/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -2,4 +2,4 @@ FROM ubuntu:24.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ - gcc libc6-dev ca-certificates + gcc m4 make libc6-dev ca-certificates diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 30265e513..94612adc7 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -35,6 +35,22 @@ case "$target" in *) extra_flags="$extra_flags --features libm-test/build-musl" ;; esac +# Configure which targets test against MPFR +case "$target" in + # MSVC cannot link MPFR + *windows-msvc*) ;; + # FIXME: MinGW should be able to build MPFR, but setup in CI is nontrivial. + *windows-gnu*) ;; + # Targets that aren't cross compiled work fine + # FIXME(ci): we should be able to enable aarch64 Linux here once GHA + # support rolls out. + x86_64*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;; + # i686 works fine, i586 does not + i686*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;; + # Apple aarch64 is native + aarch64*apple*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;; +esac + # FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI. # case "$target" in From 9fe10c1c7a5423793cd8ec4836d3992069f9b5ca Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 21:44:28 -0500 Subject: [PATCH 0923/1459] Rename the `special_case` module to `precision` and move default ULP Having the default ULP in lib.rs doesn't make much sense when everything else precision-related is in special_case.rs. Rename `special_case` to `precision` and move the `*_allowed_ulp` functions there. --- libm/crates/libm-test/src/lib.rs | 57 +------------------ .../src/{special_case.rs => precision.rs} | 53 +++++++++++++++++ 2 files changed, 55 insertions(+), 55 deletions(-) rename libm/crates/libm-test/src/{special_case.rs => precision.rs} (84%) diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 13b76d6c5..31b95e46c 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -2,11 +2,11 @@ pub mod gen; #[cfg(feature = "test-multiprecision")] pub mod mpfloat; mod num_traits; -mod special_case; +mod precision; mod test_traits; pub use num_traits::{Float, Hex, Int}; -pub use special_case::{MaybeOverride, SpecialCase}; +pub use precision::{MaybeOverride, SpecialCase, multiprec_allowed_ulp, musl_allowed_ulp}; pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, TupleCall}; /// Result type for tests is usually from `anyhow`. Most times there is no success value to @@ -16,59 +16,6 @@ pub type TestResult = Result; // List of all files present in libm's source include!(concat!(env!("OUT_DIR"), "/all_files.rs")); -/// Default ULP allowed to differ from musl (note that musl itself may not be accurate). -const MUSL_DEFAULT_ULP: u32 = 2; - -/// Default ULP allowed to differ from multiprecision (i.e. infinite) results. -const MULTIPREC_DEFAULT_ULP: u32 = 1; - -/// ULP allowed to differ from muls results. -/// -/// Note that these results were obtained using 400,000,000 rounds of random inputs, which -/// is not a value used by default. -pub fn musl_allowed_ulp(name: &str) -> u32 { - // Consider overrides xfail - match name { - #[cfg(x86_no_sse)] - "asinh" | "asinhf" => 6, - "lgamma" | "lgamma_r" | "lgammaf" | "lgammaf_r" => 400, - "tanh" | "tanhf" => 4, - "tgamma" => 20, - "j0" | "j0f" | "j1" | "j1f" => { - // Results seem very target-dependent - if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 } - } - "jn" | "jnf" => 1000, - "sincosf" => 500, - #[cfg(not(target_pointer_width = "64"))] - "exp10" => 4, - #[cfg(not(target_pointer_width = "64"))] - "exp10f" => 4, - _ => MUSL_DEFAULT_ULP, - } -} - -/// ULP allowed to differ from multiprecision results. -pub fn multiprec_allowed_ulp(name: &str) -> u32 { - // Consider overrides xfail - match name { - "asinh" | "asinhf" => 2, - "acoshf" => 4, - "atanh" | "atanhf" => 2, - "exp10" | "exp10f" => 3, - "j0" | "j0f" | "j1" | "j1f" => { - // Results seem very target-dependent - if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 } - } - "jn" | "jnf" => 1000, - "lgamma" | "lgammaf" | "lgamma_r" | "lgammaf_r" => 16, - "sinh" | "sinhf" => 2, - "tanh" | "tanhf" => 2, - "tgamma" => 20, - _ => MULTIPREC_DEFAULT_ULP, - } -} - /// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`, /// `lgamma_r` and `lgammaf_r` both return `lgamma_r`. pub fn canonical_name(name: &str) -> &str { diff --git a/libm/crates/libm-test/src/special_case.rs b/libm/crates/libm-test/src/precision.rs similarity index 84% rename from libm/crates/libm-test/src/special_case.rs rename to libm/crates/libm-test/src/precision.rs index dac7a349d..e2ad638c4 100644 --- a/libm/crates/libm-test/src/special_case.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -8,6 +8,59 @@ use crate::{CheckBasis, CheckCtx, Float, Int, TestResult}; /// Type implementing [`IgnoreCase`]. pub struct SpecialCase; +/// Default ULP allowed to differ from musl (note that musl itself may not be accurate). +const MUSL_DEFAULT_ULP: u32 = 2; + +/// Default ULP allowed to differ from multiprecision (i.e. infinite) results. +const MULTIPREC_DEFAULT_ULP: u32 = 1; + +/// ULP allowed to differ from muls results. +/// +/// Note that these results were obtained using 400,000,000 rounds of random inputs, which +/// is not a value used by default. +pub fn musl_allowed_ulp(name: &str) -> u32 { + // Consider overrides xfail + match name { + #[cfg(x86_no_sse)] + "asinh" | "asinhf" => 6, + "lgamma" | "lgamma_r" | "lgammaf" | "lgammaf_r" => 400, + "tanh" | "tanhf" => 4, + "tgamma" => 20, + "j0" | "j0f" | "j1" | "j1f" => { + // Results seem very target-dependent + if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 } + } + "jn" | "jnf" => 1000, + "sincosf" => 500, + #[cfg(not(target_pointer_width = "64"))] + "exp10" => 4, + #[cfg(not(target_pointer_width = "64"))] + "exp10f" => 4, + _ => MUSL_DEFAULT_ULP, + } +} + +/// ULP allowed to differ from multiprecision results. +pub fn multiprec_allowed_ulp(name: &str) -> u32 { + // Consider overrides xfail + match name { + "asinh" | "asinhf" => 2, + "acoshf" => 4, + "atanh" | "atanhf" => 2, + "exp10" | "exp10f" => 3, + "j0" | "j0f" | "j1" | "j1f" => { + // Results seem very target-dependent + if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 } + } + "jn" | "jnf" => 1000, + "lgamma" | "lgammaf" | "lgamma_r" | "lgammaf_r" => 16, + "sinh" | "sinhf" => 2, + "tanh" | "tanhf" => 2, + "tgamma" => 20, + _ => MULTIPREC_DEFAULT_ULP, + } +} + /// Don't run further validation on this test case. const SKIP: Option = Some(Ok(())); From 9fb2d3280cf74ef0f95556792eab9812e1e86452 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 03:05:01 -0500 Subject: [PATCH 0924/1459] Update `libm-test/build.rs` to skip directories Don't try to generate tests for directories, or for files that contain `f16` or `f128` (as these types are not provided by musl's math implementations). (cherry picked from commit fd7ad36b70d0bbc0f0b9bc7e54d10258423fda29) --- libm/crates/libm-test/build.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/libm/crates/libm-test/build.rs b/libm/crates/libm-test/build.rs index 472dec9d3..40b3e56c0 100644 --- a/libm/crates/libm-test/build.rs +++ b/libm/crates/libm-test/build.rs @@ -156,7 +156,11 @@ mod musl_serialized_tests { return; } - let files = fs::read_dir(math_src).unwrap().map(|f| f.unwrap().path()).collect::>(); + let files = fs::read_dir(math_src) + .unwrap() + .map(|f| f.unwrap().path()) + .filter(file_needs_test) + .collect::>(); let mut math = Vec::new(); for file in files { @@ -187,6 +191,19 @@ mod musl_serialized_tests { generate_unit_tests(&math); } + /// Check whether a path within `src/math` should get tests generated. + fn file_needs_test(path: &PathBuf) -> bool { + // Skip directories + if path.is_dir() { + return false; + } + + let fname = path.file_name().unwrap().to_str().unwrap(); + + // Musl doesn't support `f16` or `f128` + !(fname.contains("f16") || fname.contains("f128")) + } + /// A "poor man's" parser for the signature of a function fn parse(s: &str) -> Function { let s = eat(s, "pub fn "); From 400dbab9009065d89a025cdd5d3bde35a6f07532 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 02:56:22 -0500 Subject: [PATCH 0925/1459] Vendor `cfg_if::cfg_if!` `cfg_if` is helpful for applying `cfg` attributes to groups of items, like we will need to do with architecture-specific modules of `f16` and `f128`. However, `libm` can't have dependencies. The `cfg_if` macro is complex but small, so just vendor it here. --- libm/src/math/support/macros.rs | 46 ++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs index 6bc75837a..39a6fe827 100644 --- a/libm/src/math/support/macros.rs +++ b/libm/src/math/support/macros.rs @@ -1,6 +1,46 @@ -/// Choose among using an intrinsic (if available) and falling back to the default function body. -/// Returns directly if the intrinsic version is used, otherwise continues to the rest of the -/// function. +/// `libm` cannot have dependencies, so this is vendored directly from the `cfg-if` crate +/// (with some comments stripped for compactness). +macro_rules! cfg_if { + // match if/else chains with a final `else` + ($( + if #[cfg($meta:meta)] { $($tokens:tt)* } + ) else * else { + $($tokens2:tt)* + }) => { + cfg_if! { @__items () ; $( ( ($meta) ($($tokens)*) ), )* ( () ($($tokens2)*) ), } + }; + + // match if/else chains lacking a final `else` + ( + if #[cfg($i_met:meta)] { $($i_tokens:tt)* } + $( else if #[cfg($e_met:meta)] { $($e_tokens:tt)* } )* + ) => { + cfg_if! { + @__items + () ; + ( ($i_met) ($($i_tokens)*) ), + $( ( ($e_met) ($($e_tokens)*) ), )* + ( () () ), + } + }; + + // Internal and recursive macro to emit all the items + // + // Collects all the negated cfgs in a list at the beginning and after the + // semicolon is all the remaining items + (@__items ($($not:meta,)*) ; ) => {}; + (@__items ($($not:meta,)*) ; ( ($($m:meta),*) ($($tokens:tt)*) ), $($rest:tt)*) => { + #[cfg(all($($m,)* not(any($($not),*))))] cfg_if! { @__identity $($tokens)* } + cfg_if! { @__items ($($not,)* $($m,)*) ; $($rest)* } + }; + + // Internal macro to make __apply work out right for different match types, + // because of how macros matching/expand stuff. + (@__identity $($tokens:tt)*) => { $($tokens)* }; +} + +/// Choose between using an intrinsic (if available) and the function body. Returns directly if +/// the intrinsic is used, otherwise the rest of the function body is used. /// /// Use this if the intrinsic is likely to be more performant on the platform(s) specified /// in `intrinsic_available`. From f7d6b08c9db5760dcfd5fd92ca8e1775ebe3eeb2 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 20:24:46 -0500 Subject: [PATCH 0926/1459] Add an "arch" Cargo feature that is on by default Introduce a Cargo feature to enable or disable architecture-specific features (SIMD, assembly), which is on by default. This allows for more fine grained control compared to relying on the `force-soft-floats` feature. Similar to "unstable-intrinsics", introduce a build.rs config option for `unstable-intrinsics AND NOT force-soft-floats`, which makes this easier to work with in code. Effectively, this allows moving our non-additive Cargo feature (force-soft-floats) to a positive one by default, allowing for an override when needed. --- libm/Cargo.toml | 5 ++++- libm/build.rs | 12 ++++++++++++ libm/ci/run.sh | 1 + libm/crates/compiler-builtins-smoke-test/Cargo.toml | 1 + 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 5e4565556..2e74012ea 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -14,7 +14,10 @@ exclude = ["/ci/", "/.github/workflows/"] rust-version = "1.63" [features] -default = [] +default = ["arch"] + +# Enable architecture-specific features such as SIMD or assembly routines. +arch = [] # This tells the compiler to assume that a Nightly toolchain is being used and # that it should activate any useful Nightly things accordingly. diff --git a/libm/build.rs b/libm/build.rs index adb521407..001029236 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -15,6 +15,7 @@ fn main() { } configure_intrinsics(); + configure_arch(); } /// Simplify the feature logic for enabling intrinsics so code only needs to use @@ -28,3 +29,14 @@ fn configure_intrinsics() { println!("cargo:rustc-cfg=intrinsics_enabled"); } } + +/// Simplify the feature logic for enabling arch-specific features so code only needs to use +/// `cfg(arch_enabled)`. +fn configure_arch() { + println!("cargo:rustc-check-cfg=cfg(arch_enabled)"); + + // Enabled by default via the "arch" feature, `force-soft-floats` overrides to disable. + if cfg!(feature = "arch") && !cfg!(feature = "force-soft-floats") { + println!("cargo:rustc-cfg=arch_enabled"); + } +} diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 94612adc7..9f642326b 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -64,6 +64,7 @@ fi # Make sure we can build with overriding features. We test the indibidual # features it controls separately. +cargo check --no-default-features cargo check --features "force-soft-floats" if [ "${BUILD_ONLY:-}" = "1" ]; then diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index 2a6c62961..7118bfe06 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -18,6 +18,7 @@ force-soft-floats = [] [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = [ + "cfg(arch_enabled)", "cfg(assert_no_panic)", "cfg(intrinsics_enabled)", ] } From e97afa08d34b551e0d96390649c877291a7eba37 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 20:29:55 -0500 Subject: [PATCH 0927/1459] Update `select_implementation` to accept arch configuration --- libm/src/math/support/macros.rs | 45 ++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs index 39a6fe827..f85a6122e 100644 --- a/libm/src/math/support/macros.rs +++ b/libm/src/math/support/macros.rs @@ -39,17 +39,31 @@ macro_rules! cfg_if { (@__identity $($tokens:tt)*) => { $($tokens)* }; } -/// Choose between using an intrinsic (if available) and the function body. Returns directly if -/// the intrinsic is used, otherwise the rest of the function body is used. +/// Choose among using an intrinsic, an arch-specific implementation, and the function body. +/// Returns directly if the intrinsic or arch is used, otherwise continue with the rest of the +/// function. /// -/// Use this if the intrinsic is likely to be more performant on the platform(s) specified -/// in `intrinsic_available`. +/// Specify a `use_intrinsic` meta field if the intrinsic is (1) available on the platforms (i.e. +/// LLVM lowers it without libcalls that may recurse), (2) it is likely to be more performant. +/// Intrinsics require wrappers in the `math::arch::intrinsics` module. /// -/// The `cfg` used here is controlled by `build.rs` so the passed meta does not need to account -/// for e.g. the `unstable-intrinsics` or `force-soft-float` features. +/// Specify a `use_arch` meta field if an architecture-specific implementation is provided. +/// These live in the `math::arch::some_target_arch` module. +/// +/// Specify a `use_arch_required` meta field if something architecture-specific must be used +/// regardless of feature configuration (`force-soft-floats`). +/// +/// The passed meta options do not need to account for relevant Cargo features +/// (`unstable-intrinsics`, `arch`, `force-soft-floats`), this macro handles that part. macro_rules! select_implementation { ( name: $fname:ident, + // Configuration meta for when to use arch-specific implementation that requires hard + // float ops + $( use_arch: $use_arch:meta, )? + // Configuration meta for when to use the arch module regardless of whether softfloats + // have been requested. + $( use_arch_required: $use_arch_required:meta, )? // Configuration meta for when to call intrinsics and let LLVM figure it out $( use_intrinsic: $use_intrinsic:meta, )? args: $($arg:ident),+ , @@ -57,6 +71,25 @@ macro_rules! select_implementation { // FIXME: these use paths that are a pretty fragile (`super`). We should figure out // something better w.r.t. how this is vendored into compiler-builtins. + // However, we do need a few things from `arch` that are used even with soft floats. + // + select_implementation! { + @cfg $($use_arch_required)?; + if true { + return super::arch::$fname( $($arg),+ ); + } + } + + // By default, never use arch-specific implementations if we have force-soft-floats + #[cfg(arch_enabled)] + select_implementation! { + @cfg $($use_arch)?; + // Wrap in `if true` to avoid unused warnings + if true { + return super::arch::$fname( $($arg),+ ); + } + } + // Never use intrinsics if we are forcing soft floats, and only enable with the // `unstable-intrinsics` feature. #[cfg(intrinsics_enabled)] From b2d3330554d84e29ffed52cbba9639705af8b11f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 20:31:48 -0500 Subject: [PATCH 0928/1459] Move architecture-specific code to `src/math/arch` Move the code and call into its new location with `select_implementation`. --- libm/src/math/arch/i586.rs | 37 ++++++ libm/src/math/arch/i686.rs | 24 ++++ libm/src/math/arch/mod.rs | 19 +++ libm/src/math/ceil.rs | 19 +-- libm/src/math/floor.rs | 19 +-- libm/src/math/sqrt.rs | 233 +++++++++++++++++-------------------- libm/src/math/sqrtf.rs | 159 +++++++++++-------------- 7 files changed, 261 insertions(+), 249 deletions(-) create mode 100644 libm/src/math/arch/i586.rs create mode 100644 libm/src/math/arch/i686.rs diff --git a/libm/src/math/arch/i586.rs b/libm/src/math/arch/i586.rs new file mode 100644 index 000000000..f92b9a2af --- /dev/null +++ b/libm/src/math/arch/i586.rs @@ -0,0 +1,37 @@ +//! Architecture-specific support for x86-32 without SSE2 + +use super::super::fabs; + +/// Use an alternative implementation on x86, because the +/// main implementation fails with the x87 FPU used by +/// debian i386, probably due to excess precision issues. +/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. +pub fn ceil(x: f64) -> f64 { + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated < x { + return truncated + 1.0; + } else { + return truncated; + } + } else { + return x; + } +} + +/// Use an alternative implementation on x86, because the +/// main implementation fails with the x87 FPU used by +/// debian i386, probably due to excess precision issues. +/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. +pub fn floor(x: f64) -> f64 { + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated > x { + return truncated - 1.0; + } else { + return truncated; + } + } else { + return x; + } +} diff --git a/libm/src/math/arch/i686.rs b/libm/src/math/arch/i686.rs new file mode 100644 index 000000000..80f7face1 --- /dev/null +++ b/libm/src/math/arch/i686.rs @@ -0,0 +1,24 @@ +//! Architecture-specific support for x86-32 and x86-64 with SSE2 + +#![cfg(not(feature = "force-soft-floats"))] + +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +pub fn sqrtf(x: f32) -> f32 { + unsafe { + let m = _mm_set_ss(x); + let m_sqrt = _mm_sqrt_ss(m); + _mm_cvtss_f32(m_sqrt) + } +} + +pub fn sqrt(x: f64) -> f64 { + unsafe { + let m = _mm_set_sd(x); + let m_sqrt = _mm_sqrt_pd(m); + _mm_cvtsd_f64(m_sqrt) + } +} diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs index a4bc218b7..cf9547117 100644 --- a/libm/src/math/arch/mod.rs +++ b/libm/src/math/arch/mod.rs @@ -7,3 +7,22 @@ #[cfg(intrinsics_enabled)] pub mod intrinsics; + +// Most implementations should be defined here, to ensure they are not made available when +// soft floats are required. +#[cfg(arch_enabled)] +cfg_if! { + if #[cfg(target_feature = "sse2")] { + mod i686; + pub use i686::{sqrt, sqrtf}; + } +} + +// There are certain architecture-specific implementations that are needed for correctness +// even with `force-soft-float`. These are configured here. +cfg_if! { + if #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] { + mod i586; + pub use i586::{ceil, floor}; + } +} diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index 0da01b4d0..c7e857dbb 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -10,28 +10,11 @@ const TOINT: f64 = 1. / f64::EPSILON; pub fn ceil(x: f64) -> f64 { select_implementation! { name: ceil, + use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")), use_intrinsic: target_arch = "wasm32", args: x, } - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - { - //use an alternative implementation on x86, because the - //main implementation fails with the x87 FPU used by - //debian i386, probably due to excess precision issues. - //basic implementation taken from https://github.com/rust-lang/libm/issues/219 - use super::fabs; - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { - let truncated = x as i64 as f64; - if truncated < x { - return truncated + 1.0; - } else { - return truncated; - } - } else { - return x; - } - } let u: u64 = x.to_bits(); let e: i64 = (u >> 52 & 0x7ff) as i64; let y: f64; diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index 2b9955eba..532226b9f 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -10,28 +10,11 @@ const TOINT: f64 = 1. / f64::EPSILON; pub fn floor(x: f64) -> f64 { select_implementation! { name: floor, + use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")), use_intrinsic: target_arch = "wasm32", args: x, } - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - { - //use an alternative implementation on x86, because the - //main implementation fails with the x87 FPU used by - //debian i386, probably due to excess precision issues. - //basic implementation taken from https://github.com/rust-lang/libm/issues/219 - use super::fabs; - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { - let truncated = x as i64 as f64; - if truncated > x { - return truncated - 1.0; - } else { - return truncated; - } - } else { - return x; - } - } let ui = x.to_bits(); let e = ((ui >> 52) & 0x7ff) as i32; diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 2e856100f..a443b7e4c 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -83,156 +83,139 @@ use core::f64; pub fn sqrt(x: f64) -> f64 { select_implementation! { name: sqrt, + use_arch: target_feature = "sse2", use_intrinsic: target_arch = "wasm32", args: x, } - #[cfg(all(target_feature = "sse2", not(feature = "force-soft-floats")))] - { - // Note: This path is unlikely since LLVM will usually have already - // optimized sqrt calls into hardware instructions if sse2 is available, - // but if someone does end up here they'll appreciate the speed increase. - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - unsafe { - let m = _mm_set_sd(x); - let m_sqrt = _mm_sqrt_pd(m); - _mm_cvtsd_f64(m_sqrt) - } - } - #[cfg(any(not(target_feature = "sse2"), feature = "force-soft-floats"))] - { - use core::num::Wrapping; + use core::num::Wrapping; - const TINY: f64 = 1.0e-300; + const TINY: f64 = 1.0e-300; - let mut z: f64; - let sign: Wrapping = Wrapping(0x80000000); - let mut ix0: i32; - let mut s0: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: Wrapping; - let mut t1: Wrapping; - let mut s1: Wrapping; - let mut ix1: Wrapping; - let mut q1: Wrapping; + let mut z: f64; + let sign: Wrapping = Wrapping(0x80000000); + let mut ix0: i32; + let mut s0: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: Wrapping; + let mut t1: Wrapping; + let mut s1: Wrapping; + let mut ix1: Wrapping; + let mut q1: Wrapping; - ix0 = (x.to_bits() >> 32) as i32; - ix1 = Wrapping(x.to_bits() as u32); + ix0 = (x.to_bits() >> 32) as i32; + ix1 = Wrapping(x.to_bits() as u32); - /* take care of Inf and NaN */ - if (ix0 & 0x7ff00000) == 0x7ff00000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + /* take care of Inf and NaN */ + if (ix0 & 0x7ff00000) == 0x7ff00000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + /* take care of zero */ + if ix0 <= 0 { + if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { + return x; /* sqrt(+-0) = +-0 */ } - /* take care of zero */ - if ix0 <= 0 { - if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix0 < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } + if ix0 < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ } - /* normalize x */ - m = ix0 >> 20; - if m == 0 { - /* subnormal x */ - while ix0 == 0 { - m -= 21; - ix0 |= (ix1 >> 11).0 as i32; - ix1 <<= 21; - } - i = 0; - while (ix0 & 0x00100000) == 0 { - i += 1; - ix0 <<= 1; - } - m -= i - 1; - ix0 |= (ix1 >> (32 - i) as usize).0 as i32; - ix1 = ix1 << i as usize; + } + /* normalize x */ + m = ix0 >> 20; + if m == 0 { + /* subnormal x */ + while ix0 == 0 { + m -= 21; + ix0 |= (ix1 >> 11).0 as i32; + ix1 <<= 21; } - m -= 1023; /* unbias exponent */ - ix0 = (ix0 & 0x000fffff) | 0x00100000; - if (m & 1) == 1 { - /* odd m, double x to make it even */ - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; + i = 0; + while (ix0 & 0x00100000) == 0 { + i += 1; + ix0 <<= 1; } - m >>= 1; /* m = [m/2] */ - - /* generate sqrt(x) bit by bit */ + m -= i - 1; + ix0 |= (ix1 >> (32 - i) as usize).0 as i32; + ix1 = ix1 << i as usize; + } + m -= 1023; /* unbias exponent */ + ix0 = (ix0 & 0x000fffff) | 0x00100000; + if (m & 1) == 1 { + /* odd m, double x to make it even */ ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; ix1 += ix1; - q = 0; /* [q,q1] = sqrt(x) */ - q1 = Wrapping(0); - s0 = 0; - s1 = Wrapping(0); - r = Wrapping(0x00200000); /* r = moving bit from right to left */ + } + m >>= 1; /* m = [m/2] */ - while r != Wrapping(0) { - t = s0 + r.0 as i32; - if t <= ix0 { - s0 = t + r.0 as i32; - ix0 -= t; - q += r.0 as i32; - } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; + /* generate sqrt(x) bit by bit */ + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + q = 0; /* [q,q1] = sqrt(x) */ + q1 = Wrapping(0); + s0 = 0; + s1 = Wrapping(0); + r = Wrapping(0x00200000); /* r = moving bit from right to left */ + + while r != Wrapping(0) { + t = s0 + r.0 as i32; + if t <= ix0 { + s0 = t + r.0 as i32; + ix0 -= t; + q += r.0 as i32; } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } - r = sign; - while r != Wrapping(0) { - t1 = s1 + r; - t = s0; - if t < ix0 || (t == ix0 && t1 <= ix1) { - s1 = t1 + r; - if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { - s0 += 1; - } - ix0 -= t; - if ix1 < t1 { - ix0 -= 1; - } - ix1 -= t1; - q1 += r; + r = sign; + while r != Wrapping(0) { + t1 = s1 + r; + t = s0; + if t < ix0 || (t == ix0 && t1 <= ix1) { + s1 = t1 + r; + if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { + s0 += 1; } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; + ix0 -= t; + if ix1 < t1 { + ix0 -= 1; + } + ix1 -= t1; + q1 += r; } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } - /* use floating add to find out rounding direction */ - if (ix0 as u32 | ix1.0) != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if q1.0 == 0xffffffff { - q1 = Wrapping(0); + /* use floating add to find out rounding direction */ + if (ix0 as u32 | ix1.0) != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if q1.0 == 0xffffffff { + q1 = Wrapping(0); + q += 1; + } else if z > 1.0 { + if q1.0 == 0xfffffffe { q += 1; - } else if z > 1.0 { - if q1.0 == 0xfffffffe { - q += 1; - } - q1 += Wrapping(2); - } else { - q1 += q1 & Wrapping(1); } + q1 += Wrapping(2); + } else { + q1 += q1 & Wrapping(1); } } - ix0 = (q >> 1) + 0x3fe00000; - ix1 = q1 >> 1; - if (q & 1) == 1 { - ix1 |= sign; - } - ix0 += m << 20; - f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) } + ix0 = (q >> 1) + 0x3fe00000; + ix1 = q1 >> 1; + if (q & 1) == 1 { + ix1 |= sign; + } + ix0 += m << 20; + f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) } #[cfg(test)] diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index b2996b350..d2f7ae703 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -18,109 +18,92 @@ pub fn sqrtf(x: f32) -> f32 { select_implementation! { name: sqrtf, + use_arch: target_feature = "sse2", use_intrinsic: target_arch = "wasm32", args: x, } - #[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))] - { - // Note: This path is unlikely since LLVM will usually have already - // optimized sqrt calls into hardware instructions if sse is available, - // but if someone does end up here they'll appreciate the speed increase. - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - unsafe { - let m = _mm_set_ss(x); - let m_sqrt = _mm_sqrt_ss(m); - _mm_cvtss_f32(m_sqrt) - } - } - #[cfg(any(not(target_feature = "sse"), feature = "force-soft-floats"))] - { - const TINY: f32 = 1.0e-30; - - let mut z: f32; - let sign: i32 = 0x80000000u32 as i32; - let mut ix: i32; - let mut s: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: u32; - - ix = x.to_bits() as i32; - - /* take care of Inf and NaN */ - if (ix as u32 & 0x7f800000) == 0x7f800000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } + const TINY: f32 = 1.0e-30; - /* take care of zero */ - if ix <= 0 { - if (ix & !sign) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } + let mut z: f32; + let sign: i32 = 0x80000000u32 as i32; + let mut ix: i32; + let mut s: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: u32; - /* normalize x */ - m = ix >> 23; - if m == 0 { - /* subnormal x */ - i = 0; - while ix & 0x00800000 == 0 { - ix <<= 1; - i = i + 1; - } - m -= i - 1; + ix = x.to_bits() as i32; + + /* take care of Inf and NaN */ + if (ix as u32 & 0x7f800000) == 0x7f800000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + + /* take care of zero */ + if ix <= 0 { + if (ix & !sign) == 0 { + return x; /* sqrt(+-0) = +-0 */ } - m -= 127; /* unbias exponent */ - ix = (ix & 0x007fffff) | 0x00800000; - if m & 1 == 1 { - /* odd m, double x to make it even */ - ix += ix; + if ix < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ } - m >>= 1; /* m = [m/2] */ + } - /* generate sqrt(x) bit by bit */ + /* normalize x */ + m = ix >> 23; + if m == 0 { + /* subnormal x */ + i = 0; + while ix & 0x00800000 == 0 { + ix <<= 1; + i = i + 1; + } + m -= i - 1; + } + m -= 127; /* unbias exponent */ + ix = (ix & 0x007fffff) | 0x00800000; + if m & 1 == 1 { + /* odd m, double x to make it even */ ix += ix; - q = 0; - s = 0; - r = 0x01000000; /* r = moving bit from right to left */ - - while r != 0 { - t = s + r as i32; - if t <= ix { - s = t + r as i32; - ix -= t; - q += r as i32; - } - ix += ix; - r >>= 1; + } + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix += ix; + q = 0; + s = 0; + r = 0x01000000; /* r = moving bit from right to left */ + + while r != 0 { + t = s + r as i32; + if t <= ix { + s = t + r as i32; + ix -= t; + q += r as i32; } + ix += ix; + r >>= 1; + } - /* use floating add to find out rounding direction */ - if ix != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if z > 1.0 { - q += 2; - } else { - q += q & 1; - } + /* use floating add to find out rounding direction */ + if ix != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if z > 1.0 { + q += 2; + } else { + q += q & 1; } } - - ix = (q >> 1) + 0x3f000000; - ix += m << 23; - f32::from_bits(ix as u32) } + + ix = (q >> 1) + 0x3f000000; + ix += m << 23; + f32::from_bits(ix as u32) } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 From dcce1b541f779d8d5c27cac840196c7545c0e0dd Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 00:42:02 -0500 Subject: [PATCH 0929/1459] Add float and integer traits from compiler-builtins In preparation of adding generic algorithms to `libm`, add the traits from `compiler-builtins`. Eventually we should be able to unify the two crates so we don't have duplicate implementations. --- libm/src/math/mod.rs | 2 + libm/src/math/support/float_traits.rs | 168 +++++++++++++ libm/src/math/support/int_traits.rs | 343 ++++++++++++++++++++++++++ libm/src/math/support/mod.rs | 5 + 4 files changed, 518 insertions(+) create mode 100644 libm/src/math/support/float_traits.rs create mode 100644 libm/src/math/support/int_traits.rs diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 393bc5150..2cd77f132 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -105,6 +105,8 @@ use self::k_tanf::k_tanf; use self::rem_pio2::rem_pio2; use self::rem_pio2_large::rem_pio2_large; use self::rem_pio2f::rem_pio2f; +#[allow(unused_imports)] +use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt}; // Public modules mod acos; diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs new file mode 100644 index 000000000..4cf5d7c61 --- /dev/null +++ b/libm/src/math/support/float_traits.rs @@ -0,0 +1,168 @@ +use core::ops; + +use super::int_traits::{Int, MinInt}; + +/// Trait for some basic operations on floats +#[allow(dead_code)] +pub trait Float: + Copy + + core::fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::MulAssign + + ops::Add + + ops::Sub + + ops::Div + + ops::Rem +{ + /// A uint of the same width as the float + type Int: Int; + + /// A int of the same width as the float + type SignedInt: Int + MinInt; + + /// An int capable of containing the exponent bits plus a sign bit. This is signed. + type ExpInt: Int; + + const ZERO: Self; + const ONE: Self; + + /// The bitwidth of the float type + const BITS: u32; + + /// The bitwidth of the significand + const SIGNIFICAND_BITS: u32; + + /// The bitwidth of the exponent + const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1; + + /// The saturated value of the exponent (infinite representation), in the rightmost postiion. + const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1; + + /// The exponent bias value + const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1; + + /// A mask for the sign bit + const SIGN_MASK: Self::Int; + + /// A mask for the significand + const SIGNIFICAND_MASK: Self::Int; + + /// The implicit bit of the float format + const IMPLICIT_BIT: Self::Int; + + /// A mask for the exponent + const EXPONENT_MASK: Self::Int; + + /// Returns `self` transmuted to `Self::Int` + fn to_bits(self) -> Self::Int; + + /// Returns `self` transmuted to `Self::SignedInt` + fn to_bits_signed(self) -> Self::SignedInt; + + /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be + /// represented in multiple different ways. This method returns `true` if two NaNs are + /// compared. + fn eq_repr(self, rhs: Self) -> bool; + + /// Returns true if the sign is negative + fn is_sign_negative(self) -> bool; + + /// Returns the exponent, not adjusting for bias. + fn exp(self) -> Self::ExpInt; + + /// Returns the significand with no implicit bit (or the "fractional" part) + fn frac(self) -> Self::Int; + + /// Returns the significand with implicit bit + fn imp_frac(self) -> Self::Int; + + /// Returns a `Self::Int` transmuted back to `Self` + fn from_bits(a: Self::Int) -> Self; + + /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. + fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self; + + fn abs(self) -> Self { + let abs_mask = !Self::SIGN_MASK; + Self::from_bits(self.to_bits() & abs_mask) + } + + /// Returns (normalized exponent, normalized significand) + fn normalize(significand: Self::Int) -> (i32, Self::Int); + + /// Returns if `self` is subnormal + fn is_subnormal(self) -> bool; +} + +macro_rules! float_impl { + ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { + impl Float for $ty { + type Int = $ity; + type SignedInt = $sity; + type ExpInt = $expty; + + const ZERO: Self = 0.0; + const ONE: Self = 1.0; + + const BITS: u32 = $bits; + const SIGNIFICAND_BITS: u32 = $significand_bits; + + const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); + const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1; + const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS; + const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK); + + fn to_bits(self) -> Self::Int { + self.to_bits() + } + fn to_bits_signed(self) -> Self::SignedInt { + self.to_bits() as Self::SignedInt + } + fn eq_repr(self, rhs: Self) -> bool { + fn is_nan(x: $ty) -> bool { + // When using mangled-names, the "real" compiler-builtins might not have the + // necessary builtin (__unordtf2) to test whether `f128` is NaN. + // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin + // x is NaN if all the bits of the exponent are set and the significand is non-0 + x.to_bits() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK + && x.to_bits() & $ty::SIGNIFICAND_MASK != 0 + } + if is_nan(self) && is_nan(rhs) { true } else { self.to_bits() == rhs.to_bits() } + } + fn is_sign_negative(self) -> bool { + self.is_sign_negative() + } + fn exp(self) -> Self::ExpInt { + ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt + } + fn frac(self) -> Self::Int { + self.to_bits() & Self::SIGNIFICAND_MASK + } + fn imp_frac(self) -> Self::Int { + self.frac() | Self::IMPLICIT_BIT + } + fn from_bits(a: Self::Int) -> Self { + Self::from_bits(a) + } + fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self { + Self::from_bits( + ((negative as Self::Int) << (Self::BITS - 1)) + | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK) + | (significand & Self::SIGNIFICAND_MASK), + ) + } + fn normalize(significand: Self::Int) -> (i32, Self::Int) { + let shift = significand.leading_zeros().wrapping_sub(Self::EXPONENT_BITS); + (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int) + } + fn is_subnormal(self) -> bool { + (self.to_bits() & Self::EXPONENT_MASK) == Self::Int::ZERO + } + } + }; +} + +float_impl!(f32, u32, i32, i16, 32, 23); +float_impl!(f64, u64, i64, i16, 64, 52); diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs new file mode 100644 index 000000000..bdf3afd48 --- /dev/null +++ b/libm/src/math/support/int_traits.rs @@ -0,0 +1,343 @@ +use core::{fmt, ops}; + +/// Minimal integer implementations needed on all integer types, including wide integers. +#[allow(dead_code)] +pub trait MinInt: + Copy + + fmt::Debug + + ops::BitOr + + ops::Not + + ops::Shl +{ + /// Type with the same width but other signedness + type OtherSign: MinInt; + /// Unsigned version of Self + type UnsignedInt: MinInt; + + /// If `Self` is a signed integer + const SIGNED: bool; + + /// The bitwidth of the int type + const BITS: u32; + + const ZERO: Self; + const ONE: Self; + const MIN: Self; + const MAX: Self; +} + +/// Trait for some basic operations on integers +#[allow(dead_code)] +pub trait Int: + MinInt + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::SubAssign + + ops::BitAndAssign + + ops::BitOrAssign + + ops::BitXorAssign + + ops::ShlAssign + + ops::ShrAssign + + ops::Add + + ops::Sub + + ops::Mul + + ops::Div + + ops::Shr + + ops::BitXor + + ops::BitAnd +{ + fn unsigned(self) -> Self::UnsignedInt; + fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; + + fn from_bool(b: bool) -> Self; + + /// Prevents the need for excessive conversions between signed and unsigned + fn logical_shr(self, other: u32) -> Self; + + /// Absolute difference between two integers. + fn abs_diff(self, other: Self) -> Self::UnsignedInt; + + // copied from primitive integers, but put in a trait + fn is_zero(self) -> bool; + fn wrapping_neg(self) -> Self; + fn wrapping_add(self, other: Self) -> Self; + fn wrapping_mul(self, other: Self) -> Self; + fn wrapping_sub(self, other: Self) -> Self; + fn wrapping_shl(self, other: u32) -> Self; + fn wrapping_shr(self, other: u32) -> Self; + fn rotate_left(self, other: u32) -> Self; + fn overflowing_add(self, other: Self) -> (Self, bool); + fn leading_zeros(self) -> u32; + fn ilog2(self) -> u32; +} + +macro_rules! int_impl_common { + ($ty:ty) => { + fn from_bool(b: bool) -> Self { + b as $ty + } + + fn logical_shr(self, other: u32) -> Self { + Self::from_unsigned(self.unsigned().wrapping_shr(other)) + } + + fn is_zero(self) -> bool { + self == Self::ZERO + } + + fn wrapping_neg(self) -> Self { + ::wrapping_neg(self) + } + + fn wrapping_add(self, other: Self) -> Self { + ::wrapping_add(self, other) + } + + fn wrapping_mul(self, other: Self) -> Self { + ::wrapping_mul(self, other) + } + + fn wrapping_sub(self, other: Self) -> Self { + ::wrapping_sub(self, other) + } + + fn wrapping_shl(self, other: u32) -> Self { + ::wrapping_shl(self, other) + } + + fn wrapping_shr(self, other: u32) -> Self { + ::wrapping_shr(self, other) + } + + fn rotate_left(self, other: u32) -> Self { + ::rotate_left(self, other) + } + + fn overflowing_add(self, other: Self) -> (Self, bool) { + ::overflowing_add(self, other) + } + + fn leading_zeros(self) -> u32 { + ::leading_zeros(self) + } + + fn ilog2(self) -> u32 { + ::ilog2(self) + } + }; +} + +macro_rules! int_impl { + ($ity:ty, $uty:ty) => { + impl MinInt for $uty { + type OtherSign = $ity; + type UnsignedInt = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $uty { + fn unsigned(self) -> $uty { + self + } + + // It makes writing macros easier if this is implemented for both signed and unsigned + #[allow(clippy::wrong_self_convention)] + fn from_unsigned(me: $uty) -> Self { + me + } + + fn abs_diff(self, other: Self) -> Self { + if self < other { other.wrapping_sub(self) } else { self.wrapping_sub(other) } + } + + int_impl_common!($uty); + } + + impl MinInt for $ity { + type OtherSign = $uty; + type UnsignedInt = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $ity { + fn unsigned(self) -> $uty { + self as $uty + } + + fn from_unsigned(me: $uty) -> Self { + me as $ity + } + + fn abs_diff(self, other: Self) -> $uty { + self.wrapping_sub(other).wrapping_abs() as $uty + } + + int_impl_common!($ity); + } + }; +} + +int_impl!(isize, usize); +int_impl!(i8, u8); +int_impl!(i16, u16); +int_impl!(i32, u32); +int_impl!(i64, u64); +int_impl!(i128, u128); + +/// Trait for integers twice the bit width of another integer. This is implemented for all +/// primitives except for `u8`, because there is not a smaller primitive. +#[allow(unused)] +pub trait DInt: MinInt { + /// Integer that is half the bit width of the integer this trait is implemented for + type H: HInt; + + /// Returns the low half of `self` + fn lo(self) -> Self::H; + /// Returns the high half of `self` + fn hi(self) -> Self::H; + /// Returns the low and high halves of `self` as a tuple + fn lo_hi(self) -> (Self::H, Self::H) { + (self.lo(), self.hi()) + } + /// Constructs an integer using lower and higher half parts + fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { + lo.zero_widen() | hi.widen_hi() + } +} + +/// Trait for integers half the bit width of another integer. This is implemented for all +/// primitives except for `u128`, because it there is not a larger primitive. +#[allow(unused)] +pub trait HInt: Int { + /// Integer that is double the bit width of the integer this trait is implemented for + type D: DInt + MinInt; + + // NB: some of the below methods could have default implementations (e.g. `widen_hi`), but for + // unknown reasons this can cause infinite recursion when optimizations are disabled. See + // for context. + + /// Widens (using default extension) the integer to have double bit width + fn widen(self) -> Self::D; + /// Widens (zero extension only) the integer to have double bit width. This is needed to get + /// around problems with associated type bounds (such as `Int`) being unstable + fn zero_widen(self) -> Self::D; + /// Widens the integer to have double bit width and shifts the integer into the higher bits + fn widen_hi(self) -> Self::D; + /// Widening multiplication with zero widening. This cannot overflow. + fn zero_widen_mul(self, rhs: Self) -> Self::D; + /// Widening multiplication. This cannot overflow. + fn widen_mul(self, rhs: Self) -> Self::D; +} + +macro_rules! impl_d_int { + ($($X:ident $D:ident),*) => { + $( + impl DInt for $D { + type H = $X; + + fn lo(self) -> Self::H { + self as $X + } + fn hi(self) -> Self::H { + (self >> <$X as MinInt>::BITS) as $X + } + } + )* + }; +} + +macro_rules! impl_h_int { + ($($H:ident $uH:ident $X:ident),*) => { + $( + impl HInt for $H { + type D = $X; + + fn widen(self) -> Self::D { + self as $X + } + fn zero_widen(self) -> Self::D { + (self as $uH) as $X + } + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen().wrapping_mul(rhs.zero_widen()) + } + fn widen_mul(self, rhs: Self) -> Self::D { + self.widen().wrapping_mul(rhs.widen()) + } + fn widen_hi(self) -> Self::D { + (self as $X) << ::BITS + } + } + )* + }; +} + +impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); +impl_h_int!( + u8 u8 u16, + u16 u16 u32, + u32 u32 u64, + u64 u64 u128, + i8 u8 i16, + i16 u16 i32, + i32 u32 i64, + i64 u64 i128 +); + +/// Trait to express (possibly lossy) casting of integers +#[allow(unused)] +pub trait CastInto: Copy { + fn cast(self) -> T; +} + +#[allow(unused)] +pub trait CastFrom: Copy { + fn cast_from(value: T) -> Self; +} + +impl + Copy> CastFrom for T { + fn cast_from(value: U) -> Self { + value.cast() + } +} + +macro_rules! cast_into { + ($ty:ty) => { + cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); + }; + ($ty:ty; $($into:ty),*) => {$( + impl CastInto<$into> for $ty { + fn cast(self) -> $into { + self as $into + } + } + )*}; +} + +cast_into!(usize); +cast_into!(isize); +cast_into!(u8); +cast_into!(i8); +cast_into!(u16); +cast_into!(i16); +cast_into!(u32); +cast_into!(i32); +cast_into!(u64); +cast_into!(i64); +cast_into!(u128); +cast_into!(i128); diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index 10532f0d1..f054df6cd 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -1,2 +1,7 @@ #[macro_use] pub mod macros; +mod float_traits; +mod int_traits; + +pub use float_traits::Float; +pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; From 0bf4f0f4f351233106c76b9e871acbc7bbdd54e0 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 29 May 2024 02:56:59 -0500 Subject: [PATCH 0930/1459] Add an apfloat fallback for int to float tests --- testcrate/tests/conv.rs | 129 ++++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 52 deletions(-) diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 24f3a04a4..609680387 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -12,60 +12,82 @@ mod int_to_float { use super::*; macro_rules! i_to_f { - ($($from:ty, $into:ty, $fn:ident);*;) => { + ($f_ty:ty, $apfloat_ty:ident, $sys_available:meta, $($i_ty:ty, $fn:ident);*;) => { $( #[test] fn $fn() { use compiler_builtins::float::conv::$fn; use compiler_builtins::int::Int; - fuzz(N, |x: $from| { - let f0 = x as $into; - let f1: $into = $fn(x); - // This makes sure that the conversion produced the best rounding possible, and does - // this independent of `x as $into` rounding correctly. - // This assumes that float to integer conversion is correct. - let y_minus_ulp = <$into>::from_bits(f1.to_bits().wrapping_sub(1)) as $from; - let y = f1 as $from; - let y_plus_ulp = <$into>::from_bits(f1.to_bits().wrapping_add(1)) as $from; - let error_minus = <$from as Int>::abs_diff(y_minus_ulp, x); - let error = <$from as Int>::abs_diff(y, x); - let error_plus = <$from as Int>::abs_diff(y_plus_ulp, x); - // The first two conditions check that none of the two closest float values are - // strictly closer in representation to `x`. The second makes sure that rounding is - // towards even significand if two float values are equally close to the integer. - if error_minus < error - || error_plus < error - || ((error_minus == error || error_plus == error) - && ((f0.to_bits() & 1) != 0)) - { - if !cfg!(any( - target_arch = "powerpc", - target_arch = "powerpc64" - )) { - panic!( - "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})", - stringify!($fn), - x, - f1.to_bits(), - y_minus_ulp, - y, - y_plus_ulp, - error_minus, - error, - error_plus, - ); + fuzz(N, |x: $i_ty| { + let f0 = apfloat_fallback!( + $f_ty, $apfloat_ty, $sys_available, + |x| x as $f_ty; + // When the builtin is not available, we need to use a different conversion + // method (since apfloat doesn't support `as` casting). + |x: $i_ty| { + use compiler_builtins::int::MinInt; + + let apf = if <$i_ty>::SIGNED { + FloatTy::from_i128(x.try_into().unwrap()).value + } else { + FloatTy::from_u128(x.try_into().unwrap()).value + }; + + <$f_ty>::from_bits(apf.to_bits()) + }, + x + ); + let f1: $f_ty = $fn(x); + + #[cfg($sys_available)] { + // This makes sure that the conversion produced the best rounding possible, and does + // this independent of `x as $into` rounding correctly. + // This assumes that float to integer conversion is correct. + let y_minus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_sub(1)) as $i_ty; + let y = f1 as $i_ty; + let y_plus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_add(1)) as $i_ty; + let error_minus = <$i_ty as Int>::abs_diff(y_minus_ulp, x); + let error = <$i_ty as Int>::abs_diff(y, x); + let error_plus = <$i_ty as Int>::abs_diff(y_plus_ulp, x); + + // The first two conditions check that none of the two closest float values are + // strictly closer in representation to `x`. The second makes sure that rounding is + // towards even significand if two float values are equally close to the integer. + if error_minus < error + || error_plus < error + || ((error_minus == error || error_plus == error) + && ((f0.to_bits() & 1) != 0)) + { + if !cfg!(any( + target_arch = "powerpc", + target_arch = "powerpc64" + )) { + panic!( + "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})", + stringify!($fn), + x, + f1.to_bits(), + y_minus_ulp, + y, + y_plus_ulp, + error_minus, + error, + error_plus, + ); + } } } + // Test against native conversion. We disable testing on all `x86` because of // rounding bugs with `i686`. `powerpc` also has the same rounding bug. - if f0 != f1 && !cfg!(any( + if !Float::eq_repr(f0, f1) && !cfg!(any( target_arch = "x86", target_arch = "powerpc", target_arch = "powerpc64" )) { panic!( - "{}({}): std: {}, builtins: {}", + "{}({}): std: {:?}, builtins: {:?}", stringify!($fn), x, f0, @@ -78,19 +100,22 @@ mod int_to_float { }; } - i_to_f! { - u32, f32, __floatunsisf; - u32, f64, __floatunsidf; - i32, f32, __floatsisf; - i32, f64, __floatsidf; - u64, f32, __floatundisf; - u64, f64, __floatundidf; - i64, f32, __floatdisf; - i64, f64, __floatdidf; - u128, f32, __floatuntisf; - u128, f64, __floatuntidf; - i128, f32, __floattisf; - i128, f64, __floattidf; + i_to_f! { f32, Single, all(), + u32, __floatunsisf; + i32, __floatsisf; + u64, __floatundisf; + i64, __floatdisf; + u128, __floatuntisf; + i128, __floattisf; + } + + i_to_f! { f64, Double, all(), + u32, __floatunsidf; + i32, __floatsidf; + u64, __floatundidf; + i64, __floatdidf; + u128, __floatuntidf; + i128, __floattidf; } } From 84021e790b884e5eecc6f1b470b3e116ef33ff5c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 13 Jun 2024 06:50:44 -0500 Subject: [PATCH 0931/1459] Refactor integer to float conversion Extract some common routines to separate functions in order to deduplicate code and remove some of the magic. --- src/float/conv.rs | 174 +++++++++++++++++++++++++++++----------- src/int/mod.rs | 10 ++- testcrate/tests/conv.rs | 2 +- 3 files changed, 138 insertions(+), 48 deletions(-) diff --git a/src/float/conv.rs b/src/float/conv.rs index e86fee6dc..da87b3cae 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -6,21 +6,91 @@ use super::Float; /// Conversions from integers to floats. /// -/// These are hand-optimized bit twiddling code, -/// which unfortunately isn't the easiest kind of code to read. +/// The algorithm is explained here: . It roughly does the following: +/// - Calculate a base mantissa by shifting the integer into mantissa position. This gives us a +/// mantissa _with the implicit bit set_! +/// - Figure out if rounding needs to occur by classifying the bits that are to be truncated. Some +/// patterns are used to simplify this. Adjust the mantissa with the result if needed. +/// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros). Subtract one. +/// - Shift the exponent and add the mantissa to create the final representation. Subtracting one +/// from the exponent (above) accounts for the explicit bit being set in the mantissa. /// -/// The algorithm is explained here: +/// # Terminology +/// +/// - `i`: the original integer +/// - `i_m`: the integer, shifted fully left (no leading zeros) +/// - `n`: number of leading zeroes +/// - `e`: the resulting exponent. Usually 1 is subtracted to offset the mantissa implicit bit. +/// - `m_base`: the mantissa before adjusting for truncated bits. Implicit bit is usually set. +/// - `adj`: the bits that will be truncated, possibly compressed in some way. +/// - `m`: the resulting mantissa. Implicit bit is usually set. mod int_to_float { + use super::*; + + /// Calculate the exponent from the number of leading zeros. + /// + /// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit + /// bit set can be added back later. + fn exp>>(n: u32) -> F::Int { + F::Int::cast_from(F::EXPONENT_BIAS - 1 + I::BITS - n) + } + + /// Adjust a mantissa with dropped bits to perform correct rounding. + /// + /// The dropped bits should be exactly the bits that get truncated (left-aligned), but they + /// can be combined or compressed in some way that simplifies operations. + fn m_adj(m_base: F::Int, dropped_bits: F::Int) -> F::Int { + // Branchlessly extract a `1` if rounding up should happen, 0 otherwise + // This accounts for rounding to even. + let adj = (dropped_bits - (dropped_bits >> (F::BITS - 1) & !m_base)) >> (F::BITS - 1); + + // Add one when we need to round up. Break ties to even. + m_base + adj + } + + /// Shift the exponent to its position and add the mantissa. + /// + /// If the mantissa has the implicit bit set, the exponent should be one less than its actual + /// value to cancel it out. + fn repr(e: F::Int, m: F::Int) -> F::Int { + // + rather than | so the mantissa can overflow into the exponent + (e << F::SIGNIFICAND_BITS) + m + } + + /// Shift distance from a left-aligned integer to a smaller float. + fn shift_f_lt_i() -> u32 { + (I::BITS - F::BITS) + F::EXPONENT_BITS + } + + /// Shift distance from an integer with `n` leading zeros to a smaller float. + fn shift_f_gt_i(n: u32) -> u32 { + F::SIGNIFICAND_BITS - I::BITS + 1 + n + } + + /// Perform a signed operation as unsigned, then add the sign back. + pub fn signed(i: I, conv: Conv) -> F + where + F: Float, + I: Int, + F::Int: CastFrom, + Conv: Fn(I::UnsignedInt) -> F::Int, + { + let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1); + F::from_bits(conv(i.unsigned_abs()) | sign_bit) + } + pub fn u32_to_f32_bits(i: u32) -> u32 { if i == 0 { return 0; } let n = i.leading_zeros(); - let a = (i << n) >> 8; // Significant bits, with bit 24 still in tact. - let b = (i << n) << 24; // Insignificant bits, only relevant for rounding. - let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. - let e = 157 - n; // Exponent plus 127, minus one. - (e << 23) + m // + not |, so the mantissa can overflow into the exponent. + // Mantissa with implicit bit set (significant bits) + let m_base = (i << n) >> f32::EXPONENT_BITS; + // Bits that will be dropped (insignificant bits) + let adj = (i << n) << (f32::SIGNIFICAND_BITS + 1); + let m = m_adj::(m_base, adj); + let e = exp::(n) - 1; + repr::(e, m) } pub fn u32_to_f64_bits(i: u32) -> u64 { @@ -28,19 +98,23 @@ mod int_to_float { return 0; } let n = i.leading_zeros(); - let m = (i as u64) << (21 + n); // Significant bits, with bit 53 still in tact. - let e = 1053 - n as u64; // Exponent plus 1023, minus one. - (e << 52) + m // Bit 53 of m will overflow into e. + // Mantissa with implicit bit set + let m = (i as u64) << shift_f_gt_i::(n); + let e = exp::(n) - 1; + repr::(e, m) } pub fn u64_to_f32_bits(i: u64) -> u32 { let n = i.leading_zeros(); - let y = i.wrapping_shl(n); - let a = (y >> 40) as u32; // Significant bits, with bit 24 still in tact. - let b = (y >> 8 | y & 0xFFFF) as u32; // Insignificant bits, only relevant for rounding. - let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. - let e = if i == 0 { 0 } else { 189 - n }; // Exponent plus 127, minus one, except for zero. - (e << 23) + m // + not |, so the mantissa can overflow into the exponent. + let i_m = i.wrapping_shl(n); + // Mantissa with implicit bit set + let m_base: u32 = (i_m >> shift_f_lt_i::()) as u32; + // The entire lower half of `i` will be truncated (masked portion), plus the + // next `EXPONENT_BITS` bits. + let adj = (i_m >> f32::EXPONENT_BITS | i_m & 0xFFFF) as u32; + let m = m_adj::(m_base, adj); + let e = if i == 0 { 0 } else { exp::(n) - 1 }; + repr::(e, m) } pub fn u64_to_f64_bits(i: u64) -> u64 { @@ -48,31 +122,45 @@ mod int_to_float { return 0; } let n = i.leading_zeros(); - let a = (i << n) >> 11; // Significant bits, with bit 53 still in tact. - let b = (i << n) << 53; // Insignificant bits, only relevant for rounding. - let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even. - let e = 1085 - n as u64; // Exponent plus 1023, minus one. - (e << 52) + m // + not |, so the mantissa can overflow into the exponent. + // Mantissa with implicit bit set + let m_base = (i << n) >> f64::EXPONENT_BITS; + let adj = (i << n) << (f64::SIGNIFICAND_BITS + 1); + let m = m_adj::(m_base, adj); + let e = exp::(n) - 1; + repr::(e, m) } pub fn u128_to_f32_bits(i: u128) -> u32 { let n = i.leading_zeros(); - let y = i.wrapping_shl(n); - let a = (y >> 104) as u32; // Significant bits, with bit 24 still in tact. - let b = (y >> 72) as u32 | ((y << 32) >> 32 != 0) as u32; // Insignificant bits, only relevant for rounding. - let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. - let e = if i == 0 { 0 } else { 253 - n }; // Exponent plus 127, minus one, except for zero. - (e << 23) + m // + not |, so the mantissa can overflow into the exponent. + let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero + let m_base: u32 = (i_m >> shift_f_lt_i::()) as u32; + + // Within the upper `F::BITS`, everything except for the signifcand + // gets truncated + let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIGNIFICAND_BITS - 1)).cast(); + + // The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just + // check if it is nonzero. + let d2: u32 = (i_m << f32::BITS >> f32::BITS != 0).into(); + let adj = d1 | d2; + + // Mantissa with implicit bit set + let m = m_adj::(m_base, adj); + let e = if i == 0 { 0 } else { exp::(n) - 1 }; + repr::(e, m) } pub fn u128_to_f64_bits(i: u128) -> u64 { let n = i.leading_zeros(); - let y = i.wrapping_shl(n); - let a = (y >> 75) as u64; // Significant bits, with bit 53 still in tact. - let b = (y >> 11 | y & 0xFFFF_FFFF) as u64; // Insignificant bits, only relevant for rounding. - let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even. - let e = if i == 0 { 0 } else { 1149 - n as u64 }; // Exponent plus 1023, minus one, except for zero. - (e << 52) + m // + not |, so the mantissa can overflow into the exponent. + let i_m = i.wrapping_shl(n); + // Mantissa with implicit bit set + let m_base: u64 = (i_m >> shift_f_lt_i::()) as u64; + // The entire lower half of `i` will be truncated (masked portion), plus the + // next `EXPONENT_BITS` bits. + let adj = (i_m >> f64::EXPONENT_BITS | i_m & 0xFFFF_FFFF) as u64; + let m = m_adj::(m_base, adj); + let e = if i == 0 { 0 } else { exp::(n) - 1 }; + repr::(e, m) } } @@ -113,38 +201,32 @@ intrinsics! { intrinsics! { #[arm_aeabi_alias = __aeabi_i2f] pub extern "C" fn __floatsisf(i: i32) -> f32 { - let sign_bit = ((i >> 31) as u32) << 31; - f32::from_bits(int_to_float::u32_to_f32_bits(i.unsigned_abs()) | sign_bit) + int_to_float::signed(i, int_to_float::u32_to_f32_bits) } #[arm_aeabi_alias = __aeabi_i2d] pub extern "C" fn __floatsidf(i: i32) -> f64 { - let sign_bit = ((i >> 31) as u64) << 63; - f64::from_bits(int_to_float::u32_to_f64_bits(i.unsigned_abs()) | sign_bit) + int_to_float::signed(i, int_to_float::u32_to_f64_bits) } #[arm_aeabi_alias = __aeabi_l2f] pub extern "C" fn __floatdisf(i: i64) -> f32 { - let sign_bit = ((i >> 63) as u32) << 31; - f32::from_bits(int_to_float::u64_to_f32_bits(i.unsigned_abs()) | sign_bit) + int_to_float::signed(i, int_to_float::u64_to_f32_bits) } #[arm_aeabi_alias = __aeabi_l2d] pub extern "C" fn __floatdidf(i: i64) -> f64 { - let sign_bit = ((i >> 63) as u64) << 63; - f64::from_bits(int_to_float::u64_to_f64_bits(i.unsigned_abs()) | sign_bit) + int_to_float::signed(i, int_to_float::u64_to_f64_bits) } #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] pub extern "C" fn __floattisf(i: i128) -> f32 { - let sign_bit = ((i >> 127) as u32) << 31; - f32::from_bits(int_to_float::u128_to_f32_bits(i.unsigned_abs()) | sign_bit) + int_to_float::signed(i, int_to_float::u128_to_f32_bits) } #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] pub extern "C" fn __floattidf(i: i128) -> f64 { - let sign_bit = ((i >> 127) as u64) << 63; - f64::from_bits(int_to_float::u128_to_f64_bits(i.unsigned_abs()) | sign_bit) + int_to_float::signed(i, int_to_float::u128_to_f64_bits) } } diff --git a/src/int/mod.rs b/src/int/mod.rs index e6f31c530..0d3b0ce40 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -83,6 +83,7 @@ pub(crate) trait Int: MinInt fn unsigned(self) -> Self::UnsignedInt; fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; + fn unsigned_abs(self) -> Self::UnsignedInt; fn from_bool(b: bool) -> Self; @@ -178,7 +179,6 @@ macro_rules! int_impl_common { fn wrapping_mul(self, other: Self) -> Self { ::wrapping_mul(self, other) } - fn wrapping_sub(self, other: Self) -> Self { ::wrapping_sub(self, other) } @@ -235,6 +235,10 @@ macro_rules! int_impl { me } + fn unsigned_abs(self) -> Self { + self + } + fn abs_diff(self, other: Self) -> Self { if self < other { other.wrapping_sub(self) @@ -268,6 +272,10 @@ macro_rules! int_impl { me as $ity } + fn unsigned_abs(self) -> Self::UnsignedInt { + self.unsigned_abs() + } + fn abs_diff(self, other: Self) -> $uty { self.wrapping_sub(other).wrapping_abs() as $uty } diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 609680387..01cc588cf 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -8,7 +8,7 @@ use compiler_builtins::float::Float; use rustc_apfloat::{Float as _, FloatConvert as _}; use testcrate::*; -mod int_to_float { +mod i_to_f { use super::*; macro_rules! i_to_f { From b639224c49a6cfd2bc092732e6d173870a5a9f30 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 13 Jun 2024 06:52:46 -0500 Subject: [PATCH 0932/1459] Add integer to `f128` conversions --- README.md | 12 +- build.rs | 18 +-- examples/intrinsics.rs | 62 ++++++++- src/float/conv.rs | 80 ++++++++++++ testcrate/benches/float_conv.rs | 222 ++++++++++++++++++++++++++------ testcrate/tests/conv.rs | 22 ++++ 6 files changed, 348 insertions(+), 68 deletions(-) diff --git a/README.md b/README.md index f792d1883..a2b38cce0 100644 --- a/README.md +++ b/README.md @@ -233,12 +233,12 @@ of being added to Rust. - [x] fixunstfdi.c - [x] fixunstfsi.c - [x] fixunstfti.c -- [ ] floatditf.c -- [ ] floatsitf.c -- [ ] floattitf.c -- [ ] floatunditf.c -- [ ] floatunsitf.c -- [ ] floatuntitf.c +- [x] floatditf.c +- [x] floatsitf.c +- [x] floattitf.c +- [x] floatunditf.c +- [x] floatunsitf.c +- [x] floatuntitf.c - [x] multf3.c - [x] powitf2.c - [x] subtf3.c diff --git a/build.rs b/build.rs index 2863c979f..22ec9e4d2 100644 --- a/build.rs +++ b/build.rs @@ -532,10 +532,6 @@ mod c { if (target.arch == "aarch64" || target.arch == "arm64ec") && consider_float_intrinsics { sources.extend(&[ ("__comparetf2", "comparetf2.c"), - ("__floatditf", "floatditf.c"), - ("__floatsitf", "floatsitf.c"), - ("__floatunditf", "floatunditf.c"), - ("__floatunsitf", "floatunsitf.c"), ("__fe_getround", "fp_mode.c"), ("__fe_raise_inexact", "fp_mode.c"), ]); @@ -550,21 +546,11 @@ mod c { } if target.arch == "mips64" { - sources.extend(&[ - ("__netf2", "comparetf2.c"), - ("__floatsitf", "floatsitf.c"), - ("__floatunsitf", "floatunsitf.c"), - ("__fe_getround", "fp_mode.c"), - ]); + sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]); } if target.arch == "loongarch64" { - sources.extend(&[ - ("__netf2", "comparetf2.c"), - ("__floatsitf", "floatsitf.c"), - ("__floatunsitf", "floatunsitf.c"), - ("__fe_getround", "fp_mode.c"), - ]); + sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]); } // Remove the assembly implementations that won't compile for the target diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 06d772330..368da6af2 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -264,14 +264,18 @@ mod intrinsics { /* i32 operations */ + // floatsisf + pub fn aeabi_i2f(x: i32) -> f32 { + x as f32 + } + // floatsidf pub fn aeabi_i2d(x: i32) -> f64 { x as f64 } - // floatsisf - pub fn aeabi_i2f(x: i32) -> f32 { - x as f32 + pub fn floatsitf(x: i32) -> f128 { + x as f128 } pub fn aeabi_idiv(a: i32, b: i32) -> i32 { @@ -294,6 +298,10 @@ mod intrinsics { x as f64 } + pub fn floatditf(x: i64) -> f128 { + x as f128 + } + pub fn mulodi4(a: i64, b: i64) -> i64 { a * b } @@ -314,6 +322,18 @@ mod intrinsics { /* i128 operations */ + pub fn floattisf(x: i128) -> f32 { + x as f32 + } + + pub fn floattidf(x: i128) -> f64 { + x as f64 + } + + pub fn floattitf(x: i128) -> f128 { + x as f128 + } + pub fn lshrti3(a: i128, b: usize) -> i128 { a >> b } @@ -328,14 +348,18 @@ mod intrinsics { /* u32 operations */ + // floatunsisf + pub fn aeabi_ui2f(x: u32) -> f32 { + x as f32 + } + // floatunsidf pub fn aeabi_ui2d(x: u32) -> f64 { x as f64 } - // floatunsisf - pub fn aeabi_ui2f(x: u32) -> f32 { - x as f32 + pub fn floatunsitf(x: u32) -> f128 { + x as f128 } pub fn aeabi_uidiv(a: u32, b: u32) -> u32 { @@ -358,6 +382,10 @@ mod intrinsics { x as f64 } + pub fn floatunditf(x: u64) -> f128 { + x as f128 + } + // udivdi3 pub fn aeabi_uldivmod(a: u64, b: u64) -> u64 { a * b @@ -369,6 +397,18 @@ mod intrinsics { /* u128 operations */ + pub fn floatuntisf(x: u128) -> f32 { + x as f32 + } + + pub fn floatuntidf(x: u128) -> f64 { + x as f64 + } + + pub fn floatuntitf(x: u128) -> f128 { + x as f128 + } + pub fn muloti4(a: u128, b: u128) -> Option { a.checked_mul(b) } @@ -466,6 +506,16 @@ fn run() { bb(fixunstfsi(bb(2.))); #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] bb(fixunstfti(bb(2.))); + bb(floatditf(bb(2))); + bb(floatsitf(bb(2))); + bb(floattidf(bb(2))); + bb(floattisf(bb(2))); + bb(floattitf(bb(2))); + bb(floatunditf(bb(2))); + bb(floatunsitf(bb(2))); + bb(floatuntidf(bb(2))); + bb(floatuntisf(bb(2))); + bb(floatuntitf(bb(2))); bb(gttf(bb(2.), bb(2.))); bb(lshrti3(bb(2), bb(2))); bb(lttf(bb(2.), bb(2.))); diff --git a/src/float/conv.rs b/src/float/conv.rs index da87b3cae..4aea67c91 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -104,6 +104,24 @@ mod int_to_float { repr::(e, m) } + #[cfg(f128_enabled)] + pub fn u32_to_f128_bits(i: u32) -> u128 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + + // Shift into mantissa position that is correct for the type, but shifted into the lower + // 64 bits over so can can avoid 128-bit math. + let m = (i as u64) << (shift_f_gt_i::(n) - 64); + let e = exp::(n) as u64 - 1; + // High 64 bits of f128 representation. + let h = (e << (f128::SIGNIFICAND_BITS - 64)) + m; + + // Shift back to the high bits, the rest of the mantissa will always be 0. + (h as u128) << 64 + } + pub fn u64_to_f32_bits(i: u64) -> u32 { let n = i.leading_zeros(); let i_m = i.wrapping_shl(n); @@ -130,6 +148,18 @@ mod int_to_float { repr::(e, m) } + #[cfg(f128_enabled)] + pub fn u64_to_f128_bits(i: u64) -> u128 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + // Mantissa with implicit bit set + let m = (i as u128) << shift_f_gt_i::(n); + let e = exp::(n) - 1; + repr::(e, m) + } + pub fn u128_to_f32_bits(i: u128) -> u32 { let n = i.leading_zeros(); let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero @@ -162,6 +192,20 @@ mod int_to_float { let e = if i == 0 { 0 } else { exp::(n) - 1 }; repr::(e, m) } + + #[cfg(f128_enabled)] + pub fn u128_to_f128_bits(i: u128) -> u128 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + // Mantissa with implicit bit set + let m_base = (i << n) >> f128::EXPONENT_BITS; + let adj = (i << n) << (f128::SIGNIFICAND_BITS + 1); + let m = m_adj::(m_base, adj); + let e = exp::(n) - 1; + repr::(e, m) + } } // Conversions from unsigned integers to floats. @@ -195,6 +239,24 @@ intrinsics! { pub extern "C" fn __floatuntidf(i: u128) -> f64 { f64::from_bits(int_to_float::u128_to_f64_bits(i)) } + + #[ppc_alias = __floatunsikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatunsitf(i: u32) -> f128 { + f128::from_bits(int_to_float::u32_to_f128_bits(i)) + } + + #[ppc_alias = __floatundikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatunditf(i: u64) -> f128 { + f128::from_bits(int_to_float::u64_to_f128_bits(i)) + } + + #[ppc_alias = __floatuntikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatuntitf(i: u128) -> f128 { + f128::from_bits(int_to_float::u128_to_f128_bits(i)) + } } // Conversions from signed integers to floats. @@ -228,6 +290,24 @@ intrinsics! { pub extern "C" fn __floattidf(i: i128) -> f64 { int_to_float::signed(i, int_to_float::u128_to_f64_bits) } + + #[ppc_alias = __floatsikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatsitf(i: i32) -> f128 { + int_to_float::signed(i, int_to_float::u32_to_f128_bits) + } + + #[ppc_alias = __floatdikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatditf(i: i64) -> f128 { + int_to_float::signed(i, int_to_float::u64_to_f128_bits) + } + + #[ppc_alias = __floattikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floattitf(i: i128) -> f128 { + int_to_float::signed(i, int_to_float::u128_to_f128_bits) + } } /// Generic float to unsigned int conversions. diff --git a/testcrate/benches/float_conv.rs b/testcrate/benches/float_conv.rs index de2043b04..0625a1ae5 100644 --- a/testcrate/benches/float_conv.rs +++ b/testcrate/benches/float_conv.rs @@ -1,7 +1,8 @@ #![allow(improper_ctypes)] +#![cfg_attr(f128_enabled, feature(f128))] use compiler_builtins::float::conv; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{criterion_main, Criterion}; use testcrate::float_bench; /* unsigned int -> float */ @@ -76,6 +77,18 @@ float_bench! { ], } +#[cfg(f128_enabled)] +float_bench! { + name: conv_u32_f128, + sig: (a: u32) -> f128, + crate_fn: conv::__floatunsitf, + crate_fn_ppc: conv::__floatunsikf, + sys_fn: __floatunsitf, + sys_fn_ppc: __floatunsikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + float_bench! { name: conv_u64_f32, sig: (a: u64) -> f32, @@ -118,6 +131,18 @@ float_bench! { ], } +#[cfg(f128_enabled)] +float_bench! { + name: conv_u64_f128, + sig: (a: u64) -> f128, + crate_fn: conv::__floatunditf, + crate_fn_ppc: conv::__floatundikf, + sys_fn: __floatunditf, + sys_fn_ppc: __floatundikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + float_bench! { name: conv_u128_f32, sig: (a: u128) -> f32, @@ -136,6 +161,18 @@ float_bench! { asm: [] } +#[cfg(f128_enabled)] +float_bench! { + name: conv_u128_f128, + sig: (a: u128) -> f128, + crate_fn: conv::__floatuntitf, + crate_fn_ppc: conv::__floatuntikf, + sys_fn: __floatuntitf, + sys_fn_ppc: __floatuntikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + /* signed int -> float */ float_bench! { @@ -205,6 +242,18 @@ float_bench! { ], } +#[cfg(f128_enabled)] +float_bench! { + name: conv_i32_f128, + sig: (a: i32) -> f128, + crate_fn: conv::__floatsitf, + crate_fn_ppc: conv::__floatsikf, + sys_fn: __floatsitf, + sys_fn_ppc: __floatsikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + float_bench! { name: conv_i64_f32, sig: (a: i64) -> f32, @@ -272,6 +321,18 @@ float_bench! { ], } +#[cfg(f128_enabled)] +float_bench! { + name: conv_i64_f128, + sig: (a: i64) -> f128, + crate_fn: conv::__floatditf, + crate_fn_ppc: conv::__floatdikf, + sys_fn: __floatditf, + sys_fn_ppc: __floatdikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + float_bench! { name: conv_i128_f32, sig: (a: i128) -> f32, @@ -290,6 +351,18 @@ float_bench! { asm: [] } +#[cfg(f128_enabled)] +float_bench! { + name: conv_i128_f128, + sig: (a: i128) -> f128, + crate_fn: conv::__floattitf, + crate_fn_ppc: conv::__floattikf, + sys_fn: __floattitf, + sys_fn_ppc: __floattikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + /* float -> unsigned int */ #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] @@ -397,6 +470,39 @@ float_bench! { asm: [] } +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_u32, + sig: (a: f128) -> u32, + crate_fn: conv::__fixunstfsi, + crate_fn_ppc: conv::__fixunskfsi, + sys_fn: __fixunstfsi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_u64, + sig: (a: f128) -> u64, + crate_fn: conv::__fixunstfdi, + crate_fn_ppc: conv::__fixunskfdi, + sys_fn: __fixunstfdi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_u128, + sig: (a: f128) -> u128, + crate_fn: conv::__fixunstfti, + crate_fn_ppc: conv::__fixunskfti, + sys_fn: __fixunstfti, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + /* float -> signed int */ #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] @@ -504,43 +610,79 @@ float_bench! { asm: [] } -criterion_group!( - float_conv, - conv_u32_f32, - conv_u32_f64, - conv_u64_f32, - conv_u64_f64, - conv_u128_f32, - conv_u128_f64, - conv_i32_f32, - conv_i32_f64, - conv_i64_f32, - conv_i64_f64, - conv_i128_f32, - conv_i128_f64, - conv_f64_u32, - conv_f64_u64, - conv_f64_u128, - conv_f64_i32, - conv_f64_i64, - conv_f64_i128, -); - -// FIXME: ppc64le has a sporadic overflow panic in the crate functions -// -#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] -criterion_group!( - float_conv_not_ppc64le, - conv_f32_u32, - conv_f32_u64, - conv_f32_u128, - conv_f32_i32, - conv_f32_i64, - conv_f32_i128, -); - -#[cfg(all(target_arch = "powerpc64", target_endian = "little"))] -criterion_main!(float_conv); +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_i32, + sig: (a: f128) -> i32, + crate_fn: conv::__fixtfsi, + crate_fn_ppc: conv::__fixkfsi, + sys_fn: __fixtfsi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} -#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] -criterion_main!(float_conv, float_conv_not_ppc64le); +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_i64, + sig: (a: f128) -> i64, + crate_fn: conv::__fixtfdi, + crate_fn_ppc: conv::__fixkfdi, + sys_fn: __fixtfdi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_i128, + sig: (a: f128) -> i128, + crate_fn: conv::__fixtfti, + crate_fn_ppc: conv::__fixkfti, + sys_fn: __fixtfti, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +pub fn float_conv() { + let mut criterion = Criterion::default().configure_from_args(); + + conv_u32_f32(&mut criterion); + conv_u32_f64(&mut criterion); + conv_u64_f32(&mut criterion); + conv_u64_f64(&mut criterion); + conv_u128_f32(&mut criterion); + conv_u128_f64(&mut criterion); + conv_i32_f32(&mut criterion); + conv_i32_f64(&mut criterion); + conv_i64_f32(&mut criterion); + conv_i64_f64(&mut criterion); + conv_i128_f32(&mut criterion); + conv_i128_f64(&mut criterion); + conv_f64_u32(&mut criterion); + conv_f64_u64(&mut criterion); + conv_f64_u128(&mut criterion); + conv_f64_i32(&mut criterion); + conv_f64_i64(&mut criterion); + conv_f64_i128(&mut criterion); + + #[cfg(all(f128_enabled))] + // FIXME: ppc64le has a sporadic overflow panic in the crate functions + // + #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] + { + conv_u32_f128(&mut criterion); + conv_u64_f128(&mut criterion); + conv_u128_f128(&mut criterion); + conv_i32_f128(&mut criterion); + conv_i64_f128(&mut criterion); + conv_i128_f128(&mut criterion); + conv_f128_u32(&mut criterion); + conv_f128_u64(&mut criterion); + conv_f128_u128(&mut criterion); + conv_f128_i32(&mut criterion); + conv_f128_i64(&mut criterion); + conv_f128_i128(&mut criterion); + } +} + +criterion_main!(float_conv); diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 01cc588cf..a08748af7 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -117,6 +117,28 @@ mod i_to_f { u128, __floatuntidf; i128, __floattidf; } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"), + u32, __floatunsitf; + i32, __floatsitf; + u64, __floatunditf; + i64, __floatditf; + u128, __floatuntitf; + i128, __floattitf; + } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"), + u32, __floatunsikf; + i32, __floatsikf; + u64, __floatundikf; + i64, __floatdikf; + u128, __floatuntikf; + i128, __floattikf; + } } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 From 02e939b0c94977090d1302f25eb95dd5e4f119cc Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 30 Oct 2024 12:27:59 -0500 Subject: [PATCH 0933/1459] Remove the unneeded `isqrt` feature gate [1] has been stabilized so we no longer need to enable it. [1]: https://github.com/rust-lang/rust/issues/116226 --- testcrate/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs index 58419bf1b..4154e0fb3 100644 --- a/testcrate/src/lib.rs +++ b/testcrate/src/lib.rs @@ -15,7 +15,6 @@ #![no_std] #![cfg_attr(f128_enabled, feature(f128))] #![cfg_attr(f16_enabled, feature(f16))] -#![feature(isqrt)] pub mod bench; extern crate alloc; From 540315cf4c546e8e77e218eb389d69e21a196e18 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 30 Oct 2024 13:45:53 -0500 Subject: [PATCH 0934/1459] Rename `canonical_name` to `base_name` "Canonical" isn't really the right word here, update to "base". --- libm/crates/libm-test/src/gen/random.rs | 3 ++- libm/crates/libm-test/src/lib.rs | 2 +- libm/crates/libm-test/src/precision.rs | 32 ++++++++++++------------ libm/crates/libm-test/src/test_traits.rs | 8 +++--- libm/src/math/support/macros.rs | 8 +++--- 5 files changed, 27 insertions(+), 26 deletions(-) diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index c73937aac..d03d1ff79 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -110,6 +110,7 @@ pub fn get_test_cases(ctx: &CheckCtx) -> impl Iterator, { - let inputs = if ctx.fname == "jn" || ctx.fname == "jnf" { &TEST_CASES_JN } else { &TEST_CASES }; + let inputs = + if ctx.fn_name == "jn" || ctx.fn_name == "jnf" { &TEST_CASES_JN } else { &TEST_CASES }; inputs.get_cases() } diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 31b95e46c..6c7a3f5ec 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -18,7 +18,7 @@ include!(concat!(env!("OUT_DIR"), "/all_files.rs")); /// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`, /// `lgamma_r` and `lgammaf_r` both return `lgamma_r`. -pub fn canonical_name(name: &str) -> &str { +pub fn base_name(name: &str) -> &str { let known_mappings = &[ ("erff", "erf"), ("erf", "erf"), diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index e2ad638c4..9ef0e818d 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -111,25 +111,25 @@ impl MaybeOverride<(f32,)> for SpecialCase { ctx: &CheckCtx, ) -> Option { if ctx.basis == CheckBasis::Musl { - if ctx.fname == "expm1f" && input.0 > 80.0 && actual.is_infinite() { + if ctx.fn_name == "expm1f" && input.0 > 80.0 && actual.is_infinite() { // we return infinity but the number is representable return XFAIL; } - if ctx.fname == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() { + if ctx.fn_name == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() { // we return some NaN that should be real values or infinite // doesn't seem to happen on x86 return XFAIL; } } - if ctx.fname == "acoshf" && input.0 < -1.0 { + if ctx.fn_name == "acoshf" && input.0 < -1.0 { // acoshf is undefined for x <= 1.0, but we return a random result at lower // values. return XFAIL; } - if ctx.fname == "lgammaf" || ctx.fname == "lgammaf_r" && input.0 < 0.0 { + if ctx.fn_name == "lgammaf" || ctx.fn_name == "lgammaf_r" && input.0 < 0.0 { // loggamma should not be defined for x < 0, yet we both return results return XFAIL; } @@ -146,7 +146,7 @@ impl MaybeOverride<(f32,)> for SpecialCase { // On MPFR for lgammaf_r, we set -1 as the integer result for negative infinity but MPFR // sets +1 if ctx.basis == CheckBasis::Mpfr - && ctx.fname == "lgammaf_r" + && ctx.fn_name == "lgammaf_r" && input.0 == f32::NEG_INFINITY && actual.abs() == expected.abs() { @@ -166,13 +166,13 @@ impl MaybeOverride<(f64,)> for SpecialCase { ctx: &CheckCtx, ) -> Option { if ctx.basis == CheckBasis::Musl { - if cfg!(target_arch = "x86") && ctx.fname == "acosh" && input.0 < 1.0 { + if cfg!(target_arch = "x86") && ctx.fn_name == "acosh" && input.0 < 1.0 { // The function is undefined, both implementations return random results return SKIP; } if cfg!(x86_no_sse) - && ctx.fname == "ceil" + && ctx.fn_name == "ceil" && input.0 < 0.0 && input.0 > -1.0 && expected == F::ZERO @@ -183,13 +183,13 @@ impl MaybeOverride<(f64,)> for SpecialCase { } } - if ctx.fname == "acosh" && input.0 < 1.0 { + if ctx.fn_name == "acosh" && input.0 < 1.0 { // The function is undefined for the inputs, musl and our libm both return // random results. return XFAIL; } - if ctx.fname == "lgamma" || ctx.fname == "lgamma_r" && input.0 < 0.0 { + if ctx.fn_name == "lgamma" || ctx.fn_name == "lgamma_r" && input.0 < 0.0 { // loggamma should not be defined for x < 0, yet we both return results return XFAIL; } @@ -206,7 +206,7 @@ impl MaybeOverride<(f64,)> for SpecialCase { // On MPFR for lgamma_r, we set -1 as the integer result for negative infinity but MPFR // sets +1 if ctx.basis == CheckBasis::Mpfr - && ctx.fname == "lgamma_r" + && ctx.fn_name == "lgamma_r" && input.0 == f64::NEG_INFINITY && actual.abs() == expected.abs() { @@ -219,7 +219,7 @@ impl MaybeOverride<(f64,)> for SpecialCase { /// Check NaN bits if the function requires it fn maybe_check_nan_bits(actual: F, expected: F, ctx: &CheckCtx) -> Option { - if !(ctx.canonical_name == "fabs" || ctx.canonical_name == "copysign") { + if !(ctx.base_name == "fabs" || ctx.base_name == "copysign") { return None; } @@ -277,7 +277,7 @@ fn maybe_skip_binop_nan( ) -> Option { match ctx.basis { CheckBasis::Musl => { - if (ctx.canonical_name == "fmax" || ctx.canonical_name == "fmin") + if (ctx.base_name == "fmax" || ctx.base_name == "fmin") && (input.0.is_nan() || input.1.is_nan()) && expected.is_nan() { @@ -287,7 +287,7 @@ fn maybe_skip_binop_nan( } } CheckBasis::Mpfr => { - if ctx.canonical_name == "copysign" && input.1.is_nan() { + if ctx.base_name == "copysign" && input.1.is_nan() { SKIP } else { None @@ -308,7 +308,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase { CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx), CheckBasis::Mpfr => { // We return +0.0, MPFR returns -0.0 - if ctx.fname == "jnf" + if ctx.fn_name == "jnf" && input.1 == f32::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO @@ -333,7 +333,7 @@ impl MaybeOverride<(i32, f64)> for SpecialCase { CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx), CheckBasis::Mpfr => { // We return +0.0, MPFR returns -0.0 - if ctx.fname == "jn" + if ctx.fn_name == "jn" && input.1 == f64::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO @@ -353,7 +353,7 @@ fn bessel_prec_dropoff( ulp: &mut u32, ctx: &CheckCtx, ) -> Option { - if ctx.canonical_name == "jn" { + if ctx.base_name == "jn" { if input.0 > 4000 { return XFAIL; } else if input.0 > 2000 { diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index deb837887..34e15e0b2 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -33,17 +33,17 @@ pub struct CheckCtx { /// Allowed ULP deviation pub ulp: u32, /// Function name. - pub fname: &'static str, + pub fn_name: &'static str, /// Return the unsuffixed version of the function name. - pub canonical_name: &'static str, + pub base_name: &'static str, /// Source of truth for tests. pub basis: CheckBasis, } impl CheckCtx { pub fn new(ulp: u32, fname: &'static str, basis: CheckBasis) -> Self { - let canonical_fname = crate::canonical_name(fname); - Self { ulp, fname, canonical_name: canonical_fname, basis } + let base_name = crate::base_name(fname); + Self { ulp, fn_name: fname, base_name, basis } } } diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs index f85a6122e..b14bbec38 100644 --- a/libm/src/math/support/macros.rs +++ b/libm/src/math/support/macros.rs @@ -57,7 +57,7 @@ macro_rules! cfg_if { /// (`unstable-intrinsics`, `arch`, `force-soft-floats`), this macro handles that part. macro_rules! select_implementation { ( - name: $fname:ident, + name: $fn_name:ident, // Configuration meta for when to use arch-specific implementation that requires hard // float ops $( use_arch: $use_arch:meta, )? @@ -76,7 +76,7 @@ macro_rules! select_implementation { select_implementation! { @cfg $($use_arch_required)?; if true { - return super::arch::$fname( $($arg),+ ); + return super::arch::$fn_name( $($arg),+ ); } } @@ -86,7 +86,7 @@ macro_rules! select_implementation { @cfg $($use_arch)?; // Wrap in `if true` to avoid unused warnings if true { - return super::arch::$fname( $($arg),+ ); + return super::arch::$fn_name( $($arg),+ ); } } @@ -96,7 +96,7 @@ macro_rules! select_implementation { select_implementation! { @cfg $( $use_intrinsic )?; if true { - return super::arch::intrinsics::$fname( $($arg),+ ); + return super::arch::intrinsics::$fn_name( $($arg),+ ); } } }; From bda7ea2574cd8ad89061bf4a697a296d108e7754 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Thu, 31 Oct 2024 19:29:13 +0800 Subject: [PATCH 0935/1459] ci: add support for loongarch64-unknown-linux-gnu --- libm/.github/workflows/main.yml | 2 ++ .../docker/loongarch64-unknown-linux-gnu/Dockerfile | 13 +++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 libm/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index cc0d23ffc..f0c5fe7c0 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -32,6 +32,8 @@ jobs: os: ubuntu-latest - target: i686-unknown-linux-gnu os: ubuntu-latest + - target: loongarch64-unknown-linux-gnu + os: ubuntu-latest - target: powerpc-unknown-linux-gnu os: ubuntu-latest - target: powerpc64-unknown-linux-gnu diff --git a/libm/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile b/libm/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000..a9ce320e8 --- /dev/null +++ b/libm/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile @@ -0,0 +1,13 @@ +FROM ubuntu:24.04 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user-static ca-certificates \ + gcc-14-loongarch64-linux-gnu libc6-dev-loong64-cross + +ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \ + CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-loongarch64-static \ + AR_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-ar \ + CC_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-gcc-14 \ + QEMU_LD_PREFIX=/usr/loongarch64-linux-gnu \ + RUST_TEST_THREADS=1 From d4a39557885f7b57c3b98c661bb000d587165bc9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 30 Oct 2024 17:36:39 +0000 Subject: [PATCH 0936/1459] chore: release v0.1.137 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4a85e5215..ecd5ecd08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.136" +version = "0.1.137" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From ecb462d70eb8ae873a56c7e8a24e4d2f1c4ef977 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 21 Oct 2024 18:27:01 -0500 Subject: [PATCH 0937/1459] Add benchmarks against musl libm Add a benchmark for each function that checks against `musl_math_sys`. --- libm/crates/libm-test/Cargo.toml | 13 +++ libm/crates/libm-test/benches/random.rs | 119 ++++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 libm/crates/libm-test/benches/random.rs diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 72ac57232..fedf745ed 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -15,6 +15,12 @@ test-multiprecision = ["dep:az", "dep:rug"] # Build our own musl for testing and benchmarks build-musl = ["dep:musl-math-sys"] +# Enable report generation without bringing in more dependencies by default +benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] + +# Run with a reduced set of benchmarks, such as for CI +short-benchmarks = [] + [dependencies] anyhow = "1.0.90" az = { version = "1.2.1", optional = true } @@ -32,3 +38,10 @@ getrandom = { version = "0.2", features = ["js"] } [build-dependencies] rand = { version = "0.8.5", optional = true } + +[dev-dependencies] +criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } + +[[bench]] +name = "random" +harness = false diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs new file mode 100644 index 000000000..6c9047c3c --- /dev/null +++ b/libm/crates/libm-test/benches/random.rs @@ -0,0 +1,119 @@ +use std::hint::black_box; +use std::time::Duration; + +use criterion::{Criterion, criterion_main}; +use libm_test::gen::random; +use libm_test::{CheckBasis, CheckCtx, TupleCall}; + +/// Benchmark with this many items to get a variety +const BENCH_ITER_ITEMS: usize = if cfg!(feature = "short-benchmarks") { 50 } else { 500 }; + +macro_rules! musl_rand_benches { + ( + fn_name: $fn_name:ident, + CFn: $CFn:ty, + CArgs: $CArgs:ty, + CRet: $CRet:ty, + RustFn: $RustFn:ty, + RustArgs: $RustArgs:ty, + RustRet: $RustRet:ty, + fn_extra: $skip_on_i586:expr, + ) => { + paste::paste! { + fn [< musl_bench_ $fn_name >](c: &mut Criterion) { + let fn_name = stringify!($fn_name); + + let ulp = libm_test::musl_allowed_ulp(fn_name); + let ctx = CheckCtx::new(ulp, fn_name, CheckBasis::Musl); + let benchvec: Vec<_> = random::get_test_cases::<$RustArgs>(&ctx) + .take(BENCH_ITER_ITEMS) + .collect(); + + // Perform a sanity check that we are benchmarking the same thing + // Don't test against musl if it is not available + #[cfg(feature = "build-musl")] + for input in benchvec.iter().copied() { + use anyhow::Context; + use libm_test::{CheckBasis, CheckCtx, CheckOutput}; + + if cfg!(x86_no_sse) && $skip_on_i586 { + break; + } + + let musl_res = input.call(musl_math_sys::$fn_name as $CFn); + let crate_res = input.call(libm::$fn_name as $RustFn); + + let ctx = CheckCtx::new(ulp, fn_name, CheckBasis::Musl); + crate_res.validate(musl_res, input, &ctx).context(fn_name).unwrap(); + } + + /* Function pointers are black boxed to avoid inlining in the benchmark loop */ + + let mut group = c.benchmark_group(fn_name); + group.bench_function("crate", |b| b.iter(|| { + let f = black_box(libm::$fn_name as $RustFn); + for input in benchvec.iter().copied() { + input.call(f); + } + })); + + // Don't test against musl if it is not available + #[cfg(feature = "build-musl")] + group.bench_function("musl", |b| b.iter(|| { + let f = black_box(musl_math_sys::$fn_name as $CFn); + for input in benchvec.iter().copied() { + input.call(f); + } + })); + } + } + }; +} + +libm_macros::for_each_function! { + callback: musl_rand_benches, + skip: [], + fn_extra: match MACRO_FN_NAME { + // FIXME(correctness): wrong result on i586 + exp10 | exp10f | exp2 | exp2f => true, + _ => false + } +} + +macro_rules! run_callback { + ( + fn_name: $fn_name:ident, + CFn: $_CFn:ty, + CArgs: $_CArgs:ty, + CRet: $_CRet:ty, + RustFn: $_RustFn:ty, + RustArgs: $_RustArgs:ty, + RustRet: $_RustRet:ty, + extra: [$criterion:ident], + ) => { + paste::paste! { + [< musl_bench_ $fn_name >](&mut $criterion) + } + }; +} + +pub fn musl_random() { + let mut criterion = Criterion::default(); + + // For CI, run a short 0.5s warmup and 1.0s tests. This makes benchmarks complete in + // about the same time as other tests. + if cfg!(feature = "short-benchmarks") { + criterion = criterion + .warm_up_time(Duration::from_millis(500)) + .measurement_time(Duration::from_millis(1000)); + } + + criterion = criterion.configure_from_args(); + + libm_macros::for_each_function! { + callback: run_callback, + extra: [criterion], + }; +} + +criterion_main!(musl_random); From ca24fab4b64b92b04c37292c26246d8d55091f49 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 21 Oct 2024 18:27:58 -0500 Subject: [PATCH 0938/1459] Remove `libm-bench` This has been superseded by the benchmarks in `libm-test`. --- libm/Cargo.toml | 1 - libm/crates/libm-bench/Cargo.toml | 16 ---- libm/crates/libm-bench/benches/bench.rs | 116 ------------------------ 3 files changed, 133 deletions(-) delete mode 100644 libm/crates/libm-bench/Cargo.toml delete mode 100644 libm/crates/libm-bench/benches/bench.rs diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 2e74012ea..178627766 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -38,7 +38,6 @@ force-soft-floats = [] resolver = "2" members = [ "crates/compiler-builtins-smoke-test", - "crates/libm-bench", "crates/libm-macros", "crates/libm-test", "crates/musl-math-sys", diff --git a/libm/crates/libm-bench/Cargo.toml b/libm/crates/libm-bench/Cargo.toml deleted file mode 100644 index ee8c58200..000000000 --- a/libm/crates/libm-bench/Cargo.toml +++ /dev/null @@ -1,16 +0,0 @@ -[package] -name = "libm-bench" -version = "0.1.0" -authors = ["Gonzalo Brito Gadeschi "] -edition = "2021" -license = "MIT OR Apache-2.0" -publish = false - -[dependencies] -libm = { path = "../..", default-features = false } -rand = "0.8.5" -paste = "1.0.15" - -[features] -default = [] -unstable = [ "libm/unstable" ] diff --git a/libm/crates/libm-bench/benches/bench.rs b/libm/crates/libm-bench/benches/bench.rs deleted file mode 100644 index ca999b90f..000000000 --- a/libm/crates/libm-bench/benches/bench.rs +++ /dev/null @@ -1,116 +0,0 @@ -#![feature(test)] -extern crate test; - -use rand::Rng; -use test::Bencher; - -macro_rules! unary { - ($($func:ident),*) => ($( - paste::item! { - #[bench] - pub fn [<$func>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func>](x))) - } - #[bench] - pub fn [<$func f>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func f>](x))) - } - } - )*); -} -macro_rules! binary { - ($($func:ident),*) => ($( - paste::item! { - #[bench] - pub fn [<$func>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - let y = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func>](x, y))) - } - #[bench] - pub fn [<$func f>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - let y = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func f>](x, y))) - } - } - )*); - ($($func:ident);*) => ($( - paste::item! { - #[bench] - pub fn [<$func>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - let n = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func>](x, n))) - } - #[bench] - pub fn [<$func f>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - let n = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func f>](x, n))) - } - } - )*); -} -macro_rules! trinary { - ($($func:ident),*) => ($( - paste::item! { - #[bench] - pub fn [<$func>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - let y = rng.gen::(); - let z = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func>](x, y, z))) - } - #[bench] - pub fn [<$func f>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - let y = rng.gen::(); - let z = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func f>](x, y, z))) - } - } - )*); -} -macro_rules! bessel { - ($($func:ident),*) => ($( - paste::item! { - #[bench] - pub fn [<$func>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let mut n = rng.gen::(); - n &= 0xffff; - let x = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func>](n, x))) - } - #[bench] - pub fn [<$func f>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let mut n = rng.gen::(); - n &= 0xffff; - let x = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func f>](n, x))) - } - } - )*); -} - -unary!( - acos, acosh, asin, atan, cbrt, ceil, cos, cosh, erf, exp, exp2, exp10, expm1, fabs, floor, j0, - j1, lgamma, log, log1p, log2, log10, rint, round, sin, sinh, sqrt, tan, tanh, tgamma, trunc, - y0, y1 -); -binary!(atan2, copysign, fdim, fmax, fmin, fmod, hypot, pow); -trinary!(fma); -bessel!(jn, yn); -binary!(ldexp; scalbn); From 582be68861bdc8466fb777c4595356ddfe8e8150 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 21 Oct 2024 18:35:11 -0500 Subject: [PATCH 0939/1459] Check benchmarks in CI --- libm/.github/workflows/main.yml | 2 +- libm/ci/run.sh | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index f0c5fe7c0..bfd86497b 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -125,7 +125,7 @@ jobs: - uses: Swatinem/rust-cache@v2 - name: Download musl source run: ./ci/download-musl.sh - - run: cargo bench --all + - run: cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl msrv: name: Check MSRV diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 9f642326b..a211bc98c 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -83,4 +83,8 @@ else # unstable with a feature $cmd --features "unstable-intrinsics" $cmd --release --features "unstable-intrinsics" + + # Make sure benchmarks have correct results + $cmd --benches + $cmd --benches --release fi From 81da6d0e3fc85903a17a502ebbc04d1e477ecb37 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 31 Oct 2024 22:47:11 -0500 Subject: [PATCH 0940/1459] Change prefixes used by the `Float` trait Change `EXPONENT_` to `EXP_` and `SIGNIFICAND_` to `SIG_`. These are pretty unambiguous, and just makes for less to type once these get used. --- libm/crates/libm-test/src/mpfloat.rs | 2 +- libm/src/math/support/float_traits.rs | 45 +++++++++++++++------------ 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 44962d116..2e6fdae7f 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -15,7 +15,7 @@ use crate::Float; /// Create a multiple-precision float with the correct number of bits for a concrete float type. fn new_mpfloat() -> MpFloat { - MpFloat::new(F::SIGNIFICAND_BITS + 1) + MpFloat::new(F::SIG_BITS + 1) } /// Set subnormal emulation and convert to a concrete float type. diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 4cf5d7c61..f90e99d52 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -27,34 +27,37 @@ pub trait Float: const ZERO: Self; const ONE: Self; + const INFINITY: Self; + const NEG_INFINITY: Self; + const NAN: Self; /// The bitwidth of the float type const BITS: u32; /// The bitwidth of the significand - const SIGNIFICAND_BITS: u32; + const SIG_BITS: u32; /// The bitwidth of the exponent - const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1; + const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; /// The saturated value of the exponent (infinite representation), in the rightmost postiion. - const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1; + const EXP_MAX: u32 = (1 << Self::EXP_BITS) - 1; /// The exponent bias value - const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1; + const EXP_BIAS: u32 = Self::EXP_MAX >> 1; /// A mask for the sign bit const SIGN_MASK: Self::Int; /// A mask for the significand - const SIGNIFICAND_MASK: Self::Int; + const SIG_MASK: Self::Int; + + /// A mask for the exponent + const EXP_MASK: Self::Int; /// The implicit bit of the float format const IMPLICIT_BIT: Self::Int; - /// A mask for the exponent - const EXPONENT_MASK: Self::Int; - /// Returns `self` transmuted to `Self::Int` fn to_bits(self) -> Self::Int; @@ -105,14 +108,17 @@ macro_rules! float_impl { const ZERO: Self = 0.0; const ONE: Self = 1.0; + const INFINITY: Self = Self::INFINITY; + const NEG_INFINITY: Self = Self::NEG_INFINITY; + const NAN: Self = Self::NAN; const BITS: u32 = $bits; - const SIGNIFICAND_BITS: u32 = $significand_bits; + const SIG_BITS: u32 = $significand_bits; const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); - const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1; - const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS; - const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK); + const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1; + const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK); + const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS; fn to_bits(self) -> Self::Int { self.to_bits() @@ -126,8 +132,7 @@ macro_rules! float_impl { // necessary builtin (__unordtf2) to test whether `f128` is NaN. // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin // x is NaN if all the bits of the exponent are set and the significand is non-0 - x.to_bits() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK - && x.to_bits() & $ty::SIGNIFICAND_MASK != 0 + x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0 } if is_nan(self) && is_nan(rhs) { true } else { self.to_bits() == rhs.to_bits() } } @@ -135,10 +140,10 @@ macro_rules! float_impl { self.is_sign_negative() } fn exp(self) -> Self::ExpInt { - ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt + ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt } fn frac(self) -> Self::Int { - self.to_bits() & Self::SIGNIFICAND_MASK + self.to_bits() & Self::SIG_MASK } fn imp_frac(self) -> Self::Int { self.frac() | Self::IMPLICIT_BIT @@ -149,16 +154,16 @@ macro_rules! float_impl { fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self { Self::from_bits( ((negative as Self::Int) << (Self::BITS - 1)) - | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK) - | (significand & Self::SIGNIFICAND_MASK), + | ((exponent << Self::SIG_BITS) & Self::EXP_MASK) + | (significand & Self::SIG_MASK), ) } fn normalize(significand: Self::Int) -> (i32, Self::Int) { - let shift = significand.leading_zeros().wrapping_sub(Self::EXPONENT_BITS); + let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int) } fn is_subnormal(self) -> bool { - (self.to_bits() & Self::EXPONENT_MASK) == Self::Int::ZERO + (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO } } }; From 72091faa3ae7e153c8bfb4498492ce174e014615 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 31 Oct 2024 23:47:41 -0500 Subject: [PATCH 0941/1459] Update libm `Float` and `Int` with functions from the test traits The test versions of `Float` and `Int` have a few more methods and constants availablee. Update the in `libm` with everything missing from `libm_test` so we will be able to merge these. --- libm/src/math/support/float_traits.rs | 50 ++++++++++++++++++++++----- libm/src/math/support/int_traits.rs | 43 +++++++++++++++++++---- 2 files changed, 77 insertions(+), 16 deletions(-) diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index f90e99d52..a1d84faf2 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -1,4 +1,4 @@ -use core::ops; +use core::{fmt, ops}; use super::int_traits::{Int, MinInt}; @@ -6,7 +6,8 @@ use super::int_traits::{Int, MinInt}; #[allow(dead_code)] pub trait Float: Copy - + core::fmt::Debug + + fmt::Debug + + fmt::Display + PartialEq + PartialOrd + ops::AddAssign @@ -17,16 +18,17 @@ pub trait Float: + ops::Rem { /// A uint of the same width as the float - type Int: Int; + type Int: Int; /// A int of the same width as the float - type SignedInt: Int + MinInt; + type SignedInt: Int + MinInt; /// An int capable of containing the exponent bits plus a sign bit. This is signed. type ExpInt: Int; const ZERO: Self; const ONE: Self; + const NEG_ONE: Self; const INFINITY: Self; const NEG_INFINITY: Self; const NAN: Self; @@ -69,9 +71,18 @@ pub trait Float: /// compared. fn eq_repr(self, rhs: Self) -> bool; - /// Returns true if the sign is negative + /// Returns true if the value is NaN. + fn is_nan(self) -> bool; + + /// Returns true if the value is +inf or -inf. + fn is_infinite(self) -> bool; + + /// Returns true if the sign is negative. fn is_sign_negative(self) -> bool; + /// Returns if `self` is subnormal + fn is_subnormal(self) -> bool; + /// Returns the exponent, not adjusting for bias. fn exp(self) -> Self::ExpInt; @@ -95,8 +106,11 @@ pub trait Float: /// Returns (normalized exponent, normalized significand) fn normalize(significand: Self::Int) -> (i32, Self::Int); - /// Returns if `self` is subnormal - fn is_subnormal(self) -> bool; + /// Returns a number composed of the magnitude of self and the sign of sign. + fn copysign(self, other: Self) -> Self; + + /// Returns a number that represents the sign of self. + fn signum(self) -> Self; } macro_rules! float_impl { @@ -108,6 +122,7 @@ macro_rules! float_impl { const ZERO: Self = 0.0; const ONE: Self = 1.0; + const NEG_ONE: Self = -1.0; const INFINITY: Self = Self::INFINITY; const NEG_INFINITY: Self = Self::NEG_INFINITY; const NAN: Self = Self::NAN; @@ -136,9 +151,18 @@ macro_rules! float_impl { } if is_nan(self) && is_nan(rhs) { true } else { self.to_bits() == rhs.to_bits() } } + fn is_nan(self) -> bool { + self.is_nan() + } + fn is_infinite(self) -> bool { + self.is_infinite() + } fn is_sign_negative(self) -> bool { self.is_sign_negative() } + fn is_subnormal(self) -> bool { + (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO + } fn exp(self) -> Self::ExpInt { ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt } @@ -162,8 +186,16 @@ macro_rules! float_impl { let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int) } - fn is_subnormal(self) -> bool { - (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO + fn copysign(self, other: Self) -> Self { + let mut x = self.to_bits(); + let y = other.to_bits(); + x &= !Self::SIGN_MASK; + x |= y & Self::SIGN_MASK; + Self::from_bits(x) + } + + fn signum(self) -> Self { + if self.is_nan() { self } else { Self::ONE.copysign(self) } } } }; diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index bdf3afd48..c5feef8d7 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -12,7 +12,7 @@ pub trait MinInt: /// Type with the same width but other signedness type OtherSign: MinInt; /// Unsigned version of Self - type UnsignedInt: MinInt; + type Unsigned: MinInt; /// If `Self` is a signed integer const SIGNED: bool; @@ -30,6 +30,7 @@ pub trait MinInt: #[allow(dead_code)] pub trait Int: MinInt + + fmt::Display + PartialEq + PartialOrd + ops::AddAssign @@ -47,8 +48,10 @@ pub trait Int: + ops::BitXor + ops::BitAnd { - fn unsigned(self) -> Self::UnsignedInt; - fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; + fn signed(self) -> ::OtherSign; + fn unsigned(self) -> Self::Unsigned; + fn from_unsigned(unsigned: Self::Unsigned) -> Self; + fn abs(self) -> Self; fn from_bool(b: bool) -> Self; @@ -56,10 +59,12 @@ pub trait Int: fn logical_shr(self, other: u32) -> Self; /// Absolute difference between two integers. - fn abs_diff(self, other: Self) -> Self::UnsignedInt; + fn abs_diff(self, other: Self) -> Self::Unsigned; // copied from primitive integers, but put in a trait fn is_zero(self) -> bool; + fn checked_add(self, other: Self) -> Option; + fn checked_sub(self, other: Self) -> Option; fn wrapping_neg(self) -> Self; fn wrapping_add(self, other: Self) -> Self; fn wrapping_mul(self, other: Self) -> Self; @@ -86,6 +91,14 @@ macro_rules! int_impl_common { self == Self::ZERO } + fn checked_add(self, other: Self) -> Option { + self.checked_add(other) + } + + fn checked_sub(self, other: Self) -> Option { + self.checked_sub(other) + } + fn wrapping_neg(self) -> Self { ::wrapping_neg(self) } @@ -132,7 +145,7 @@ macro_rules! int_impl { ($ity:ty, $uty:ty) => { impl MinInt for $uty { type OtherSign = $ity; - type UnsignedInt = $uty; + type Unsigned = $uty; const BITS: u32 = ::ZERO.count_zeros(); const SIGNED: bool = Self::MIN != Self::ZERO; @@ -144,10 +157,18 @@ macro_rules! int_impl { } impl Int for $uty { - fn unsigned(self) -> $uty { + fn signed(self) -> $ity { + self as $ity + } + + fn unsigned(self) -> Self { self } + fn abs(self) -> Self { + unimplemented!() + } + // It makes writing macros easier if this is implemented for both signed and unsigned #[allow(clippy::wrong_self_convention)] fn from_unsigned(me: $uty) -> Self { @@ -163,7 +184,7 @@ macro_rules! int_impl { impl MinInt for $ity { type OtherSign = $uty; - type UnsignedInt = $uty; + type Unsigned = $uty; const BITS: u32 = ::ZERO.count_zeros(); const SIGNED: bool = Self::MIN != Self::ZERO; @@ -175,10 +196,18 @@ macro_rules! int_impl { } impl Int for $ity { + fn signed(self) -> Self { + self + } + fn unsigned(self) -> $uty { self as $uty } + fn abs(self) -> Self { + self.abs() + } + fn from_unsigned(me: $uty) -> Self { me as $ity } From aa381b68817f307df4c19e3b59077afa7f2c0c34 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 31 Oct 2024 23:52:27 -0500 Subject: [PATCH 0942/1459] Expose the `support` module publicly with a test feature --- libm/Cargo.toml | 3 +++ libm/crates/compiler-builtins-smoke-test/Cargo.toml | 1 + libm/src/math/mod.rs | 9 ++++++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 178627766..98a60bfe3 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -26,6 +26,9 @@ unstable = ["unstable-intrinsics"] # Enable calls to functions in `core::intrinsics` unstable-intrinsics = [] +# Make some internal things public for testing. +unstable-test-support = [] + # Used to prevent using any intrinsics or arch-specific code. # # HACK: this is a negative feature which is generally a bad idea in Cargo, but diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index 7118bfe06..e75c4f42b 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -13,6 +13,7 @@ bench = false # Duplicated from libm's Cargo.toml unstable = [] unstable-intrinsics = [] +unstable-test-support = [] checked = [] force-soft-floats = [] diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 2cd77f132..afebdf586 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -74,9 +74,16 @@ macro_rules! div { }; } -// Private modules +// `support` may be public for testing +#[macro_use] +#[cfg(feature = "unstable-test-support")] +pub mod support; + #[macro_use] +#[cfg(not(feature = "unstable-test-support"))] mod support; + +// Private modules mod arch; mod expo2; mod fenv; From d5d7bc9a02728c25751b6fee5cdb4778ec343d1b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 31 Oct 2024 23:53:03 -0500 Subject: [PATCH 0943/1459] Replace `libm_test::{Float, Int}` with `libm::{Float, Int}` This involves moving some things from full generic implementations (e.g. `impl SomeTrait for F { /* ... */ }` to generic functions and macros to implement traits that call them, due to orphan rule violations after `Float` became a not-in-crate trait. `Hex` was moved to `test_traits` so we can eliminate `num_traits`. --- libm/crates/libm-test/Cargo.toml | 2 +- libm/crates/libm-test/src/lib.rs | 5 +- libm/crates/libm-test/src/num_traits.rs | 214 ------------------- libm/crates/libm-test/src/test_traits.rs | 261 +++++++++++++++++------ 4 files changed, 199 insertions(+), 283 deletions(-) delete mode 100644 libm/crates/libm-test/src/num_traits.rs diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index fedf745ed..3587b44e6 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -24,7 +24,7 @@ short-benchmarks = [] [dependencies] anyhow = "1.0.90" az = { version = "1.2.1", optional = true } -libm = { path = "../.." } +libm = { path = "../..", features = ["unstable-test-support"] } libm-macros = { path = "../libm-macros" } musl-math-sys = { path = "../musl-math-sys", optional = true } paste = "1.0.15" diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 6c7a3f5ec..56a872779 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -1,13 +1,12 @@ pub mod gen; #[cfg(feature = "test-multiprecision")] pub mod mpfloat; -mod num_traits; mod precision; mod test_traits; -pub use num_traits::{Float, Hex, Int}; +pub use libm::support::{Float, Int}; pub use precision::{MaybeOverride, SpecialCase, multiprec_allowed_ulp, musl_allowed_ulp}; -pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, TupleCall}; +pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall}; /// Result type for tests is usually from `anyhow`. Most times there is no success value to /// propagate. diff --git a/libm/crates/libm-test/src/num_traits.rs b/libm/crates/libm-test/src/num_traits.rs deleted file mode 100644 index e16f4e4dc..000000000 --- a/libm/crates/libm-test/src/num_traits.rs +++ /dev/null @@ -1,214 +0,0 @@ -use std::fmt; - -use crate::{MaybeOverride, SpecialCase, TestResult}; - -/// Common types and methods for floating point numbers. -pub trait Float: Copy + fmt::Display + fmt::Debug + PartialEq { - type Int: Int; - type SignedInt: Int + Int; - - const ZERO: Self; - const ONE: Self; - - /// The bitwidth of the float type - const BITS: u32; - - /// The bitwidth of the significand - const SIGNIFICAND_BITS: u32; - - /// The bitwidth of the exponent - const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1; - - fn is_nan(self) -> bool; - fn is_infinite(self) -> bool; - fn to_bits(self) -> Self::Int; - fn from_bits(bits: Self::Int) -> Self; - fn signum(self) -> Self; -} - -macro_rules! impl_float { - ($($fty:ty, $ui:ty, $si:ty, $significand_bits:expr;)+) => { - $( - impl Float for $fty { - type Int = $ui; - type SignedInt = $si; - - const ZERO: Self = 0.0; - const ONE: Self = 1.0; - - const BITS: u32 = <$ui>::BITS; - const SIGNIFICAND_BITS: u32 = $significand_bits; - - fn is_nan(self) -> bool { - self.is_nan() - } - fn is_infinite(self) -> bool { - self.is_infinite() - } - fn to_bits(self) -> Self::Int { - self.to_bits() - } - fn from_bits(bits: Self::Int) -> Self { - Self::from_bits(bits) - } - fn signum(self) -> Self { - self.signum() - } - } - - impl Hex for $fty { - fn hex(self) -> String { - self.to_bits().hex() - } - } - )+ - } -} - -impl_float!( - f32, u32, i32, 23; - f64, u64, i64, 52; -); - -/// Common types and methods for integers. -pub trait Int: Copy + fmt::Display + fmt::Debug + PartialEq { - type OtherSign: Int; - type Unsigned: Int; - const BITS: u32; - const SIGNED: bool; - - fn signed(self) -> ::OtherSign; - fn unsigned(self) -> Self::Unsigned; - fn checked_sub(self, other: Self) -> Option; - fn abs(self) -> Self; -} - -macro_rules! impl_int { - ($($ui:ty, $si:ty ;)+) => { - $( - impl Int for $ui { - type OtherSign = $si; - type Unsigned = Self; - const BITS: u32 = <$ui>::BITS; - const SIGNED: bool = false; - fn signed(self) -> Self::OtherSign { - self as $si - } - fn unsigned(self) -> Self { - self - } - fn checked_sub(self, other: Self) -> Option { - self.checked_sub(other) - } - fn abs(self) -> Self { - unimplemented!() - } - } - - impl Int for $si { - type OtherSign = $ui; - type Unsigned = $ui; - const BITS: u32 = <$ui>::BITS; - const SIGNED: bool = true; - fn signed(self) -> Self { - self - } - fn unsigned(self) -> $ui { - self as $ui - } - fn checked_sub(self, other: Self) -> Option { - self.checked_sub(other) - } - fn abs(self) -> Self { - self.abs() - } - } - - impl_int!(@for_both $si); - impl_int!(@for_both $ui); - - )+ - }; - - (@for_both $ty:ty) => { - impl Hex for $ty { - fn hex(self) -> String { - format!("{self:#0width$x}", width = ((Self::BITS / 4) + 2) as usize) - } - } - - impl $crate::CheckOutput for $ty - where - Input: Hex + fmt::Debug, - SpecialCase: MaybeOverride, - { - fn validate<'a>( - self, - expected: Self, - input: Input, - ctx: &$crate::CheckCtx, - ) -> TestResult { - if let Some(res) = SpecialCase::check_int(input, self, expected, ctx) { - return res; - } - - anyhow::ensure!( - self == expected, - "\ - \n input: {input:?} {ibits}\ - \n expected: {expected:<22?} {expbits}\ - \n actual: {self:<22?} {actbits}\ - ", - actbits = self.hex(), - expbits = expected.hex(), - ibits = input.hex(), - ); - - Ok(()) - } - } - } -} - -impl_int!( - u32, i32; - u64, i64; -); - -/// A helper trait to print something as hex with the correct number of nibbles, e.g. a `u32` -/// will always print with `0x` followed by 8 digits. -/// -/// This is only used for printing errors so allocating is okay. -pub trait Hex: Copy { - fn hex(self) -> String; -} - -impl Hex for (T1,) -where - T1: Hex, -{ - fn hex(self) -> String { - format!("({},)", self.0.hex()) - } -} - -impl Hex for (T1, T2) -where - T1: Hex, - T2: Hex, -{ - fn hex(self) -> String { - format!("({}, {})", self.0.hex(), self.1.hex()) - } -} - -impl Hex for (T1, T2, T3) -where - T1: Hex, - T2: Hex, - T3: Hex, -{ - fn hex(self) -> String { - format!("({}, {}, {})", self.0.hex(), self.1.hex(), self.2.hex()) - } -} diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index 34e15e0b2..67df83fb4 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -11,21 +11,7 @@ use std::fmt; use anyhow::{Context, bail, ensure}; -use crate::{Float, Hex, Int, MaybeOverride, SpecialCase, TestResult}; - -/// Implement this on types that can generate a sequence of tuples for test input. -pub trait GenerateInput { - fn get_cases(&self) -> impl Iterator; -} - -/// Trait for calling a function with a tuple as arguments. -/// -/// Implemented on the tuple with the function signature as the generic (so we can use the same -/// tuple for multiple signatures). -pub trait TupleCall: fmt::Debug { - type Output; - fn call(self, f: Func) -> Self::Output; -} +use crate::{Float, Int, MaybeOverride, SpecialCase, TestResult}; /// Context passed to [`CheckOutput`]. #[derive(Clone, Debug, PartialEq, Eq)] @@ -56,14 +42,38 @@ pub enum CheckBasis { Mpfr, } +/// Implement this on types that can generate a sequence of tuples for test input. +pub trait GenerateInput { + fn get_cases(&self) -> impl Iterator; +} + +/// Trait for calling a function with a tuple as arguments. +/// +/// Implemented on the tuple with the function signature as the generic (so we can use the same +/// tuple for multiple signatures). +pub trait TupleCall: fmt::Debug { + type Output; + fn call(self, f: Func) -> Self::Output; +} + /// A trait to implement on any output type so we can verify it in a generic way. pub trait CheckOutput: Sized { /// Validate `self` (actual) and `expected` are the same. /// /// `input` is only used here for error messages. - fn validate<'a>(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult; + fn validate(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult; +} + +/// A helper trait to print something as hex with the correct number of nibbles, e.g. a `u32` +/// will always print with `0x` followed by 8 digits. +/// +/// This is only used for printing errors so allocating is okay. +pub trait Hex: Copy { + fn hex(self) -> String; } +/* implement `TupleCall` */ + impl TupleCall R> for (T1,) where T1: fmt::Debug, @@ -143,72 +153,193 @@ where } } -// Implement for floats -impl CheckOutput for F +/* implement `Hex` */ + +impl Hex for (T1,) where - F: Float + Hex, + T1: Hex, +{ + fn hex(self) -> String { + format!("({},)", self.0.hex()) + } +} + +impl Hex for (T1, T2) +where + T1: Hex, + T2: Hex, +{ + fn hex(self) -> String { + format!("({}, {})", self.0.hex(), self.1.hex()) + } +} + +impl Hex for (T1, T2, T3) +where + T1: Hex, + T2: Hex, + T3: Hex, +{ + fn hex(self) -> String { + format!("({}, {}, {})", self.0.hex(), self.1.hex(), self.2.hex()) + } +} + +/* trait implementations for ints */ + +macro_rules! impl_int { + ($($ty:ty),*) => { + $( + impl Hex for $ty { + fn hex(self) -> String { + format!("{self:#0width$x}", width = ((Self::BITS / 4) + 2) as usize) + } + } + + impl $crate::CheckOutput for $ty + where + Input: Hex + fmt::Debug, + SpecialCase: MaybeOverride, + { + fn validate<'a>( + self, + expected: Self, + input: Input, + ctx: &$crate::CheckCtx, + ) -> TestResult { + validate_int(self, expected, input, ctx) + } + } + )* + }; +} + +fn validate_int<'a, I, Input>(actual: I, expected: I, input: Input, ctx: &CheckCtx) -> TestResult +where + I: Int + Hex, Input: Hex + fmt::Debug, - u32: TryFrom, SpecialCase: MaybeOverride, { - fn validate<'a>(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult { - // Create a wrapper function so we only need to `.with_context` once. - let inner = || -> TestResult { - let mut allowed_ulp = ctx.ulp; - - // If the tested function requires a nonstandard test, run it here. - if let Some(res) = - SpecialCase::check_float(input, self, expected, &mut allowed_ulp, ctx) - { - return res; + if let Some(res) = SpecialCase::check_int(input, actual, expected, ctx) { + return res; + } + + anyhow::ensure!( + actual == expected, + "\ + \n input: {input:?} {ibits}\ + \n expected: {expected:<22?} {expbits}\ + \n actual: {actual:<22?} {actbits}\ + ", + actbits = actual.hex(), + expbits = expected.hex(), + ibits = input.hex(), + ); + + Ok(()) +} + +impl_int!(u32, i32, u64, i64); + +/* trait implementations for floats */ + +macro_rules! impl_float { + ($($ty:ty),*) => { + $( + impl Hex for $ty { + fn hex(self) -> String { + format!( + "{:#0width$x}", + self.to_bits(), + width = ((Self::BITS / 4) + 2) as usize + ) + } } - // Check when both are NaNs - if self.is_nan() && expected.is_nan() { - // By default, NaNs have nothing special to check. - return Ok(()); - } else if self.is_nan() || expected.is_nan() { - // Check when only one is a NaN - bail!("real value != NaN") + impl $crate::CheckOutput for $ty + where + Input: Hex + fmt::Debug, + SpecialCase: MaybeOverride, + { + fn validate<'a>( + self, + expected: Self, + input: Input, + ctx: &$crate::CheckCtx, + ) -> TestResult { + validate_float(self, expected, input, ctx) + } } + )* + }; +} + +fn validate_float<'a, F, Input>(actual: F, expected: F, input: Input, ctx: &CheckCtx) -> TestResult +where + F: Float + Hex, + Input: Hex + fmt::Debug, + u32: TryFrom, + SpecialCase: MaybeOverride, +{ + // Create a wrapper function so we only need to `.with_context` once. + let inner = || -> TestResult { + let mut allowed_ulp = ctx.ulp; - // Make sure that the signs are the same before checing ULP to avoid wraparound - let act_sig = self.signum(); - let exp_sig = expected.signum(); - ensure!(act_sig == exp_sig, "mismatched signs {act_sig} {exp_sig}"); + // If the tested function requires a nonstandard test, run it here. + if let Some(res) = SpecialCase::check_float(input, actual, expected, &mut allowed_ulp, ctx) + { + return res; + } - if self.is_infinite() ^ expected.is_infinite() { - bail!("mismatched infinities"); - } + // Check when both are NaNs + if actual.is_nan() && expected.is_nan() { + // By default, NaNs have nothing special to check. + return Ok(()); + } else if actual.is_nan() || expected.is_nan() { + // Check when only one is a NaN + bail!("real value != NaN") + } - let act_bits = self.to_bits().signed(); - let exp_bits = expected.to_bits().signed(); + // Make sure that the signs are the same before checing ULP to avoid wraparound + let act_sig = actual.signum(); + let exp_sig = expected.signum(); + ensure!(act_sig == exp_sig, "mismatched signs {act_sig} {exp_sig}"); - let ulp_diff = act_bits.checked_sub(exp_bits).unwrap().abs(); + if actual.is_infinite() ^ expected.is_infinite() { + bail!("mismatched infinities"); + } - let ulp_u32 = u32::try_from(ulp_diff) - .map_err(|e| anyhow::anyhow!("{e:?}: ulp of {ulp_diff} exceeds u32::MAX"))?; + let act_bits = actual.to_bits().signed(); + let exp_bits = expected.to_bits().signed(); - ensure!(ulp_u32 <= allowed_ulp, "ulp {ulp_diff} > {allowed_ulp}",); + let ulp_diff = act_bits.checked_sub(exp_bits).unwrap().abs(); - Ok(()) - }; + let ulp_u32 = u32::try_from(ulp_diff) + .map_err(|e| anyhow::anyhow!("{e:?}: ulp of {ulp_diff} exceeds u32::MAX"))?; - inner().with_context(|| { - format!( - "\ - \n input: {input:?} {ibits}\ - \n expected: {expected:<22?} {expbits}\ - \n actual: {self:<22?} {actbits}\ - ", - actbits = self.hex(), - expbits = expected.hex(), - ibits = input.hex(), - ) - }) - } + ensure!(ulp_u32 <= allowed_ulp, "ulp {ulp_diff} > {allowed_ulp}",); + + Ok(()) + }; + + inner().with_context(|| { + format!( + "\ + \n input: {input:?} {ibits}\ + \n expected: {expected:<22?} {expbits}\ + \n actual: {actual:<22?} {actbits}\ + ", + actbits = actual.hex(), + expbits = expected.hex(), + ibits = input.hex(), + ) + }) } +impl_float!(f32, f64); + +/* trait implementations for compound types */ + /// Implement `CheckOutput` for combinations of types. macro_rules! impl_tuples { ($(($a:ty, $b:ty);)*) => { From b1459f323b141ec8a493ac148896f02505f4bb30 Mon Sep 17 00:00:00 2001 From: hev Date: Fri, 1 Nov 2024 18:00:00 +0800 Subject: [PATCH 0944/1459] Disable `f16` for LoongArch64 (#722) Disable `f161` for LoongArch64 due to incorrect code generation on LLVM 19, which causes failures in `testcrate/tests/conv.rs`. This workaround will remain in place until llvm/llvm-project#109093 is merged or we upgrade to LLVM 20. --- configure.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configure.rs b/configure.rs index e23c0e839..68b4d68e6 100644 --- a/configure.rs +++ b/configure.rs @@ -64,6 +64,8 @@ pub fn configure_f16_f128(target: &Target) { "arm64ec" => (false, false), // `f16` crashes "s390x" => (false, true), + // FIXME(llvm): `f16` test failures fixed by + "loongarch64" => (false, true), // `f128` crashes "mips64" | "mips64r6" => (true, false), // `f128` crashes From 9ddeafce911b3878a908109b68ae4f504208c93b Mon Sep 17 00:00:00 2001 From: hev Date: Fri, 1 Nov 2024 18:34:51 +0800 Subject: [PATCH 0945/1459] Use `f16_enabled`/`f128_enabled` in `examples/intrinsics.rs` (#724) Enable conditional compilation for intrinsics with `f16_enabled` and `f128_enabled` --- examples/intrinsics.rs | 122 +++++++++++++++++++++++++++++++++++------ 1 file changed, 106 insertions(+), 16 deletions(-) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 368da6af2..ef7a3d430 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -28,21 +28,28 @@ extern "C" {} mod intrinsics { /* f16 operations */ + #[cfg(f16_enabled)] pub fn extendhfsf(x: f16) -> f32 { x as f32 } + #[cfg(f16_enabled)] pub fn extendhfdf(x: f16) -> f64 { x as f64 } - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f16_enabled, + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] pub fn extendhftf(x: f16) -> f128 { x as f128 } /* f32 operations */ + #[cfg(f16_enabled)] pub fn truncsfhf(x: f32) -> f16 { x as f16 } @@ -52,6 +59,7 @@ mod intrinsics { x as f64 } + #[cfg(f128_enabled)] pub fn extendsftf(x: f32) -> f128 { x as f128 } @@ -191,73 +199,104 @@ mod intrinsics { /* f128 operations */ - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f16_enabled, + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] pub fn trunctfhf(x: f128) -> f16 { x as f16 } + #[cfg(f128_enabled)] pub fn trunctfsf(x: f128) -> f32 { x as f32 } + #[cfg(f128_enabled)] pub fn trunctfdf(x: f128) -> f64 { x as f64 } - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] pub fn fixtfsi(x: f128) -> i32 { x as i32 } - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] pub fn fixtfdi(x: f128) -> i64 { x as i64 } - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] pub fn fixtfti(x: f128) -> i128 { x as i128 } - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] pub fn fixunstfsi(x: f128) -> u32 { x as u32 } - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] pub fn fixunstfdi(x: f128) -> u64 { x as u64 } - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] pub fn fixunstfti(x: f128) -> u128 { x as u128 } + #[cfg(f128_enabled)] pub fn addtf(a: f128, b: f128) -> f128 { a + b } + #[cfg(f128_enabled)] pub fn eqtf(a: f128, b: f128) -> bool { a == b } + #[cfg(f128_enabled)] pub fn gttf(a: f128, b: f128) -> bool { a > b } + #[cfg(f128_enabled)] pub fn lttf(a: f128, b: f128) -> bool { a < b } + #[cfg(f128_enabled)] pub fn multf(a: f128, b: f128) -> f128 { a * b } + #[cfg(f128_enabled)] pub fn divtf(a: f128, b: f128) -> f128 { a / b } + #[cfg(f128_enabled)] pub fn subtf(a: f128, b: f128) -> f128 { a - b } @@ -274,6 +313,7 @@ mod intrinsics { x as f64 } + #[cfg(f128_enabled)] pub fn floatsitf(x: i32) -> f128 { x as f128 } @@ -298,6 +338,7 @@ mod intrinsics { x as f64 } + #[cfg(f128_enabled)] pub fn floatditf(x: i64) -> f128 { x as f128 } @@ -330,6 +371,7 @@ mod intrinsics { x as f64 } + #[cfg(f128_enabled)] pub fn floattitf(x: i128) -> f128 { x as f128 } @@ -358,6 +400,7 @@ mod intrinsics { x as f64 } + #[cfg(f128_enabled)] pub fn floatunsitf(x: u32) -> f128 { x as f128 } @@ -382,6 +425,7 @@ mod intrinsics { x as f64 } + #[cfg(f128_enabled)] pub fn floatunditf(x: u64) -> f128 { x as f128 } @@ -405,6 +449,7 @@ mod intrinsics { x as f64 } + #[cfg(f128_enabled)] pub fn floatuntitf(x: u128) -> f128 { x as f128 } @@ -440,6 +485,7 @@ fn run() { // FIXME(f16_f128): some PPC f128 <-> int conversion functions have the wrong names + #[cfg(f128_enabled)] bb(addtf(bb(2.), bb(2.))); bb(aeabi_d2f(bb(2.))); bb(aeabi_d2i(bb(2.))); @@ -482,54 +528,98 @@ fn run() { bb(aeabi_uldivmod(bb(2), bb(3))); bb(ashlti3(bb(2), bb(2))); bb(ashrti3(bb(2), bb(2))); + #[cfg(f128_enabled)] bb(divtf(bb(2.), bb(2.))); bb(divti3(bb(2), bb(2))); + #[cfg(f128_enabled)] bb(eqtf(bb(2.), bb(2.))); + #[cfg(f16_enabled)] bb(extendhfdf(bb(2.))); + #[cfg(f16_enabled)] bb(extendhfsf(bb(2.))); - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f16_enabled, + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] bb(extendhftf(bb(2.))); + #[cfg(f128_enabled)] bb(extendsftf(bb(2.))); bb(fixdfti(bb(2.))); bb(fixsfti(bb(2.))); - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] bb(fixtfdi(bb(2.))); - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] bb(fixtfsi(bb(2.))); - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] bb(fixtfti(bb(2.))); bb(fixunsdfti(bb(2.))); bb(fixunssfti(bb(2.))); - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] bb(fixunstfdi(bb(2.))); - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] bb(fixunstfsi(bb(2.))); - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] bb(fixunstfti(bb(2.))); + #[cfg(f128_enabled)] bb(floatditf(bb(2))); + #[cfg(f128_enabled)] bb(floatsitf(bb(2))); bb(floattidf(bb(2))); bb(floattisf(bb(2))); + #[cfg(f128_enabled)] bb(floattitf(bb(2))); + #[cfg(f128_enabled)] bb(floatunditf(bb(2))); + #[cfg(f128_enabled)] bb(floatunsitf(bb(2))); bb(floatuntidf(bb(2))); bb(floatuntisf(bb(2))); + #[cfg(f128_enabled)] bb(floatuntitf(bb(2))); + #[cfg(f128_enabled)] bb(gttf(bb(2.), bb(2.))); bb(lshrti3(bb(2), bb(2))); + #[cfg(f128_enabled)] bb(lttf(bb(2.), bb(2.))); bb(moddi3(bb(2), bb(3))); bb(modti3(bb(2), bb(2))); bb(mulodi4(bb(2), bb(3))); bb(muloti4(bb(2), bb(2))); + #[cfg(f128_enabled)] bb(multf(bb(2.), bb(2.))); bb(multi3(bb(2), bb(2))); + #[cfg(f128_enabled)] bb(subtf(bb(2.), bb(2.))); + #[cfg(f16_enabled)] bb(truncsfhf(bb(2.))); + #[cfg(f128_enabled)] bb(trunctfdf(bb(2.))); - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(all( + f16_enabled, + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] bb(trunctfhf(bb(2.))); + #[cfg(f128_enabled)] bb(trunctfsf(bb(2.))); bb(udivti3(bb(2), bb(2))); bb(umoddi3(bb(2), bb(3))); From 5145d951e8f95b296f85c2afe5b15fb77ab6438b Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Thu, 31 Oct 2024 21:36:57 +0800 Subject: [PATCH 0946/1459] ci: add support for loongarch64-unknown-linux-gnu --- .github/workflows/main.yml | 3 +++ ci/docker/loongarch64-unknown-linux-gnu/Dockerfile | 13 +++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 ci/docker/loongarch64-unknown-linux-gnu/Dockerfile diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ec5c059ba..fee5c45ea 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -38,6 +38,9 @@ jobs: - target: i686-unknown-linux-gnu os: ubuntu-latest rust: nightly + - target: loongarch64-unknown-linux-gnu + os: ubuntu-latest + rust: nightly # MIPS targets disabled since they are dropped to tier 3. # See https://github.com/rust-lang/compiler-team/issues/648 #- target: mips-unknown-linux-gnu diff --git a/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000..5107d20a2 --- /dev/null +++ b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile @@ -0,0 +1,13 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user-static ca-certificates \ + gcc-14-loongarch64-linux-gnu libc6-dev-loong64-cross + +ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \ + CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-loongarch64-static \ + CC_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-gcc-14 \ + QEMU_LD_PREFIX=/usr/loongarch64-linux-gnu \ + RUST_TEST_THREADS=1 From 942d552ce81c45212bd0423d791a3a7eb4bb31b1 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 1 Nov 2024 05:48:10 -0500 Subject: [PATCH 0947/1459] Enable the changelog for `release-plz` This crate isn't meant for direct use, but having an easy way to see what changed between versions would still be helpful when this crate is updated in rust-lang/rust. --- .release-plz.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/.release-plz.toml b/.release-plz.toml index fce19d157..e32f67610 100644 --- a/.release-plz.toml +++ b/.release-plz.toml @@ -1,5 +1,4 @@ [workspace] -changelog_update = false semver_check = false # As part of the release process, we delete `libm/Cargo.toml`. Since From 3ae5e336f690885755da12400c9400ba64b125fb Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 1 Nov 2024 11:00:27 +0000 Subject: [PATCH 0948/1459] chore: release v0.1.138 --- CHANGELOG.md | 15 +++++++++++++++ Cargo.toml | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..f43b63b2b --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,15 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.1.138](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.137...compiler_builtins-v0.1.138) - 2024-11-01 + +### Other + +- Use `f16_enabled`/`f128_enabled` in `examples/intrinsics.rs` ([#724](https://github.com/rust-lang/compiler-builtins/pull/724)) +- Disable `f16` for LoongArch64 ([#722](https://github.com/rust-lang/compiler-builtins/pull/722)) diff --git a/Cargo.toml b/Cargo.toml index ecd5ecd08..3d8a1f255 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.137" +version = "0.1.138" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 60e96dbcffa139c4b6d8d6563ba09cf843c74da6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 1 Nov 2024 06:05:42 -0500 Subject: [PATCH 0949/1459] Fix errors reported by Clippy in `libm` --- libm/src/lib.rs | 3 + libm/src/math/asin.rs | 2 +- libm/src/math/asinf.rs | 2 +- libm/src/math/atan2f.rs | 22 +-- libm/src/math/atanhf.rs | 2 +- libm/src/math/exp2f.rs | 4 +- libm/src/math/expm1.rs | 2 +- libm/src/math/expm1f.rs | 2 +- libm/src/math/fabs.rs | 2 - libm/src/math/fdim.rs | 6 +- libm/src/math/fdimf.rs | 6 +- libm/src/math/fmaf.rs | 2 +- libm/src/math/fmod.rs | 4 +- libm/src/math/fmodf.rs | 2 +- libm/src/math/ilogb.rs | 2 +- libm/src/math/ilogbf.rs | 2 +- libm/src/math/jn.rs | 238 ++++++++++++++-------------- libm/src/math/jnf.rs | 230 +++++++++++++-------------- libm/src/math/lgamma_r.rs | 3 +- libm/src/math/lgammaf_r.rs | 3 +- libm/src/math/nextafter.rs | 4 +- libm/src/math/pow.rs | 6 +- libm/src/math/powf.rs | 12 +- libm/src/math/rem_pio2.rs | 2 +- libm/src/math/rem_pio2_large.rs | 2 - libm/src/math/sincosf.rs | 26 ++- libm/src/math/sqrt.rs | 12 +- libm/src/math/support/int_traits.rs | 3 + libm/src/math/tgamma.rs | 5 +- 29 files changed, 306 insertions(+), 305 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 98ac55988..511ab598d 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -5,12 +5,15 @@ #![allow(clippy::assign_op_pattern)] #![allow(clippy::deprecated_cfg_attr)] #![allow(clippy::eq_op)] +#![allow(clippy::excessive_precision)] #![allow(clippy::float_cmp)] #![allow(clippy::int_plus_one)] #![allow(clippy::many_single_char_names)] #![allow(clippy::mixed_case_hex_literals)] +#![allow(clippy::needless_late_init)] #![allow(clippy::needless_return)] #![allow(clippy::unreadable_literal)] +#![allow(clippy::zero_divided_by_zero)] mod libm_helper; mod math; diff --git a/libm/src/math/asin.rs b/libm/src/math/asin.rs index 12fe08fc7..12d0cd35f 100644 --- a/libm/src/math/asin.rs +++ b/libm/src/math/asin.rs @@ -90,7 +90,7 @@ pub fn asin(mut x: f64) -> f64 { /* |x| < 0.5 */ if ix < 0x3fe00000 { /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */ - if ix < 0x3e500000 && ix >= 0x00100000 { + if (0x00100000..0x3e500000).contains(&ix) { return x; } else { return x + x * comp_r(x * x); diff --git a/libm/src/math/asinf.rs b/libm/src/math/asinf.rs index 2c785abe2..0ea49c076 100644 --- a/libm/src/math/asinf.rs +++ b/libm/src/math/asinf.rs @@ -54,7 +54,7 @@ pub fn asinf(mut x: f32) -> f32 { if ix < 0x3f000000 { /* |x| < 0.5 */ /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */ - if (ix < 0x39800000) && (ix >= 0x00800000) { + if (0x00800000..0x39800000).contains(&ix) { return x; } return x + x * r(x * x); diff --git a/libm/src/math/atan2f.rs b/libm/src/math/atan2f.rs index fa33f54f6..95b466fff 100644 --- a/libm/src/math/atan2f.rs +++ b/libm/src/math/atan2f.rs @@ -42,9 +42,9 @@ pub fn atan2f(y: f32, x: f32) -> f32 { /* when y = 0 */ if iy == 0 { return match m { - 0 | 1 => y, /* atan(+-0,+anything)=+-0 */ - 2 => PI, /* atan(+0,-anything) = pi */ - 3 | _ => -PI, /* atan(-0,-anything) =-pi */ + 0 | 1 => y, /* atan(+-0,+anything)=+-0 */ + 2 => PI, /* atan(+0,-anything) = pi */ + _ => -PI, /* atan(-0,-anything) =-pi */ }; } /* when x = 0 */ @@ -55,17 +55,17 @@ pub fn atan2f(y: f32, x: f32) -> f32 { if ix == 0x7f800000 { return if iy == 0x7f800000 { match m { - 0 => PI / 4., /* atan(+INF,+INF) */ - 1 => -PI / 4., /* atan(-INF,+INF) */ - 2 => 3. * PI / 4., /* atan(+INF,-INF)*/ - 3 | _ => -3. * PI / 4., /* atan(-INF,-INF)*/ + 0 => PI / 4., /* atan(+INF,+INF) */ + 1 => -PI / 4., /* atan(-INF,+INF) */ + 2 => 3. * PI / 4., /* atan(+INF,-INF)*/ + _ => -3. * PI / 4., /* atan(-INF,-INF)*/ } } else { match m { - 0 => 0., /* atan(+...,+INF) */ - 1 => -0., /* atan(-...,+INF) */ - 2 => PI, /* atan(+...,-INF) */ - 3 | _ => -PI, /* atan(-...,-INF) */ + 0 => 0., /* atan(+...,+INF) */ + 1 => -0., /* atan(-...,+INF) */ + 2 => PI, /* atan(+...,-INF) */ + _ => -PI, /* atan(-...,-INF) */ } }; } diff --git a/libm/src/math/atanhf.rs b/libm/src/math/atanhf.rs index 3545411bb..80ccec1f6 100644 --- a/libm/src/math/atanhf.rs +++ b/libm/src/math/atanhf.rs @@ -18,7 +18,7 @@ pub fn atanhf(mut x: f32) -> f32 { if u < 0x3f800000 - (32 << 23) { /* handle underflow */ if u < (1 << 23) { - force_eval!((x * x) as f32); + force_eval!(x * x); } } else { /* |x| < 0.5, up to 1.7ulp error */ diff --git a/libm/src/math/exp2f.rs b/libm/src/math/exp2f.rs index f4867b80e..f452b6a20 100644 --- a/libm/src/math/exp2f.rs +++ b/libm/src/math/exp2f.rs @@ -95,7 +95,7 @@ pub fn exp2f(mut x: f32) -> f32 { /* NaN */ return x; } - if ui >= 0x43000000 && ui < 0x80000000 { + if (0x43000000..0x80000000).contains(&ui) { /* x >= 128 */ x *= x1p127; return x; @@ -127,7 +127,7 @@ pub fn exp2f(mut x: f32) -> f32 { let z: f64 = (x - uf) as f64; /* Compute r = exp2(y) = exp2ft[i0] * p(z). */ let r: f64 = f64::from_bits(i!(EXP2FT, i0 as usize)); - let t: f64 = r as f64 * z; + let t: f64 = r * z; let r: f64 = r + t * (p1 as f64 + z * p2 as f64) + t * (z * z) * (p3 as f64 + z * p4 as f64); /* Scale by 2**k */ diff --git a/libm/src/math/expm1.rs b/libm/src/math/expm1.rs index 42608509a..f25153f32 100644 --- a/libm/src/math/expm1.rs +++ b/libm/src/math/expm1.rs @@ -115,7 +115,7 @@ pub fn expm1(mut x: f64) -> f64 { } ui = ((0x3ff + k) as u64) << 52; /* 2^k */ let twopk = f64::from_bits(ui); - if k < 0 || k > 56 { + if !(0..=56).contains(&k) { /* suffice to return exp(x)-1 */ y = x - e + 1.0; if k == 1024 { diff --git a/libm/src/math/expm1f.rs b/libm/src/math/expm1f.rs index a862fe255..12c6f532b 100644 --- a/libm/src/math/expm1f.rs +++ b/libm/src/math/expm1f.rs @@ -115,7 +115,7 @@ pub fn expm1f(mut x: f32) -> f32 { return 1. + 2. * (x - e); } let twopk = f32::from_bits(((0x7f + k) << 23) as u32); /* 2^k */ - if (k < 0) || (k > 56) { + if !(0..=56).contains(&k) { /* suffice to return exp(x)-1 */ let mut y = x - e + 1.; if k == 128 { diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index 8d3ea2fd6..d7980eb65 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -1,5 +1,3 @@ -use core::u64; - /// Absolute value (magnitude) (f64) /// Calculates the absolute value (magnitude) of the argument `x`, /// by direct manipulation of the bit representation of `x`. diff --git a/libm/src/math/fdim.rs b/libm/src/math/fdim.rs index 014930097..7c58cb5a9 100644 --- a/libm/src/math/fdim.rs +++ b/libm/src/math/fdim.rs @@ -3,9 +3,9 @@ use core::f64; /// Positive difference (f64) /// /// Determines the positive difference between arguments, returning: -/// * x - y if x > y, or -/// * +0 if x <= y, or -/// * NAN if either argument is NAN. +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. /// /// A range error may occur. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] diff --git a/libm/src/math/fdimf.rs b/libm/src/math/fdimf.rs index ea0b592d7..2abd49a64 100644 --- a/libm/src/math/fdimf.rs +++ b/libm/src/math/fdimf.rs @@ -3,9 +3,9 @@ use core::f32; /// Positive difference (f32) /// /// Determines the positive difference between arguments, returning: -/// * x - y if x > y, or -/// * +0 if x <= y, or -/// * NAN if either argument is NAN. +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. /// /// A range error may occur. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] diff --git a/libm/src/math/fmaf.rs b/libm/src/math/fmaf.rs index 10bdaeab3..79371c836 100644 --- a/libm/src/math/fmaf.rs +++ b/libm/src/math/fmaf.rs @@ -71,7 +71,7 @@ pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 { underflow may not be raised correctly, example: fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) */ - if e < 0x3ff - 126 && e >= 0x3ff - 149 && fetestexcept(FE_INEXACT) != 0 { + if ((0x3ff - 149)..(0x3ff - 126)).contains(&e) && fetestexcept(FE_INEXACT) != 0 { feclearexcept(FE_INEXACT); // prevent `xy + vz` from being CSE'd with `xy + z` above let vz: f32 = unsafe { read_volatile(&z) }; diff --git a/libm/src/math/fmod.rs b/libm/src/math/fmod.rs index d892ffd8b..df16162bc 100644 --- a/libm/src/math/fmod.rs +++ b/libm/src/math/fmod.rs @@ -1,5 +1,3 @@ -use core::u64; - #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmod(x: f64, y: f64) -> f64 { let mut uxi = x.to_bits(); @@ -74,7 +72,7 @@ pub fn fmod(x: f64, y: f64) -> f64 { } else { uxi >>= -ex + 1; } - uxi |= (sx as u64) << 63; + uxi |= sx << 63; f64::from_bits(uxi) } diff --git a/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs index 1d8001384..671af8580 100644 --- a/libm/src/math/fmodf.rs +++ b/libm/src/math/fmodf.rs @@ -1,4 +1,4 @@ -use core::{f32, u32}; +use core::f32; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmodf(x: f32, y: f32) -> f32 { diff --git a/libm/src/math/ilogb.rs b/libm/src/math/ilogb.rs index 9d58d0608..ccc4914be 100644 --- a/libm/src/math/ilogb.rs +++ b/libm/src/math/ilogb.rs @@ -21,7 +21,7 @@ pub fn ilogb(x: f64) -> i32 { e } else if e == 0x7ff { force_eval!(0.0 / 0.0); - if (i << 12) != 0 { FP_ILOGBNAN } else { i32::max_value() } + if (i << 12) != 0 { FP_ILOGBNAN } else { i32::MAX } } else { e - 0x3ff } diff --git a/libm/src/math/ilogbf.rs b/libm/src/math/ilogbf.rs index 85deb43c8..3585d6d36 100644 --- a/libm/src/math/ilogbf.rs +++ b/libm/src/math/ilogbf.rs @@ -21,7 +21,7 @@ pub fn ilogbf(x: f32) -> i32 { e } else if e == 0xff { force_eval!(0.0 / 0.0); - if (i << 9) != 0 { FP_ILOGBNAN } else { i32::max_value() } + if (i << 9) != 0 { FP_ILOGBNAN } else { i32::MAX } } else { e - 0x7f } diff --git a/libm/src/math/jn.rs b/libm/src/math/jn.rs index aff051f24..7f98ddc05 100644 --- a/libm/src/math/jn.rs +++ b/libm/src/math/jn.rs @@ -104,7 +104,8 @@ pub fn jn(n: i32, mut x: f64) -> f64 { 0 => -cos(x) + sin(x), 1 => -cos(x) - sin(x), 2 => cos(x) - sin(x), - 3 | _ => cos(x) + sin(x), + // 3 + _ => cos(x) + sin(x), }; b = INVSQRTPI * temp / sqrt(x); } else { @@ -118,130 +119,128 @@ pub fn jn(n: i32, mut x: f64) -> f64 { a = temp; } } - } else { - if ix < 0x3e100000 { - /* x < 2**-29 */ - /* x is tiny, return the first Taylor expansion of J(n,x) - * J(n,x) = 1/n!*(x/2)^n - ... - */ - if nm1 > 32 { - /* underflow */ - b = 0.0; - } else { - temp = x * 0.5; - b = temp; - a = 1.0; - i = 2; - while i <= nm1 + 1 { - a *= i as f64; /* a = n! */ - b *= temp; /* b = (x/2)^n */ - i += 1; - } - b = b / a; - } + } else if ix < 0x3e100000 { + /* x < 2**-29 */ + /* x is tiny, return the first Taylor expansion of J(n,x) + * J(n,x) = 1/n!*(x/2)^n - ... + */ + if nm1 > 32 { + /* underflow */ + b = 0.0; } else { - /* use backward recurrence */ - /* x x^2 x^2 - * J(n,x)/J(n-1,x) = ---- ------ ------ ..... - * 2n - 2(n+1) - 2(n+2) - * - * 1 1 1 - * (for large x) = ---- ------ ------ ..... - * 2n 2(n+1) 2(n+2) - * -- - ------ - ------ - - * x x x - * - * Let w = 2n/x and h=2/x, then the above quotient - * is equal to the continued fraction: - * 1 - * = ----------------------- - * 1 - * w - ----------------- - * 1 - * w+h - --------- - * w+2h - ... - * - * To determine how many terms needed, let - * Q(0) = w, Q(1) = w(w+h) - 1, - * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), - * When Q(k) > 1e4 good for single - * When Q(k) > 1e9 good for double - * When Q(k) > 1e17 good for quadruple - */ - /* determine k */ - let mut t: f64; - let mut q0: f64; - let mut q1: f64; - let mut w: f64; - let h: f64; - let mut z: f64; - let mut tmp: f64; - let nf: f64; + temp = x * 0.5; + b = temp; + a = 1.0; + i = 2; + while i <= nm1 + 1 { + a *= i as f64; /* a = n! */ + b *= temp; /* b = (x/2)^n */ + i += 1; + } + b = b / a; + } + } else { + /* use backward recurrence */ + /* x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quadruple + */ + /* determine k */ + let mut t: f64; + let mut q0: f64; + let mut q1: f64; + let mut w: f64; + let h: f64; + let mut z: f64; + let mut tmp: f64; + let nf: f64; - let mut k: i32; + let mut k: i32; - nf = (nm1 as f64) + 1.0; - w = 2.0 * nf / x; - h = 2.0 / x; - z = w + h; - q0 = w; - q1 = w * z - 1.0; - k = 1; - while q1 < 1.0e9 { - k += 1; - z += h; - tmp = z * q1 - q0; - q0 = q1; - q1 = tmp; - } - t = 0.0; - i = k; - while i >= 0 { - t = 1.0 / (2.0 * ((i as f64) + nf) / x - t); + nf = (nm1 as f64) + 1.0; + w = 2.0 * nf / x; + h = 2.0 / x; + z = w + h; + q0 = w; + q1 = w * z - 1.0; + k = 1; + while q1 < 1.0e9 { + k += 1; + z += h; + tmp = z * q1 - q0; + q0 = q1; + q1 = tmp; + } + t = 0.0; + i = k; + while i >= 0 { + t = 1.0 / (2.0 * ((i as f64) + nf) / x - t); + i -= 1; + } + a = t; + b = 1.0; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * Hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = nf * log(fabs(w)); + if tmp < 7.09782712893383973096e+02 { + i = nm1; + while i > 0 { + temp = b; + b = b * (2.0 * (i as f64)) / x - a; + a = temp; i -= 1; } - a = t; - b = 1.0; - /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) - * Hence, if n*(log(2n/x)) > ... - * single 8.8722839355e+01 - * double 7.09782712893383973096e+02 - * long double 1.1356523406294143949491931077970765006170e+04 - * then recurrent value may overflow and the result is - * likely underflow to zero - */ - tmp = nf * log(fabs(w)); - if tmp < 7.09782712893383973096e+02 { - i = nm1; - while i > 0 { - temp = b; - b = b * (2.0 * (i as f64)) / x - a; - a = temp; - i -= 1; - } - } else { - i = nm1; - while i > 0 { - temp = b; - b = b * (2.0 * (i as f64)) / x - a; - a = temp; - /* scale b to avoid spurious overflow */ - let x1p500 = f64::from_bits(0x5f30000000000000); // 0x1p500 == 2^500 - if b > x1p500 { - a /= b; - t /= b; - b = 1.0; - } - i -= 1; + } else { + i = nm1; + while i > 0 { + temp = b; + b = b * (2.0 * (i as f64)) / x - a; + a = temp; + /* scale b to avoid spurious overflow */ + let x1p500 = f64::from_bits(0x5f30000000000000); // 0x1p500 == 2^500 + if b > x1p500 { + a /= b; + t /= b; + b = 1.0; } + i -= 1; } - z = j0(x); - w = j1(x); - if fabs(z) >= fabs(w) { - b = t * z / b; - } else { - b = t * w / a; - } + } + z = j0(x); + w = j1(x); + if fabs(z) >= fabs(w) { + b = t * z / b; + } else { + b = t * w / a; } } @@ -315,7 +314,8 @@ pub fn yn(n: i32, x: f64) -> f64 { 0 => -sin(x) - cos(x), 1 => -sin(x) + cos(x), 2 => sin(x) + cos(x), - 3 | _ => sin(x) - cos(x), + // 3 + _ => sin(x) - cos(x), }; b = INVSQRTPI * temp / sqrt(x); } else { diff --git a/libm/src/math/jnf.rs b/libm/src/math/jnf.rs index e5afda448..754f8f33b 100644 --- a/libm/src/math/jnf.rs +++ b/libm/src/math/jnf.rs @@ -64,128 +64,126 @@ pub fn jnf(n: i32, mut x: f32) -> f32 { b = b * (2.0 * (i as f32) / x) - a; a = temp; } + } else if ix < 0x35800000 { + /* x < 2**-20 */ + /* x is tiny, return the first Taylor expansion of J(n,x) + * J(n,x) = 1/n!*(x/2)^n - ... + */ + if nm1 > 8 { + /* underflow */ + nm1 = 8; + } + temp = 0.5 * x; + b = temp; + a = 1.0; + i = 2; + while i <= nm1 + 1 { + a *= i as f32; /* a = n! */ + b *= temp; /* b = (x/2)^n */ + i += 1; + } + b = b / a; } else { - if ix < 0x35800000 { - /* x < 2**-20 */ - /* x is tiny, return the first Taylor expansion of J(n,x) - * J(n,x) = 1/n!*(x/2)^n - ... - */ - if nm1 > 8 { - /* underflow */ - nm1 = 8; - } - temp = 0.5 * x; - b = temp; - a = 1.0; - i = 2; - while i <= nm1 + 1 { - a *= i as f32; /* a = n! */ - b *= temp; /* b = (x/2)^n */ - i += 1; - } - b = b / a; - } else { - /* use backward recurrence */ - /* x x^2 x^2 - * J(n,x)/J(n-1,x) = ---- ------ ------ ..... - * 2n - 2(n+1) - 2(n+2) - * - * 1 1 1 - * (for large x) = ---- ------ ------ ..... - * 2n 2(n+1) 2(n+2) - * -- - ------ - ------ - - * x x x - * - * Let w = 2n/x and h=2/x, then the above quotient - * is equal to the continued fraction: - * 1 - * = ----------------------- - * 1 - * w - ----------------- - * 1 - * w+h - --------- - * w+2h - ... - * - * To determine how many terms needed, let - * Q(0) = w, Q(1) = w(w+h) - 1, - * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), - * When Q(k) > 1e4 good for single - * When Q(k) > 1e9 good for double - * When Q(k) > 1e17 good for quadruple - */ - /* determine k */ - let mut t: f32; - let mut q0: f32; - let mut q1: f32; - let mut w: f32; - let h: f32; - let mut z: f32; - let mut tmp: f32; - let nf: f32; - let mut k: i32; + /* use backward recurrence */ + /* x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quadruple + */ + /* determine k */ + let mut t: f32; + let mut q0: f32; + let mut q1: f32; + let mut w: f32; + let h: f32; + let mut z: f32; + let mut tmp: f32; + let nf: f32; + let mut k: i32; - nf = (nm1 as f32) + 1.0; - w = 2.0 * (nf as f32) / x; - h = 2.0 / x; - z = w + h; - q0 = w; - q1 = w * z - 1.0; - k = 1; - while q1 < 1.0e4 { - k += 1; - z += h; - tmp = z * q1 - q0; - q0 = q1; - q1 = tmp; - } - t = 0.0; - i = k; - while i >= 0 { - t = 1.0 / (2.0 * ((i as f32) + nf) / x - t); + nf = (nm1 as f32) + 1.0; + w = 2.0 * nf / x; + h = 2.0 / x; + z = w + h; + q0 = w; + q1 = w * z - 1.0; + k = 1; + while q1 < 1.0e4 { + k += 1; + z += h; + tmp = z * q1 - q0; + q0 = q1; + q1 = tmp; + } + t = 0.0; + i = k; + while i >= 0 { + t = 1.0 / (2.0 * ((i as f32) + nf) / x - t); + i -= 1; + } + a = t; + b = 1.0; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * Hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = nf * logf(fabsf(w)); + if tmp < 88.721679688 { + i = nm1; + while i > 0 { + temp = b; + b = 2.0 * (i as f32) * b / x - a; + a = temp; i -= 1; } - a = t; - b = 1.0; - /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) - * Hence, if n*(log(2n/x)) > ... - * single 8.8722839355e+01 - * double 7.09782712893383973096e+02 - * long double 1.1356523406294143949491931077970765006170e+04 - * then recurrent value may overflow and the result is - * likely underflow to zero - */ - tmp = nf * logf(fabsf(w)); - if tmp < 88.721679688 { - i = nm1; - while i > 0 { - temp = b; - b = 2.0 * (i as f32) * b / x - a; - a = temp; - i -= 1; - } - } else { - i = nm1; - while i > 0 { - temp = b; - b = 2.0 * (i as f32) * b / x - a; - a = temp; - /* scale b to avoid spurious overflow */ - let x1p60 = f32::from_bits(0x5d800000); // 0x1p60 == 2^60 - if b > x1p60 { - a /= b; - t /= b; - b = 1.0; - } - i -= 1; + } else { + i = nm1; + while i > 0 { + temp = b; + b = 2.0 * (i as f32) * b / x - a; + a = temp; + /* scale b to avoid spurious overflow */ + let x1p60 = f32::from_bits(0x5d800000); // 0x1p60 == 2^60 + if b > x1p60 { + a /= b; + t /= b; + b = 1.0; } + i -= 1; } - z = j0f(x); - w = j1f(x); - if fabsf(z) >= fabsf(w) { - b = t * z / b; - } else { - b = t * w / a; - } + } + z = j0f(x); + w = j1f(x); + if fabsf(z) >= fabsf(w) { + b = t * z / b; + } else { + b = t * w / a; } } diff --git a/libm/src/math/lgamma_r.rs b/libm/src/math/lgamma_r.rs index b26177e6e..6becaad2c 100644 --- a/libm/src/math/lgamma_r.rs +++ b/libm/src/math/lgamma_r.rs @@ -160,7 +160,8 @@ fn sin_pi(mut x: f64) -> f64 { 1 => k_cos(x, 0.0), 2 => k_sin(-x, 0.0, 0), 3 => -k_cos(x, 0.0), - 0 | _ => k_sin(x, 0.0, 0), + // 0 + _ => k_sin(x, 0.0, 0), } } diff --git a/libm/src/math/lgammaf_r.rs b/libm/src/math/lgammaf_r.rs index 723c90daf..10cecee54 100644 --- a/libm/src/math/lgammaf_r.rs +++ b/libm/src/math/lgammaf_r.rs @@ -95,7 +95,8 @@ fn sin_pi(mut x: f32) -> f32 { 1 => k_cosf(y), 2 => k_sinf(-y), 3 => -k_cosf(y), - 0 | _ => k_sinf(y), + // 0 + _ => k_sinf(y), } } diff --git a/libm/src/math/nextafter.rs b/libm/src/math/nextafter.rs index 057626191..422bd7496 100644 --- a/libm/src/math/nextafter.rs +++ b/libm/src/math/nextafter.rs @@ -10,8 +10,8 @@ pub fn nextafter(x: f64, y: f64) -> f64 { return y; } - let ax = ux_i & !1_u64 / 2; - let ay = uy_i & !1_u64 / 2; + let ax = ux_i & (!1_u64 / 2); + let ay = uy_i & (!1_u64 / 2); if ax == 0 { if ay == 0 { return y; diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 7ecad291d..736465cd1 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -98,8 +98,8 @@ pub fn pow(x: f64, y: f64) -> f64 { let (hx, lx): (i32, u32) = ((x.to_bits() >> 32) as i32, x.to_bits() as u32); let (hy, ly): (i32, u32) = ((y.to_bits() >> 32) as i32, y.to_bits() as u32); - let mut ix: i32 = (hx & 0x7fffffff) as i32; - let iy: i32 = (hy & 0x7fffffff) as i32; + let mut ix: i32 = hx & 0x7fffffff_i32; + let iy: i32 = hy & 0x7fffffff_i32; /* x**0 = 1, even if x is NaN */ if ((iy as u32) | ly) == 0 { @@ -355,7 +355,7 @@ pub fn pow(x: f64, y: f64) -> f64 { } /* compute 2**(p_h+p_l) */ - let i: i32 = j & (0x7fffffff as i32); + let i: i32 = j & 0x7fffffff_i32; k = (i >> 20) - 0x3ff; let mut n: i32 = 0; diff --git a/libm/src/math/powf.rs b/libm/src/math/powf.rs index 2d9d1e4bb..839c6c23d 100644 --- a/libm/src/math/powf.rs +++ b/libm/src/math/powf.rs @@ -13,6 +13,8 @@ * ==================================================== */ +use core::cmp::Ordering; + use super::{fabsf, scalbnf, sqrtf}; const BP: [f32; 2] = [1.0, 1.5]; @@ -115,15 +117,13 @@ pub fn powf(x: f32, y: f32) -> f32 { /* special value of y */ if iy == 0x7f800000 { /* y is +-inf */ - if ix == 0x3f800000 { + match ix.cmp(&0x3f800000) { /* (-1)**+-inf is 1 */ - return 1.0; - } else if ix > 0x3f800000 { + Ordering::Equal => return 1.0, /* (|x|>1)**+-inf = inf,0 */ - return if hy >= 0 { y } else { 0.0 }; - } else { + Ordering::Greater => return if hy >= 0 { y } else { 0.0 }, /* (|x|<1)**+-inf = 0,inf */ - return if hy >= 0 { 0.0 } else { -y }; + Ordering::Less => return if hy >= 0 { 0.0 } else { -y }, } } if iy == 0x3f800000 { diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 4dfb8c658..917e90819 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -50,7 +50,7 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { fn medium(x: f64, ix: u32) -> (i32, f64, f64) { /* rint(x/(pi/2)), Assume round-to-nearest. */ - let tmp = x as f64 * INV_PIO2 + TO_INT; + let tmp = x * INV_PIO2 + TO_INT; // force rounding of tmp to it's storage format on x87 to avoid // excess precision issues. #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 1dfbba3b1..ec8397f4b 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -425,8 +425,6 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> for i in (0..=jz).rev() { fw += i!(fq, i); } - // TODO: drop excess precision here once double_t is used - fw = fw as f64; i!(y, 0, =, if ih == 0 { fw } else { -fw }); fw = i!(fq, 0) - fw; for i in 1..=jz { diff --git a/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs index 423845e44..f33607676 100644 --- a/libm/src/math/sincosf.rs +++ b/libm/src/math/sincosf.rs @@ -67,14 +67,12 @@ pub fn sincosf(x: f32) -> (f32, f32) { } } /* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */ - else { - if sign { - s = -k_sinf(x as f64 + S2PIO2); - c = -k_cosf(x as f64 + S2PIO2); - } else { - s = -k_sinf(x as f64 - S2PIO2); - c = -k_cosf(x as f64 - S2PIO2); - } + else if sign { + s = -k_sinf(x as f64 + S2PIO2); + c = -k_cosf(x as f64 + S2PIO2); + } else { + s = -k_sinf(x as f64 - S2PIO2); + c = -k_cosf(x as f64 - S2PIO2); } return (s, c); @@ -91,14 +89,12 @@ pub fn sincosf(x: f32) -> (f32, f32) { s = -k_cosf(x as f64 - S3PIO2); c = k_sinf(x as f64 - S3PIO2); } + } else if sign { + s = k_sinf(x as f64 + S4PIO2); + c = k_cosf(x as f64 + S4PIO2); } else { - if sign { - s = k_sinf(x as f64 + S4PIO2); - c = k_cosf(x as f64 + S4PIO2); - } else { - s = k_sinf(x as f64 - S4PIO2); - c = k_cosf(x as f64 - S4PIO2); - } + s = k_sinf(x as f64 - S4PIO2); + c = k_cosf(x as f64 - S4PIO2); } return (s, c); diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index a443b7e4c..3eaf52cda 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -144,13 +144,15 @@ pub fn sqrt(x: f64) -> f64 { ix0 = (ix0 & 0x000fffff) | 0x00100000; if (m & 1) == 1 { /* odd m, double x to make it even */ - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix0 *= 2; + ix0 += ((ix1 & sign) >> 31).0 as i32; ix1 += ix1; } m >>= 1; /* m = [m/2] */ /* generate sqrt(x) bit by bit */ - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix0 *= 2; + ix0 += ((ix1 & sign) >> 31).0 as i32; ix1 += ix1; q = 0; /* [q,q1] = sqrt(x) */ q1 = Wrapping(0); @@ -165,7 +167,8 @@ pub fn sqrt(x: f64) -> f64 { ix0 -= t; q += r.0 as i32; } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix0 *= 2; + ix0 += ((ix1 & sign) >> 31).0 as i32; ix1 += ix1; r >>= 1; } @@ -186,7 +189,8 @@ pub fn sqrt(x: f64) -> f64 { ix1 -= t1; q1 += r; } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix0 *= 2; + ix0 += ((ix1 & sign) >> 31).0 as i32; ix1 += ix1; r >>= 1; } diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index c5feef8d7..b08907aa5 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -136,6 +136,9 @@ macro_rules! int_impl_common { } fn ilog2(self) -> u32 { + // On our older MSRV, this resolves to the trait method. Which won't actually work, + // but this is only called behind other gates. + #[allow(clippy::incompatible_msrv)] ::ilog2(self) } }; diff --git a/libm/src/math/tgamma.rs b/libm/src/math/tgamma.rs index 3f38c0b1d..60451416a 100644 --- a/libm/src/math/tgamma.rs +++ b/libm/src/math/tgamma.rs @@ -45,7 +45,8 @@ fn sinpi(mut x: f64) -> f64 { 1 => k_cos(x, 0.0), 2 => k_sin(-x, 0.0, 0), 3 => -k_cos(x, 0.0), - 0 | _ => k_sin(x, 0.0, 0), + // 0 + _ => k_sin(x, 0.0, 0), } } @@ -143,7 +144,7 @@ pub fn tgamma(mut x: f64) -> f64 { /* special cases */ if ix >= 0x7ff00000 { /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */ - return x + core::f64::INFINITY; + return x + f64::INFINITY; } if ix < ((0x3ff - 54) << 20) { /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */ From fca5e57f649204fd08b0c17348d093f69018ca7b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 1 Nov 2024 06:32:06 -0500 Subject: [PATCH 0950/1459] Enable clippy for `libm` in CI --- libm/.github/workflows/main.yml | 10 ++++++++-- libm/ci/run.sh | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index bfd86497b..14e557884 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -82,8 +82,8 @@ jobs: [ -n "${{ matrix.channel }}" ] && channel="${{ matrix.channel }}" rustup update "$channel" --no-self-update rustup default "$channel" - rustup target add ${{ matrix.target }} - rustup component add llvm-tools-preview + rustup target add "${{ matrix.target }}" + rustup component add clippy llvm-tools-preview - uses: Swatinem/rust-cache@v2 with: key: ${{ matrix.target }} @@ -105,6 +105,12 @@ jobs: rustup target add x86_64-unknown-linux-musl cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} + - name: Clippy + run: | + # Run clippy on `libm` + cargo clippy --target "${{ matrix.target }}" --package libm + + builtins: name: Check use with compiler-builtins runs-on: ubuntu-latest diff --git a/libm/ci/run.sh b/libm/ci/run.sh index a211bc98c..32453663e 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -88,3 +88,4 @@ else $cmd --benches $cmd --benches --release fi + From 86ccf1b0777b5486f6a6e9a7f1e0451d063a0fe4 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 31 Oct 2024 03:05:45 -0500 Subject: [PATCH 0951/1459] Introduce `hf32!` and `hf64!` macros for hex float support Rust does not have any native way to parse hex floats, but they are heavily used in the C algorithms that we derive from. Introduce a const function that can parse these, as well as macros `hf32!` and `hf64!` that ensure the string literals get handled at compiler time. These are currently not used but making everything available now will ease future development. Co-authored-by: quaternic <57393910+quaternic@users.noreply.github.com> --- libm/CONTRIBUTING.md | 34 +-- libm/src/math/support/hex_float.rs | 399 +++++++++++++++++++++++++++++ libm/src/math/support/macros.rs | 20 ++ libm/src/math/support/mod.rs | 3 + 4 files changed, 430 insertions(+), 26 deletions(-) create mode 100644 libm/src/math/support/hex_float.rs diff --git a/libm/CONTRIBUTING.md b/libm/CONTRIBUTING.md index a39623696..0a1741631 100644 --- a/libm/CONTRIBUTING.md +++ b/libm/CONTRIBUTING.md @@ -44,37 +44,19 @@ Check [PR #65] for an example. `mod.rs`. - You may encounter weird literals like `0x1p127f` in the MUSL code. These are hexadecimal floating - point literals. Rust (the language) doesn't support these kind of literals. The best way I have - found to deal with these literals is to turn them into their integer representation using the - [`hexf!`] macro and then turn them back into floats. See below: + point literals. Rust (the language) doesn't support these kind of literals. This crate provides + two macros, `hf32!` and `hf64!`, which convert string literals to floats at compile time. -[`hexf!`]: https://crates.io/crates/hexf - -``` rust -// Step 1: write a program to convert the float into its integer representation -#[macro_use] -extern crate hexf; - -fn main() { - println!("{:#x}", hexf32!("0x1.0p127").to_bits()); -} -``` - -``` console -$ # Step 2: run the program -$ cargo run -0x7f000000 -``` - -``` rust -// Step 3: copy paste the output into libm -let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 12 -``` + ```rust + assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000); + assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000); + ``` - Rust code panics on arithmetic overflows when not optimized. You may need to use the [`Wrapping`] - newtype to avoid this problem. + newtype to avoid this problem, or individual methods like [`wrapping_add`]. [`Wrapping`]: https://doc.rust-lang.org/std/num/struct.Wrapping.html +[`wrapping_add`]: https://doc.rust-lang.org/std/primitive.u32.html#method.wrapping_add ## Testing diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs new file mode 100644 index 000000000..80434a5ec --- /dev/null +++ b/libm/src/math/support/hex_float.rs @@ -0,0 +1,399 @@ +//! Utilities for working with hex float formats. + +#![allow(dead_code)] // FIXME: remove once this gets used + +/// Construct a 32-bit float from hex float representation (C-style) +pub const fn hf32(s: &str) -> f32 { + f32_from_bits(parse_any(s, 32, 23) as u32) +} + +/// Construct a 64-bit float from hex float representation (C-style) +pub const fn hf64(s: &str) -> f64 { + f64_from_bits(parse_any(s, 64, 52) as u64) +} + +const fn parse_any(s: &str, bits: u32, sig_bits: u32) -> u128 { + let exp_bits: u32 = bits - sig_bits - 1; + let max_msb: i32 = (1 << (exp_bits - 1)) - 1; + // The exponent of one ULP in the subnormals + let min_lsb: i32 = 1 - max_msb - sig_bits as i32; + + let (neg, mut sig, exp) = parse_hex(s.as_bytes()); + + if sig == 0 { + return (neg as u128) << (bits - 1); + } + + // exponents of the least and most significant bits in the value + let lsb = sig.trailing_zeros() as i32; + let msb = u128_ilog2(sig) as i32; + let sig_bits = sig_bits as i32; + + assert!(msb - lsb <= sig_bits, "the value is too precise"); + assert!(msb + exp <= max_msb, "the value is too huge"); + assert!(lsb + exp >= min_lsb, "the value is too tiny"); + + // The parsed value is X = sig * 2^exp + // Expressed as a multiple U of the smallest subnormal value: + // X = U * 2^min_lsb, so U = sig * 2^(exp-min_lsb) + let mut uexp = exp - min_lsb; + + let shift = if uexp + msb >= sig_bits { + // normal, shift msb to position sig_bits + sig_bits - msb + } else { + // subnormal, shift so that uexp becomes 0 + uexp + }; + + if shift >= 0 { + sig <<= shift; + } else { + sig >>= -shift; + } + uexp -= shift; + + // the most significant bit is like having 1 in the exponent bits + // add any leftover exponent to that + assert!(uexp >= 0 && uexp < (1 << exp_bits) - 2); + sig += (uexp as u128) << sig_bits; + + // finally, set the sign bit if necessary + sig | ((neg as u128) << (bits - 1)) +} + +/// Parse a hexadecimal float x +/// returns (s,n,e): +/// s == x.is_sign_negative() +/// n * 2^e == x.abs() +const fn parse_hex(mut b: &[u8]) -> (bool, u128, i32) { + let mut neg = false; + let mut sig: u128 = 0; + let mut exp: i32 = 0; + + if let &[c @ (b'-' | b'+'), ref rest @ ..] = b { + b = rest; + neg = c == b'-'; + } + + if let &[b'0', b'x' | b'X', ref rest @ ..] = b { + b = rest; + } else { + panic!("no hex indicator"); + } + + let mut seen_point = false; + let mut some_digits = false; + + while let &[c, ref rest @ ..] = b { + b = rest; + + match c { + b'.' => { + assert!(!seen_point); + seen_point = true; + continue; + } + b'p' | b'P' => break, + c => { + let digit = hex_digit(c); + some_digits = true; + let of; + (sig, of) = sig.overflowing_mul(16); + assert!(!of, "too many digits"); + sig |= digit as u128; + // up until the fractional point, the value grows + // with more digits, but after it the exponent is + // compensated to match. + if seen_point { + exp -= 4; + } + } + } + } + assert!(some_digits, "at least one digit is required"); + some_digits = false; + + let mut negate_exp = false; + if let &[c @ (b'-' | b'+'), ref rest @ ..] = b { + b = rest; + negate_exp = c == b'-'; + } + + let mut pexp: i32 = 0; + while let &[c, ref rest @ ..] = b { + b = rest; + let digit = dec_digit(c); + some_digits = true; + let of; + (pexp, of) = pexp.overflowing_mul(10); + assert!(!of, "too many exponent digits"); + pexp += digit as i32; + } + assert!(some_digits, "at least one exponent digit is required"); + + if negate_exp { + exp -= pexp; + } else { + exp += pexp; + } + + (neg, sig, exp) +} + +const fn dec_digit(c: u8) -> u8 { + match c { + b'0'..=b'9' => c - b'0', + _ => panic!("bad char"), + } +} + +const fn hex_digit(c: u8) -> u8 { + match c { + b'0'..=b'9' => c - b'0', + b'a'..=b'f' => c - b'a' + 10, + b'A'..=b'F' => c - b'A' + 10, + _ => panic!("bad char"), + } +} + +/* FIXME(msrv): vendor some things that are not const stable at our MSRV */ + +/// `f32::from_bits` +const fn f32_from_bits(v: u32) -> f32 { + unsafe { core::mem::transmute(v) } +} + +/// `f64::from_bits` +const fn f64_from_bits(v: u64) -> f64 { + unsafe { core::mem::transmute(v) } +} + +/// `u128::ilog2` +const fn u128_ilog2(v: u128) -> u32 { + assert!(v != 0); + u128::BITS - 1 - v.leading_zeros() +} + +#[cfg(test)] +mod tests { + extern crate std; + use std::{format, println}; + + use super::*; + + #[test] + fn test_parse_any() { + for k in -149..=127 { + let s = format!("0x1p{k}"); + let x = hf32(&s); + let y = if k < 0 { 0.5f32.powi(-k) } else { 2.0f32.powi(k) }; + assert_eq!(x, y); + } + + let mut s = *b"0x.0000000p-121"; + for e in 0..40 { + for k in 0..(1 << 15) { + let expected = f32::from_bits(k) * 2.0f32.powi(e); + let x = hf32(std::str::from_utf8(&s).unwrap()); + assert_eq!( + x.to_bits(), + expected.to_bits(), + "\ + e={e}\n\ + k={k}\n\ + x={x}\n\ + expected={expected}\n\ + s={}\n\ + f32::from_bits(k)={}\n\ + 2.0f32.powi(e)={}\ + ", + std::str::from_utf8(&s).unwrap(), + f32::from_bits(k), + 2.0f32.powi(e), + ); + for i in (3..10).rev() { + if s[i] == b'f' { + s[i] = b'0'; + } else if s[i] == b'9' { + s[i] = b'a'; + break; + } else { + s[i] += 1; + break; + } + } + } + for i in (12..15).rev() { + if s[i] == b'0' { + s[i] = b'9'; + } else { + s[i] -= 1; + break; + } + } + for i in (3..10).rev() { + s[i] = b'0'; + } + } + } + + #[test] + fn test_f32() { + let checks = [ + ("0x.1234p+16", (0x1234 as f32).to_bits()), + ("0x1.234p+12", (0x1234 as f32).to_bits()), + ("0x12.34p+8", (0x1234 as f32).to_bits()), + ("0x123.4p+4", (0x1234 as f32).to_bits()), + ("0x1234p+0", (0x1234 as f32).to_bits()), + ("0x1234.p+0", (0x1234 as f32).to_bits()), + ("0x1234.0p+0", (0x1234 as f32).to_bits()), + ("0x1.fffffep+127", f32::MAX.to_bits()), + ("0x1.0p+1", 2.0f32.to_bits()), + ("0x1.0p+0", 1.0f32.to_bits()), + ("0x1.ffep+8", 0x43fff000), + ("+0x1.ffep+8", 0x43fff000), + ("0x1p+0", 0x3f800000), + ("0x1.99999ap-4", 0x3dcccccd), + ("0x1.9p+6", 0x42c80000), + ("0x1.2d5ed2p+20", 0x4996af69), + ("-0x1.348eb8p+10", 0xc49a475c), + ("-0x1.33dcfep-33", 0xaf19ee7f), + ("0x0.0p0", 0.0f32.to_bits()), + ("-0x0.0p0", (-0.0f32).to_bits()), + ("0x1.0p0", 1.0f32.to_bits()), + ("0x1.99999ap-4", (0.1f32).to_bits()), + ("-0x1.99999ap-4", (-0.1f32).to_bits()), + ("0x1.111114p-127", 0x00444445), + ("0x1.23456p-130", 0x00091a2b), + ("0x1p-149", 0x00000001), + ]; + for (s, exp) in checks { + println!("parsing {s}"); + let act = hf32(s).to_bits(); + assert_eq!( + act, exp, + "parsing {s}: {act:#010x} != {exp:#010x}\nact: {act:#034b}\nexp: {exp:#034b}" + ); + } + } + + #[test] + fn test_f64() { + let checks = [ + ("0x.1234p+16", (0x1234 as f64).to_bits()), + ("0x1.234p+12", (0x1234 as f64).to_bits()), + ("0x12.34p+8", (0x1234 as f64).to_bits()), + ("0x123.4p+4", (0x1234 as f64).to_bits()), + ("0x1234p+0", (0x1234 as f64).to_bits()), + ("0x1234.p+0", (0x1234 as f64).to_bits()), + ("0x1234.0p+0", (0x1234 as f64).to_bits()), + ("0x1.ffep+8", 0x407ffe0000000000), + ("0x1p+0", 0x3ff0000000000000), + ("0x1.999999999999ap-4", 0x3fb999999999999a), + ("0x1.9p+6", 0x4059000000000000), + ("0x1.2d5ed1fe1da7bp+20", 0x4132d5ed1fe1da7b), + ("-0x1.348eb851eb852p+10", 0xc09348eb851eb852), + ("-0x1.33dcfe54a3803p-33", 0xbde33dcfe54a3803), + ("0x1.0p0", 1.0f64.to_bits()), + ("0x0.0p0", 0.0f64.to_bits()), + ("-0x0.0p0", (-0.0f64).to_bits()), + ("0x1.999999999999ap-4", 0.1f64.to_bits()), + ("0x1.999999999998ap-4", (0.1f64 - f64::EPSILON).to_bits()), + ("-0x1.999999999999ap-4", (-0.1f64).to_bits()), + ("-0x1.999999999998ap-4", (-0.1f64 + f64::EPSILON).to_bits()), + ("0x0.8000000000001p-1022", 0x0008000000000001), + ("0x0.123456789abcdp-1022", 0x000123456789abcd), + ("0x0.0000000000002p-1022", 0x0000000000000002), + ]; + for (s, exp) in checks { + println!("parsing {s}"); + let act = hf64(s).to_bits(); + assert_eq!( + act, exp, + "parsing {s}: {act:#018x} != {exp:#018x}\nact: {act:#066b}\nexp: {exp:#066b}" + ); + } + } + + #[test] + fn test_f32_almost_extra_precision() { + // Exact maximum precision allowed + hf32("0x1.abcdeep+0"); + } + + #[test] + fn test_macros() { + assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000u32); + assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000u64); + } +} + +#[cfg(test)] +// FIXME(ppc): something with `should_panic` tests cause a SIGILL with ppc64le +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +mod tests_panicking { + extern crate std; + use super::*; + + #[test] + #[should_panic] + fn test_f32_extra_precision2() { + // One bit more than the above. + hf32("0x1.ffffffp+127"); + } + + #[test] + #[should_panic(expected = "the value is too huge")] + fn test_f32_overflow() { + // One bit more than the above. + hf32("0x1p+128"); + } + + #[test] + #[should_panic(expected = "the value is too precise")] + fn test_f32_extra_precision() { + // One bit more than the above. + hf32("0x1.abcdefp+0"); + } + + #[test] + fn test_f32_tiniest() { + let x = hf32("0x1.p-149"); + let y = hf32("0x0.0000000000000001p-85"); + let z = hf32("0x0.8p-148"); + assert_eq!(x, y); + assert_eq!(x, z); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f32_too_tiny() { + hf32("0x1.p-150"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f32_also_too_tiny() { + hf32("0x0.8p-149"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f32_again_too_tiny() { + hf32("0x0.0000000000000001p-86"); + } + + #[test] + fn test_f64_almost_extra_precision() { + // Exact maximum precision allowed + hf64("0x1.abcdabcdabcdfp+0"); + } + + #[test] + #[should_panic(expected = "the value is too precise")] + fn test_f64_extra_precision() { + // One bit more than the above. + hf64("0x1.abcdabcdabcdf8p+0"); + } +} diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs index b14bbec38..9441eace5 100644 --- a/libm/src/math/support/macros.rs +++ b/libm/src/math/support/macros.rs @@ -105,3 +105,23 @@ macro_rules! select_implementation { (@cfg ; $ex:expr) => { }; (@cfg $provided:meta; $ex:expr) => { #[cfg($provided)] $ex }; } + +/// Construct a 32-bit float from hex float representation (C-style), guaranteed to +/// evaluate at compile time. +#[allow(unused_macros)] +macro_rules! hf32 { + ($s:literal) => {{ + const X: f32 = $crate::math::support::hf32($s); + X + }}; +} + +/// Construct a 64-bit float from hex float representation (C-style), guaranteed to +/// evaluate at compile time. +#[allow(unused_macros)] +macro_rules! hf64 { + ($s:literal) => {{ + const X: f64 = $crate::math::support::hf64($s); + X + }}; +} diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index f054df6cd..04a313abc 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -1,7 +1,10 @@ #[macro_use] pub mod macros; mod float_traits; +mod hex_float; mod int_traits; pub use float_traits::Float; +#[allow(unused_imports)] +pub use hex_float::{hf32, hf64}; pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; From dbb3c58f6abad4ca0fd5420557d428fd9284e5ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johanna=20S=C3=B6rng=C3=A5rd?= <44257381+JSorngard@users.noreply.github.com> Date: Fri, 1 Nov 2024 13:29:03 +0100 Subject: [PATCH 0952/1459] Add some more basic docstrings (#352) * Add docstrings to the tgamma functions * Add docstrings to the lgamma functions * Add docstrings to trunc * Add docstrings to exp10 functions --- libm/src/math/exp10.rs | 1 + libm/src/math/exp10f.rs | 1 + libm/src/math/lgamma.rs | 2 ++ libm/src/math/lgammaf.rs | 2 ++ libm/src/math/tgamma.rs | 1 + libm/src/math/tgammaf.rs | 1 + libm/src/math/trunc.rs | 3 +++ libm/src/math/truncf.rs | 3 +++ 8 files changed, 14 insertions(+) diff --git a/libm/src/math/exp10.rs b/libm/src/math/exp10.rs index 559930e10..2c3df0173 100644 --- a/libm/src/math/exp10.rs +++ b/libm/src/math/exp10.rs @@ -6,6 +6,7 @@ const P10: &[f64] = &[ 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, ]; +/// Calculates 10 raised to the power of `x` (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp10(x: f64) -> f64 { let (mut y, n) = modf(x); diff --git a/libm/src/math/exp10f.rs b/libm/src/math/exp10f.rs index 786305481..e81d18380 100644 --- a/libm/src/math/exp10f.rs +++ b/libm/src/math/exp10f.rs @@ -5,6 +5,7 @@ const LN10_F64: f64 = 3.32192809488736234787031942948939; const P10: &[f32] = &[1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7]; +/// Calculates 10 raised to the power of `x` (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp10f(x: f32) -> f32 { let (mut y, n) = modff(x); diff --git a/libm/src/math/lgamma.rs b/libm/src/math/lgamma.rs index a08bc5b64..8312dc186 100644 --- a/libm/src/math/lgamma.rs +++ b/libm/src/math/lgamma.rs @@ -1,5 +1,7 @@ use super::lgamma_r; +/// The natural logarithm of the +/// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn lgamma(x: f64) -> f64 { lgamma_r(x).0 diff --git a/libm/src/math/lgammaf.rs b/libm/src/math/lgammaf.rs index a9c2da75b..d37512397 100644 --- a/libm/src/math/lgammaf.rs +++ b/libm/src/math/lgammaf.rs @@ -1,5 +1,7 @@ use super::lgammaf_r; +/// The natural logarithm of the +/// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn lgammaf(x: f32) -> f32 { lgammaf_r(x).0 diff --git a/libm/src/math/tgamma.rs b/libm/src/math/tgamma.rs index 60451416a..305986064 100644 --- a/libm/src/math/tgamma.rs +++ b/libm/src/math/tgamma.rs @@ -130,6 +130,7 @@ fn s(x: f64) -> f64 { return num / den; } +/// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tgamma(mut x: f64) -> f64 { let u: u64 = x.to_bits(); diff --git a/libm/src/math/tgammaf.rs b/libm/src/math/tgammaf.rs index 23e3814f9..fe178f7a3 100644 --- a/libm/src/math/tgammaf.rs +++ b/libm/src/math/tgammaf.rs @@ -1,5 +1,6 @@ use super::tgamma; +/// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tgammaf(x: f32) -> f32 { tgamma(x as f64) as f32 diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs index 6961bb950..34bc2fdfa 100644 --- a/libm/src/math/trunc.rs +++ b/libm/src/math/trunc.rs @@ -1,5 +1,8 @@ use core::f64; +/// Rounds the number toward 0 to the closest integral value (f64). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn trunc(x: f64) -> f64 { select_implementation! { diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs index 8270c8eb3..a74f78987 100644 --- a/libm/src/math/truncf.rs +++ b/libm/src/math/truncf.rs @@ -1,5 +1,8 @@ use core::f32; +/// Rounds the number toward 0 to the closest integral value (f32). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn truncf(x: f32) -> f32 { select_implementation! { From 4d65f13469314e34d0e3c0e6d2be8d93c9313b24 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 1 Nov 2024 07:45:19 -0500 Subject: [PATCH 0953/1459] Resolve clippy errors in `libm` tests and check this in CI --- libm/.github/workflows/main.yml | 5 +- libm/src/math/ceil.rs | 6 +-- libm/src/math/ceilf.rs | 6 +-- libm/src/math/fabs.rs | 8 ++-- libm/src/math/fabsf.rs | 8 ++-- libm/src/math/floor.rs | 6 +-- libm/src/math/floorf.rs | 6 +-- libm/src/math/pow.rs | 85 ++++++++++++++++----------------- libm/src/math/sqrt.rs | 9 ++-- libm/src/math/sqrtf.rs | 9 ++-- 10 files changed, 65 insertions(+), 83 deletions(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 14e557884..c79d637ec 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -106,9 +106,10 @@ jobs: cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} - name: Clippy - run: | + # Tests and utilities can't build on no_std targets + if: "!contains(matrix.target, 'thumb')" # Run clippy on `libm` - cargo clippy --target "${{ matrix.target }}" --package libm + run: cargo clippy --target "${{ matrix.target }}" --package libm --all-targets builtins: diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index c7e857dbb..c198ebcfe 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -34,8 +34,6 @@ pub fn ceil(x: f64) -> f64 { #[cfg(test)] mod tests { - use core::f64::*; - use super::*; #[test] @@ -48,8 +46,8 @@ mod tests { #[test] fn spec_tests() { // Not Asserted: that the current rounding mode has no effect. - assert!(ceil(NAN).is_nan()); - for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { + assert!(ceil(f64::NAN).is_nan()); + for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY].iter().copied() { assert_eq!(ceil(f), f); } } diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs index 0da384350..9eb2ec07a 100644 --- a/libm/src/math/ceilf.rs +++ b/libm/src/math/ceilf.rs @@ -42,8 +42,6 @@ pub fn ceilf(x: f32) -> f32 { #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { - use core::f32::*; - use super::*; #[test] @@ -56,8 +54,8 @@ mod tests { #[test] fn spec_tests() { // Not Asserted: that the current rounding mode has no effect. - assert!(ceilf(NAN).is_nan()); - for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { + assert!(ceilf(f32::NAN).is_nan()); + for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY].iter().copied() { assert_eq!(ceilf(f), f); } } diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index d7980eb65..d083053e1 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -14,8 +14,6 @@ pub fn fabs(x: f64) -> f64 { #[cfg(test)] mod tests { - use core::f64::*; - use super::*; #[test] @@ -27,12 +25,12 @@ mod tests { /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs #[test] fn spec_tests() { - assert!(fabs(NAN).is_nan()); + assert!(fabs(f64::NAN).is_nan()); for f in [0.0, -0.0].iter().copied() { assert_eq!(fabs(f), 0.0); } - for f in [INFINITY, NEG_INFINITY].iter().copied() { - assert_eq!(fabs(f), INFINITY); + for f in [f64::INFINITY, f64::NEG_INFINITY].iter().copied() { + assert_eq!(fabs(f), f64::INFINITY); } } } diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs index 1dac6389d..eabe87254 100644 --- a/libm/src/math/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -16,8 +16,6 @@ pub fn fabsf(x: f32) -> f32 { #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { - use core::f32::*; - use super::*; #[test] @@ -29,12 +27,12 @@ mod tests { /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs #[test] fn spec_tests() { - assert!(fabsf(NAN).is_nan()); + assert!(fabsf(f32::NAN).is_nan()); for f in [0.0, -0.0].iter().copied() { assert_eq!(fabsf(f), 0.0); } - for f in [INFINITY, NEG_INFINITY].iter().copied() { - assert_eq!(fabsf(f), INFINITY); + for f in [f32::INFINITY, f32::NEG_INFINITY].iter().copied() { + assert_eq!(fabsf(f), f32::INFINITY); } } } diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index 532226b9f..e478f6d54 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -33,8 +33,6 @@ pub fn floor(x: f64) -> f64 { #[cfg(test)] mod tests { - use core::f64::*; - use super::*; #[test] @@ -47,8 +45,8 @@ mod tests { #[test] fn spec_tests() { // Not Asserted: that the current rounding mode has no effect. - assert!(floor(NAN).is_nan()); - for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { + assert!(floor(f64::NAN).is_nan()); + for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY].iter().copied() { assert_eq!(floor(f), f); } } diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs index 4f38cb15b..bd1570c86 100644 --- a/libm/src/math/floorf.rs +++ b/libm/src/math/floorf.rs @@ -42,8 +42,6 @@ pub fn floorf(x: f32) -> f32 { #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { - use core::f32::*; - use super::*; #[test] @@ -57,8 +55,8 @@ mod tests { #[test] fn spec_tests() { // Not Asserted: that the current rounding mode has no effect. - assert!(floorf(NAN).is_nan()); - for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { + assert!(floorf(f32::NAN).is_nan()); + for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY].iter().copied() { assert_eq!(floorf(f), f); } } diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 736465cd1..80b2a2499 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -398,7 +398,6 @@ mod tests { extern crate core; use self::core::f64::consts::{E, PI}; - use self::core::f64::{EPSILON, INFINITY, MAX, MIN, MIN_POSITIVE, NAN, NEG_INFINITY}; use super::pow; const POS_ZERO: &[f64] = &[0.0]; @@ -407,15 +406,15 @@ mod tests { const NEG_ONE: &[f64] = &[-1.0]; const POS_FLOATS: &[f64] = &[99.0 / 70.0, E, PI]; const NEG_FLOATS: &[f64] = &[-99.0 / 70.0, -E, -PI]; - const POS_SMALL_FLOATS: &[f64] = &[(1.0 / 2.0), MIN_POSITIVE, EPSILON]; - const NEG_SMALL_FLOATS: &[f64] = &[-(1.0 / 2.0), -MIN_POSITIVE, -EPSILON]; - const POS_EVENS: &[f64] = &[2.0, 6.0, 8.0, 10.0, 22.0, 100.0, MAX]; - const NEG_EVENS: &[f64] = &[MIN, -100.0, -22.0, -10.0, -8.0, -6.0, -2.0]; + const POS_SMALL_FLOATS: &[f64] = &[(1.0 / 2.0), f64::MIN_POSITIVE, f64::EPSILON]; + const NEG_SMALL_FLOATS: &[f64] = &[-(1.0 / 2.0), -f64::MIN_POSITIVE, -f64::EPSILON]; + const POS_EVENS: &[f64] = &[2.0, 6.0, 8.0, 10.0, 22.0, 100.0, f64::MAX]; + const NEG_EVENS: &[f64] = &[f64::MIN, -100.0, -22.0, -10.0, -8.0, -6.0, -2.0]; const POS_ODDS: &[f64] = &[3.0, 7.0]; const NEG_ODDS: &[f64] = &[-7.0, -3.0]; - const NANS: &[f64] = &[NAN]; - const POS_INF: &[f64] = &[INFINITY]; - const NEG_INF: &[f64] = &[NEG_INFINITY]; + const NANS: &[f64] = &[f64::NAN]; + const POS_INF: &[f64] = &[f64::INFINITY]; + const NEG_INF: &[f64] = &[f64::NEG_INFINITY]; const ALL: &[&[f64]] = &[ POS_ZERO, @@ -492,83 +491,83 @@ mod tests { #[test] fn nan_inputs() { // NAN as the base: - // (NAN ^ anything *but 0* should be NAN) - test_sets_as_exponent(NAN, &ALL[2..], NAN); + // (f64::NAN ^ anything *but 0* should be f64::NAN) + test_sets_as_exponent(f64::NAN, &ALL[2..], f64::NAN); - // NAN as the exponent: - // (anything *but 1* ^ NAN should be NAN) - test_sets_as_base(&ALL[..(ALL.len() - 2)], NAN, NAN); + // f64::NAN as the exponent: + // (anything *but 1* ^ f64::NAN should be f64::NAN) + test_sets_as_base(&ALL[..(ALL.len() - 2)], f64::NAN, f64::NAN); } #[test] fn infinity_as_base() { // Positive Infinity as the base: - // (+Infinity ^ positive anything but 0 and NAN should be +Infinity) - test_sets_as_exponent(INFINITY, &POS[1..], INFINITY); + // (+Infinity ^ positive anything but 0 and f64::NAN should be +Infinity) + test_sets_as_exponent(f64::INFINITY, &POS[1..], f64::INFINITY); - // (+Infinity ^ negative anything except 0 and NAN should be 0.0) - test_sets_as_exponent(INFINITY, &NEG[1..], 0.0); + // (+Infinity ^ negative anything except 0 and f64::NAN should be 0.0) + test_sets_as_exponent(f64::INFINITY, &NEG[1..], 0.0); // Negative Infinity as the base: // (-Infinity ^ positive odd ints should be -Infinity) - test_sets_as_exponent(NEG_INFINITY, &[POS_ODDS], NEG_INFINITY); + test_sets_as_exponent(f64::NEG_INFINITY, &[POS_ODDS], f64::NEG_INFINITY); // (-Infinity ^ anything but odd ints should be == -0 ^ (-anything)) // We can lump in pos/neg odd ints here because they don't seem to // cause panics (div by zero) in release mode (I think). - test_sets(ALL, &|v: f64| pow(NEG_INFINITY, v), &|v: f64| pow(-0.0, -v)); + test_sets(ALL, &|v: f64| pow(f64::NEG_INFINITY, v), &|v: f64| pow(-0.0, -v)); } #[test] fn infinity_as_exponent() { // Positive/Negative base greater than 1: - // (pos/neg > 1 ^ Infinity should be Infinity - note this excludes NAN as the base) - test_sets_as_base(&ALL[5..(ALL.len() - 2)], INFINITY, INFINITY); + // (pos/neg > 1 ^ Infinity should be Infinity - note this excludes f64::NAN as the base) + test_sets_as_base(&ALL[5..(ALL.len() - 2)], f64::INFINITY, f64::INFINITY); // (pos/neg > 1 ^ -Infinity should be 0.0) - test_sets_as_base(&ALL[5..ALL.len() - 2], NEG_INFINITY, 0.0); + test_sets_as_base(&ALL[5..ALL.len() - 2], f64::NEG_INFINITY, 0.0); // Positive/Negative base less than 1: let base_below_one = &[POS_ZERO, NEG_ZERO, NEG_SMALL_FLOATS, POS_SMALL_FLOATS]; - // (pos/neg < 1 ^ Infinity should be 0.0 - this also excludes NAN as the base) - test_sets_as_base(base_below_one, INFINITY, 0.0); + // (pos/neg < 1 ^ Infinity should be 0.0 - this also excludes f64::NAN as the base) + test_sets_as_base(base_below_one, f64::INFINITY, 0.0); // (pos/neg < 1 ^ -Infinity should be Infinity) - test_sets_as_base(base_below_one, NEG_INFINITY, INFINITY); + test_sets_as_base(base_below_one, f64::NEG_INFINITY, f64::INFINITY); // Positive/Negative 1 as the base: // (pos/neg 1 ^ Infinity should be 1) - test_sets_as_base(&[NEG_ONE, POS_ONE], INFINITY, 1.0); + test_sets_as_base(&[NEG_ONE, POS_ONE], f64::INFINITY, 1.0); // (pos/neg 1 ^ -Infinity should be 1) - test_sets_as_base(&[NEG_ONE, POS_ONE], NEG_INFINITY, 1.0); + test_sets_as_base(&[NEG_ONE, POS_ONE], f64::NEG_INFINITY, 1.0); } #[test] fn zero_as_base() { // Positive Zero as the base: - // (+0 ^ anything positive but 0 and NAN should be +0) + // (+0 ^ anything positive but 0 and f64::NAN should be +0) test_sets_as_exponent(0.0, &POS[1..], 0.0); - // (+0 ^ anything negative but 0 and NAN should be Infinity) + // (+0 ^ anything negative but 0 and f64::NAN should be Infinity) // (this should panic because we're dividing by zero) - test_sets_as_exponent(0.0, &NEG[1..], INFINITY); + test_sets_as_exponent(0.0, &NEG[1..], f64::INFINITY); // Negative Zero as the base: - // (-0 ^ anything positive but 0, NAN, and odd ints should be +0) + // (-0 ^ anything positive but 0, f64::NAN, and odd ints should be +0) test_sets_as_exponent(-0.0, &POS[3..], 0.0); - // (-0 ^ anything negative but 0, NAN, and odd ints should be Infinity) + // (-0 ^ anything negative but 0, f64::NAN, and odd ints should be Infinity) // (should panic because of divide by zero) - test_sets_as_exponent(-0.0, &NEG[3..], INFINITY); + test_sets_as_exponent(-0.0, &NEG[3..], f64::INFINITY); // (-0 ^ positive odd ints should be -0) test_sets_as_exponent(-0.0, &[POS_ODDS], -0.0); // (-0 ^ negative odd ints should be -Infinity) // (should panic because of divide by zero) - test_sets_as_exponent(-0.0, &[NEG_ODDS], NEG_INFINITY); + test_sets_as_exponent(-0.0, &[NEG_ODDS], f64::NEG_INFINITY); } #[test] @@ -583,21 +582,17 @@ mod tests { // Factoring -1 out: // (negative anything ^ integer should be (-1 ^ integer) * (positive anything ^ integer)) - (&[POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS]).iter().for_each( - |int_set| { - int_set.iter().for_each(|int| { - test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| { - pow(-1.0, *int) * pow(v, *int) - }); - }) - }, - ); + [POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS].iter().for_each(|int_set| { + int_set.iter().for_each(|int| { + test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| pow(-1.0, *int) * pow(v, *int)); + }) + }); // Negative base (imaginary results): // (-anything except 0 and Infinity ^ non-integer should be NAN) - (&NEG[1..(NEG.len() - 1)]).iter().for_each(|set| { + NEG[1..(NEG.len() - 1)].iter().for_each(|set| { set.iter().for_each(|val| { - test_sets(&ALL[3..7], &|v: f64| pow(*val, v), &|_| NAN); + test_sets(&ALL[3..7], &|v: f64| pow(*val, v), &|_| f64::NAN); }) }); } diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 3eaf52cda..d9a8f184c 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -224,8 +224,6 @@ pub fn sqrt(x: f64) -> f64 { #[cfg(test)] mod tests { - use core::f64::*; - use super::*; #[test] @@ -239,15 +237,16 @@ mod tests { fn spec_tests() { // Not Asserted: FE_INVALID exception is raised if argument is negative. assert!(sqrt(-1.0).is_nan()); - assert!(sqrt(NAN).is_nan()); - for f in [0.0, -0.0, INFINITY].iter().copied() { + assert!(sqrt(f64::NAN).is_nan()); + for f in [0.0, -0.0, f64::INFINITY].iter().copied() { assert_eq!(sqrt(f), f); } } #[test] + #[allow(clippy::approx_constant)] fn conformance_tests() { - let values = [3.14159265359, 10000.0, f64::from_bits(0x0000000f), INFINITY]; + let values = [3.14159265359, 10000.0, f64::from_bits(0x0000000f), f64::INFINITY]; let results = [ 4610661241675116657u64, 4636737291354636288u64, diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index d2f7ae703..23f9a8443 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -110,8 +110,6 @@ pub fn sqrtf(x: f32) -> f32 { #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { - use core::f32::*; - use super::*; #[test] @@ -125,15 +123,16 @@ mod tests { fn spec_tests() { // Not Asserted: FE_INVALID exception is raised if argument is negative. assert!(sqrtf(-1.0).is_nan()); - assert!(sqrtf(NAN).is_nan()); - for f in [0.0, -0.0, INFINITY].iter().copied() { + assert!(sqrtf(f32::NAN).is_nan()); + for f in [0.0, -0.0, f32::INFINITY].iter().copied() { assert_eq!(sqrtf(f), f); } } #[test] + #[allow(clippy::approx_constant)] fn conformance_tests() { - let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), INFINITY]; + let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), f32::INFINITY]; let results = [1071833029u32, 1120403456u32, 456082799u32, 2139095040u32]; for i in 0..values.len() { From 8b9fb33131f8bbe36a5fc4f595f1bf5100be5346 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 1 Nov 2024 06:40:17 -0500 Subject: [PATCH 0954/1459] Fix clippy lints in `crates/` and enable this on CI --- libm/.github/workflows/main.yml | 24 ++++++++++++++++++------ libm/crates/libm-macros/src/lib.rs | 2 +- libm/crates/libm-test/src/gen/random.rs | 2 +- libm/crates/libm-test/src/precision.rs | 2 +- libm/crates/libm-test/src/test_traits.rs | 4 ++-- libm/crates/musl-math-sys/build.rs | 2 +- libm/crates/musl-math-sys/src/lib.rs | 7 +++++++ 7 files changed, 31 insertions(+), 12 deletions(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index c79d637ec..866f0de9e 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -105,12 +105,24 @@ jobs: rustup target add x86_64-unknown-linux-musl cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} - - name: Clippy - # Tests and utilities can't build on no_std targets - if: "!contains(matrix.target, 'thumb')" - # Run clippy on `libm` - run: cargo clippy --target "${{ matrix.target }}" --package libm --all-targets - + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: Install Rust + run: | + rustup update nightly --no-self-update + rustup default nightly + rustup component add clippy + - uses: Swatinem/rust-cache@v2 + - name: Download musl source + run: ./ci/download-musl.sh + - run: | + cargo clippy --all \ + --exclude cb \ + --features libm-test/build-musl,libm-test/test-multiprecision \ + --all-targets builtins: name: Check use with compiler-builtins diff --git a/libm/crates/libm-macros/src/lib.rs b/libm/crates/libm-macros/src/lib.rs index dc78598ca..41d13035c 100644 --- a/libm/crates/libm-macros/src/lib.rs +++ b/libm/crates/libm-macros/src/lib.rs @@ -353,7 +353,7 @@ fn validate(input: &StructuredInput) -> syn::Result> if !input.skip.is_empty() && input.only.is_some() { let e = syn::Error::new( input.only_span.unwrap(), - format!("only one of `skip` or `only` may be specified"), + "only one of `skip` or `only` may be specified", ); return Err(e); } diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index d03d1ff79..e347b3c63 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -37,7 +37,7 @@ static TEST_CASES: LazyLock = LazyLock::new(|| make_test_cases(NTES /// value so tests don't run forever. static TEST_CASES_JN: LazyLock = LazyLock::new(|| { // Start with regular test cases - let mut cases = (&*TEST_CASES).clone(); + let mut cases = (*TEST_CASES).clone(); // These functions are extremely slow, limit them let ntests_jn = (NTESTS / 1000).max(80); diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 9ef0e818d..5b021e946 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -238,7 +238,7 @@ fn maybe_check_nan_bits(actual: F, expected: F, ctx: &CheckCtx) -> Opt // abs and copysign require signaling NaNs to be propagated, so verify bit equality. if actual.to_bits() == expected.to_bits() { - return SKIP; + SKIP } else { Some(Err(anyhow::anyhow!("NaNs have different bitpatterns"))) } diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index 67df83fb4..e69e16d24 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -214,7 +214,7 @@ macro_rules! impl_int { }; } -fn validate_int<'a, I, Input>(actual: I, expected: I, input: Input, ctx: &CheckCtx) -> TestResult +fn validate_int(actual: I, expected: I, input: Input, ctx: &CheckCtx) -> TestResult where I: Int + Hex, Input: Hex + fmt::Debug, @@ -274,7 +274,7 @@ macro_rules! impl_float { }; } -fn validate_float<'a, F, Input>(actual: F, expected: F, input: Input, ctx: &CheckCtx) -> TestResult +fn validate_float(actual: F, expected: F, input: Input, ctx: &CheckCtx) -> TestResult where F: Float + Hex, Input: Hex + fmt::Debug, diff --git a/libm/crates/musl-math-sys/build.rs b/libm/crates/musl-math-sys/build.rs index 03df06c79..03deb4ff0 100644 --- a/libm/crates/musl-math-sys/build.rs +++ b/libm/crates/musl-math-sys/build.rs @@ -124,7 +124,7 @@ fn build_musl_math(cfg: &Config) { // Run configuration steps. Usually done as part of the musl `Makefile`. let obj_include = cfg.out_dir.join("musl_obj/include"); fs::create_dir_all(&obj_include).unwrap(); - fs::create_dir_all(&obj_include.join("bits")).unwrap(); + fs::create_dir_all(obj_include.join("bits")).unwrap(); let sed_stat = Command::new("sed") .arg("-f") .arg(musl_dir.join("tools/mkalltypes.sed")) diff --git a/libm/crates/musl-math-sys/src/lib.rs b/libm/crates/musl-math-sys/src/lib.rs index fe3c89229..db352fab8 100644 --- a/libm/crates/musl-math-sys/src/lib.rs +++ b/libm/crates/musl-math-sys/src/lib.rs @@ -7,6 +7,7 @@ use std::ffi::{c_char, c_int, c_long}; /// unsound. macro_rules! functions { ( $( + $( #[$meta:meta] )* $pfx_name:ident: $name:ident( $($arg:ident: $aty:ty),+ ) -> $rty:ty; )* ) => { extern "C" { @@ -15,6 +16,7 @@ macro_rules! functions { $( // Expose a safe version + $( #[$meta] )* pub fn $name( $($arg: $aty),+ ) -> $rty { // SAFETY: FFI calls with no preconditions unsafe { $pfx_name( $($arg),+ ) } @@ -231,8 +233,13 @@ functions! { musl_logf: logf(a: f32) -> f32; musl_modf: modf(a: f64, b: &mut f64) -> f64; musl_modff: modff(a: f32, b: &mut f32) -> f32; + + // FIXME: these need to be unsafe + #[allow(clippy::not_unsafe_ptr_arg_deref)] musl_nan: nan(a: *const c_char) -> f64; + #[allow(clippy::not_unsafe_ptr_arg_deref)] musl_nanf: nanf(a: *const c_char) -> f32; + musl_nearbyint: nearbyint(a: f64) -> f64; musl_nearbyintf: nearbyintf(a: f32) -> f32; musl_nextafter: nextafter(a: f64, b: f64) -> f64; From fc9760317b2fc3ccefe2675e4e847843a22fe181 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 31 Oct 2024 02:44:00 -0500 Subject: [PATCH 0955/1459] Adjust how the proc macro emits types and add an enum Currently the macro always provides `CFn`, `RustFn`, `RustArgs`, etc. Change this so that: 1. This information must be explicily requested in the invocation. 2. There is a new `FTy` field available that emits a single float type, rather than a tuple or signature. Additionally, add two new macros that create enums representing function names. --- libm/crates/libm-macros/Cargo.toml | 1 + libm/crates/libm-macros/src/enums.rs | 132 +++++++++++++++++ libm/crates/libm-macros/src/lib.rs | 191 +++++++++++++++++++------ libm/crates/libm-macros/src/parse.rs | 24 +++- libm/crates/libm-macros/tests/basic.rs | 50 ++++--- libm/crates/libm-macros/tests/enum.rs | 19 +++ 6 files changed, 354 insertions(+), 63 deletions(-) create mode 100644 libm/crates/libm-macros/src/enums.rs create mode 100644 libm/crates/libm-macros/tests/enum.rs diff --git a/libm/crates/libm-macros/Cargo.toml b/libm/crates/libm-macros/Cargo.toml index 3da9d45a2..c9defb1c5 100644 --- a/libm/crates/libm-macros/Cargo.toml +++ b/libm/crates/libm-macros/Cargo.toml @@ -8,6 +8,7 @@ publish = false proc-macro = true [dependencies] +heck = "0.5.0" proc-macro2 = "1.0.88" quote = "1.0.37" syn = { version = "2.0.79", features = ["full", "extra-traits", "visit-mut"] } diff --git a/libm/crates/libm-macros/src/enums.rs b/libm/crates/libm-macros/src/enums.rs new file mode 100644 index 000000000..d9017dff7 --- /dev/null +++ b/libm/crates/libm-macros/src/enums.rs @@ -0,0 +1,132 @@ +use heck::ToUpperCamelCase; +use proc_macro2 as pm2; +use proc_macro2::{Ident, Span}; +use quote::quote; +use syn::spanned::Spanned; +use syn::{Fields, ItemEnum, Variant}; + +use crate::{ALL_FUNCTIONS_FLAT, base_name}; + +/// Implement `#[function_enum]`, see documentation in `lib.rs`. +pub fn function_enum( + mut item: ItemEnum, + attributes: pm2::TokenStream, +) -> syn::Result { + expect_empty_enum(&item)?; + let attr_span = attributes.span(); + let mut attr = attributes.into_iter(); + + // Attribute should be the identifier of the `BaseName` enum. + let Some(tt) = attr.next() else { + return Err(syn::Error::new(attr_span, "expected one attribute")); + }; + + let pm2::TokenTree::Ident(base_enum) = tt else { + return Err(syn::Error::new(tt.span(), "expected an identifier")); + }; + + if let Some(tt) = attr.next() { + return Err(syn::Error::new(tt.span(), "unexpected token after identifier")); + } + + let enum_name = &item.ident; + let mut as_str_arms = Vec::new(); + let mut base_arms = Vec::new(); + + for func in ALL_FUNCTIONS_FLAT.iter() { + let fn_name = func.name; + let ident = Ident::new(&fn_name.to_upper_camel_case(), Span::call_site()); + let bname_ident = Ident::new(&base_name(fn_name).to_upper_camel_case(), Span::call_site()); + + // Match arm for `fn as_str(self)` matcher + as_str_arms.push(quote! { Self::#ident => #fn_name }); + + // Match arm for `fn base_name(self)` matcher + base_arms.push(quote! { Self::#ident => #base_enum::#bname_ident }); + + let variant = + Variant { attrs: Vec::new(), ident, fields: Fields::Unit, discriminant: None }; + + item.variants.push(variant); + } + + let res = quote! { + // Instantiate the enum + #item + + impl #enum_name { + /// The stringified version of this function name. + const fn as_str(self) -> &'static str { + match self { + #( #as_str_arms , )* + } + } + + /// The base name enum for this function. + const fn base_name(self) -> #base_enum { + match self { + #( #base_arms, )* + } + } + } + }; + + Ok(res) +} + +/// Implement `#[base_name_enum]`, see documentation in `lib.rs`. +pub fn base_name_enum( + mut item: ItemEnum, + attributes: pm2::TokenStream, +) -> syn::Result { + expect_empty_enum(&item)?; + if !attributes.is_empty() { + let sp = attributes.span(); + return Err(syn::Error::new(sp.span(), "no attributes expected")); + } + + let mut base_names: Vec<_> = + ALL_FUNCTIONS_FLAT.iter().map(|func| base_name(func.name)).collect(); + base_names.sort_unstable(); + base_names.dedup(); + + let item_name = &item.ident; + let mut as_str_arms = Vec::new(); + + for base_name in base_names { + let ident = Ident::new(&base_name.to_upper_camel_case(), Span::call_site()); + + // Match arm for `fn as_str(self)` matcher + as_str_arms.push(quote! { Self::#ident => #base_name }); + + let variant = + Variant { attrs: Vec::new(), ident, fields: Fields::Unit, discriminant: None }; + + item.variants.push(variant); + } + + let res = quote! { + // Instantiate the enum + #item + + impl #item_name { + /// The stringified version of this base name. + const fn as_str(self) -> &'static str { + match self { + #( #as_str_arms ),* + } + } + } + }; + + Ok(res) +} + +/// Verify that an enum is empty, otherwise return an error +fn expect_empty_enum(item: &ItemEnum) -> syn::Result<()> { + if !item.variants.is_empty() { + Err(syn::Error::new(item.variants.span(), "expected an empty enum")) + } else { + Ok(()) + } +} diff --git a/libm/crates/libm-macros/src/lib.rs b/libm/crates/libm-macros/src/lib.rs index 41d13035c..2db412e79 100644 --- a/libm/crates/libm-macros/src/lib.rs +++ b/libm/crates/libm-macros/src/lib.rs @@ -1,16 +1,20 @@ +mod enums; mod parse; + use std::sync::LazyLock; use parse::{Invocation, StructuredInput}; use proc_macro as pm; use proc_macro2::{self as pm2, Span}; use quote::{ToTokens, quote}; -use syn::Ident; +use syn::spanned::Spanned; use syn::visit_mut::VisitMut; +use syn::{Ident, ItemEnum}; -const ALL_FUNCTIONS: &[(Signature, Option, &[&str])] = &[ +const ALL_FUNCTIONS: &[(Ty, Signature, Option, &[&str])] = &[ ( // `fn(f32) -> f32` + Ty::F32, Signature { args: &[Ty::F32], returns: &[Ty::F32] }, None, &[ @@ -22,6 +26,7 @@ const ALL_FUNCTIONS: &[(Signature, Option, &[&str])] = &[ ), ( // `(f64) -> f64` + Ty::F64, Signature { args: &[Ty::F64], returns: &[Ty::F64] }, None, &[ @@ -33,6 +38,7 @@ const ALL_FUNCTIONS: &[(Signature, Option, &[&str])] = &[ ), ( // `(f32, f32) -> f32` + Ty::F32, Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32] }, None, &[ @@ -50,6 +56,7 @@ const ALL_FUNCTIONS: &[(Signature, Option, &[&str])] = &[ ), ( // `(f64, f64) -> f64` + Ty::F64, Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64] }, None, &[ @@ -67,102 +74,120 @@ const ALL_FUNCTIONS: &[(Signature, Option, &[&str])] = &[ ), ( // `(f32, f32, f32) -> f32` + Ty::F32, Signature { args: &[Ty::F32, Ty::F32, Ty::F32], returns: &[Ty::F32] }, None, &["fmaf"], ), ( // `(f64, f64, f64) -> f64` + Ty::F64, Signature { args: &[Ty::F64, Ty::F64, Ty::F64], returns: &[Ty::F64] }, None, &["fma"], ), ( // `(f32) -> i32` + Ty::F32, Signature { args: &[Ty::F32], returns: &[Ty::I32] }, None, &["ilogbf"], ), ( // `(f64) -> i32` + Ty::F64, Signature { args: &[Ty::F64], returns: &[Ty::I32] }, None, &["ilogb"], ), ( // `(i32, f32) -> f32` + Ty::F32, Signature { args: &[Ty::I32, Ty::F32], returns: &[Ty::F32] }, None, &["jnf"], ), ( // `(i32, f64) -> f64` + Ty::F64, Signature { args: &[Ty::I32, Ty::F64], returns: &[Ty::F64] }, None, &["jn"], ), ( // `(f32, i32) -> f32` + Ty::F32, Signature { args: &[Ty::F32, Ty::I32], returns: &[Ty::F32] }, None, &["scalbnf", "ldexpf"], ), ( // `(f64, i64) -> f64` + Ty::F64, Signature { args: &[Ty::F64, Ty::I32], returns: &[Ty::F64] }, None, &["scalbn", "ldexp"], ), ( // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)` + Ty::F32, Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] }, Some(Signature { args: &[Ty::F32, Ty::MutF32], returns: &[Ty::F32] }), &["modff"], ), ( // `(f64, &mut f64) -> f64` as `(f64) -> (f64, f64)` + Ty::F64, Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] }, Some(Signature { args: &[Ty::F64, Ty::MutF64], returns: &[Ty::F64] }), &["modf"], ), ( // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)` + Ty::F32, Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::I32] }, Some(Signature { args: &[Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }), &["frexpf", "lgammaf_r"], ), ( // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)` + Ty::F64, Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::I32] }, Some(Signature { args: &[Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }), &["frexp", "lgamma_r"], ), ( // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)` + Ty::F32, Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32, Ty::I32] }, Some(Signature { args: &[Ty::F32, Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }), &["remquof"], ), ( // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)` + Ty::F64, Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64, Ty::I32] }, Some(Signature { args: &[Ty::F64, Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }), &["remquo"], ), ( // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)` + Ty::F32, Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] }, Some(Signature { args: &[Ty::F32, Ty::MutF32, Ty::MutF32], returns: &[] }), &["sincosf"], ), ( // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)` + Ty::F64, Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] }, Some(Signature { args: &[Ty::F64, Ty::MutF64, Ty::MutF64], returns: &[] }), &["sincos"], ), ]; +const KNOWN_TYPES: &[&str] = &["FTy", "CFn", "CArgs", "CRet", "RustFn", "RustArgs", "RustRet"]; + /// A type used in a function signature. #[allow(dead_code)] #[derive(Debug, Clone, Copy)] @@ -190,12 +215,12 @@ impl ToTokens for Ty { Ty::F128 => quote! { f128 }, Ty::I32 => quote! { i32 }, Ty::CInt => quote! { ::core::ffi::c_int }, - Ty::MutF16 => quote! { &mut f16 }, - Ty::MutF32 => quote! { &mut f32 }, - Ty::MutF64 => quote! { &mut f64 }, - Ty::MutF128 => quote! { &mut f128 }, - Ty::MutI32 => quote! { &mut i32 }, - Ty::MutCInt => quote! { &mut core::ffi::c_int }, + Ty::MutF16 => quote! { &'a mut f16 }, + Ty::MutF32 => quote! { &'a mut f32 }, + Ty::MutF64 => quote! { &'a mut f64 }, + Ty::MutF128 => quote! { &'a mut f128 }, + Ty::MutI32 => quote! { &'a mut i32 }, + Ty::MutCInt => quote! { &'a mut core::ffi::c_int }, }; tokens.extend(ts); @@ -213,6 +238,7 @@ struct Signature { #[derive(Debug, Clone)] struct FunctionInfo { name: &'static str, + base_fty: Ty, /// Function signature for C implementations c_sig: Signature, /// Function signature for Rust implementations @@ -223,10 +249,11 @@ struct FunctionInfo { static ALL_FUNCTIONS_FLAT: LazyLock> = LazyLock::new(|| { let mut ret = Vec::new(); - for (rust_sig, c_sig, names) in ALL_FUNCTIONS { + for (base_fty, rust_sig, c_sig, names) in ALL_FUNCTIONS { for name in *names { let api = FunctionInfo { name, + base_fty: *base_fty, rust_sig: rust_sig.clone(), c_sig: c_sig.clone().unwrap_or_else(|| rust_sig.clone()), }; @@ -238,6 +265,37 @@ static ALL_FUNCTIONS_FLAT: LazyLock> = LazyLock::new(|| { ret }); +/// Populate an enum with a variant representing function. Names are in upper camel case. +/// +/// Applied to an empty enum. Expects one attribute `#[function_enum(BaseName)]` that provides +/// the name of the `BaseName` enum. +#[proc_macro_attribute] +pub fn function_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> pm::TokenStream { + let item = syn::parse_macro_input!(tokens as ItemEnum); + let res = enums::function_enum(item, attributes.into()); + + match res { + Ok(ts) => ts, + Err(e) => e.into_compile_error(), + } + .into() +} + +/// Create an enum representing all possible base names, with names in upper camel case. +/// +/// Applied to an empty enum. +#[proc_macro_attribute] +pub fn base_name_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> pm::TokenStream { + let item = syn::parse_macro_input!(tokens as ItemEnum); + let res = enums::base_name_enum(item, attributes.into()); + + match res { + Ok(ts) => ts, + Err(e) => e.into_compile_error(), + } + .into() +} + /// Do something for each function present in this crate. /// /// Takes a callback macro and invokes it multiple times, once for each function that @@ -258,6 +316,8 @@ static ALL_FUNCTIONS_FLAT: LazyLock> = LazyLock::new(|| { /// ( /// // Name of that function /// fn_name: $fn_name:ident, +/// // The basic float type for this function (e.g. `f32`, `f64`) +/// FTy: $FTy:ty, /// // Function signature of the C version (e.g. `fn(f32, &mut f32) -> f32`) /// CFn: $CFn:ty, /// // A tuple representing the C version's arguments (e.g. `(f32, &mut f32)`) @@ -279,17 +339,16 @@ static ALL_FUNCTIONS_FLAT: LazyLock> = LazyLock::new(|| { /// ) => { }; /// } /// +/// // All fields except for `callback` are optional. /// libm_macros::for_each_function! { /// // The macro to invoke as a callback /// callback: callback_macro, +/// // Which types to include either as a list (`[CFn, RustFn, RustArgs]`) or "all" +/// emit_types: all, /// // Functions to skip, i.e. `callback` shouldn't be called at all for these. -/// // -/// // This is an optional field. /// skip: [sin, cos], /// // Attributes passed as `attrs` for specific functions. For example, here the invocation /// // with `sinf` and that with `cosf` will both get `meta1` and `meta2`, but no others will. -/// // -/// // This is an optional field. /// attributes: [ /// #[meta1] /// #[meta2] @@ -297,8 +356,6 @@ static ALL_FUNCTIONS_FLAT: LazyLock> = LazyLock::new(|| { /// ], /// // Any tokens that should be passed directly to all invocations of the callback. This can /// // be used to pass local variables or other things the macro needs access to. -/// // -/// // This is an optional field. /// extra: [foo], /// // Similar to `extra`, but allow providing a pattern for only specific functions. Uses /// // a simplified match-like syntax. @@ -313,7 +370,7 @@ pub fn for_each_function(tokens: pm::TokenStream) -> pm::TokenStream { let input = syn::parse_macro_input!(tokens as Invocation); let res = StructuredInput::from_fields(input) - .and_then(|s_in| validate(&s_in).map(|fn_list| (s_in, fn_list))) + .and_then(|mut s_in| validate(&mut s_in).map(|fn_list| (s_in, fn_list))) .and_then(|(s_in, fn_list)| expand(s_in, &fn_list)); match res { @@ -325,7 +382,7 @@ pub fn for_each_function(tokens: pm::TokenStream) -> pm::TokenStream { /// Check for any input that is structurally correct but has other problems. /// /// Returns the list of function names that we should expand for. -fn validate(input: &StructuredInput) -> syn::Result> { +fn validate(input: &mut StructuredInput) -> syn::Result> { // Collect lists of all functions that are provied as macro inputs in various fields (only, // skip, attributes). let attr_mentions = input @@ -376,6 +433,43 @@ fn validate(input: &StructuredInput) -> syn::Result> fn_list.push(func); } + // Types that the user would like us to provide in the macro + let mut add_all_types = false; + for ty in &input.emit_types { + let ty_name = ty.to_string(); + if ty_name == "all" { + add_all_types = true; + continue; + } + + // Check that all requested types are valid + if !KNOWN_TYPES.contains(&ty_name.as_str()) { + let e = syn::Error::new( + ty_name.span(), + format!("unrecognized type identifier `{ty_name}`"), + ); + return Err(e); + } + } + + if add_all_types { + // Ensure that if `all` was specified that nothing else was + if input.emit_types.len() > 1 { + let e = syn::Error::new( + input.emit_types_span.unwrap(), + "if `all` is specified, no other type identifiers may be given", + ); + return Err(e); + } + + // ...and then add all types + input.emit_types.clear(); + for ty in KNOWN_TYPES { + let ident = Ident::new(ty, Span::call_site()); + input.emit_types.push(ident); + } + } + if let Some(map) = &input.fn_extra { if !map.keys().any(|key| key == "_") { // No default provided; make sure every expected function is covered @@ -451,20 +545,31 @@ fn expand(input: StructuredInput, fn_list: &[&FunctionInfo]) -> syn::Result pm2::TokenStream::new(), }; + let base_fty = func.base_fty; let c_args = &func.c_sig.args; let c_ret = &func.c_sig.returns; let rust_args = &func.rust_sig.args; let rust_ret = &func.rust_sig.returns; + let mut ty_fields = Vec::new(); + for ty in &input.emit_types { + let field = match ty.to_string().as_str() { + "FTy" => quote! { FTy: #base_fty, }, + "CFn" => quote! { CFn: fn( #(#c_args),* ,) -> ( #(#c_ret),* ), }, + "CArgs" => quote! { CArgs: ( #(#c_args),* ,), }, + "CRet" => quote! { CRet: ( #(#c_ret),* ), }, + "RustFn" => quote! { RustFn: fn( #(#rust_args),* ,) -> ( #(#rust_ret),* ), }, + "RustArgs" => quote! { RustArgs: ( #(#rust_args),* ,), }, + "RustRet" => quote! { RustRet: ( #(#rust_ret),* ), }, + _ => unreachable!("checked in validation"), + }; + ty_fields.push(field); + } + let new = quote! { #callback! { fn_name: #fn_name, - CFn: fn( #(#c_args),* ,) -> ( #(#c_ret),* ), - CArgs: ( #(#c_args),* ,), - CRet: ( #(#c_ret),* ), - RustFn: fn( #(#rust_args),* ,) -> ( #(#rust_ret),* ), - RustArgs: ( #(#rust_args),* ,), - RustRet: ( #(#rust_ret),* ), + #( #ty_fields )* #meta_field #extra_field #fn_extra_field @@ -488,24 +593,7 @@ struct MacroReplace { impl MacroReplace { fn new(name: &'static str) -> Self { - // Keep this in sync with `libm_test::canonical_name` - let known_mappings = &[ - ("erff", "erf"), - ("erf", "erf"), - ("lgammaf_r", "lgamma_r"), - ("modff", "modf"), - ("modf", "modf"), - ]; - - let norm_name = match known_mappings.iter().find(|known| known.0 == name) { - Some(found) => found.1, - None => name - .strip_suffix("f") - .or_else(|| name.strip_suffix("f16")) - .or_else(|| name.strip_suffix("f128")) - .unwrap_or(name), - }; - + let norm_name = base_name(name); Self { fn_name: name, norm_name: norm_name.to_owned(), error: None } } @@ -539,3 +627,24 @@ impl VisitMut for MacroReplace { syn::visit_mut::visit_ident_mut(self, i); } } + +/// Return the unsuffixed name of a function. +fn base_name(name: &str) -> &str { + // Keep this in sync with `libm_test::base_name` + let known_mappings = &[ + ("erff", "erf"), + ("erf", "erf"), + ("lgammaf_r", "lgamma_r"), + ("modff", "modf"), + ("modf", "modf"), + ]; + + match known_mappings.iter().find(|known| known.0 == name) { + Some(found) => found.1, + None => name + .strip_suffix("f") + .or_else(|| name.strip_suffix("f16")) + .or_else(|| name.strip_suffix("f128")) + .unwrap_or(name), + } +} diff --git a/libm/crates/libm-macros/src/parse.rs b/libm/crates/libm-macros/src/parse.rs index ee9bd524b..369bbae2f 100644 --- a/libm/crates/libm-macros/src/parse.rs +++ b/libm/crates/libm-macros/src/parse.rs @@ -5,7 +5,7 @@ use quote::ToTokens; use syn::parse::{Parse, ParseStream, Parser}; use syn::punctuated::Punctuated; use syn::spanned::Spanned; -use syn::token::Comma; +use syn::token::{self, Comma}; use syn::{Arm, Attribute, Expr, ExprMatch, Ident, Meta, Token, bracketed}; /// The input to our macro; just a list of `field: value` items. @@ -39,6 +39,9 @@ impl Parse for Mapping { pub struct StructuredInput { /// Macro to invoke once per function pub callback: Ident, + /// Whether or not to provide `CFn` `CArgs` `RustFn` etc. This is really only needed + /// once for crate to set up the main trait. + pub emit_types: Vec, /// Skip these functions pub skip: Vec, /// Invoke only for these functions @@ -50,6 +53,7 @@ pub struct StructuredInput { /// Per-function extra expressions to pass to the macro pub fn_extra: Option>, // For diagnostics + pub emit_types_span: Option, pub only_span: Option, pub fn_extra_span: Option, } @@ -58,6 +62,7 @@ impl StructuredInput { pub fn from_fields(input: Invocation) -> syn::Result { let mut map: Vec<_> = input.fields.into_iter().collect(); let cb_expr = expect_field(&mut map, "callback")?; + let emit_types_expr = expect_field(&mut map, "emit_types").ok(); let skip_expr = expect_field(&mut map, "skip").ok(); let only_expr = expect_field(&mut map, "only").ok(); let attr_expr = expect_field(&mut map, "attributes").ok(); @@ -71,6 +76,12 @@ impl StructuredInput { ))?; } + let emit_types_span = emit_types_expr.as_ref().map(|expr| expr.span()); + let emit_types = match emit_types_expr { + Some(expr) => Parser::parse2(parse_ident_or_array, expr.into_token_stream())?, + None => Vec::new(), + }; + let skip = match skip_expr { Some(expr) => Parser::parse2(parse_ident_array, expr.into_token_stream())?, None => Vec::new(), @@ -103,6 +114,7 @@ impl StructuredInput { Ok(Self { callback: expect_ident(cb_expr)?, + emit_types, skip, only, only_span, @@ -110,6 +122,7 @@ impl StructuredInput { extra, fn_extra, fn_extra_span, + emit_types_span, }) } } @@ -183,6 +196,15 @@ fn expect_ident(expr: Expr) -> syn::Result { syn::parse2(expr.into_token_stream()) } +/// Parse either a single identifier (`foo`) or an array of identifiers (`[foo, bar, baz]`). +fn parse_ident_or_array(input: ParseStream) -> syn::Result> { + if !input.peek(token::Bracket) { + return Ok(vec![input.parse()?]); + } + + parse_ident_array(input) +} + /// Parse an array of expressions. fn parse_expr_array(input: ParseStream) -> syn::Result> { let content; diff --git a/libm/crates/libm-macros/tests/basic.rs b/libm/crates/libm-macros/tests/basic.rs index 8f8c09f1b..2eaba04f4 100644 --- a/libm/crates/libm-macros/tests/basic.rs +++ b/libm/crates/libm-macros/tests/basic.rs @@ -4,6 +4,7 @@ macro_rules! basic { ( fn_name: $fn_name:ident, + FTy: $FTy:ty, CFn: $CFn:ty, CArgs: $CArgs:ty, CRet: $CRet:ty, @@ -17,9 +18,9 @@ macro_rules! basic { $(#[$meta])* mod $fn_name { #[allow(unused)] - type CFnTy = $CFn; - // type CArgsTy<'_> = $CArgs; - // type CRetTy<'_> = $CRet; + type FTy= $FTy; + #[allow(unused)] + type CFnTy<'a> = $CFn; #[allow(unused)] type RustFnTy = $RustFn; #[allow(unused)] @@ -39,6 +40,7 @@ macro_rules! basic { mod test_basic { libm_macros::for_each_function! { callback: basic, + emit_types: all, skip: [sin, cos], attributes: [ // just some random attributes @@ -58,25 +60,8 @@ mod test_basic { macro_rules! basic_no_extra { ( fn_name: $fn_name:ident, - CFn: $CFn:ty, - CArgs: $CArgs:ty, - CRet: $CRet:ty, - RustFn: $RustFn:ty, - RustArgs: $RustArgs:ty, - RustRet: $RustRet:ty, ) => { - mod $fn_name { - #[allow(unused)] - type CFnTy = $CFn; - // type CArgsTy<'_> = $CArgs; - // type CRetTy<'_> = $CRet; - #[allow(unused)] - type RustFnTy = $RustFn; - #[allow(unused)] - type RustArgsTy = $RustArgs; - #[allow(unused)] - type RustRetTy = $RustRet; - } + mod $fn_name {} }; } @@ -94,3 +79,26 @@ mod test_only { only: [sin, sinf], } } + +macro_rules! specified_types { + ( + fn_name: $fn_name:ident, + RustFn: $RustFn:ty, + RustArgs: $RustArgs:ty, + ) => { + mod $fn_name { + #[allow(unused)] + type RustFnTy = $RustFn; + #[allow(unused)] + type RustArgsTy = $RustArgs; + } + }; +} + +mod test_emit_types { + // Test that we can specify a couple types to emit + libm_macros::for_each_function! { + callback: specified_types, + emit_types: [RustFn, RustArgs], + } +} diff --git a/libm/crates/libm-macros/tests/enum.rs b/libm/crates/libm-macros/tests/enum.rs new file mode 100644 index 000000000..884b8d8d6 --- /dev/null +++ b/libm/crates/libm-macros/tests/enum.rs @@ -0,0 +1,19 @@ +#[libm_macros::function_enum(BaseName)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum Function {} + +#[libm_macros::base_name_enum] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum BaseName {} + +#[test] +fn as_str() { + assert_eq!(Function::Sin.as_str(), "sin"); + assert_eq!(Function::Sinf.as_str(), "sinf"); +} + +#[test] +fn basename() { + assert_eq!(Function::Sin.base_name(), BaseName::Sin); + assert_eq!(Function::Sinf.base_name(), BaseName::Sin); +} From 0d188ee58212fb542e344df27dd571ebc1efeb12 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 31 Oct 2024 02:45:37 -0500 Subject: [PATCH 0956/1459] Introduce a `op` module with struct representations of each routine This contains: 1. Per-function and per-operation enums created by the proc macro 2. The `MathOp` trait which is implemented once per struct representing a function 3. Submodules for each function, each containing a `Routine` struct that implements `MathOp` --- libm/crates/libm-test/src/lib.rs | 2 + libm/crates/libm-test/src/op.rs | 111 +++++++++++++++++++++++ libm/crates/libm-test/src/test_traits.rs | 4 +- 3 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 libm/crates/libm-test/src/op.rs diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 56a872779..e64ad6264 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -1,10 +1,12 @@ pub mod gen; #[cfg(feature = "test-multiprecision")] pub mod mpfloat; +pub mod op; mod precision; mod test_traits; pub use libm::support::{Float, Int}; +pub use op::{BaseName, MathOp, Name}; pub use precision::{MaybeOverride, SpecialCase, multiprec_allowed_ulp, musl_allowed_ulp}; pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall}; diff --git a/libm/crates/libm-test/src/op.rs b/libm/crates/libm-test/src/op.rs new file mode 100644 index 000000000..fe0a08a28 --- /dev/null +++ b/libm/crates/libm-test/src/op.rs @@ -0,0 +1,111 @@ +//! Types representing individual functions. +//! +//! Each routine gets a module with its name, e.g. `mod sinf { /* ... */ }`. The module +//! contains a unit struct `Routine` which implements `MathOp`. +//! +//! Basically everything could be called a "function" here, so we loosely use the following +//! terminology: +//! +//! - "Function": the math operation that does not have an associated precision. E.g. `f(x) = e^x`, +//! `f(x) = log(x)`. +//! - "Routine": A code implementation of a math operation with a specific precision. E.g. `exp`, +//! `expf`, `expl`, `log`, `logf`. +//! - "Operation" / "Op": Something that relates a routine to a function or is otherwise higher +//! level. `Op` is also used as the name for generic parameters since it is terse. + +use crate::{CheckOutput, Float, TupleCall}; + +/// An enum representing each possible routine name. +#[libm_macros::function_enum(BaseName)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum Name {} + +/// The name without any type specifier, e.g. `sin` and `sinf` both become `sin`. +#[libm_macros::base_name_enum] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum BaseName {} + +/// Attributes ascribed to a `libm` routine including signature, type information, +/// and naming. +pub trait MathOp { + /// The float type used for this operation. + type FTy: Float; + + /// The function type representing the signature in a C library. + type CFn: Copy; + + /// Arguments passed to the C library function as a tuple. These may include `&mut` return + /// values. + type CArgs<'a> + where + Self: 'a; + + /// The type returned by C implementations. + type CRet; + + /// The signature of the Rust function as a `fn(...) -> ...` type. + type RustFn: Copy; + + /// Arguments passed to the Rust library function as a tuple. + /// + /// The required `TupleCall` bounds ensure this type can be passed either to the C function or + /// to the Rust function. + type RustArgs: Copy + + TupleCall + + TupleCall; + + /// Type returned from the Rust function. + type RustRet: CheckOutput; + + /// The name of this function, including suffix (e.g. `sin`, `sinf`). + const NAME: Name; + + /// The name as a string. + const NAME_STR: &'static str = Self::NAME.as_str(); + + /// The name of the function excluding the type suffix, e.g. `sin` and `sinf` are both `sin`. + const BASE_NAME: BaseName = Self::NAME.base_name(); + + /// The function in `libm` which can be called. + const ROUTINE: Self::RustFn; +} + +macro_rules! do_thing { + // Matcher for unary functions + ( + fn_name: $fn_name:ident, + FTy: $FTy:ty, + CFn: $CFn:ty, + CArgs: $CArgs:ty, + CRet: $CRet:ty, + RustFn: $RustFn:ty, + RustArgs: $RustArgs:ty, + RustRet: $RustRet:ty, + ) => { + paste::paste! { + pub mod $fn_name { + use super::*; + pub struct Routine; + + impl MathOp for Routine { + type FTy = $FTy; + type CFn = for<'a> $CFn; + type CArgs<'a> = $CArgs where Self: 'a; + type CRet = $CRet; + type RustFn = $RustFn; + type RustArgs = $RustArgs; + type RustRet = $RustRet; + + const NAME: Name = Name::[< $fn_name:camel >]; + const ROUTINE: Self::RustFn = libm::$fn_name; + } + } + + } + }; +} + +libm_macros::for_each_function! { + callback: do_thing, + emit_types: all, +} diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index e69e16d24..b9bec9a44 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -137,7 +137,7 @@ where } } -impl TupleCall for (T1,) +impl TupleCall fn(T1, &'a mut T2, &'a mut T3)> for (T1,) where T1: fmt::Debug, T2: fmt::Debug + Default, @@ -145,7 +145,7 @@ where { type Output = (T2, T3); - fn call(self, f: fn(T1, &mut T2, &mut T3)) -> Self::Output { + fn call(self, f: for<'a> fn(T1, &'a mut T2, &'a mut T3)) -> Self::Output { let mut t2 = T2::default(); let mut t3 = T3::default(); f(self.0, &mut t2, &mut t3); From 5178920956b936a41c2e3b2155067961fc30d65d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 31 Oct 2024 02:46:21 -0500 Subject: [PATCH 0957/1459] Rework tests to make use of the new `MathOp` trait --- libm/crates/libm-test/benches/random.rs | 129 ++++--- libm/crates/libm-test/src/mpfloat.rs | 355 +++++++----------- libm/crates/libm-test/tests/check_coverage.rs | 6 - .../libm-test/tests/compare_built_musl.rs | 54 +-- libm/crates/libm-test/tests/multiprecision.rs | 49 +-- 5 files changed, 278 insertions(+), 315 deletions(-) diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 6c9047c3c..6f2305dd2 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -2,72 +2,103 @@ use std::hint::black_box; use std::time::Duration; use criterion::{Criterion, criterion_main}; -use libm_test::gen::random; -use libm_test::{CheckBasis, CheckCtx, TupleCall}; +use libm_test::gen::{CachedInput, random}; +use libm_test::{CheckBasis, CheckCtx, GenerateInput, MathOp, TupleCall}; /// Benchmark with this many items to get a variety const BENCH_ITER_ITEMS: usize = if cfg!(feature = "short-benchmarks") { 50 } else { 500 }; +/// Extra parameters we only care about if we are benchmarking against musl. +#[allow(dead_code)] +struct MuslExtra { + musl_fn: Option, + skip_on_i586: bool, +} + macro_rules! musl_rand_benches { ( fn_name: $fn_name:ident, - CFn: $CFn:ty, - CArgs: $CArgs:ty, - CRet: $CRet:ty, - RustFn: $RustFn:ty, - RustArgs: $RustArgs:ty, - RustRet: $RustRet:ty, fn_extra: $skip_on_i586:expr, ) => { paste::paste! { fn [< musl_bench_ $fn_name >](c: &mut Criterion) { - let fn_name = stringify!($fn_name); - - let ulp = libm_test::musl_allowed_ulp(fn_name); - let ctx = CheckCtx::new(ulp, fn_name, CheckBasis::Musl); - let benchvec: Vec<_> = random::get_test_cases::<$RustArgs>(&ctx) - .take(BENCH_ITER_ITEMS) - .collect(); + type Op = libm_test::op::$fn_name::Routine; - // Perform a sanity check that we are benchmarking the same thing - // Don't test against musl if it is not available #[cfg(feature = "build-musl")] - for input in benchvec.iter().copied() { - use anyhow::Context; - use libm_test::{CheckBasis, CheckCtx, CheckOutput}; + let musl_extra = MuslExtra { + musl_fn: Some(musl_math_sys::$fn_name as ::CFn), + skip_on_i586: $skip_on_i586 + }; + + #[cfg(not(feature = "build-musl"))] + let musl_extra = MuslExtra { + musl_fn: None, + skip_on_i586: $skip_on_i586 + }; + + bench_one::(c, musl_extra); + } + } + }; +} - if cfg!(x86_no_sse) && $skip_on_i586 { - break; - } +fn bench_one(c: &mut Criterion, musl_extra: MuslExtra) +where + Op: MathOp, + CachedInput: GenerateInput, +{ + let name = Op::NAME_STR; + + let ulp = libm_test::musl_allowed_ulp(name); + let ctx = CheckCtx::new(ulp, name, CheckBasis::Musl); + let benchvec: Vec<_> = + random::get_test_cases::(&ctx).take(BENCH_ITER_ITEMS).collect(); + + // Perform a sanity check that we are benchmarking the same thing + // Don't test against musl if it is not available + #[cfg(feature = "build-musl")] + for input in benchvec.iter().copied() { + use anyhow::Context; + use libm_test::CheckOutput; + + if cfg!(x86_no_sse) && musl_extra.skip_on_i586 { + break; + } - let musl_res = input.call(musl_math_sys::$fn_name as $CFn); - let crate_res = input.call(libm::$fn_name as $RustFn); + let musl_res = input.call(musl_extra.musl_fn.unwrap()); + let crate_res = input.call(Op::ROUTINE); - let ctx = CheckCtx::new(ulp, fn_name, CheckBasis::Musl); - crate_res.validate(musl_res, input, &ctx).context(fn_name).unwrap(); - } + crate_res.validate(musl_res, input, &ctx).context(name).unwrap(); + } - /* Function pointers are black boxed to avoid inlining in the benchmark loop */ + #[cfg(not(feature = "build-musl"))] + let _ = musl_extra; // silence unused warnings - let mut group = c.benchmark_group(fn_name); - group.bench_function("crate", |b| b.iter(|| { - let f = black_box(libm::$fn_name as $RustFn); - for input in benchvec.iter().copied() { - input.call(f); - } - })); + /* Option pointers are black boxed to avoid inlining in the benchmark loop */ - // Don't test against musl if it is not available - #[cfg(feature = "build-musl")] - group.bench_function("musl", |b| b.iter(|| { - let f = black_box(musl_math_sys::$fn_name as $CFn); - for input in benchvec.iter().copied() { - input.call(f); - } - })); + let mut group = c.benchmark_group(name); + group.bench_function("crate", |b| { + b.iter(|| { + let f = black_box(Op::ROUTINE); + for input in benchvec.iter().copied() { + input.call(f); } - } - }; + }) + }); + + // Don't test against musl if it is not available + #[cfg(feature = "build-musl")] + { + let musl_fn = musl_extra.musl_fn.unwrap(); + group.bench_function("musl", |b| { + b.iter(|| { + let f = black_box(musl_fn); + for input in benchvec.iter().copied() { + input.call(f); + } + }) + }); + } } libm_macros::for_each_function! { @@ -83,12 +114,6 @@ libm_macros::for_each_function! { macro_rules! run_callback { ( fn_name: $fn_name:ident, - CFn: $_CFn:ty, - CArgs: $_CArgs:ty, - CRet: $_CRet:ty, - RustFn: $_RustFn:ty, - RustArgs: $_RustArgs:ty, - RustRet: $_RustRet:ty, extra: [$criterion:ident], ) => { paste::paste! { diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 2e6fdae7f..507b077b3 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -11,7 +11,7 @@ pub use rug::Float as MpFloat; use rug::float::Round::Nearest; use rug::ops::{PowAssignRound, RemAssignRound}; -use crate::Float; +use crate::{Float, MathOp}; /// Create a multiple-precision float with the correct number of bits for a concrete float type. fn new_mpfloat() -> MpFloat { @@ -29,23 +29,19 @@ where /// Structures that represent a float operation. /// -/// The struct itself should hold any context that can be reused among calls to `run` (allocated -/// `MpFloat`s). -pub trait MpOp { - /// Inputs to the operation (concrete float types). - type Input; - - /// Outputs from the operation (concrete float types). - type Output; +pub trait MpOp: MathOp { + /// The struct itself should hold any context that can be reused among calls to `run` (allocated + /// `MpFloat`s). + type MpTy; /// Create a new instance. - fn new() -> Self; + fn new_mp() -> Self::MpTy; /// Perform the operation. /// /// Usually this means assigning inputs to cached floats, performing the operation, applying /// subnormal approximation, and converting the result back to concrete values. - fn run(&mut self, input: Self::Input) -> Self::Output; + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet; } /// Implement `MpOp` for functions with a single return value. @@ -53,32 +49,21 @@ macro_rules! impl_mp_op { // Matcher for unary functions ( fn_name: $fn_name:ident, - CFn: $CFn:ty, - CArgs: $CArgs:ty, - CRet: $CRet:ty, - RustFn: fn($fty:ty,) -> $_ret:ty, - RustArgs: $RustArgs:ty, - RustRet: $RustRet:ty, + RustFn: fn($_fty:ty,) -> $_ret:ty, fn_extra: $fn_name_normalized:expr, ) => { paste::paste! { - pub mod $fn_name { - use super::*; - pub struct Operation(MpFloat); - - impl MpOp for Operation { - type Input = $RustArgs; - type Output = $RustRet; - - fn new() -> Self { - Self(new_mpfloat::<$fty>()) - } - - fn run(&mut self, input: Self::Input) -> Self::Output { - self.0.assign(input.0); - let ord = self.0.[< $fn_name_normalized _round >](Nearest); - prep_retval::(&mut self.0, ord) - } + impl MpOp for crate::op::$fn_name::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + let ord = this.[< $fn_name_normalized _round >](Nearest); + prep_retval::(this, ord) } } } @@ -86,33 +71,22 @@ macro_rules! impl_mp_op { // Matcher for binary functions ( fn_name: $fn_name:ident, - CFn: $CFn:ty, - CArgs: $CArgs:ty, - CRet: $CRet:ty, - RustFn: fn($fty:ty, $_fty2:ty,) -> $_ret:ty, - RustArgs: $RustArgs:ty, - RustRet: $RustRet:ty, + RustFn: fn($_fty:ty, $_fty2:ty,) -> $_ret:ty, fn_extra: $fn_name_normalized:expr, ) => { paste::paste! { - pub mod $fn_name { - use super::*; - pub struct Operation(MpFloat, MpFloat); - - impl MpOp for Operation { - type Input = $RustArgs; - type Output = $RustRet; - - fn new() -> Self { - Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>()) - } - - fn run(&mut self, input: Self::Input) -> Self::Output { - self.0.assign(input.0); - self.1.assign(input.1); - let ord = self.0.[< $fn_name_normalized _round >](&self.1, Nearest); - prep_retval::(&mut self.0, ord) - } + impl MpOp for crate::op::$fn_name::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = this.0.[< $fn_name_normalized _round >](&this.1, Nearest); + prep_retval::(&mut this.0, ord) } } } @@ -120,34 +94,27 @@ macro_rules! impl_mp_op { // Matcher for ternary functions ( fn_name: $fn_name:ident, - CFn: $CFn:ty, - CArgs: $CArgs:ty, - CRet: $CRet:ty, - RustFn: fn($fty:ty, $_fty2:ty, $_fty3:ty,) -> $_ret:ty, - RustArgs: $RustArgs:ty, - RustRet: $RustRet:ty, + RustFn: fn($_fty:ty, $_fty2:ty, $_fty3:ty,) -> $_ret:ty, fn_extra: $fn_name_normalized:expr, ) => { paste::paste! { - pub mod $fn_name { - use super::*; - pub struct Operation(MpFloat, MpFloat, MpFloat); - - impl MpOp for Operation { - type Input = $RustArgs; - type Output = $RustRet; - - fn new() -> Self { - Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>(), new_mpfloat::<$fty>()) - } - - fn run(&mut self, input: Self::Input) -> Self::Output { - self.0.assign(input.0); - self.1.assign(input.1); - self.2.assign(input.2); - let ord = self.0.[< $fn_name_normalized _round >](&self.1, &self.2, Nearest); - prep_retval::(&mut self.0, ord) - } + impl MpOp for crate::op::$fn_name::Routine { + type MpTy = (MpFloat, MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + ( + new_mpfloat::(), + new_mpfloat::(), + new_mpfloat::(), + ) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + this.2.assign(input.2); + let ord = this.0.[< $fn_name_normalized _round >](&this.1, &this.2, Nearest); + prep_retval::(&mut this.0, ord) } } } @@ -156,6 +123,7 @@ macro_rules! impl_mp_op { libm_macros::for_each_function! { callback: impl_mp_op, + emit_types: [RustFn], skip: [ // Most of these need a manual implementation fabs, ceil, copysign, floor, rint, round, trunc, @@ -186,29 +154,23 @@ macro_rules! impl_no_round { ($($fn_name:ident, $rug_name:ident;)*) => { paste::paste! { // Implement for both f32 and f64 - $( impl_no_round!{ @inner_unary [< $fn_name f >], (f32,), $rug_name } )* - $( impl_no_round!{ @inner_unary $fn_name, (f64,), $rug_name } )* + $( impl_no_round!{ @inner_unary [< $fn_name f >], $rug_name } )* + $( impl_no_round!{ @inner_unary $fn_name, $rug_name } )* } }; - (@inner_unary $fn_name:ident, ($fty:ty,), $rug_name:ident) => { - pub mod $fn_name { - use super::*; - pub struct Operation(MpFloat); + (@inner_unary $fn_name:ident, $rug_name:ident) => { + impl MpOp for crate::op::$fn_name::Routine { + type MpTy = MpFloat; - impl MpOp for Operation { - type Input = ($fty,); - type Output = $fty; - - fn new() -> Self { - Self(new_mpfloat::<$fty>()) - } + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } - fn run(&mut self, input: Self::Input) -> Self::Output { - self.0.assign(input.0); - self.0.$rug_name(); - prep_retval::(&mut self.0, Ordering::Equal) - } + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + this.$rug_name(); + prep_retval::(this, Ordering::Equal) } } }; @@ -227,132 +189,81 @@ impl_no_round! { macro_rules! impl_op_for_ty { ($fty:ty, $suffix:literal) => { paste::paste! { - pub mod [] { - use super::*; - pub struct Operation(MpFloat, MpFloat); - - impl MpOp for Operation { - type Input = ($fty, $fty); - type Output = $fty; - - fn new() -> Self { - Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>()) - } - - fn run(&mut self, input: Self::Input) -> Self::Output { - self.0.assign(input.0); - self.1.assign(input.1); - self.0.copysign_mut(&self.1); - prep_retval::(&mut self.0, Ordering::Equal) - } + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) } - } - pub mod [] { - use super::*; - pub struct Operation(MpFloat, MpFloat); + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + this.0.copysign_mut(&this.1); + prep_retval::(&mut this.0, Ordering::Equal) + } + } - impl MpOp for Operation { - type Input = ($fty, $fty); - type Output = $fty; + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); - fn new() -> Self { - Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>()) - } + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } - fn run(&mut self, input: Self::Input) -> Self::Output { - self.0.assign(input.0); - self.1.assign(input.1); - let ord = self.0.pow_assign_round(&self.1, Nearest); - prep_retval::(&mut self.0, ord) - } + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = this.0.pow_assign_round(&this.1, Nearest); + prep_retval::(&mut this.0, ord) } } - pub mod [] { - use super::*; - pub struct Operation(MpFloat, MpFloat); + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); - impl MpOp for Operation { - type Input = ($fty, $fty); - type Output = $fty; - - fn new() -> Self { - Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>()) - } + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } - fn run(&mut self, input: Self::Input) -> Self::Output { - self.0.assign(input.0); - self.1.assign(input.1); - let ord = self.0.rem_assign_round(&self.1, Nearest); - prep_retval::(&mut self.0, ord) - } + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = this.0.rem_assign_round(&this.1, Nearest); + prep_retval::(&mut this.0, ord) } } - pub mod [] { - use super::*; - pub struct Operation(MpFloat); - - impl MpOp for Operation { - type Input = ($fty,); - type Output = ($fty, i32); + impl MpOp for crate::op::[]::Routine { + type MpTy = (i32, MpFloat); - fn new() -> Self { - Self(new_mpfloat::<$fty>()) - } + fn new_mp() -> Self::MpTy { + (0, new_mpfloat::()) + } - fn run(&mut self, input: Self::Input) -> Self::Output { - self.0.assign(input.0); - let (sign, ord) = self.0.ln_abs_gamma_round(Nearest); - let ret = prep_retval::<$fty>(&mut self.0, ord); - (ret, sign as i32) - } + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0 = input.0; + this.1.assign(input.1); + let ord = this.1.jn_round(this.0, Nearest); + prep_retval::(&mut this.1, ord) } } - pub mod [] { - use super::*; - pub struct Operation(i32, MpFloat); - - impl MpOp for Operation { - type Input = (i32, $fty); - type Output = $fty; + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); - fn new() -> Self { - Self(0, new_mpfloat::<$fty>()) - } - - fn run(&mut self, input: Self::Input) -> Self::Output { - self.0 = input.0; - self.1.assign(input.1); - let ord = self.1.jn_round(self.0, Nearest); - prep_retval::<$fty>(&mut self.1, ord) - } + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) } - } - pub mod [] { - use super::*; - pub struct Operation(MpFloat, MpFloat); - - impl MpOp for Operation { - type Input = ($fty,); - type Output = ($fty, $fty); - - fn new() -> Self { - Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>()) - } - - fn run(&mut self, input: Self::Input) -> Self::Output { - self.0.assign(input.0); - self.1.assign(0.0); - let (sord, cord) = self.0.sin_cos_round(&mut self.1, Nearest); - ( - prep_retval::<$fty>(&mut self.0, sord), - prep_retval::<$fty>(&mut self.1, cord) - ) - } + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(0.0); + let (sord, cord) = this.0.sin_cos_round(&mut this.1, Nearest); + ( + prep_retval::(&mut this.0, sord), + prep_retval::(&mut this.1, cord) + ) } } } @@ -362,7 +273,33 @@ macro_rules! impl_op_for_ty { impl_op_for_ty!(f32, "f"); impl_op_for_ty!(f64, ""); -// Account for `lgamma_r` not having a simple `f` suffix -pub mod lgammaf_r { - pub use super::lgamma_rf::*; +// `lgamma_r` is not a simple suffix so we can't use the above macro. +impl MpOp for crate::op::lgamma_r::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + let (sign, ord) = this.ln_abs_gamma_round(Nearest); + let ret = prep_retval::(this, ord); + (ret, sign as i32) + } +} + +impl MpOp for crate::op::lgammaf_r::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + let (sign, ord) = this.ln_abs_gamma_round(Nearest); + let ret = prep_retval::(this, ord); + (ret, sign as i32) + } } diff --git a/libm/crates/libm-test/tests/check_coverage.rs b/libm/crates/libm-test/tests/check_coverage.rs index ef6d21fdb..b7988660e 100644 --- a/libm/crates/libm-test/tests/check_coverage.rs +++ b/libm/crates/libm-test/tests/check_coverage.rs @@ -22,12 +22,6 @@ const ALLOWED_SKIPS: &[&str] = &[ macro_rules! callback { ( fn_name: $name:ident, - CFn: $_CFn:ty, - CArgs: $_CArgs:ty, - CRet: $_CRet:ty, - RustFn: $_RustFn:ty, - RustArgs: $_RustArgs:ty, - RustRet: $_RustRet:ty, extra: [$push_to:ident], ) => { $push_to.push(stringify!($name)); diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 5a118f7c2..d4ba9e900 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -9,42 +9,46 @@ // There are some targets we can't build musl for #![cfg(feature = "build-musl")] -use libm_test::gen::random; -use libm_test::{CheckBasis, CheckCtx, CheckOutput, TupleCall, musl_allowed_ulp}; -use musl_math_sys as musl; +use libm_test::gen::{CachedInput, random}; +use libm_test::{ + CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall, musl_allowed_ulp, +}; macro_rules! musl_rand_tests { ( fn_name: $fn_name:ident, - CFn: $CFn:ty, - CArgs: $CArgs:ty, - CRet: $CRet:ty, - RustFn: $RustFn:ty, - RustArgs: $RustArgs:ty, - RustRet: $RustRet:ty, attrs: [$($meta:meta)*] - ) => { paste::paste! { - #[test] - $(#[$meta])* - fn [< musl_random_ $fn_name >]() { - let fname = stringify!($fn_name); - let ulp = musl_allowed_ulp(fname); - let ctx = CheckCtx::new(ulp, fname, CheckBasis::Musl); - let cases = random::get_test_cases::<$RustArgs>(&ctx); - - for input in cases { - let musl_res = input.call(musl::$fn_name as $CFn); - let crate_res = input.call(libm::$fn_name as $RustFn); - - crate_res.validate(musl_res, input, &ctx).unwrap(); + ) => { + paste::paste! { + #[test] + $(#[$meta])* + fn [< musl_random_ $fn_name >]() { + test_one::(musl_math_sys::$fn_name); } } - } }; + }; +} + +fn test_one(musl_fn: Op::CFn) +where + Op: MathOp, + CachedInput: GenerateInput, +{ + let name = Op::NAME_STR; + let ulp = musl_allowed_ulp(name); + let ctx = CheckCtx::new(ulp, name, CheckBasis::Musl); + let cases = random::get_test_cases::(&ctx); + + for input in cases { + let musl_res = input.call(musl_fn); + let crate_res = input.call(Op::ROUTINE); + + crate_res.validate(musl_res, input, &ctx).unwrap(); + } } libm_macros::for_each_function! { callback: musl_rand_tests, - skip: [], attributes: [ #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586 [exp10, exp10f, exp2, exp2f, rint] diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index f8d94a160..676ee86a0 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -2,45 +2,48 @@ #![cfg(feature = "test-multiprecision")] -use libm_test::gen::random; -use libm_test::mpfloat::{self, MpOp}; -use libm_test::{CheckBasis, CheckCtx, CheckOutput, TupleCall, multiprec_allowed_ulp}; +use libm_test::gen::{CachedInput, random}; +use libm_test::mpfloat::MpOp; +use libm_test::{ + CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall, multiprec_allowed_ulp, +}; /// Implement a test against MPFR with random inputs. macro_rules! multiprec_rand_tests { ( fn_name: $fn_name:ident, - CFn: $CFn:ty, - CArgs: $CArgs:ty, - CRet: $CRet:ty, - RustFn: $RustFn:ty, - RustArgs: $RustArgs:ty, - RustRet: $RustRet:ty, attrs: [$($meta:meta)*] ) => { paste::paste! { #[test] $(#[$meta])* fn [< multiprec_random_ $fn_name >]() { - type MpOpTy = mpfloat::$fn_name::Operation; - - let fname = stringify!($fn_name); - let ulp = multiprec_allowed_ulp(fname); - let mut mp_vals = MpOpTy::new(); - let ctx = CheckCtx::new(ulp, fname, CheckBasis::Mpfr); - let cases = random::get_test_cases::<$RustArgs>(&ctx); - - for input in cases { - let mp_res = mp_vals.run(input); - let crate_res = input.call(libm::$fn_name as $RustFn); - - crate_res.validate(mp_res, input, &ctx).unwrap(); - } + test_one::(); } } }; } +fn test_one() +where + Op: MathOp + MpOp, + CachedInput: GenerateInput, +{ + let name = Op::NAME_STR; + + let ulp = multiprec_allowed_ulp(name); + let mut mp_vals = Op::new_mp(); + let ctx = CheckCtx::new(ulp, name, CheckBasis::Mpfr); + let cases = random::get_test_cases::(&ctx); + + for input in cases { + let mp_res = Op::run(&mut mp_vals, input); + let crate_res = input.call(Op::ROUTINE); + + crate_res.validate(mp_res, input, &ctx).unwrap(); + } +} + libm_macros::for_each_function! { callback: multiprec_rand_tests, attributes: [ From 87c2087db59c514ec11856c8f61096ee28c10d3f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 2 Nov 2024 16:52:28 -0500 Subject: [PATCH 0958/1459] Correct the proc macro to emit `pub` functions --- libm/crates/libm-macros/src/enums.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libm/crates/libm-macros/src/enums.rs b/libm/crates/libm-macros/src/enums.rs index d9017dff7..1f9fca2ef 100644 --- a/libm/crates/libm-macros/src/enums.rs +++ b/libm/crates/libm-macros/src/enums.rs @@ -56,14 +56,14 @@ pub fn function_enum( impl #enum_name { /// The stringified version of this function name. - const fn as_str(self) -> &'static str { + pub const fn as_str(self) -> &'static str { match self { #( #as_str_arms , )* } } /// The base name enum for this function. - const fn base_name(self) -> #base_enum { + pub const fn base_name(self) -> #base_enum { match self { #( #base_arms, )* } @@ -111,7 +111,7 @@ pub fn base_name_enum( impl #item_name { /// The stringified version of this base name. - const fn as_str(self) -> &'static str { + pub const fn as_str(self) -> &'static str { match self { #( #as_str_arms ),* } From 475f57c145317ac66b4d4e546e9c9ef942fd6e47 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 2 Nov 2024 22:35:30 -0500 Subject: [PATCH 0959/1459] Change the `CheckCtx` constructor to take a `Name` enum This prepares to eliminate some reliance on string matching but does not yet make those changes. --- libm/crates/libm-macros/src/lib.rs | 4 +-- libm/crates/libm-test/benches/random.rs | 2 +- libm/crates/libm-test/src/gen/random.rs | 7 ++-- libm/crates/libm-test/src/lib.rs | 21 ------------ libm/crates/libm-test/src/precision.rs | 32 +++++++++---------- libm/crates/libm-test/src/test_traits.rs | 20 ++++++++---- .../libm-test/tests/compare_built_musl.rs | 2 +- libm/crates/libm-test/tests/multiprecision.rs | 2 +- 8 files changed, 40 insertions(+), 50 deletions(-) diff --git a/libm/crates/libm-macros/src/lib.rs b/libm/crates/libm-macros/src/lib.rs index 2db412e79..1e7cd08b9 100644 --- a/libm/crates/libm-macros/src/lib.rs +++ b/libm/crates/libm-macros/src/lib.rs @@ -628,9 +628,9 @@ impl VisitMut for MacroReplace { } } -/// Return the unsuffixed name of a function. +/// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`, +/// `lgamma_r` and `lgammaf_r` both return `lgamma_r`. fn base_name(name: &str) -> &str { - // Keep this in sync with `libm_test::base_name` let known_mappings = &[ ("erff", "erf"), ("erf", "erf"), diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 6f2305dd2..72ace5d53 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -50,7 +50,7 @@ where let name = Op::NAME_STR; let ulp = libm_test::musl_allowed_ulp(name); - let ctx = CheckCtx::new(ulp, name, CheckBasis::Musl); + let ctx = CheckCtx::new(ulp, Op::NAME, CheckBasis::Musl); let benchvec: Vec<_> = random::get_test_cases::(&ctx).take(BENCH_ITER_ITEMS).collect(); diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index e347b3c63..b72247a4e 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -110,7 +110,10 @@ pub fn get_test_cases(ctx: &CheckCtx) -> impl Iterator, { - let inputs = - if ctx.fn_name == "jn" || ctx.fn_name == "jnf" { &TEST_CASES_JN } else { &TEST_CASES }; + let inputs = if ctx.fn_name_str == "jn" || ctx.fn_name_str == "jnf" { + &TEST_CASES_JN + } else { + &TEST_CASES + }; inputs.get_cases() } diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index e64ad6264..af6ad6da5 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -17,27 +17,6 @@ pub type TestResult = Result; // List of all files present in libm's source include!(concat!(env!("OUT_DIR"), "/all_files.rs")); -/// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`, -/// `lgamma_r` and `lgammaf_r` both return `lgamma_r`. -pub fn base_name(name: &str) -> &str { - let known_mappings = &[ - ("erff", "erf"), - ("erf", "erf"), - ("lgammaf_r", "lgamma_r"), - ("modff", "modf"), - ("modf", "modf"), - ]; - - match known_mappings.iter().find(|known| known.0 == name) { - Some(found) => found.1, - None => name - .strip_suffix("f") - .or_else(|| name.strip_suffix("f16")) - .or_else(|| name.strip_suffix("f128")) - .unwrap_or(name), - } -} - /// True if `EMULATED` is set and nonempty. Used to determine how many iterations to run. pub const fn emulated() -> bool { match option_env!("EMULATED") { diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 5b021e946..5b5743020 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -111,25 +111,25 @@ impl MaybeOverride<(f32,)> for SpecialCase { ctx: &CheckCtx, ) -> Option { if ctx.basis == CheckBasis::Musl { - if ctx.fn_name == "expm1f" && input.0 > 80.0 && actual.is_infinite() { + if ctx.fn_name_str == "expm1f" && input.0 > 80.0 && actual.is_infinite() { // we return infinity but the number is representable return XFAIL; } - if ctx.fn_name == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() { + if ctx.fn_name_str == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() { // we return some NaN that should be real values or infinite // doesn't seem to happen on x86 return XFAIL; } } - if ctx.fn_name == "acoshf" && input.0 < -1.0 { + if ctx.fn_name_str == "acoshf" && input.0 < -1.0 { // acoshf is undefined for x <= 1.0, but we return a random result at lower // values. return XFAIL; } - if ctx.fn_name == "lgammaf" || ctx.fn_name == "lgammaf_r" && input.0 < 0.0 { + if ctx.fn_name_str == "lgammaf" || ctx.fn_name_str == "lgammaf_r" && input.0 < 0.0 { // loggamma should not be defined for x < 0, yet we both return results return XFAIL; } @@ -146,7 +146,7 @@ impl MaybeOverride<(f32,)> for SpecialCase { // On MPFR for lgammaf_r, we set -1 as the integer result for negative infinity but MPFR // sets +1 if ctx.basis == CheckBasis::Mpfr - && ctx.fn_name == "lgammaf_r" + && ctx.fn_name_str == "lgammaf_r" && input.0 == f32::NEG_INFINITY && actual.abs() == expected.abs() { @@ -166,13 +166,13 @@ impl MaybeOverride<(f64,)> for SpecialCase { ctx: &CheckCtx, ) -> Option { if ctx.basis == CheckBasis::Musl { - if cfg!(target_arch = "x86") && ctx.fn_name == "acosh" && input.0 < 1.0 { + if cfg!(target_arch = "x86") && ctx.fn_name_str == "acosh" && input.0 < 1.0 { // The function is undefined, both implementations return random results return SKIP; } if cfg!(x86_no_sse) - && ctx.fn_name == "ceil" + && ctx.fn_name_str == "ceil" && input.0 < 0.0 && input.0 > -1.0 && expected == F::ZERO @@ -183,13 +183,13 @@ impl MaybeOverride<(f64,)> for SpecialCase { } } - if ctx.fn_name == "acosh" && input.0 < 1.0 { + if ctx.fn_name_str == "acosh" && input.0 < 1.0 { // The function is undefined for the inputs, musl and our libm both return // random results. return XFAIL; } - if ctx.fn_name == "lgamma" || ctx.fn_name == "lgamma_r" && input.0 < 0.0 { + if ctx.fn_name_str == "lgamma" || ctx.fn_name_str == "lgamma_r" && input.0 < 0.0 { // loggamma should not be defined for x < 0, yet we both return results return XFAIL; } @@ -206,7 +206,7 @@ impl MaybeOverride<(f64,)> for SpecialCase { // On MPFR for lgamma_r, we set -1 as the integer result for negative infinity but MPFR // sets +1 if ctx.basis == CheckBasis::Mpfr - && ctx.fn_name == "lgamma_r" + && ctx.fn_name_str == "lgamma_r" && input.0 == f64::NEG_INFINITY && actual.abs() == expected.abs() { @@ -219,7 +219,7 @@ impl MaybeOverride<(f64,)> for SpecialCase { /// Check NaN bits if the function requires it fn maybe_check_nan_bits(actual: F, expected: F, ctx: &CheckCtx) -> Option { - if !(ctx.base_name == "fabs" || ctx.base_name == "copysign") { + if !(ctx.base_name_str == "fabs" || ctx.base_name_str == "copysign") { return None; } @@ -277,7 +277,7 @@ fn maybe_skip_binop_nan( ) -> Option { match ctx.basis { CheckBasis::Musl => { - if (ctx.base_name == "fmax" || ctx.base_name == "fmin") + if (ctx.base_name_str == "fmax" || ctx.base_name_str == "fmin") && (input.0.is_nan() || input.1.is_nan()) && expected.is_nan() { @@ -287,7 +287,7 @@ fn maybe_skip_binop_nan( } } CheckBasis::Mpfr => { - if ctx.base_name == "copysign" && input.1.is_nan() { + if ctx.base_name_str == "copysign" && input.1.is_nan() { SKIP } else { None @@ -308,7 +308,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase { CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx), CheckBasis::Mpfr => { // We return +0.0, MPFR returns -0.0 - if ctx.fn_name == "jnf" + if ctx.fn_name_str == "jnf" && input.1 == f32::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO @@ -333,7 +333,7 @@ impl MaybeOverride<(i32, f64)> for SpecialCase { CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx), CheckBasis::Mpfr => { // We return +0.0, MPFR returns -0.0 - if ctx.fn_name == "jn" + if ctx.fn_name_str == "jn" && input.1 == f64::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO @@ -353,7 +353,7 @@ fn bessel_prec_dropoff( ulp: &mut u32, ctx: &CheckCtx, ) -> Option { - if ctx.base_name == "jn" { + if ctx.base_name_str == "jn" { if input.0 > 4000 { return XFAIL; } else if input.0 > 2000 { diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index b9bec9a44..65faefd33 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -11,25 +11,33 @@ use std::fmt; use anyhow::{Context, bail, ensure}; -use crate::{Float, Int, MaybeOverride, SpecialCase, TestResult}; +use crate::{BaseName, Float, Int, MaybeOverride, Name, SpecialCase, TestResult}; /// Context passed to [`CheckOutput`]. #[derive(Clone, Debug, PartialEq, Eq)] pub struct CheckCtx { /// Allowed ULP deviation pub ulp: u32, + pub fn_name: Name, + pub base_name: BaseName, /// Function name. - pub fn_name: &'static str, + pub fn_name_str: &'static str, /// Return the unsuffixed version of the function name. - pub base_name: &'static str, + pub base_name_str: &'static str, /// Source of truth for tests. pub basis: CheckBasis, } impl CheckCtx { - pub fn new(ulp: u32, fname: &'static str, basis: CheckBasis) -> Self { - let base_name = crate::base_name(fname); - Self { ulp, fn_name: fname, base_name, basis } + pub fn new(ulp: u32, fn_name: Name, basis: CheckBasis) -> Self { + Self { + ulp, + fn_name, + fn_name_str: fn_name.as_str(), + base_name: fn_name.base_name(), + base_name_str: fn_name.base_name().as_str(), + basis, + } } } diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index d4ba9e900..f4c827fc9 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -36,7 +36,7 @@ where { let name = Op::NAME_STR; let ulp = musl_allowed_ulp(name); - let ctx = CheckCtx::new(ulp, name, CheckBasis::Musl); + let ctx = CheckCtx::new(ulp, Op::NAME, CheckBasis::Musl); let cases = random::get_test_cases::(&ctx); for input in cases { diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 676ee86a0..5f38d8462 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -33,7 +33,7 @@ where let ulp = multiprec_allowed_ulp(name); let mut mp_vals = Op::new_mp(); - let ctx = CheckCtx::new(ulp, name, CheckBasis::Mpfr); + let ctx = CheckCtx::new(ulp, Op::NAME, CheckBasis::Mpfr); let cases = random::get_test_cases::(&ctx); for input in cases { From e73ee4ab56e5dc04eb8bc93f28ac029ad90e9965 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 2 Nov 2024 22:40:09 -0500 Subject: [PATCH 0960/1459] Rename `Name` to `Identifier` to avoid some ambiguity of "name" --- libm/crates/libm-test/benches/random.rs | 4 ++-- libm/crates/libm-test/src/gen/random.rs | 7 ++---- libm/crates/libm-test/src/lib.rs | 2 +- libm/crates/libm-test/src/op.rs | 12 +++++----- libm/crates/libm-test/src/precision.rs | 24 +++++++++---------- libm/crates/libm-test/src/test_traits.rs | 16 ++++++------- .../libm-test/tests/compare_built_musl.rs | 4 ++-- libm/crates/libm-test/tests/multiprecision.rs | 4 ++-- 8 files changed, 35 insertions(+), 38 deletions(-) diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 72ace5d53..5eea43319 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -47,10 +47,10 @@ where Op: MathOp, CachedInput: GenerateInput, { - let name = Op::NAME_STR; + let name = Op::NAME; let ulp = libm_test::musl_allowed_ulp(name); - let ctx = CheckCtx::new(ulp, Op::NAME, CheckBasis::Musl); + let ctx = CheckCtx::new(ulp, Op::IDENTIFIER, CheckBasis::Musl); let benchvec: Vec<_> = random::get_test_cases::(&ctx).take(BENCH_ITER_ITEMS).collect(); diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index b72247a4e..e347b3c63 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -110,10 +110,7 @@ pub fn get_test_cases(ctx: &CheckCtx) -> impl Iterator, { - let inputs = if ctx.fn_name_str == "jn" || ctx.fn_name_str == "jnf" { - &TEST_CASES_JN - } else { - &TEST_CASES - }; + let inputs = + if ctx.fn_name == "jn" || ctx.fn_name == "jnf" { &TEST_CASES_JN } else { &TEST_CASES }; inputs.get_cases() } diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index af6ad6da5..914e58244 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -6,7 +6,7 @@ mod precision; mod test_traits; pub use libm::support::{Float, Int}; -pub use op::{BaseName, MathOp, Name}; +pub use op::{BaseName, Identifier, MathOp}; pub use precision::{MaybeOverride, SpecialCase, multiprec_allowed_ulp, musl_allowed_ulp}; pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall}; diff --git a/libm/crates/libm-test/src/op.rs b/libm/crates/libm-test/src/op.rs index fe0a08a28..50b455d3a 100644 --- a/libm/crates/libm-test/src/op.rs +++ b/libm/crates/libm-test/src/op.rs @@ -15,10 +15,10 @@ use crate::{CheckOutput, Float, TupleCall}; -/// An enum representing each possible routine name. +/// An enum representing each possible symbol name (`sin`, `sinf`, `sinl`, etc). #[libm_macros::function_enum(BaseName)] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub enum Name {} +pub enum Identifier {} /// The name without any type specifier, e.g. `sin` and `sinf` both become `sin`. #[libm_macros::base_name_enum] @@ -58,13 +58,13 @@ pub trait MathOp { type RustRet: CheckOutput; /// The name of this function, including suffix (e.g. `sin`, `sinf`). - const NAME: Name; + const IDENTIFIER: Identifier; /// The name as a string. - const NAME_STR: &'static str = Self::NAME.as_str(); + const NAME: &'static str = Self::IDENTIFIER.as_str(); /// The name of the function excluding the type suffix, e.g. `sin` and `sinf` are both `sin`. - const BASE_NAME: BaseName = Self::NAME.base_name(); + const BASE_NAME: BaseName = Self::IDENTIFIER.base_name(); /// The function in `libm` which can be called. const ROUTINE: Self::RustFn; @@ -96,7 +96,7 @@ macro_rules! do_thing { type RustArgs = $RustArgs; type RustRet = $RustRet; - const NAME: Name = Name::[< $fn_name:camel >]; + const IDENTIFIER: Identifier = Identifier::[< $fn_name:camel >]; const ROUTINE: Self::RustFn = libm::$fn_name; } } diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 5b5743020..afe8c1fb7 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -111,25 +111,25 @@ impl MaybeOverride<(f32,)> for SpecialCase { ctx: &CheckCtx, ) -> Option { if ctx.basis == CheckBasis::Musl { - if ctx.fn_name_str == "expm1f" && input.0 > 80.0 && actual.is_infinite() { + if ctx.fn_name == "expm1f" && input.0 > 80.0 && actual.is_infinite() { // we return infinity but the number is representable return XFAIL; } - if ctx.fn_name_str == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() { + if ctx.fn_name == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() { // we return some NaN that should be real values or infinite // doesn't seem to happen on x86 return XFAIL; } } - if ctx.fn_name_str == "acoshf" && input.0 < -1.0 { + if ctx.fn_name == "acoshf" && input.0 < -1.0 { // acoshf is undefined for x <= 1.0, but we return a random result at lower // values. return XFAIL; } - if ctx.fn_name_str == "lgammaf" || ctx.fn_name_str == "lgammaf_r" && input.0 < 0.0 { + if ctx.fn_name == "lgammaf" || ctx.fn_name == "lgammaf_r" && input.0 < 0.0 { // loggamma should not be defined for x < 0, yet we both return results return XFAIL; } @@ -146,7 +146,7 @@ impl MaybeOverride<(f32,)> for SpecialCase { // On MPFR for lgammaf_r, we set -1 as the integer result for negative infinity but MPFR // sets +1 if ctx.basis == CheckBasis::Mpfr - && ctx.fn_name_str == "lgammaf_r" + && ctx.fn_name == "lgammaf_r" && input.0 == f32::NEG_INFINITY && actual.abs() == expected.abs() { @@ -166,13 +166,13 @@ impl MaybeOverride<(f64,)> for SpecialCase { ctx: &CheckCtx, ) -> Option { if ctx.basis == CheckBasis::Musl { - if cfg!(target_arch = "x86") && ctx.fn_name_str == "acosh" && input.0 < 1.0 { + if cfg!(target_arch = "x86") && ctx.fn_name == "acosh" && input.0 < 1.0 { // The function is undefined, both implementations return random results return SKIP; } if cfg!(x86_no_sse) - && ctx.fn_name_str == "ceil" + && ctx.fn_name == "ceil" && input.0 < 0.0 && input.0 > -1.0 && expected == F::ZERO @@ -183,13 +183,13 @@ impl MaybeOverride<(f64,)> for SpecialCase { } } - if ctx.fn_name_str == "acosh" && input.0 < 1.0 { + if ctx.fn_name == "acosh" && input.0 < 1.0 { // The function is undefined for the inputs, musl and our libm both return // random results. return XFAIL; } - if ctx.fn_name_str == "lgamma" || ctx.fn_name_str == "lgamma_r" && input.0 < 0.0 { + if ctx.fn_name == "lgamma" || ctx.fn_name == "lgamma_r" && input.0 < 0.0 { // loggamma should not be defined for x < 0, yet we both return results return XFAIL; } @@ -206,7 +206,7 @@ impl MaybeOverride<(f64,)> for SpecialCase { // On MPFR for lgamma_r, we set -1 as the integer result for negative infinity but MPFR // sets +1 if ctx.basis == CheckBasis::Mpfr - && ctx.fn_name_str == "lgamma_r" + && ctx.fn_name == "lgamma_r" && input.0 == f64::NEG_INFINITY && actual.abs() == expected.abs() { @@ -308,7 +308,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase { CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx), CheckBasis::Mpfr => { // We return +0.0, MPFR returns -0.0 - if ctx.fn_name_str == "jnf" + if ctx.fn_name == "jnf" && input.1 == f32::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO @@ -333,7 +333,7 @@ impl MaybeOverride<(i32, f64)> for SpecialCase { CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx), CheckBasis::Mpfr => { // We return +0.0, MPFR returns -0.0 - if ctx.fn_name_str == "jn" + if ctx.fn_name == "jn" && input.1 == f64::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index 65faefd33..180330058 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -11,17 +11,17 @@ use std::fmt; use anyhow::{Context, bail, ensure}; -use crate::{BaseName, Float, Int, MaybeOverride, Name, SpecialCase, TestResult}; +use crate::{BaseName, Float, Identifier, Int, MaybeOverride, SpecialCase, TestResult}; /// Context passed to [`CheckOutput`]. #[derive(Clone, Debug, PartialEq, Eq)] pub struct CheckCtx { /// Allowed ULP deviation pub ulp: u32, - pub fn_name: Name, + pub fn_ident: Identifier, pub base_name: BaseName, /// Function name. - pub fn_name_str: &'static str, + pub fn_name: &'static str, /// Return the unsuffixed version of the function name. pub base_name_str: &'static str, /// Source of truth for tests. @@ -29,13 +29,13 @@ pub struct CheckCtx { } impl CheckCtx { - pub fn new(ulp: u32, fn_name: Name, basis: CheckBasis) -> Self { + pub fn new(ulp: u32, fn_ident: Identifier, basis: CheckBasis) -> Self { Self { ulp, - fn_name, - fn_name_str: fn_name.as_str(), - base_name: fn_name.base_name(), - base_name_str: fn_name.base_name().as_str(), + fn_ident, + fn_name: fn_ident.as_str(), + base_name: fn_ident.base_name(), + base_name_str: fn_ident.base_name().as_str(), basis, } } diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index f4c827fc9..c029a5d97 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -34,9 +34,9 @@ where Op: MathOp, CachedInput: GenerateInput, { - let name = Op::NAME_STR; + let name = Op::NAME; let ulp = musl_allowed_ulp(name); - let ctx = CheckCtx::new(ulp, Op::NAME, CheckBasis::Musl); + let ctx = CheckCtx::new(ulp, Op::IDENTIFIER, CheckBasis::Musl); let cases = random::get_test_cases::(&ctx); for input in cases { diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 5f38d8462..00c6278f6 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -29,11 +29,11 @@ where Op: MathOp + MpOp, CachedInput: GenerateInput, { - let name = Op::NAME_STR; + let name = Op::NAME; let ulp = multiprec_allowed_ulp(name); let mut mp_vals = Op::new_mp(); - let ctx = CheckCtx::new(ulp, Op::NAME, CheckBasis::Mpfr); + let ctx = CheckCtx::new(ulp, Op::IDENTIFIER, CheckBasis::Mpfr); let cases = random::get_test_cases::(&ctx); for input in cases { From 45fdf46ccf65ef151153b79bf5a47ca994f94a89 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 2 Nov 2024 23:03:41 -0500 Subject: [PATCH 0961/1459] Change default ULP to use enum matching Migrate from string to enum matching and tie this to `CheckCtx::new`, so no tests need to explicitly set ULP. --- libm/crates/libm-test/benches/random.rs | 3 +- libm/crates/libm-test/src/lib.rs | 2 +- libm/crates/libm-test/src/precision.rs | 73 +++++++++---------- libm/crates/libm-test/src/test_traits.rs | 11 ++- .../libm-test/tests/compare_built_musl.rs | 8 +- libm/crates/libm-test/tests/multiprecision.rs | 9 +-- 6 files changed, 47 insertions(+), 59 deletions(-) diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 5eea43319..d77d57908 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -49,8 +49,7 @@ where { let name = Op::NAME; - let ulp = libm_test::musl_allowed_ulp(name); - let ctx = CheckCtx::new(ulp, Op::IDENTIFIER, CheckBasis::Musl); + let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl); let benchvec: Vec<_> = random::get_test_cases::(&ctx).take(BENCH_ITER_ITEMS).collect(); diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 914e58244..7f0d9aa75 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -7,7 +7,7 @@ mod test_traits; pub use libm::support::{Float, Int}; pub use op::{BaseName, Identifier, MathOp}; -pub use precision::{MaybeOverride, SpecialCase, multiprec_allowed_ulp, musl_allowed_ulp}; +pub use precision::{MaybeOverride, SpecialCase, default_ulp}; pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall}; /// Result type for tests is usually from `anyhow`. Most times there is no success value to diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index afe8c1fb7..c761709b8 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -3,7 +3,10 @@ use core::f32; -use crate::{CheckBasis, CheckCtx, Float, Int, TestResult}; +use CheckBasis::{Mpfr, Musl}; +use Identifier as Id; + +use crate::{CheckBasis, CheckCtx, Float, Identifier, Int, TestResult}; /// Type implementing [`IgnoreCase`]. pub struct SpecialCase; @@ -14,50 +17,42 @@ const MUSL_DEFAULT_ULP: u32 = 2; /// Default ULP allowed to differ from multiprecision (i.e. infinite) results. const MULTIPREC_DEFAULT_ULP: u32 = 1; -/// ULP allowed to differ from muls results. +/// ULP allowed to differ from the results returned by a test basis. /// -/// Note that these results were obtained using 400,000,000 rounds of random inputs, which +/// Note that these results were obtained using 400M rounds of random inputs, which /// is not a value used by default. -pub fn musl_allowed_ulp(name: &str) -> u32 { - // Consider overrides xfail - match name { - #[cfg(x86_no_sse)] - "asinh" | "asinhf" => 6, - "lgamma" | "lgamma_r" | "lgammaf" | "lgammaf_r" => 400, - "tanh" | "tanhf" => 4, - "tgamma" => 20, - "j0" | "j0f" | "j1" | "j1f" => { +pub fn default_ulp(ctx: &CheckCtx) -> u32 { + match (&ctx.basis, ctx.fn_ident) { + // Overrides that apply to either basis + (_, Id::J0 | Id::J0f | Id::J1 | Id::J1f) => { // Results seem very target-dependent if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 } } - "jn" | "jnf" => 1000, - "sincosf" => 500, - #[cfg(not(target_pointer_width = "64"))] - "exp10" => 4, - #[cfg(not(target_pointer_width = "64"))] - "exp10f" => 4, - _ => MUSL_DEFAULT_ULP, - } -} + (_, Id::Jn | Id::Jnf) => 1000, -/// ULP allowed to differ from multiprecision results. -pub fn multiprec_allowed_ulp(name: &str) -> u32 { - // Consider overrides xfail - match name { - "asinh" | "asinhf" => 2, - "acoshf" => 4, - "atanh" | "atanhf" => 2, - "exp10" | "exp10f" => 3, - "j0" | "j0f" | "j1" | "j1f" => { - // Results seem very target-dependent - if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 } - } - "jn" | "jnf" => 1000, - "lgamma" | "lgammaf" | "lgamma_r" | "lgammaf_r" => 16, - "sinh" | "sinhf" => 2, - "tanh" | "tanhf" => 2, - "tgamma" => 20, - _ => MULTIPREC_DEFAULT_ULP, + // Overrides for musl + #[cfg(x86_no_sse)] + (Musl, Id::Asinh | Id::Asinhf) => 6, + #[cfg(not(target_pointer_width = "64"))] + (Musl, Id::Exp10 | Id::Exp10f) => 4, + (Musl, Id::Lgamma | Id::LgammaR | Id::Lgammaf | Id::LgammafR) => 400, + (Musl, Id::Sincosf) => 500, + (Musl, Id::Tanh | Id::Tanhf) => 4, + (Musl, Id::Tgamma) => 20, + + // Overrides for MPFR + (Mpfr, Id::Acoshf) => 4, + (Mpfr, Id::Asinh | Id::Asinhf) => 2, + (Mpfr, Id::Atanh | Id::Atanhf) => 2, + (Mpfr, Id::Exp10 | Id::Exp10f) => 3, + (Mpfr, Id::Lgamma | Id::LgammaR | Id::Lgammaf | Id::LgammafR) => 16, + (Mpfr, Id::Sinh | Id::Sinhf) => 2, + (Mpfr, Id::Tanh | Id::Tanhf) => 2, + (Mpfr, Id::Tgamma) => 20, + + // Defaults + (Musl, _) => MUSL_DEFAULT_ULP, + (Mpfr, _) => MULTIPREC_DEFAULT_ULP, } } diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index 180330058..ec14a8cf2 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -29,15 +29,18 @@ pub struct CheckCtx { } impl CheckCtx { - pub fn new(ulp: u32, fn_ident: Identifier, basis: CheckBasis) -> Self { - Self { - ulp, + /// Create a new check context, using the default ULP for the function. + pub fn new(fn_ident: Identifier, basis: CheckBasis) -> Self { + let mut ret = Self { + ulp: 0, fn_ident, fn_name: fn_ident.as_str(), base_name: fn_ident.base_name(), base_name_str: fn_ident.base_name().as_str(), basis, - } + }; + ret.ulp = crate::default_ulp(&ret); + ret } } diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index c029a5d97..0022ee03c 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -10,9 +10,7 @@ #![cfg(feature = "build-musl")] use libm_test::gen::{CachedInput, random}; -use libm_test::{ - CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall, musl_allowed_ulp, -}; +use libm_test::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall}; macro_rules! musl_rand_tests { ( @@ -34,9 +32,7 @@ where Op: MathOp, CachedInput: GenerateInput, { - let name = Op::NAME; - let ulp = musl_allowed_ulp(name); - let ctx = CheckCtx::new(ulp, Op::IDENTIFIER, CheckBasis::Musl); + let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl); let cases = random::get_test_cases::(&ctx); for input in cases { diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 00c6278f6..47a85bdb3 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -4,9 +4,7 @@ use libm_test::gen::{CachedInput, random}; use libm_test::mpfloat::MpOp; -use libm_test::{ - CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall, multiprec_allowed_ulp, -}; +use libm_test::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall}; /// Implement a test against MPFR with random inputs. macro_rules! multiprec_rand_tests { @@ -29,11 +27,8 @@ where Op: MathOp + MpOp, CachedInput: GenerateInput, { - let name = Op::NAME; - - let ulp = multiprec_allowed_ulp(name); let mut mp_vals = Op::new_mp(); - let ctx = CheckCtx::new(ulp, Op::IDENTIFIER, CheckBasis::Mpfr); + let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr); let cases = random::get_test_cases::(&ctx); for input in cases { From 80b2b98675f960f8f4e0f5d60978d34f740093e9 Mon Sep 17 00:00:00 2001 From: beetrees Date: Sun, 3 Nov 2024 22:54:01 +0000 Subject: [PATCH 0962/1459] Remove incorrect `sparcv9` match pattern from `configure_f16_f128` --- configure.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.rs b/configure.rs index 68b4d68e6..f8aafbe70 100644 --- a/configure.rs +++ b/configure.rs @@ -71,7 +71,7 @@ pub fn configure_f16_f128(target: &Target) { // `f128` crashes "powerpc64" if &target.os == "aix" => (true, false), // `f128` crashes - "sparc" | "sparcv9" => (true, false), + "sparc" => (true, false), // `f16` miscompiles "wasm32" | "wasm64" => (false, true), // Most everything else works as of LLVM 19 From 05d7fec0ff954a3a5e6b4af0b7dbdef20be681b1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 3 Nov 2024 23:08:41 +0000 Subject: [PATCH 0963/1459] chore: release v0.1.139 --- CHANGELOG.md | 6 ++++++ Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f43b63b2b..5c65aa871 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.139](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.138...compiler_builtins-v0.1.139) - 2024-11-03 + +### Other + +- Remove incorrect `sparcv9` match pattern from `configure_f16_f128` + ## [0.1.138](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.137...compiler_builtins-v0.1.138) - 2024-11-01 ### Other diff --git a/Cargo.toml b/Cargo.toml index 3d8a1f255..33f1c9a3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.138" +version = "0.1.139" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From d15d64abdebb68f377cc5e69c41f43d468d96d3d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 3 Nov 2024 20:24:26 -0600 Subject: [PATCH 0964/1459] Change the `multiprec_` prefix to `mp_` Currently there is a combination of names starting with `multiprecision_`, `mp_` and `multiprec_`. Update so `multiprecision_` is always used when a long form makes sense, `mp_` otherwise (eliminating `multiprec_`). --- libm/crates/libm-test/src/precision.rs | 4 ++-- libm/crates/libm-test/tests/multiprecision.rs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index c761709b8..cf9115430 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -15,7 +15,7 @@ pub struct SpecialCase; const MUSL_DEFAULT_ULP: u32 = 2; /// Default ULP allowed to differ from multiprecision (i.e. infinite) results. -const MULTIPREC_DEFAULT_ULP: u32 = 1; +const MP_DEFAULT_ULP: u32 = 1; /// ULP allowed to differ from the results returned by a test basis. /// @@ -52,7 +52,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { // Defaults (Musl, _) => MUSL_DEFAULT_ULP, - (Mpfr, _) => MULTIPREC_DEFAULT_ULP, + (Mpfr, _) => MP_DEFAULT_ULP, } } diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 47a85bdb3..0b41fba82 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -7,7 +7,7 @@ use libm_test::mpfloat::MpOp; use libm_test::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall}; /// Implement a test against MPFR with random inputs. -macro_rules! multiprec_rand_tests { +macro_rules! mp_rand_tests { ( fn_name: $fn_name:ident, attrs: [$($meta:meta)*] @@ -15,7 +15,7 @@ macro_rules! multiprec_rand_tests { paste::paste! { #[test] $(#[$meta])* - fn [< multiprec_random_ $fn_name >]() { + fn [< mp_random_ $fn_name >]() { test_one::(); } } @@ -40,7 +40,7 @@ where } libm_macros::for_each_function! { - callback: multiprec_rand_tests, + callback: mp_rand_tests, attributes: [ // Also an assertion failure on i686: at `MPFR_ASSERTN (! mpfr_erangeflag_p ())` #[ignore = "large values are infeasible in MPFR"] From e86d47f6ae90e17e0ba33fdc3f6a4a33ac1a1164 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 3 Nov 2024 20:52:28 -0600 Subject: [PATCH 0965/1459] Move some numeric trait logic to default implementations There are a handful of functions we can move out of the macro and to the numeric traits as default implementations; do that here. Additionally, add some bounds that make sense for completeness. --- libm/crates/libm-test/src/test_traits.rs | 2 +- libm/src/math/support/float_traits.rs | 106 +++++++++++++---------- libm/src/math/support/int_traits.rs | 7 +- 3 files changed, 67 insertions(+), 48 deletions(-) diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index ec14a8cf2..b8e0aa108 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -314,7 +314,7 @@ where // Make sure that the signs are the same before checing ULP to avoid wraparound let act_sig = actual.signum(); let exp_sig = expected.signum(); - ensure!(act_sig == exp_sig, "mismatched signs {act_sig} {exp_sig}"); + ensure!(act_sig == exp_sig, "mismatched signs {act_sig:?} {exp_sig:?}"); if actual.is_infinite() ^ expected.is_infinite() { bail!("mismatched infinities"); diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index a1d84faf2..0047ba368 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -1,4 +1,4 @@ -use core::{fmt, ops}; +use core::{fmt, mem, ops}; use super::int_traits::{Int, MinInt}; @@ -7,15 +7,17 @@ use super::int_traits::{Int, MinInt}; pub trait Float: Copy + fmt::Debug - + fmt::Display + PartialEq + PartialOrd + ops::AddAssign + ops::MulAssign + ops::Add + ops::Sub + + ops::Mul + ops::Div + ops::Rem + + ops::Neg + + 'static { /// A uint of the same width as the float type Int: Int; @@ -27,11 +29,16 @@ pub trait Float: type ExpInt: Int; const ZERO: Self; + const NEG_ZERO: Self; const ONE: Self; const NEG_ONE: Self; const INFINITY: Self; const NEG_INFINITY: Self; const NAN: Self; + const MAX: Self; + const MIN: Self; + const PI: Self; + const FRAC_PI_2: Self; /// The bitwidth of the float type const BITS: u32; @@ -69,7 +76,19 @@ pub trait Float: /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be /// represented in multiple different ways. This method returns `true` if two NaNs are /// compared. - fn eq_repr(self, rhs: Self) -> bool; + fn eq_repr(self, rhs: Self) -> bool { + let is_nan = |x: Self| -> bool { + // } + // fn is_nan(x: Self) -> bool { + // When using mangled-names, the "real" compiler-builtins might not have the + // necessary builtin (__unordtf2) to test whether `f128` is NaN. + // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin + // x is NaN if all the bits of the exponent are set and the significand is non-0 + x.to_bits() & Self::EXP_MASK == Self::EXP_MASK + && x.to_bits() & Self::SIG_MASK != Self::Int::ZERO + }; + if is_nan(self) && is_nan(rhs) { true } else { self.to_bits() == rhs.to_bits() } + } /// Returns true if the value is NaN. fn is_nan(self) -> bool; @@ -81,22 +100,35 @@ pub trait Float: fn is_sign_negative(self) -> bool; /// Returns if `self` is subnormal - fn is_subnormal(self) -> bool; + fn is_subnormal(self) -> bool { + (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO + } /// Returns the exponent, not adjusting for bias. fn exp(self) -> Self::ExpInt; /// Returns the significand with no implicit bit (or the "fractional" part) - fn frac(self) -> Self::Int; + fn frac(self) -> Self::Int { + self.to_bits() & Self::SIG_MASK + } /// Returns the significand with implicit bit - fn imp_frac(self) -> Self::Int; + fn imp_frac(self) -> Self::Int { + self.frac() | Self::IMPLICIT_BIT + } /// Returns a `Self::Int` transmuted back to `Self` fn from_bits(a: Self::Int) -> Self; /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. - fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self; + fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self { + let sign = if negative { Self::Int::ONE } else { Self::Int::ZERO }; + Self::from_bits( + (sign << (Self::BITS - 1)) + | ((exponent << Self::SIG_BITS) & Self::EXP_MASK) + | (significand & Self::SIG_MASK), + ) + } fn abs(self) -> Self { let abs_mask = !Self::SIGN_MASK; @@ -107,10 +139,18 @@ pub trait Float: fn normalize(significand: Self::Int) -> (i32, Self::Int); /// Returns a number composed of the magnitude of self and the sign of sign. - fn copysign(self, other: Self) -> Self; + fn copysign(self, other: Self) -> Self { + let mut x = self.to_bits(); + let y = other.to_bits(); + x &= !Self::SIGN_MASK; + x |= y & Self::SIGN_MASK; + Self::from_bits(x) + } /// Returns a number that represents the sign of self. - fn signum(self) -> Self; + fn signum(self) -> Self { + if self.is_nan() { self } else { Self::ONE.copysign(self) } + } } macro_rules! float_impl { @@ -121,11 +161,22 @@ macro_rules! float_impl { type ExpInt = $expty; const ZERO: Self = 0.0; + const NEG_ZERO: Self = -0.0; const ONE: Self = 1.0; const NEG_ONE: Self = -1.0; const INFINITY: Self = Self::INFINITY; const NEG_INFINITY: Self = Self::NEG_INFINITY; const NAN: Self = Self::NAN; + const MAX: Self = -Self::MIN; + // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed + // FIXME(msrv): just use `from_bits` when available + // SAFETY: POD cast with no preconditions + const MIN: Self = unsafe { + mem::transmute::(Self::Int::MAX & !(1 << Self::SIG_BITS)) + }; + + const PI: Self = core::$ty::consts::PI; + const FRAC_PI_2: Self = core::$ty::consts::FRAC_PI_2; const BITS: u32 = $bits; const SIG_BITS: u32 = $significand_bits; @@ -141,16 +192,6 @@ macro_rules! float_impl { fn to_bits_signed(self) -> Self::SignedInt { self.to_bits() as Self::SignedInt } - fn eq_repr(self, rhs: Self) -> bool { - fn is_nan(x: $ty) -> bool { - // When using mangled-names, the "real" compiler-builtins might not have the - // necessary builtin (__unordtf2) to test whether `f128` is NaN. - // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin - // x is NaN if all the bits of the exponent are set and the significand is non-0 - x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0 - } - if is_nan(self) && is_nan(rhs) { true } else { self.to_bits() == rhs.to_bits() } - } fn is_nan(self) -> bool { self.is_nan() } @@ -160,43 +201,16 @@ macro_rules! float_impl { fn is_sign_negative(self) -> bool { self.is_sign_negative() } - fn is_subnormal(self) -> bool { - (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO - } fn exp(self) -> Self::ExpInt { ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt } - fn frac(self) -> Self::Int { - self.to_bits() & Self::SIG_MASK - } - fn imp_frac(self) -> Self::Int { - self.frac() | Self::IMPLICIT_BIT - } fn from_bits(a: Self::Int) -> Self { Self::from_bits(a) } - fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self { - Self::from_bits( - ((negative as Self::Int) << (Self::BITS - 1)) - | ((exponent << Self::SIG_BITS) & Self::EXP_MASK) - | (significand & Self::SIG_MASK), - ) - } fn normalize(significand: Self::Int) -> (i32, Self::Int) { let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int) } - fn copysign(self, other: Self) -> Self { - let mut x = self.to_bits(); - let y = other.to_bits(); - x &= !Self::SIGN_MASK; - x |= y & Self::SIGN_MASK; - Self::from_bits(x) - } - - fn signum(self) -> Self { - if self.is_nan() { self } else { Self::ONE.copysign(self) } - } } }; } diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index b08907aa5..c72c1d5cb 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -1,4 +1,4 @@ -use core::{fmt, ops}; +use core::{cmp, fmt, ops}; /// Minimal integer implementations needed on all integer types, including wide integers. #[allow(dead_code)] @@ -31,6 +31,8 @@ pub trait MinInt: pub trait Int: MinInt + fmt::Display + + fmt::Binary + + fmt::LowerHex + PartialEq + PartialOrd + ops::AddAssign @@ -47,6 +49,9 @@ pub trait Int: + ops::Shr + ops::BitXor + ops::BitAnd + + cmp::Ord + + CastInto + + CastFrom { fn signed(self) -> ::OtherSign; fn unsigned(self) -> Self::Unsigned; From 077f0181e24459ffad1e6c8c917edc5985f2695d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 5 Nov 2024 02:43:33 -0600 Subject: [PATCH 0966/1459] Disable `f16` on platforms that have recursion problems CI in [1] seems to indicate that there are cases where the `f16` infinite recursion bug ([2], [3]) can make its way into what gets called during tests, even though this doesn't seem to be the usual case. In order to make sure that we avoid these completely, just unset `f16_enabled` on any platforms that have the recursion problem. This also refactors the `match` statement to be more in line with `library/std/build.rs`. [1]: https://github.com/rust-lang/compiler-builtins/pull/729 [2]: https://github.com/llvm/llvm-project/issues/97981 [3]: https://github.com/rust-lang/compiler-builtins/issues/651 --- configure.rs | 55 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/configure.rs b/configure.rs index f8aafbe70..0a0bd503d 100644 --- a/configure.rs +++ b/configure.rs @@ -51,31 +51,46 @@ impl Target { /// Configure whether or not `f16` and `f128` support should be enabled. pub fn configure_f16_f128(target: &Target) { // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means - // that the backend will not crash when using these types. This does not mean that the - // backend does the right thing, or that the platform doesn't have ABI bugs. + // that the backend will not crash when using these types and generates code that can be called + // without crashing (no infinite recursion). This does not mean that the platform doesn't have + // ABI or other bugs. // // We do this here rather than in `rust-lang/rust` because configuring via cargo features is // not straightforward. // // Original source of this list: // - let (f16_ok, f128_ok) = match target.arch.as_str() { - // `f16` and `f128` both crash - "arm64ec" => (false, false), - // `f16` crashes - "s390x" => (false, true), - // FIXME(llvm): `f16` test failures fixed by - "loongarch64" => (false, true), - // `f128` crashes - "mips64" | "mips64r6" => (true, false), - // `f128` crashes - "powerpc64" if &target.os == "aix" => (true, false), - // `f128` crashes - "sparc" => (true, false), - // `f16` miscompiles - "wasm32" | "wasm64" => (false, true), + let f16_enabled = match target.arch.as_str() { + // Unsupported + "arm64ec" => false, + // Selection failure + "s390x" => false, + // Infinite recursion + // FIXME(llvm): loongarch fixed by + "csky" => false, + "hexagon" => false, + "loongarch64" => false, + "mips" | "mips64" | "mips32r6" | "mips64r6" => false, + "powerpc" | "powerpc64" => false, + "sparc" | "sparc64" => false, + "wasm32" | "wasm64" => false, // Most everything else works as of LLVM 19 - _ => (true, true), + _ => true, + }; + + let f128_enabled = match target.arch.as_str() { + // Unsupported + "arm64ec" => false, + // Selection failure + "mips64" | "mips64r6" => false, + // Selection failure + "nvptx64" => false, + // Selection failure + "powerpc64" if &target.os == "aix" => false, + // Selection failure + "sparc" => false, + // Most everything else works as of LLVM 19 + _ => true, }; // If the feature is set, disable these types. @@ -84,11 +99,11 @@ pub fn configure_f16_f128(target: &Target) { println!("cargo::rustc-check-cfg=cfg(f16_enabled)"); println!("cargo::rustc-check-cfg=cfg(f128_enabled)"); - if f16_ok && !disable_both { + if f16_enabled && !disable_both { println!("cargo::rustc-cfg=f16_enabled"); } - if f128_ok && !disable_both { + if f128_enabled && !disable_both { println!("cargo::rustc-cfg=f128_enabled"); } } From 0806b79a42a827f4a7fac6fc03d483cc6d8e1ade Mon Sep 17 00:00:00 2001 From: beetrees Date: Thu, 14 Nov 2024 18:13:10 +0000 Subject: [PATCH 0967/1459] Use `https:` links in `README.md` --- libm/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/README.md b/libm/README.md index e5d64bd2d..52d760a4f 100644 --- a/libm/README.md +++ b/libm/README.md @@ -41,14 +41,14 @@ This crate supports rustc 1.63 and newer. ## License Usage is licensed under the MIT license ([LICENSE-MIT](LICENSE-MIT) or -http://opensource.org/licenses/MIT). +https://opensource.org/licenses/MIT). ### Contribution Contributions are licensed under both the MIT license and the Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or -http://www.apache.org/licenses/LICENSE-2.0). Unless you explicitly state +https://www.apache.org/licenses/LICENSE-2.0). Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as mentioned, without any additional terms or conditions. From f105e349e5f6d877fa4156ca93630c1a7aba6560 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 22 Dec 2024 05:03:21 +0000 Subject: [PATCH 0968/1459] Remove tests against system musl We now have tests against our custom-built musl as well as tests against MPFR. The tests against system musl covers less than those against custom-built musl, and are less portable; there isn't much benefit to keeping them around so just remove them. --- libm/CONTRIBUTING.md | 20 +- libm/ci/run.sh | 5 - libm/crates/libm-test/Cargo.toml | 1 - libm/crates/libm-test/build.rs | 456 ---------------------- libm/crates/libm-test/tests/musl_biteq.rs | 6 - libm/src/lib.rs | 25 -- 6 files changed, 12 insertions(+), 501 deletions(-) delete mode 100644 libm/crates/libm-test/tests/musl_biteq.rs diff --git a/libm/CONTRIBUTING.md b/libm/CONTRIBUTING.md index 0a1741631..aadcdf036 100644 --- a/libm/CONTRIBUTING.md +++ b/libm/CONTRIBUTING.md @@ -6,9 +6,8 @@ `src/math/mod.rs` accordingly. Also, uncomment the corresponding trait method in `src/lib.rs`. - Write some simple tests in your module (using `#[test]`) -- Run `cargo test` to make sure it works -- Run `cargo test --features libm-test/test-musl-serialized` to compare your - implementation against musl's +- Run `cargo test` to make sure it works. Full tests are only run when enabling + features, see [Testing](#testing) below. - Send us a pull request! Make sure to run `cargo fmt` on your code before sending the PR. Also include "closes #42" in the PR description to close the corresponding issue. @@ -66,12 +65,17 @@ Normal tests can be executed with: cargo test ``` -If you'd like to run tests with randomized inputs that get compared against musl -itself, you'll need to be on a Linux system and then you can execute: +If you'd like to run tests with randomized inputs that get compared against +infinite-precision results, run: ```sh -cargo test --features libm-test/test-musl-serialized +cargo test --features libm-test/test-multiprecision,libm-test/build-musl --release ``` -Note that you may need to pass `--release` to Cargo if there are errors related -to integer overflow. +The multiprecision tests use the [`rug`] crate for bindings to MPFR. MPFR can +be difficult to build on non-Unix systems, refer to [`gmp_mpfr_sys`] for help. + +`build-musl` does not build with MSVC, Wasm, or Thumb. + +[`rug`]: https://docs.rs/rug/latest/rug/ +[`gmp_mpfr_sys`]: https://docs.rs/gmp-mpfr-sys/1.6.4/gmp_mpfr_sys/ diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 32453663e..d89c8bdf0 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -57,11 +57,6 @@ case "$target" in *windows-gnu) extra_flags="$extra_flags --exclude libm-macros" ;; esac -if [ "$(uname -a)" = "Linux" ]; then - # also run the reference tests when we can. requires a Linux host. - extra_flags="$extra_flags --features libm-test/test-musl-serialized" -fi - # Make sure we can build with overriding features. We test the indibidual # features it controls separately. cargo check --no-default-features diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 3587b44e6..4d75b25f8 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -9,7 +9,6 @@ default = [] # Generate tests which are random inputs and the outputs are calculated with # musl libc. -test-musl-serialized = ["rand"] test-multiprecision = ["dep:az", "dep:rug"] # Build our own musl for testing and benchmarks diff --git a/libm/crates/libm-test/build.rs b/libm/crates/libm-test/build.rs index 40b3e56c0..dc3126dbb 100644 --- a/libm/crates/libm-test/build.rs +++ b/libm/crates/libm-test/build.rs @@ -8,9 +8,6 @@ fn main() { emit_optimization_cfg(&cfg); emit_cfg_shorthands(&cfg); list_all_tests(&cfg); - - #[cfg(feature = "test-musl-serialized")] - musl_serialized_tests::generate(); } #[allow(dead_code)] @@ -93,456 +90,3 @@ fn list_all_tests(cfg: &Config) { let outfile = cfg.out_dir.join("all_files.rs"); fs::write(outfile, s).unwrap(); } - -/// At build time, generate the output of what the corresponding `*musl` target does with a range -/// of inputs. -/// -/// Serialize that target's output, run the same thing with our symbols, then load and compare -/// the resulting values. -#[cfg(feature = "test-musl-serialized")] -mod musl_serialized_tests { - use std::path::PathBuf; - use std::process::Command; - use std::{env, fs}; - - use rand::Rng; - use rand::seq::SliceRandom; - - // Number of tests to generate for each function - const NTESTS: usize = 500; - - // These files are all internal functions or otherwise miscellaneous, not - // defining a function we want to test. - const IGNORED_FILES: &[&str] = &[ - "fenv.rs", - // These are giving slightly different results compared to musl - "lgamma.rs", - "lgammaf.rs", - "tgamma.rs", - "j0.rs", - "j0f.rs", - "jn.rs", - "jnf.rs", - "j1.rs", - "j1f.rs", - ]; - - struct Function { - name: String, - args: Vec, - ret: Vec, - tests: Vec, - } - - enum Ty { - F32, - F64, - I32, - Bool, - } - - struct Test { - inputs: Vec, - outputs: Vec, - } - - pub fn generate() { - // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 - let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); - let libm_test = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); - let math_src = libm_test.join("../../src/math"); - - if target_arch == "powerpc64" { - return; - } - - let files = fs::read_dir(math_src) - .unwrap() - .map(|f| f.unwrap().path()) - .filter(file_needs_test) - .collect::>(); - - let mut math = Vec::new(); - for file in files { - if IGNORED_FILES.iter().any(|f| file.ends_with(f)) { - continue; - } - - println!("generating musl reference tests in {:?}", file); - - let contents = fs::read_to_string(file).unwrap(); - let mut functions = contents.lines().filter(|f| f.starts_with("pub fn")); - while let Some(function_to_test) = functions.next() { - math.push(parse(function_to_test)); - } - } - - // Generate a bunch of random inputs for each function. This will - // attempt to generate a good set of uniform test cases for exercising - // all the various functionality. - generate_random_tests(&mut math, &mut rand::thread_rng()); - - // After we have all our inputs, use the x86_64-unknown-linux-musl - // target to generate the expected output. - generate_test_outputs(&mut math); - //panic!("Boo"); - // ... and now that we have both inputs and expected outputs, do a bunch - // of codegen to create the unit tests which we'll actually execute. - generate_unit_tests(&math); - } - - /// Check whether a path within `src/math` should get tests generated. - fn file_needs_test(path: &PathBuf) -> bool { - // Skip directories - if path.is_dir() { - return false; - } - - let fname = path.file_name().unwrap().to_str().unwrap(); - - // Musl doesn't support `f16` or `f128` - !(fname.contains("f16") || fname.contains("f128")) - } - - /// A "poor man's" parser for the signature of a function - fn parse(s: &str) -> Function { - let s = eat(s, "pub fn "); - let pos = s.find('(').unwrap(); - let name = &s[..pos]; - let s = &s[pos + 1..]; - let end = s.find(')').unwrap(); - let args = s[..end] - .split(',') - .map(|arg| { - let colon = arg.find(':').unwrap(); - parse_ty(arg[colon + 1..].trim()) - }) - .collect::>(); - let tail = &s[end + 1..]; - let tail = eat(tail, " -> "); - let ret = parse_retty(tail.replace("{", "").trim()); - - return Function { name: name.to_string(), args, ret, tests: Vec::new() }; - - fn parse_ty(s: &str) -> Ty { - match s { - "f32" => Ty::F32, - "f64" => Ty::F64, - "i32" => Ty::I32, - "bool" => Ty::Bool, - other => panic!("unknown type `{}`", other), - } - } - - fn parse_retty(s: &str) -> Vec { - match s { - "(f32, f32)" => vec![Ty::F32, Ty::F32], - "(f32, i32)" => vec![Ty::F32, Ty::I32], - "(f64, f64)" => vec![Ty::F64, Ty::F64], - "(f64, i32)" => vec![Ty::F64, Ty::I32], - other => vec![parse_ty(other)], - } - } - - fn eat<'a>(s: &'a str, prefix: &str) -> &'a str { - if s.starts_with(prefix) { - &s[prefix.len()..] - } else { - panic!("{:?} didn't start with {:?}", s, prefix) - } - } - } - - fn generate_random_tests(functions: &mut [Function], rng: &mut R) { - for function in functions { - for _ in 0..NTESTS { - function.tests.push(generate_test(function, rng)); - } - } - - fn generate_test(function: &Function, rng: &mut R) -> Test { - let mut inputs = function.args.iter().map(|ty| ty.gen_i64(rng)).collect::>(); - - // First argument to this function appears to be a number of - // iterations, so passing in massive random numbers causes it to - // take forever to execute, so make sure we're not running random - // math code until the heat death of the universe. - if function.name == "jn" || function.name == "jnf" { - inputs[0] &= 0xffff; - } - - Test { - inputs, - // zero output for now since we'll generate it later - outputs: vec![], - } - } - } - - impl Ty { - fn gen_i64(&self, r: &mut R) -> i64 { - use std::{f32, f64}; - - return match self { - Ty::F32 => { - if r.gen_range(0..20) < 1 { - let i = *[f32::NAN, f32::INFINITY, f32::NEG_INFINITY].choose(r).unwrap(); - i.to_bits().into() - } else { - r.gen::().to_bits().into() - } - } - Ty::F64 => { - if r.gen_range(0..20) < 1 { - let i = *[f64::NAN, f64::INFINITY, f64::NEG_INFINITY].choose(r).unwrap(); - i.to_bits() as i64 - } else { - r.gen::().to_bits() as i64 - } - } - Ty::I32 => { - if r.gen_range(0..10) < 1 { - let i = *[i32::max_value(), 0, i32::min_value()].choose(r).unwrap(); - i.into() - } else { - r.gen::().into() - } - } - Ty::Bool => r.gen::() as i64, - }; - } - - fn libc_ty(&self) -> &'static str { - match self { - Ty::F32 => "f32", - Ty::F64 => "f64", - Ty::I32 => "i32", - Ty::Bool => "i32", - } - } - - fn libc_pty(&self) -> &'static str { - match self { - Ty::F32 => "*mut f32", - Ty::F64 => "*mut f64", - Ty::I32 => "*mut i32", - Ty::Bool => "*mut i32", - } - } - - fn default(&self) -> &'static str { - match self { - Ty::F32 => "0_f32", - Ty::F64 => "0_f64", - Ty::I32 => "0_i32", - Ty::Bool => "false", - } - } - - fn to_i64(&self) -> &'static str { - match self { - Ty::F32 => ".to_bits() as i64", - Ty::F64 => ".to_bits() as i64", - Ty::I32 => " as i64", - Ty::Bool => " as i64", - } - } - } - - fn generate_test_outputs(functions: &mut [Function]) { - let mut src = String::new(); - let dst = std::env::var("OUT_DIR").unwrap(); - - // Generate a program which will run all tests with all inputs in - // `functions`. This program will write all outputs to stdout (in a - // binary format). - src.push_str("use std::io::Write;"); - src.push_str("fn main() {"); - src.push_str("let mut result = Vec::new();"); - for function in functions.iter_mut() { - src.push_str("unsafe {"); - src.push_str("extern { fn "); - src.push_str(&function.name); - src.push_str("("); - - let (ret, retptr) = match function.name.as_str() { - "sincos" | "sincosf" => (None, &function.ret[..]), - _ => (Some(&function.ret[0]), &function.ret[1..]), - }; - for (i, arg) in function.args.iter().enumerate() { - src.push_str(&format!("arg{}: {},", i, arg.libc_ty())); - } - for (i, ret) in retptr.iter().enumerate() { - src.push_str(&format!("argret{}: {},", i, ret.libc_pty())); - } - src.push_str(")"); - if let Some(ty) = ret { - src.push_str(" -> "); - src.push_str(ty.libc_ty()); - } - src.push_str("; }"); - - src.push_str(&format!("static TESTS: &[[i64; {}]]", function.args.len())); - src.push_str(" = &["); - for test in function.tests.iter() { - src.push_str("["); - for val in test.inputs.iter() { - src.push_str(&val.to_string()); - src.push_str(","); - } - src.push_str("],"); - } - src.push_str("];"); - - src.push_str("for test in TESTS {"); - for (i, arg) in retptr.iter().enumerate() { - src.push_str(&format!("let mut argret{} = {};", i, arg.default())); - } - src.push_str("let output = "); - src.push_str(&function.name); - src.push_str("("); - for (i, arg) in function.args.iter().enumerate() { - src.push_str(&match arg { - Ty::F32 => format!("f32::from_bits(test[{}] as u32)", i), - Ty::F64 => format!("f64::from_bits(test[{}] as u64)", i), - Ty::I32 => format!("test[{}] as i32", i), - Ty::Bool => format!("test[{}] as i32", i), - }); - src.push_str(","); - } - for (i, _) in retptr.iter().enumerate() { - src.push_str(&format!("&mut argret{},", i)); - } - src.push_str(");"); - if let Some(ty) = &ret { - src.push_str(&format!("let output = output{};", ty.to_i64())); - src.push_str("result.extend_from_slice(&output.to_le_bytes());"); - } - - for (i, ret) in retptr.iter().enumerate() { - src.push_str(&format!( - "result.extend_from_slice(&(argret{}{}).to_le_bytes());", - i, - ret.to_i64(), - )); - } - src.push_str("}"); - - src.push_str("}"); - } - - src.push_str("std::io::stdout().write_all(&result).unwrap();"); - - src.push_str("}"); - - let path = format!("{}/gen.rs", dst); - fs::write(&path, src).unwrap(); - - // Make it somewhat pretty if something goes wrong - drop(Command::new("rustfmt").arg(&path).status()); - - // Compile and execute this tests for the musl target, assuming we're an - // x86_64 host effectively. - let status = Command::new("rustc") - .current_dir(&dst) - .arg(&path) - .arg("--target=x86_64-unknown-linux-musl") - .status() - .unwrap(); - assert!(status.success()); - let output = Command::new("./gen").current_dir(&dst).output().unwrap(); - assert!(output.status.success()); - assert!(output.stderr.is_empty()); - - // Map all the output bytes back to an `i64` and then shove it all into - // the expected results. - let mut results = output.stdout.chunks_exact(8).map(|buf| { - let mut exact = [0; 8]; - exact.copy_from_slice(buf); - i64::from_le_bytes(exact) - }); - - for f in functions.iter_mut() { - for test in f.tests.iter_mut() { - test.outputs = (0..f.ret.len()).map(|_| results.next().unwrap()).collect(); - } - } - assert!(results.next().is_none()); - } - - /// Codegens a file which has a ton of `#[test]` annotations for all the - /// tests that we generated above. - fn generate_unit_tests(functions: &[Function]) { - let mut src = String::new(); - let dst = std::env::var("OUT_DIR").unwrap(); - - for function in functions { - src.push_str("#[test]"); - src.push_str("fn "); - src.push_str(&function.name); - src.push_str("_matches_musl() {"); - src.push_str(&format!( - "static TESTS: &[([i64; {}], [i64; {}])]", - function.args.len(), - function.ret.len(), - )); - src.push_str(" = &["); - for test in function.tests.iter() { - src.push_str("(["); - for val in test.inputs.iter() { - src.push_str(&val.to_string()); - src.push_str(","); - } - src.push_str("],"); - src.push_str("["); - for val in test.outputs.iter() { - src.push_str(&val.to_string()); - src.push_str(","); - } - src.push_str("],"); - src.push_str("),"); - } - src.push_str("];"); - - src.push_str("for (test, expected) in TESTS {"); - src.push_str("let output = libm::"); - src.push_str(&function.name); - src.push_str("("); - for (i, arg) in function.args.iter().enumerate() { - src.push_str(&match arg { - Ty::F32 => format!("f32::from_bits(test[{}] as u32)", i), - Ty::F64 => format!("f64::from_bits(test[{}] as u64)", i), - Ty::I32 => format!("test[{}] as i32", i), - Ty::Bool => format!("test[{}] as i32", i), - }); - src.push_str(","); - } - src.push_str(");"); - - for (i, ret) in function.ret.iter().enumerate() { - let get = if function.ret.len() == 1 { String::new() } else { format!(".{}", i) }; - src.push_str(&(match ret { - Ty::F32 => format!("if libm::_eqf(output{}, f32::from_bits(expected[{}] as u32)).is_ok() {{ continue }}", get, i), - Ty::F64 => format!("if libm::_eq(output{}, f64::from_bits(expected[{}] as u64)).is_ok() {{ continue }}", get, i), - Ty::I32 => format!("if output{} as i64 == expected[{}] {{ continue }}", get, i), - Ty::Bool => unreachable!(), - })); - } - - src.push_str( - r#" - panic!("INPUT: {:?} EXPECTED: {:?} ACTUAL {:?}", test, expected, output); - "#, - ); - src.push_str("}"); - - src.push_str("}"); - } - - let path = format!("{}/musl-tests.rs", dst); - fs::write(&path, src).unwrap(); - - // Try to make it somewhat pretty - drop(Command::new("rustfmt").arg(&path).status()); - } -} diff --git a/libm/crates/libm-test/tests/musl_biteq.rs b/libm/crates/libm-test/tests/musl_biteq.rs deleted file mode 100644 index f586fd03d..000000000 --- a/libm/crates/libm-test/tests/musl_biteq.rs +++ /dev/null @@ -1,6 +0,0 @@ -//! compare - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(all(test, feature = "test-musl-serialized"))] -include!(concat!(env!("OUT_DIR"), "/musl-tests.rs")); diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 511ab598d..6bb06b5b8 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -23,28 +23,3 @@ use core::{f32, f64}; pub use libm_helper::*; pub use self::math::*; - -/// Approximate equality with 1 ULP of tolerance -#[doc(hidden)] -#[inline] -pub fn _eqf(a: f32, b: f32) -> Result<(), u32> { - if a.is_nan() && b.is_nan() { - Ok(()) - } else { - let err = (a.to_bits() as i32).wrapping_sub(b.to_bits() as i32).abs(); - - if err <= 1 { Ok(()) } else { Err(err as u32) } - } -} - -#[doc(hidden)] -#[inline] -pub fn _eq(a: f64, b: f64) -> Result<(), u64> { - if a.is_nan() && b.is_nan() { - Ok(()) - } else { - let err = (a.to_bits() as i64).wrapping_sub(b.to_bits() as i64).abs(); - - if err <= 1 { Ok(()) } else { Err(err as u64) } - } -} From 36deaed0975bcca3a8ed4c88733caef91fb3c673 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 22 Dec 2024 21:49:37 +0000 Subject: [PATCH 0969/1459] Fix a bug in `abs_diff` These implementations of `abs_diff` were added in c2ff1b3119 ("Completely overhaul fuzz testing"), but the signed implementation is wrong when |x| + |y| exceeds the integer's limits (e.g. `(-128).abs_diff(1)` should be 128 but currently these return 127. Resolve this by just using `std`'s implementation since that is stable now. This isn't used anywhere critical, we probably just weren't hitting the edge case. --- src/int/mod.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/int/mod.rs b/src/int/mod.rs index 0d3b0ce40..c0d5a6715 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -240,11 +240,7 @@ macro_rules! int_impl { } fn abs_diff(self, other: Self) -> Self { - if self < other { - other.wrapping_sub(self) - } else { - self.wrapping_sub(other) - } + self.abs_diff(other) } int_impl_common!($uty); @@ -277,7 +273,7 @@ macro_rules! int_impl { } fn abs_diff(self, other: Self) -> $uty { - self.wrapping_sub(other).wrapping_abs() as $uty + self.abs_diff(other) } int_impl_common!($ity); From 658a32d26f784f618f773f4ca52136d016e7d84a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 22 Dec 2024 22:24:38 +0000 Subject: [PATCH 0970/1459] Fix a bug in `abs_diff` These were taken from `compiler-builtins` but the implementation has a bug near the integer limits. Fixed in `compiler-builtins` by using `core`'s implementation at [1], this is the corresponding fix for `libm`. [1]: https://github.com/rust-lang/compiler-builtins/pull/736 --- libm/src/math/support/int_traits.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index c72c1d5cb..ded990bdf 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -184,7 +184,7 @@ macro_rules! int_impl { } fn abs_diff(self, other: Self) -> Self { - if self < other { other.wrapping_sub(self) } else { self.wrapping_sub(other) } + self.abs_diff(other) } int_impl_common!($uty); @@ -221,7 +221,7 @@ macro_rules! int_impl { } fn abs_diff(self, other: Self) -> $uty { - self.wrapping_sub(other).wrapping_abs() as $uty + self.abs_diff(other) } int_impl_common!($ity); From c07e157e8c09cac57df133aadec243f1f20d6617 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 22 Dec 2024 23:15:16 +0000 Subject: [PATCH 0971/1459] Introduce helper types for accessing trait items The ambiguous associated types error sometimes fires in cases where it shouldn't be ambiguous ([1]), which can make things clunky when working with chained associated types (e.g. `Op::FTy::Int::*` does not work). Add helper types that we can use instead of the full syntax. There aren't too many cases in-crate now but this is relevant for some open PRs. [1]: https://github.com/rust-lang/rust/issues/38078 --- libm/crates/libm-test/benches/random.rs | 2 +- libm/crates/libm-test/src/lib.rs | 4 ++-- libm/crates/libm-test/src/op.rs | 9 +++++++++ libm/src/math/support/float_traits.rs | 4 ++++ libm/src/math/support/int_traits.rs | 6 +++++- libm/src/math/support/mod.rs | 3 ++- 6 files changed, 23 insertions(+), 5 deletions(-) diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index d77d57908..9ccc38fe3 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -26,7 +26,7 @@ macro_rules! musl_rand_benches { #[cfg(feature = "build-musl")] let musl_extra = MuslExtra { - musl_fn: Some(musl_math_sys::$fn_name as ::CFn), + musl_fn: Some(musl_math_sys::$fn_name as libm_test::CFn), skip_on_i586: $skip_on_i586 }; diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 7f0d9aa75..bc96b466b 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -5,8 +5,8 @@ pub mod op; mod precision; mod test_traits; -pub use libm::support::{Float, Int}; -pub use op::{BaseName, Identifier, MathOp}; +pub use libm::support::{Float, Int, IntTy}; +pub use op::{BaseName, CFn, FTy, Identifier, MathOp, RustFn, RustRet}; pub use precision::{MaybeOverride, SpecialCase, default_ulp}; pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall}; diff --git a/libm/crates/libm-test/src/op.rs b/libm/crates/libm-test/src/op.rs index 50b455d3a..0faeceb09 100644 --- a/libm/crates/libm-test/src/op.rs +++ b/libm/crates/libm-test/src/op.rs @@ -70,6 +70,15 @@ pub trait MathOp { const ROUTINE: Self::RustFn; } +/// Access the associated `FTy` type from an op (helper to avoid ambiguous associated types). +pub type FTy = ::FTy; +/// Access the associated `CFn` type from an op (helper to avoid ambiguous associated types). +pub type CFn = ::CFn; +/// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types). +pub type RustFn = ::RustFn; +/// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types). +pub type RustRet = ::RustRet; + macro_rules! do_thing { // Matcher for unary functions ( diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 0047ba368..5808aeebc 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -153,6 +153,10 @@ pub trait Float: } } +/// Access the associated `Int` type from a float (helper to avoid ambiguous associated types). +#[allow(dead_code)] +pub type IntTy = ::Int; + macro_rules! float_impl { ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { impl Float for $ty { diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index ded990bdf..380313c1e 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -26,6 +26,10 @@ pub trait MinInt: const MAX: Self; } +/// Access the associated `OtherSign` type from an int (helper to avoid ambiguous associated +/// types). +pub type OtherSign = ::OtherSign; + /// Trait for some basic operations on integers #[allow(dead_code)] pub trait Int: @@ -53,7 +57,7 @@ pub trait Int: + CastInto + CastFrom { - fn signed(self) -> ::OtherSign; + fn signed(self) -> OtherSign; fn unsigned(self) -> Self::Unsigned; fn from_unsigned(unsigned: Self::Unsigned) -> Self; fn abs(self) -> Self; diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index 04a313abc..25681c307 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -4,7 +4,8 @@ mod float_traits; mod hex_float; mod int_traits; -pub use float_traits::Float; +#[allow(unused_imports)] +pub use float_traits::{Float, IntTy}; #[allow(unused_imports)] pub use hex_float::{hf32, hf64}; pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; From 59d01e11272436031deef21c2c3192aa62075eea Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 22 Dec 2024 23:56:45 +0000 Subject: [PATCH 0972/1459] Rename associated type helpers, add `OpITy` Change the names to make them less ambiguous. Additionally add `OpITy` for accessing the same-sized integer of an operation's float type. --- libm/crates/libm-test/benches/random.rs | 2 +- libm/crates/libm-test/src/lib.rs | 2 +- libm/crates/libm-test/src/op.rs | 10 ++++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 9ccc38fe3..b9c39334c 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -26,7 +26,7 @@ macro_rules! musl_rand_benches { #[cfg(feature = "build-musl")] let musl_extra = MuslExtra { - musl_fn: Some(musl_math_sys::$fn_name as libm_test::CFn), + musl_fn: Some(musl_math_sys::$fn_name as libm_test::OpCFn), skip_on_i586: $skip_on_i586 }; diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index bc96b466b..17a06b3be 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -6,7 +6,7 @@ mod precision; mod test_traits; pub use libm::support::{Float, Int, IntTy}; -pub use op::{BaseName, CFn, FTy, Identifier, MathOp, RustFn, RustRet}; +pub use op::{BaseName, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet}; pub use precision::{MaybeOverride, SpecialCase, default_ulp}; pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall}; diff --git a/libm/crates/libm-test/src/op.rs b/libm/crates/libm-test/src/op.rs index 0faeceb09..bcea31c22 100644 --- a/libm/crates/libm-test/src/op.rs +++ b/libm/crates/libm-test/src/op.rs @@ -71,13 +71,15 @@ pub trait MathOp { } /// Access the associated `FTy` type from an op (helper to avoid ambiguous associated types). -pub type FTy = ::FTy; +pub type OpFTy = ::FTy; +/// Access the associated `FTy::Int` type from an op (helper to avoid ambiguous associated types). +pub type OpITy = <::FTy as Float>::Int; /// Access the associated `CFn` type from an op (helper to avoid ambiguous associated types). -pub type CFn = ::CFn; +pub type OpCFn = ::CFn; /// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types). -pub type RustFn = ::RustFn; +pub type OpRustFn = ::RustFn; /// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types). -pub type RustRet = ::RustRet; +pub type OpRustRet = ::RustRet; macro_rules! do_thing { // Matcher for unary functions From fe6b09397c03f621f0be27287d9c5934d70f3fd2 Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Thu, 26 Dec 2024 06:01:14 +0100 Subject: [PATCH 0973/1459] Disable f128 for amdgpu (#737) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `compiler_builtins` fails to compile to amdgpu if f128 is enabled. The reason seems to be that compiler_builtins uses libcalls in the implementation. I’m not really familiar with what libcalls are, but the LLVM amdgpu backend explicitly does not support them. Error message: ``` LLVM ERROR: unsupported libcall legalization ``` --- configure.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configure.rs b/configure.rs index 0a0bd503d..e20c717ec 100644 --- a/configure.rs +++ b/configure.rs @@ -79,6 +79,8 @@ pub fn configure_f16_f128(target: &Target) { }; let f128_enabled = match target.arch.as_str() { + // Unsupported (libcall is not supported) + "amdgpu" => false, // Unsupported "arm64ec" => false, // Selection failure From ff0ca1f39bd86258fcd9b4d930d139a054f7e82f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 26 Dec 2024 05:01:38 +0000 Subject: [PATCH 0974/1459] chore: release v0.1.140 --- CHANGELOG.md | 8 ++++++++ Cargo.toml | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c65aa871..f88859143 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.140](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.139...compiler_builtins-v0.1.140) - 2024-12-26 + +### Other + +- Disable f128 for amdgpu ([#737](https://github.com/rust-lang/compiler-builtins/pull/737)) +- Fix a bug in `abs_diff` +- Disable `f16` on platforms that have recursion problems + ## [0.1.139](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.138...compiler_builtins-v0.1.139) - 2024-11-03 ### Other diff --git a/Cargo.toml b/Cargo.toml index 33f1c9a3a..33d5c06b7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.139" +version = "0.1.140" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 8a9cc719089ffaf722690ac51047ba2cb8b486eb Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 26 Dec 2024 09:13:58 +0000 Subject: [PATCH 0975/1459] Replace string function name matching with enums where possible --- libm/crates/libm-test/src/gen/random.rs | 5 +- libm/crates/libm-test/src/precision.rs | 100 ++++++++++------------- libm/crates/libm-test/src/test_traits.rs | 3 - 3 files changed, 46 insertions(+), 62 deletions(-) diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index e347b3c63..527cd1351 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -7,7 +7,7 @@ use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; use super::CachedInput; -use crate::{CheckCtx, GenerateInput}; +use crate::{BaseName, CheckCtx, GenerateInput}; const SEED: [u8; 32] = *b"3.141592653589793238462643383279"; @@ -110,7 +110,6 @@ pub fn get_test_cases(ctx: &CheckCtx) -> impl Iterator, { - let inputs = - if ctx.fn_name == "jn" || ctx.fn_name == "jnf" { &TEST_CASES_JN } else { &TEST_CASES }; + let inputs = if ctx.base_name == BaseName::Jn { &TEST_CASES_JN } else { &TEST_CASES }; inputs.get_cases() } diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index cf9115430..c7f9d9e30 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -6,7 +6,7 @@ use core::f32; use CheckBasis::{Mpfr, Musl}; use Identifier as Id; -use crate::{CheckBasis, CheckCtx, Float, Identifier, Int, TestResult}; +use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult}; /// Type implementing [`IgnoreCase`]. pub struct SpecialCase; @@ -106,25 +106,26 @@ impl MaybeOverride<(f32,)> for SpecialCase { ctx: &CheckCtx, ) -> Option { if ctx.basis == CheckBasis::Musl { - if ctx.fn_name == "expm1f" && input.0 > 80.0 && actual.is_infinite() { + if ctx.base_name == BaseName::Expm1 && input.0 > 80.0 && actual.is_infinite() { // we return infinity but the number is representable return XFAIL; } - if ctx.fn_name == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() { + if ctx.base_name == BaseName::Sinh && input.0.abs() > 80.0 && actual.is_nan() { // we return some NaN that should be real values or infinite // doesn't seem to happen on x86 return XFAIL; } } - if ctx.fn_name == "acoshf" && input.0 < -1.0 { + if ctx.base_name == BaseName::Acosh && input.0 < -1.0 { // acoshf is undefined for x <= 1.0, but we return a random result at lower // values. return XFAIL; } - if ctx.fn_name == "lgammaf" || ctx.fn_name == "lgammaf_r" && input.0 < 0.0 { + if ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR && input.0 < 0.0 + { // loggamma should not be defined for x < 0, yet we both return results return XFAIL; } @@ -141,7 +142,7 @@ impl MaybeOverride<(f32,)> for SpecialCase { // On MPFR for lgammaf_r, we set -1 as the integer result for negative infinity but MPFR // sets +1 if ctx.basis == CheckBasis::Mpfr - && ctx.fn_name == "lgammaf_r" + && ctx.base_name == BaseName::LgammaR && input.0 == f32::NEG_INFINITY && actual.abs() == expected.abs() { @@ -161,13 +162,13 @@ impl MaybeOverride<(f64,)> for SpecialCase { ctx: &CheckCtx, ) -> Option { if ctx.basis == CheckBasis::Musl { - if cfg!(target_arch = "x86") && ctx.fn_name == "acosh" && input.0 < 1.0 { + if cfg!(target_arch = "x86") && ctx.base_name == BaseName::Acosh && input.0 < 1.0 { // The function is undefined, both implementations return random results return SKIP; } if cfg!(x86_no_sse) - && ctx.fn_name == "ceil" + && ctx.base_name == BaseName::Ceil && input.0 < 0.0 && input.0 > -1.0 && expected == F::ZERO @@ -178,13 +179,14 @@ impl MaybeOverride<(f64,)> for SpecialCase { } } - if ctx.fn_name == "acosh" && input.0 < 1.0 { + if ctx.base_name == BaseName::Acosh && input.0 < 1.0 { // The function is undefined for the inputs, musl and our libm both return // random results. return XFAIL; } - if ctx.fn_name == "lgamma" || ctx.fn_name == "lgamma_r" && input.0 < 0.0 { + if ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR && input.0 < 0.0 + { // loggamma should not be defined for x < 0, yet we both return results return XFAIL; } @@ -201,7 +203,7 @@ impl MaybeOverride<(f64,)> for SpecialCase { // On MPFR for lgamma_r, we set -1 as the integer result for negative infinity but MPFR // sets +1 if ctx.basis == CheckBasis::Mpfr - && ctx.fn_name == "lgamma_r" + && ctx.base_name == BaseName::LgammaR && input.0 == f64::NEG_INFINITY && actual.abs() == expected.abs() { @@ -214,7 +216,7 @@ impl MaybeOverride<(f64,)> for SpecialCase { /// Check NaN bits if the function requires it fn maybe_check_nan_bits(actual: F, expected: F, ctx: &CheckCtx) -> Option { - if !(ctx.base_name_str == "fabs" || ctx.base_name_str == "copysign") { + if !(ctx.base_name == BaseName::Fabs || ctx.base_name == BaseName::Copysign) { return None; } @@ -270,24 +272,16 @@ fn maybe_skip_binop_nan( expected: F2, ctx: &CheckCtx, ) -> Option { - match ctx.basis { - CheckBasis::Musl => { - if (ctx.base_name_str == "fmax" || ctx.base_name_str == "fmin") - && (input.0.is_nan() || input.1.is_nan()) - && expected.is_nan() - { - XFAIL - } else { - None - } - } - CheckBasis::Mpfr => { - if ctx.base_name_str == "copysign" && input.1.is_nan() { - SKIP - } else { - None - } + match (&ctx.basis, ctx.base_name) { + (Musl, BaseName::Fmin | BaseName::Fmax) + if (input.0.is_nan() || input.1.is_nan()) && expected.is_nan() => + { + XFAIL } + + (Mpfr, BaseName::Copysign) if input.1.is_nan() => SKIP, + + _ => None, } } @@ -299,20 +293,17 @@ impl MaybeOverride<(i32, f32)> for SpecialCase { ulp: &mut u32, ctx: &CheckCtx, ) -> Option { - match ctx.basis { - CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx), - CheckBasis::Mpfr => { - // We return +0.0, MPFR returns -0.0 - if ctx.fn_name == "jnf" - && input.1 == f32::NEG_INFINITY - && actual == F::ZERO - && expected == F::ZERO - { - XFAIL - } else { - None - } + match (&ctx.basis, ctx.base_name) { + (Musl, _) => bessel_prec_dropoff(input, ulp, ctx), + + // We return +0.0, MPFR returns -0.0 + (Mpfr, BaseName::Jn) + if input.1 == f32::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO => + { + XFAIL } + + _ => None, } } } @@ -324,20 +315,17 @@ impl MaybeOverride<(i32, f64)> for SpecialCase { ulp: &mut u32, ctx: &CheckCtx, ) -> Option { - match ctx.basis { - CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx), - CheckBasis::Mpfr => { - // We return +0.0, MPFR returns -0.0 - if ctx.fn_name == "jn" - && input.1 == f64::NEG_INFINITY - && actual == F::ZERO - && expected == F::ZERO - { - XFAIL - } else { - bessel_prec_dropoff(input, ulp, ctx) - } + match (&ctx.basis, ctx.base_name) { + (Musl, _) => bessel_prec_dropoff(input, ulp, ctx), + + // We return +0.0, MPFR returns -0.0 + (Mpfr, BaseName::Jn) + if input.1 == f64::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO => + { + XFAIL } + + _ => None, } } } @@ -348,7 +336,7 @@ fn bessel_prec_dropoff( ulp: &mut u32, ctx: &CheckCtx, ) -> Option { - if ctx.base_name_str == "jn" { + if ctx.base_name == BaseName::Jn { if input.0 > 4000 { return XFAIL; } else if input.0 > 2000 { diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index b8e0aa108..ca933bbda 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -22,8 +22,6 @@ pub struct CheckCtx { pub base_name: BaseName, /// Function name. pub fn_name: &'static str, - /// Return the unsuffixed version of the function name. - pub base_name_str: &'static str, /// Source of truth for tests. pub basis: CheckBasis, } @@ -36,7 +34,6 @@ impl CheckCtx { fn_ident, fn_name: fn_ident.as_str(), base_name: fn_ident.base_name(), - base_name_str: fn_ident.base_name().as_str(), basis, }; ret.ulp = crate::default_ulp(&ret); From 0d8d363b50a44103b46abb9c189f7db27a6d82ab Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 27 Dec 2024 10:58:36 +0000 Subject: [PATCH 0976/1459] Fix new `clippy::precedence` lints [1] extends Clippy's `precedence` lint to cover `&`, `|`, and bitshifts. Update cases that are flagged by this in the most recent nightly. [1]: https://github.com/rust-lang/rust-clippy/pull/13743 --- libm/src/math/ceil.rs | 2 +- libm/src/math/exp10.rs | 2 +- libm/src/math/exp10f.rs | 2 +- libm/src/math/exp2.rs | 2 +- libm/src/math/fma.rs | 12 ++++++------ libm/src/math/fmod.rs | 4 ++-- libm/src/math/fmodf.rs | 4 ++-- libm/src/math/j1.rs | 4 ++-- libm/src/math/jn.rs | 4 ++-- libm/src/math/log10.rs | 2 +- libm/src/math/log1p.rs | 2 +- libm/src/math/log2.rs | 2 +- libm/src/math/mod.rs | 2 +- libm/src/math/modf.rs | 2 +- libm/src/math/modff.rs | 2 +- libm/src/math/nextafter.rs | 6 +++--- libm/src/math/rint.rs | 2 +- libm/src/math/rintf.rs | 2 +- libm/src/math/sqrt.rs | 2 +- libm/src/math/trunc.rs | 2 +- libm/src/math/truncf.rs | 2 +- 21 files changed, 32 insertions(+), 32 deletions(-) diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index c198ebcfe..b0576f3dc 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -16,7 +16,7 @@ pub fn ceil(x: f64) -> f64 { } let u: u64 = x.to_bits(); - let e: i64 = (u >> 52 & 0x7ff) as i64; + let e: i64 = ((u >> 52) & 0x7ff) as i64; let y: f64; if e >= 0x3ff + 52 || x == 0. { diff --git a/libm/src/math/exp10.rs b/libm/src/math/exp10.rs index 2c3df0173..7c33c92b6 100644 --- a/libm/src/math/exp10.rs +++ b/libm/src/math/exp10.rs @@ -12,7 +12,7 @@ pub fn exp10(x: f64) -> f64 { let (mut y, n) = modf(x); let u: u64 = n.to_bits(); /* fabs(n) < 16 without raising invalid on nan */ - if (u >> 52 & 0x7ff) < 0x3ff + 4 { + if ((u >> 52) & 0x7ff) < 0x3ff + 4 { if y == 0.0 { return i!(P10, ((n as isize) + 15) as usize); } diff --git a/libm/src/math/exp10f.rs b/libm/src/math/exp10f.rs index e81d18380..0520a41f2 100644 --- a/libm/src/math/exp10f.rs +++ b/libm/src/math/exp10f.rs @@ -11,7 +11,7 @@ pub fn exp10f(x: f32) -> f32 { let (mut y, n) = modff(x); let u = n.to_bits(); /* fabsf(n) < 8 without raising invalid on nan */ - if (u >> 23 & 0xff) < 0x7f + 3 { + if ((u >> 23) & 0xff) < 0x7f + 3 { if y == 0.0 { return i!(P10, ((n as isize) + 7) as usize); } diff --git a/libm/src/math/exp2.rs b/libm/src/math/exp2.rs index dce2ab4df..6e98d066c 100644 --- a/libm/src/math/exp2.rs +++ b/libm/src/math/exp2.rs @@ -341,7 +341,7 @@ pub fn exp2(mut x: f64) -> f64 { /* Filter out exceptional cases. */ let ui = f64::to_bits(x); - let ix = ui >> 32 & 0x7fffffff; + let ix = (ui >> 32) & 0x7fffffff; if ix >= 0x408ff000 { /* |x| >= 1022 or nan */ if ix >= 0x40900000 && ui >> 63 == 0 { diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index bb2028fa7..826143d5a 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -82,7 +82,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { d -= 64; if d == 0 { } else if d < 64 { - rlo = rhi << (64 - d) | rlo >> d | ((rlo << (64 - d)) != 0) as u64; + rlo = (rhi << (64 - d)) | (rlo >> d) | ((rlo << (64 - d)) != 0) as u64; rhi = rhi >> d; } else { rlo = 1; @@ -95,7 +95,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { if d == 0 { zlo = nz.m; } else if d < 64 { - zlo = nz.m >> d | ((nz.m << (64 - d)) != 0) as u64; + zlo = (nz.m >> d) | ((nz.m << (64 - d)) != 0) as u64; } else { zlo = 1; } @@ -127,11 +127,11 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { e += 64; d = rhi.leading_zeros() as i32 - 1; /* note: d > 0 */ - rhi = rhi << d | rlo >> (64 - d) | ((rlo << d) != 0) as u64; + rhi = (rhi << d) | (rlo >> (64 - d)) | ((rlo << d) != 0) as u64; } else if rlo != 0 { d = rlo.leading_zeros() as i32 - 1; if d < 0 { - rhi = rlo >> 1 | (rlo & 1); + rhi = (rlo >> 1) | (rlo & 1); } else { rhi = rlo << d; } @@ -165,7 +165,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { /* one bit is lost when scaled, add another top bit to only round once at conversion if it is inexact */ if (rhi << 53) != 0 { - i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64; + i = ((rhi >> 1) | (rhi & 1) | (1 << 62)) as i64; if sign != 0 { i = -i; } @@ -182,7 +182,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { } else { /* only round once when scaled */ d = 10; - i = ((rhi >> d | ((rhi << (64 - d)) != 0) as u64) << d) as i64; + i = (((rhi >> d) | ((rhi << (64 - d)) != 0) as u64) << d) as i64; if sign != 0 { i = -i; } diff --git a/libm/src/math/fmod.rs b/libm/src/math/fmod.rs index df16162bc..b68e6b0ea 100644 --- a/libm/src/math/fmod.rs +++ b/libm/src/math/fmod.rs @@ -2,8 +2,8 @@ pub fn fmod(x: f64, y: f64) -> f64 { let mut uxi = x.to_bits(); let mut uyi = y.to_bits(); - let mut ex = (uxi >> 52 & 0x7ff) as i64; - let mut ey = (uyi >> 52 & 0x7ff) as i64; + let mut ex = ((uxi >> 52) & 0x7ff) as i64; + let mut ey = ((uyi >> 52) & 0x7ff) as i64; let sx = uxi >> 63; let mut i; diff --git a/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs index 671af8580..4de181957 100644 --- a/libm/src/math/fmodf.rs +++ b/libm/src/math/fmodf.rs @@ -4,8 +4,8 @@ use core::f32; pub fn fmodf(x: f32, y: f32) -> f32 { let mut uxi = x.to_bits(); let mut uyi = y.to_bits(); - let mut ex = (uxi >> 23 & 0xff) as i32; - let mut ey = (uyi >> 23 & 0xff) as i32; + let mut ex = ((uxi >> 23) & 0xff) as i32; + let mut ey = ((uyi >> 23) & 0xff) as i32; let sx = uxi & 0x80000000; let mut i; diff --git a/libm/src/math/j1.rs b/libm/src/math/j1.rs index cef17a63e..578ae59d3 100644 --- a/libm/src/math/j1.rs +++ b/libm/src/math/j1.rs @@ -171,10 +171,10 @@ pub fn y1(x: f64) -> f64 { lx = get_low_word(x); /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */ - if (ix << 1 | lx) == 0 { + if (ix << 1) | lx == 0 { return -1.0 / 0.0; } - if (ix >> 31) != 0 { + if ix >> 31 != 0 { return 0.0 / 0.0; } if ix >= 0x7ff00000 { diff --git a/libm/src/math/jn.rs b/libm/src/math/jn.rs index 7f98ddc05..d228781d1 100644 --- a/libm/src/math/jn.rs +++ b/libm/src/math/jn.rs @@ -55,7 +55,7 @@ pub fn jn(n: i32, mut x: f64) -> f64 { ix &= 0x7fffffff; // -lx == !lx + 1 - if (ix | (lx | ((!lx).wrapping_add(1))) >> 31) > 0x7ff00000 { + if ix | ((lx | (!lx).wrapping_add(1)) >> 31) > 0x7ff00000 { /* nan */ return x; } @@ -265,7 +265,7 @@ pub fn yn(n: i32, x: f64) -> f64 { ix &= 0x7fffffff; // -lx == !lx + 1 - if (ix | (lx | ((!lx).wrapping_add(1))) >> 31) > 0x7ff00000 { + if ix | ((lx | (!lx).wrapping_add(1)) >> 31) > 0x7ff00000 { /* nan */ return x; } diff --git a/libm/src/math/log10.rs b/libm/src/math/log10.rs index f9d118f12..8c9d68c49 100644 --- a/libm/src/math/log10.rs +++ b/libm/src/math/log10.rs @@ -78,7 +78,7 @@ pub fn log10(mut x: f64) -> f64 { hx += 0x3ff00000 - 0x3fe6a09e; k += (hx >> 20) as i32 - 0x3ff; hx = (hx & 0x000fffff) + 0x3fe6a09e; - ui = (hx as u64) << 32 | (ui & 0xffffffff); + ui = ((hx as u64) << 32) | (ui & 0xffffffff); x = f64::from_bits(ui); f = x - 1.0; diff --git a/libm/src/math/log1p.rs b/libm/src/math/log1p.rs index 80561ec74..b7f3fb09e 100644 --- a/libm/src/math/log1p.rs +++ b/libm/src/math/log1p.rs @@ -125,7 +125,7 @@ pub fn log1p(x: f64) -> f64 { } /* reduce u into [sqrt(2)/2, sqrt(2)] */ hu = (hu & 0x000fffff) + 0x3fe6a09e; - ui = (hu as u64) << 32 | (ui & 0xffffffff); + ui = ((hu as u64) << 32) | (ui & 0xffffffff); f = f64::from_bits(ui) - 1.; } hfsq = 0.5 * f * f; diff --git a/libm/src/math/log2.rs b/libm/src/math/log2.rs index 59533340b..701f63c25 100644 --- a/libm/src/math/log2.rs +++ b/libm/src/math/log2.rs @@ -75,7 +75,7 @@ pub fn log2(mut x: f64) -> f64 { hx += 0x3ff00000 - 0x3fe6a09e; k += (hx >> 20) as i32 - 0x3ff; hx = (hx & 0x000fffff) + 0x3fe6a09e; - ui = (hx as u64) << 32 | (ui & 0xffffffff); + ui = ((hx as u64) << 32) | (ui & 0xffffffff); x = f64::from_bits(ui); f = x - 1.0; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index afebdf586..3852c774e 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -359,5 +359,5 @@ fn with_set_low_word(f: f64, lo: u32) -> f64 { #[inline] fn combine_words(hi: u32, lo: u32) -> f64 { - f64::from_bits((hi as u64) << 32 | lo as u64) + f64::from_bits(((hi as u64) << 32) | lo as u64) } diff --git a/libm/src/math/modf.rs b/libm/src/math/modf.rs index bcab33a81..e29e80ccf 100644 --- a/libm/src/math/modf.rs +++ b/libm/src/math/modf.rs @@ -2,7 +2,7 @@ pub fn modf(x: f64) -> (f64, f64) { let rv2: f64; let mut u = x.to_bits(); let mask: u64; - let e = ((u >> 52 & 0x7ff) as i32) - 0x3ff; + let e = (((u >> 52) & 0x7ff) as i32) - 0x3ff; /* no fractional part */ if e >= 52 { diff --git a/libm/src/math/modff.rs b/libm/src/math/modff.rs index 56ece12e3..fac60abaa 100644 --- a/libm/src/math/modff.rs +++ b/libm/src/math/modff.rs @@ -2,7 +2,7 @@ pub fn modff(x: f32) -> (f32, f32) { let rv2: f32; let mut u: u32 = x.to_bits(); let mask: u32; - let e = ((u >> 23 & 0xff) as i32) - 0x7f; + let e = (((u >> 23) & 0xff) as i32) - 0x7f; /* no fractional part */ if e >= 23 { diff --git a/libm/src/math/nextafter.rs b/libm/src/math/nextafter.rs index 422bd7496..c991ff6f2 100644 --- a/libm/src/math/nextafter.rs +++ b/libm/src/math/nextafter.rs @@ -16,14 +16,14 @@ pub fn nextafter(x: f64, y: f64) -> f64 { if ay == 0 { return y; } - ux_i = (uy_i & 1_u64 << 63) | 1; - } else if ax > ay || ((ux_i ^ uy_i) & 1_u64 << 63) != 0 { + ux_i = (uy_i & (1_u64 << 63)) | 1; + } else if ax > ay || ((ux_i ^ uy_i) & (1_u64 << 63)) != 0 { ux_i -= 1; } else { ux_i += 1; } - let e = ux_i >> 52 & 0x7ff; + let e = (ux_i >> 52) & 0x7ff; // raise overflow if ux.f is infinite and x is finite if e == 0x7ff { force_eval!(x + x); diff --git a/libm/src/math/rint.rs b/libm/src/math/rint.rs index 618b26e54..cbdc3c2b9 100644 --- a/libm/src/math/rint.rs +++ b/libm/src/math/rint.rs @@ -2,7 +2,7 @@ pub fn rint(x: f64) -> f64 { let one_over_e = 1.0 / f64::EPSILON; let as_u64: u64 = x.to_bits(); - let exponent: u64 = as_u64 >> 52 & 0x7ff; + let exponent: u64 = (as_u64 >> 52) & 0x7ff; let is_positive = (as_u64 >> 63) == 0; if exponent >= 0x3ff + 52 { x diff --git a/libm/src/math/rintf.rs b/libm/src/math/rintf.rs index 0726d83ba..2d22c9393 100644 --- a/libm/src/math/rintf.rs +++ b/libm/src/math/rintf.rs @@ -2,7 +2,7 @@ pub fn rintf(x: f32) -> f32 { let one_over_e = 1.0 / f32::EPSILON; let as_u32: u32 = x.to_bits(); - let exponent: u32 = as_u32 >> 23 & 0xff; + let exponent: u32 = (as_u32 >> 23) & 0xff; let is_positive = (as_u32 >> 31) == 0; if exponent >= 0x7f + 23 { x diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index d9a8f184c..3f1a10fdd 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -219,7 +219,7 @@ pub fn sqrt(x: f64) -> f64 { ix1 |= sign; } ix0 += m << 20; - f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) + f64::from_bits(((ix0 as u64) << 32) | ix1.0 as u64) } #[cfg(test)] diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs index 34bc2fdfa..d85bffb40 100644 --- a/libm/src/math/trunc.rs +++ b/libm/src/math/trunc.rs @@ -14,7 +14,7 @@ pub fn trunc(x: f64) -> f64 { let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 let mut i: u64 = x.to_bits(); - let mut e: i64 = (i >> 52 & 0x7ff) as i64 - 0x3ff + 12; + let mut e: i64 = ((i >> 52) & 0x7ff) as i64 - 0x3ff + 12; let m: u64; if e >= 52 + 12 { diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs index a74f78987..82017b87b 100644 --- a/libm/src/math/truncf.rs +++ b/libm/src/math/truncf.rs @@ -14,7 +14,7 @@ pub fn truncf(x: f32) -> f32 { let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 let mut i: u32 = x.to_bits(); - let mut e: i32 = (i >> 23 & 0xff) as i32 - 0x7f + 9; + let mut e: i32 = ((i >> 23) & 0xff) as i32 - 0x7f + 9; let m: u32; if e >= 23 + 9 { From 4f7e69c11c19818b807d3fff9506d9dca25c10fd Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 27 Dec 2024 11:02:40 +0000 Subject: [PATCH 0977/1459] Allow Clippy lints in `compiler-builtins-smoke-test` Rather than always needing to exclude `cb` when running `cargo clippy`, just disable Clippy for the included module. --- libm/.github/workflows/main.yml | 1 - libm/crates/compiler-builtins-smoke-test/src/lib.rs | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 866f0de9e..d290d09a5 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -120,7 +120,6 @@ jobs: run: ./ci/download-musl.sh - run: | cargo clippy --all \ - --exclude cb \ --features libm-test/build-musl,libm-test/test-multiprecision \ --all-targets diff --git a/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/libm/crates/compiler-builtins-smoke-test/src/lib.rs index e65cb8da3..e3a51a575 100644 --- a/libm/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/libm/crates/compiler-builtins-smoke-test/src/lib.rs @@ -4,8 +4,9 @@ #![feature(core_intrinsics)] #![allow(internal_features)] -#![allow(dead_code)] #![no_std] +#[allow(dead_code)] +#[allow(clippy::all)] // We don't get `libm`'s list of `allow`s, so just ignore Clippy. #[path = "../../../src/math/mod.rs"] pub mod libm; From 1b0fce7ab3aa94e967c95450e645862d7625bb24 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 29 Dec 2024 06:46:22 +0000 Subject: [PATCH 0978/1459] Change from `-latest` to named CI images GitHub will be upgrading the `-latest` tags of these images in the near future. Change all images to specify the latest version. --- libm/.github/workflows/main.yml | 62 ++++++++++++++++----------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index d290d09a5..93cd541f8 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -16,56 +16,56 @@ jobs: matrix: include: - target: aarch64-apple-darwin - os: macos-latest + os: macos-15 - target: aarch64-unknown-linux-gnu - os: ubuntu-latest + os: ubuntu-24.04 - target: aarch64-pc-windows-msvc - os: windows-latest + os: windows-2025 build_only: 1 # Can't run on x86 hosts - target: arm-unknown-linux-gnueabi - os: ubuntu-latest + os: ubuntu-24.04 - target: arm-unknown-linux-gnueabihf - os: ubuntu-latest + os: ubuntu-24.04 - target: armv7-unknown-linux-gnueabihf - os: ubuntu-latest + os: ubuntu-24.04 - target: i586-unknown-linux-gnu - os: ubuntu-latest + os: ubuntu-24.04 - target: i686-unknown-linux-gnu - os: ubuntu-latest + os: ubuntu-24.04 - target: loongarch64-unknown-linux-gnu - os: ubuntu-latest + os: ubuntu-24.04 - target: powerpc-unknown-linux-gnu - os: ubuntu-latest + os: ubuntu-24.04 - target: powerpc64-unknown-linux-gnu - os: ubuntu-latest + os: ubuntu-24.04 - target: powerpc64le-unknown-linux-gnu - os: ubuntu-latest + os: ubuntu-24.04 - target: riscv64gc-unknown-linux-gnu - os: ubuntu-latest + os: ubuntu-24.04 - target: thumbv6m-none-eabi - os: ubuntu-latest + os: ubuntu-24.04 - target: thumbv7em-none-eabi - os: ubuntu-latest + os: ubuntu-24.04 - target: thumbv7em-none-eabihf - os: ubuntu-latest + os: ubuntu-24.04 - target: thumbv7m-none-eabi - os: ubuntu-latest + os: ubuntu-24.04 - target: x86_64-unknown-linux-gnu - os: ubuntu-latest + os: ubuntu-24.04 - target: x86_64-apple-darwin os: macos-13 - target: wasm32-unknown-unknown - os: ubuntu-latest + os: ubuntu-24.04 build_only: 1 - target: i686-pc-windows-msvc - os: windows-latest + os: windows-2025 - target: x86_64-pc-windows-msvc - os: windows-latest + os: windows-2025 - target: i686-pc-windows-gnu - os: windows-latest + os: windows-2025 channel: nightly-i686-gnu - target: x86_64-pc-windows-gnu - os: windows-latest + os: windows-2025 channel: nightly-x86_64-gnu runs-on: ${{ matrix.os }} env: @@ -94,20 +94,20 @@ jobs: # Non-linux tests just use our raw script - name: Run locally - if: matrix.os != 'ubuntu-latest' || contains(matrix.target, 'wasm') + if: matrix.os != 'ubuntu-24.04' || contains(matrix.target, 'wasm') shell: bash run: ./ci/run.sh ${{ matrix.target }} # Otherwise we use our docker containers to run builds - name: Run in Docker - if: matrix.os == 'ubuntu-latest' && !contains(matrix.target, 'wasm') + if: matrix.os == 'ubuntu-24.04' && !contains(matrix.target, 'wasm') run: | rustup target add x86_64-unknown-linux-musl cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} clippy: name: Clippy - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@master - name: Install Rust @@ -125,7 +125,7 @@ jobs: builtins: name: Check use with compiler-builtins - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@master - name: Install Rust @@ -135,7 +135,7 @@ jobs: benchmarks: name: Benchmarks - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@master - name: Install Rust @@ -147,7 +147,7 @@ jobs: msrv: name: Check MSRV - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 env: RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings` steps: @@ -163,7 +163,7 @@ jobs: rustfmt: name: Rustfmt - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@master - name: Install Rust @@ -180,7 +180,7 @@ jobs: - benchmarks - msrv - rustfmt - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency # failed" as success. So we have to do some contortions to ensure the job fails if any of its # dependencies fails. From 6ebb553f7e276347095ada2b03603bf0b15bc543 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 03:07:58 -0500 Subject: [PATCH 0979/1459] Introduce generic `abs` and `copysign` Add generic versions of `abs` and `copysign`, which will provide an entrypoint for adding `f16` and `f128`. Since this implementation is identical to the existing type-specific implementations, make use of it for `f32` and `f64`. --- libm/src/math/copysign.rs | 6 +----- libm/src/math/copysignf.rs | 6 +----- libm/src/math/fabs.rs | 2 +- libm/src/math/fabsf.rs | 2 +- libm/src/math/generic/abs.rs | 6 ++++++ libm/src/math/generic/copysign.rs | 10 ++++++++++ libm/src/math/generic/mod.rs | 5 +++++ libm/src/math/mod.rs | 1 + 8 files changed, 26 insertions(+), 12 deletions(-) create mode 100644 libm/src/math/generic/abs.rs create mode 100644 libm/src/math/generic/copysign.rs create mode 100644 libm/src/math/generic/mod.rs diff --git a/libm/src/math/copysign.rs b/libm/src/math/copysign.rs index 1f4a35a33..552bf3975 100644 --- a/libm/src/math/copysign.rs +++ b/libm/src/math/copysign.rs @@ -4,9 +4,5 @@ /// first argument, `x`, and the sign of its second argument, `y`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn copysign(x: f64, y: f64) -> f64 { - let mut ux = x.to_bits(); - let uy = y.to_bits(); - ux &= (!0) >> 1; - ux |= uy & (1 << 63); - f64::from_bits(ux) + super::generic::copysign(x, y) } diff --git a/libm/src/math/copysignf.rs b/libm/src/math/copysignf.rs index 6c346e3a5..8b9bed4c0 100644 --- a/libm/src/math/copysignf.rs +++ b/libm/src/math/copysignf.rs @@ -4,9 +4,5 @@ /// first argument, `x`, and the sign of its second argument, `y`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn copysignf(x: f32, y: f32) -> f32 { - let mut ux = x.to_bits(); - let uy = y.to_bits(); - ux &= 0x7fffffff; - ux |= uy & 0x80000000; - f32::from_bits(ux) + super::generic::copysign(x, y) } diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index d083053e1..2163637e7 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -9,7 +9,7 @@ pub fn fabs(x: f64) -> f64 { args: x, } - f64::from_bits(x.to_bits() & (u64::MAX / 2)) + super::generic::abs(x) } #[cfg(test)] diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs index eabe87254..ac77c9201 100644 --- a/libm/src/math/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -9,7 +9,7 @@ pub fn fabsf(x: f32) -> f32 { args: x, } - f32::from_bits(x.to_bits() & 0x7fffffff) + super::generic::abs(x) } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 diff --git a/libm/src/math/generic/abs.rs b/libm/src/math/generic/abs.rs new file mode 100644 index 000000000..2c9a43c12 --- /dev/null +++ b/libm/src/math/generic/abs.rs @@ -0,0 +1,6 @@ +use super::super::Float; + +/// Absolute value. +pub fn abs(x: F) -> F { + x.abs() +} diff --git a/libm/src/math/generic/copysign.rs b/libm/src/math/generic/copysign.rs new file mode 100644 index 000000000..d6b814891 --- /dev/null +++ b/libm/src/math/generic/copysign.rs @@ -0,0 +1,10 @@ +use super::super::Float; + +/// Copy the sign of `y` to `x`. +pub fn copysign(x: F, y: F) -> F { + let mut ux = x.to_bits(); + let uy = y.to_bits(); + ux &= !F::SIGN_MASK; + ux |= uy & (F::SIGN_MASK); + F::from_bits(ux) +} diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs new file mode 100644 index 000000000..1ddd08f0e --- /dev/null +++ b/libm/src/math/generic/mod.rs @@ -0,0 +1,5 @@ +mod abs; +mod copysign; + +pub use abs::abs; +pub use copysign::copysign; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 3852c774e..ba1995228 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -87,6 +87,7 @@ mod support; mod arch; mod expo2; mod fenv; +mod generic; mod k_cos; mod k_cosf; mod k_expo2; From f051214ce189ecd3bc5966e78a8c291d2efdb062 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 00:44:50 -0500 Subject: [PATCH 0980/1459] Add `f16` and `f128` configuration from `compiler-builtins` In preparation of adding routines from these two types, duplicate the `compiler-builtins` configuration here. --- libm/Cargo.toml | 5 +- libm/build.rs | 30 +--- libm/configure.rs | 168 ++++++++++++++++++ .../compiler-builtins-smoke-test/Cargo.toml | 2 + libm/crates/libm-macros/Cargo.toml | 7 + libm/crates/libm-test/Cargo.toml | 11 +- libm/crates/libm-test/build.rs | 62 +------ libm/src/lib.rs | 2 + libm/src/math/support/float_traits.rs | 4 + 9 files changed, 208 insertions(+), 83 deletions(-) create mode 100644 libm/configure.rs diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 98a60bfe3..bfc11509e 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -21,7 +21,7 @@ arch = [] # This tells the compiler to assume that a Nightly toolchain is being used and # that it should activate any useful Nightly things accordingly. -unstable = ["unstable-intrinsics"] +unstable = ["unstable-intrinsics", "unstable-float"] # Enable calls to functions in `core::intrinsics` unstable-intrinsics = [] @@ -29,6 +29,9 @@ unstable-intrinsics = [] # Make some internal things public for testing. unstable-test-support = [] +# Enable the nightly-only `f16` and `f128`. +unstable-float = [] + # Used to prevent using any intrinsics or arch-specific code. # # HACK: this is a negative feature which is generally a bad idea in Cargo, but diff --git a/libm/build.rs b/libm/build.rs index 001029236..9c9e0e723 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -1,6 +1,10 @@ use std::env; +mod configure; + fn main() { + let cfg = configure::Config::from_env(); + println!("cargo:rerun-if-changed=build.rs"); println!("cargo:rustc-check-cfg=cfg(assert_no_panic)"); @@ -14,29 +18,5 @@ fn main() { } } - configure_intrinsics(); - configure_arch(); -} - -/// Simplify the feature logic for enabling intrinsics so code only needs to use -/// `cfg(intrinsics_enabled)`. -fn configure_intrinsics() { - println!("cargo:rustc-check-cfg=cfg(intrinsics_enabled)"); - - // Disabled by default; `unstable-intrinsics` enables again; `force-soft-floats` overrides - // to disable. - if cfg!(feature = "unstable-intrinsics") && !cfg!(feature = "force-soft-floats") { - println!("cargo:rustc-cfg=intrinsics_enabled"); - } -} - -/// Simplify the feature logic for enabling arch-specific features so code only needs to use -/// `cfg(arch_enabled)`. -fn configure_arch() { - println!("cargo:rustc-check-cfg=cfg(arch_enabled)"); - - // Enabled by default via the "arch" feature, `force-soft-floats` overrides to disable. - if cfg!(feature = "arch") && !cfg!(feature = "force-soft-floats") { - println!("cargo:rustc-cfg=arch_enabled"); - } + configure::emit_libm_config(&cfg); } diff --git a/libm/configure.rs b/libm/configure.rs new file mode 100644 index 000000000..389e86c33 --- /dev/null +++ b/libm/configure.rs @@ -0,0 +1,168 @@ +// Configuration shared with both libm and libm-test + +use std::env; +use std::path::PathBuf; + +#[allow(dead_code)] +pub struct Config { + pub manifest_dir: PathBuf, + pub out_dir: PathBuf, + pub opt_level: u8, + pub target_arch: String, + pub target_env: String, + pub target_family: Option, + pub target_os: String, + pub target_string: String, + pub target_vendor: String, + pub target_features: Vec, +} + +impl Config { + pub fn from_env() -> Self { + let target_features = env::var("CARGO_CFG_TARGET_FEATURE") + .map(|feats| feats.split(',').map(ToOwned::to_owned).collect()) + .unwrap_or_default(); + + Self { + manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()), + out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()), + opt_level: env::var("OPT_LEVEL").unwrap().parse().unwrap(), + target_arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(), + target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(), + target_family: env::var("CARGO_CFG_TARGET_FAMILY").ok(), + target_os: env::var("CARGO_CFG_TARGET_OS").unwrap(), + target_string: env::var("TARGET").unwrap(), + target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), + target_features, + } + } +} + +/// Libm gets most config options made available. +#[allow(dead_code)] +pub fn emit_libm_config(cfg: &Config) { + emit_intrinsics_cfg(); + emit_arch_cfg(); + emit_optimization_cfg(cfg); + emit_cfg_shorthands(cfg); + emit_f16_f128_cfg(cfg); +} + +/// Tests don't need most feature-related config. +#[allow(dead_code)] +pub fn emit_test_config(cfg: &Config) { + emit_optimization_cfg(cfg); + emit_cfg_shorthands(cfg); + emit_f16_f128_cfg(cfg); +} + +/// Simplify the feature logic for enabling intrinsics so code only needs to use +/// `cfg(intrinsics_enabled)`. +fn emit_intrinsics_cfg() { + println!("cargo:rustc-check-cfg=cfg(intrinsics_enabled)"); + + // Disabled by default; `unstable-intrinsics` enables again; `force-soft-floats` overrides + // to disable. + if cfg!(feature = "unstable-intrinsics") && !cfg!(feature = "force-soft-floats") { + println!("cargo:rustc-cfg=intrinsics_enabled"); + } +} + +/// Simplify the feature logic for enabling arch-specific features so code only needs to use +/// `cfg(arch_enabled)`. +fn emit_arch_cfg() { + println!("cargo:rustc-check-cfg=cfg(arch_enabled)"); + + // Enabled by default via the "arch" feature, `force-soft-floats` overrides to disable. + if cfg!(feature = "arch") && !cfg!(feature = "force-soft-floats") { + println!("cargo:rustc-cfg=arch_enabled"); + } +} + +/// Some tests are extremely slow. Emit a config option based on optimization level. +fn emit_optimization_cfg(cfg: &Config) { + println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)"); + + if cfg.opt_level >= 2 { + println!("cargo:rustc-cfg=optimizations_enabled"); + } +} + +/// Provide an alias for common longer config combinations. +fn emit_cfg_shorthands(cfg: &Config) { + println!("cargo:rustc-check-cfg=cfg(x86_no_sse)"); + if cfg.target_arch == "x86" && !cfg.target_features.iter().any(|f| f == "sse") { + // Shorthand to detect i586 targets + println!("cargo:rustc-cfg=x86_no_sse"); + } +} + +/// Configure whether or not `f16` and `f128` support should be enabled. +fn emit_f16_f128_cfg(cfg: &Config) { + println!("cargo:rustc-check-cfg=cfg(f16_enabled)"); + println!("cargo:rustc-check-cfg=cfg(f128_enabled)"); + + // `unstable-float` enables these features. + if !cfg!(feature = "unstable-float") { + return; + } + + // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means + // that the backend will not crash when using these types and generates code that can be called + // without crashing (no infinite recursion). This does not mean that the platform doesn't have + // ABI or other bugs. + // + // We do this here rather than in `rust-lang/rust` because configuring via cargo features is + // not straightforward. + // + // Original source of this list: + // + let f16_enabled = match cfg.target_arch.as_str() { + // Unsupported + "arm64ec" => false, + // Selection failure + "s390x" => false, + // Infinite recursion + // FIXME(llvm): loongarch fixed by + "csky" => false, + "hexagon" => false, + "loongarch64" => false, + "mips" | "mips64" | "mips32r6" | "mips64r6" => false, + "powerpc" | "powerpc64" => false, + "sparc" | "sparc64" => false, + "wasm32" | "wasm64" => false, + // Most everything else works as of LLVM 19 + _ => true, + }; + + let f128_enabled = match cfg.target_arch.as_str() { + // Unsupported (libcall is not supported) + "amdgpu" => false, + // Unsupported + "arm64ec" => false, + // Selection failure + "mips64" | "mips64r6" => false, + // Selection failure + "nvptx64" => false, + // Selection failure + "powerpc64" if &cfg.target_os == "aix" => false, + // Selection failure + "sparc" => false, + // Most everything else works as of LLVM 19 + _ => true, + }; + + // If the feature is set, disable these types. + let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some(); + + println!("cargo:rustc-check-cfg=cfg(f16_enabled)"); + println!("cargo:rustc-check-cfg=cfg(f128_enabled)"); + + if f16_enabled && !disable_both { + println!("cargo:rustc-cfg=f16_enabled"); + } + + if f128_enabled && !disable_both { + println!("cargo:rustc-cfg=f128_enabled"); + } +} diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index e75c4f42b..82cfeecb9 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -21,5 +21,7 @@ force-soft-floats = [] unexpected_cfgs = { level = "warn", check-cfg = [ "cfg(arch_enabled)", "cfg(assert_no_panic)", + "cfg(f128_enabled)", + "cfg(f16_enabled)", "cfg(intrinsics_enabled)", ] } diff --git a/libm/crates/libm-macros/Cargo.toml b/libm/crates/libm-macros/Cargo.toml index c9defb1c5..9194232b2 100644 --- a/libm/crates/libm-macros/Cargo.toml +++ b/libm/crates/libm-macros/Cargo.toml @@ -12,3 +12,10 @@ heck = "0.5.0" proc-macro2 = "1.0.88" quote = "1.0.37" syn = { version = "2.0.79", features = ["full", "extra-traits", "visit-mut"] } + +[lints.rust] +# Values used during testing +unexpected_cfgs = { level = "warn", check-cfg = [ + 'cfg(f16_enabled)', + 'cfg(f128_enabled)', +] } diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 4d75b25f8..f2dd88fa1 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -5,7 +5,10 @@ edition = "2021" publish = false [features] -default = [] +default = ["unstable-float"] + +# Propagated from libm because this affects which functions we test. +unstable-float = ["libm/unstable-float"] # Generate tests which are random inputs and the outputs are calculated with # musl libc. @@ -44,3 +47,9 @@ criterion = { version = "0.5.1", default-features = false, features = ["cargo_be [[bench]] name = "random" harness = false + +[lints.rust] +# Values from the chared config.rs used by `libm` but not the test crate +unexpected_cfgs = { level = "warn", check-cfg = [ + 'cfg(feature, values("arch", "force-soft-floats", "unstable-intrinsics"))', +] } diff --git a/libm/crates/libm-test/build.rs b/libm/crates/libm-test/build.rs index dc3126dbb..f2cd298ba 100644 --- a/libm/crates/libm-test/build.rs +++ b/libm/crates/libm-test/build.rs @@ -1,66 +1,16 @@ use std::fmt::Write; -use std::path::PathBuf; -use std::{env, fs}; +use std::fs; + +#[path = "../../configure.rs"] +mod configure; +use configure::Config; fn main() { let cfg = Config::from_env(); - emit_optimization_cfg(&cfg); - emit_cfg_shorthands(&cfg); list_all_tests(&cfg); -} - -#[allow(dead_code)] -struct Config { - manifest_dir: PathBuf, - out_dir: PathBuf, - opt_level: u8, - target_arch: String, - target_env: String, - target_family: Option, - target_os: String, - target_string: String, - target_vendor: String, - target_features: Vec, -} - -impl Config { - fn from_env() -> Self { - let target_features = env::var("CARGO_CFG_TARGET_FEATURE") - .map(|feats| feats.split(',').map(ToOwned::to_owned).collect()) - .unwrap_or_default(); - - Self { - manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()), - out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()), - opt_level: env::var("OPT_LEVEL").unwrap().parse().unwrap(), - target_arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(), - target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(), - target_family: env::var("CARGO_CFG_TARGET_FAMILY").ok(), - target_os: env::var("CARGO_CFG_TARGET_OS").unwrap(), - target_string: env::var("TARGET").unwrap(), - target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), - target_features, - } - } -} -/// Some tests are extremely slow. Emit a config option based on optimization level. -fn emit_optimization_cfg(cfg: &Config) { - println!("cargo::rustc-check-cfg=cfg(optimizations_enabled)"); - - if cfg.opt_level >= 2 { - println!("cargo::rustc-cfg=optimizations_enabled"); - } -} - -/// Provide an alias for common longer config combinations. -fn emit_cfg_shorthands(cfg: &Config) { - println!("cargo::rustc-check-cfg=cfg(x86_no_sse)"); - if cfg.target_arch == "x86" && !cfg.target_features.iter().any(|f| f == "sse") { - // Shorthand to detect i586 targets - println!("cargo::rustc-cfg=x86_no_sse"); - } + configure::emit_test_config(&cfg); } /// Create a list of all source files in an array. This can be used for making sure that diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 6bb06b5b8..327e3d6e6 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -2,6 +2,8 @@ #![no_std] #![cfg_attr(intrinsics_enabled, allow(internal_features))] #![cfg_attr(intrinsics_enabled, feature(core_intrinsics))] +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] #![allow(clippy::assign_op_pattern)] #![allow(clippy::deprecated_cfg_attr)] #![allow(clippy::eq_op)] diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 5808aeebc..7b3f6904b 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -219,5 +219,9 @@ macro_rules! float_impl { }; } +#[cfg(f16_enabled)] +float_impl!(f16, u16, i16, i8, 16, 10); float_impl!(f32, u32, i32, i16, 32, 23); float_impl!(f64, u64, i64, i16, 64, 52); +#[cfg(f128_enabled)] +float_impl!(f128, u128, i128, i16, 128, 112); From f00f440a57c3718da68b29623546eb8d5b22f187 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 29 Dec 2024 07:37:01 +0000 Subject: [PATCH 0981/1459] Always enable `unstable-float` in CI Since these add new API but do not affect runtime, we can enable it for all tests that run with nightly. --- libm/ci/run.sh | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/libm/ci/run.sh b/libm/ci/run.sh index d89c8bdf0..7e514a1cd 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -62,22 +62,26 @@ esac cargo check --no-default-features cargo check --features "force-soft-floats" +# Always enable `unstable-float` since it expands available API but does not +# change any implementations. +extra_flags="$extra_flags --features unstable-float" + if [ "${BUILD_ONLY:-}" = "1" ]; then cmd="cargo build --target $target --package libm" $cmd - $cmd --features "unstable-intrinsics" + $cmd --features unstable-intrinsics echo "can't run tests on $target; skipping" else cmd="cargo test --all --target $target $extra_flags" - # stable by default + # Test without intrinsics $cmd $cmd --release - # unstable with a feature - $cmd --features "unstable-intrinsics" - $cmd --release --features "unstable-intrinsics" + # Test with intrinsic use + $cmd --features unstable-intrinsics + $cmd --release --features unstable-intrinsics # Make sure benchmarks have correct results $cmd --benches From 592cabfa0673616b9f4fa1f6d4ec028415b41d52 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 19 Dec 2024 11:47:04 +0000 Subject: [PATCH 0982/1459] Update and slightly refactor some of the `Float` trait Add a constant for negative pi and provide a standalone const `from_bits`, which can be combined with what we already had in `hex_float`. Also provide another default method to reduce what needs to be provided by the macro. --- libm/src/math/support/float_traits.rs | 47 +++++++++++++++++++-------- libm/src/math/support/hex_float.rs | 12 ++----- libm/src/math/support/mod.rs | 1 + 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 7b3f6904b..68ba60030 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -38,6 +38,7 @@ pub trait Float: const MAX: Self; const MIN: Self; const PI: Self; + const NEG_PI: Self; const FRAC_PI_2: Self; /// The bitwidth of the float type @@ -71,7 +72,9 @@ pub trait Float: fn to_bits(self) -> Self::Int; /// Returns `self` transmuted to `Self::SignedInt` - fn to_bits_signed(self) -> Self::SignedInt; + fn to_bits_signed(self) -> Self::SignedInt { + self.to_bits().signed() + } /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be /// represented in multiple different ways. This method returns `true` if two NaNs are @@ -158,7 +161,15 @@ pub trait Float: pub type IntTy = ::Int; macro_rules! float_impl { - ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { + ( + $ty:ident, + $ity:ident, + $sity:ident, + $expty:ident, + $bits:expr, + $significand_bits:expr, + $from_bits:path + ) => { impl Float for $ty { type Int = $ity; type SignedInt = $sity; @@ -173,13 +184,10 @@ macro_rules! float_impl { const NAN: Self = Self::NAN; const MAX: Self = -Self::MIN; // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed - // FIXME(msrv): just use `from_bits` when available - // SAFETY: POD cast with no preconditions - const MIN: Self = unsafe { - mem::transmute::(Self::Int::MAX & !(1 << Self::SIG_BITS)) - }; + const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS)); const PI: Self = core::$ty::consts::PI; + const NEG_PI: Self = -Self::PI; const FRAC_PI_2: Self = core::$ty::consts::FRAC_PI_2; const BITS: u32 = $bits; @@ -193,9 +201,6 @@ macro_rules! float_impl { fn to_bits(self) -> Self::Int { self.to_bits() } - fn to_bits_signed(self) -> Self::SignedInt { - self.to_bits() as Self::SignedInt - } fn is_nan(self) -> bool { self.is_nan() } @@ -220,8 +225,22 @@ macro_rules! float_impl { } #[cfg(f16_enabled)] -float_impl!(f16, u16, i16, i8, 16, 10); -float_impl!(f32, u32, i32, i16, 32, 23); -float_impl!(f64, u64, i64, i16, 64, 52); +float_impl!(f16, u16, i16, i8, 16, 10, f16::from_bits); +float_impl!(f32, u32, i32, i16, 32, 23, f32_from_bits); +float_impl!(f64, u64, i64, i16, 64, 52, f64_from_bits); #[cfg(f128_enabled)] -float_impl!(f128, u128, i128, i16, 128, 112); +float_impl!(f128, u128, i128, i16, 128, 112, f128::from_bits); + +/* FIXME(msrv): vendor some things that are not const stable at our MSRV */ + +/// `f32::from_bits` +pub const fn f32_from_bits(bits: u32) -> f32 { + // SAFETY: POD cast with no preconditions + unsafe { mem::transmute::(bits) } +} + +/// `f64::from_bits` +pub const fn f64_from_bits(bits: u64) -> f64 { + // SAFETY: POD cast with no preconditions + unsafe { mem::transmute::(bits) } +} diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs index 80434a5ec..1666c6153 100644 --- a/libm/src/math/support/hex_float.rs +++ b/libm/src/math/support/hex_float.rs @@ -2,6 +2,8 @@ #![allow(dead_code)] // FIXME: remove once this gets used +use super::{f32_from_bits, f64_from_bits}; + /// Construct a 32-bit float from hex float representation (C-style) pub const fn hf32(s: &str) -> f32 { f32_from_bits(parse_any(s, 32, 23) as u32) @@ -159,16 +161,6 @@ const fn hex_digit(c: u8) -> u8 { /* FIXME(msrv): vendor some things that are not const stable at our MSRV */ -/// `f32::from_bits` -const fn f32_from_bits(v: u32) -> f32 { - unsafe { core::mem::transmute(v) } -} - -/// `f64::from_bits` -const fn f64_from_bits(v: u64) -> f64 { - unsafe { core::mem::transmute(v) } -} - /// `u128::ilog2` const fn u128_ilog2(v: u128) -> u32 { assert!(v != 0); diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index 25681c307..e2f4e0e98 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -6,6 +6,7 @@ mod int_traits; #[allow(unused_imports)] pub use float_traits::{Float, IntTy}; +pub(crate) use float_traits::{f32_from_bits, f64_from_bits}; #[allow(unused_imports)] pub use hex_float::{hf32, hf64}; pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; From be772a1e6e8d4f6dd018f158c81acaebd0a210c4 Mon Sep 17 00:00:00 2001 From: beetrees Date: Thu, 19 Dec 2024 11:59:09 +0000 Subject: [PATCH 0983/1459] Remove an `is_nan` workaround that is no longer needed --- libm/src/math/support/float_traits.rs | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 68ba60030..e64640a0d 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -80,17 +80,7 @@ pub trait Float: /// represented in multiple different ways. This method returns `true` if two NaNs are /// compared. fn eq_repr(self, rhs: Self) -> bool { - let is_nan = |x: Self| -> bool { - // } - // fn is_nan(x: Self) -> bool { - // When using mangled-names, the "real" compiler-builtins might not have the - // necessary builtin (__unordtf2) to test whether `f128` is NaN. - // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin - // x is NaN if all the bits of the exponent are set and the significand is non-0 - x.to_bits() & Self::EXP_MASK == Self::EXP_MASK - && x.to_bits() & Self::SIG_MASK != Self::Int::ZERO - }; - if is_nan(self) && is_nan(rhs) { true } else { self.to_bits() == rhs.to_bits() } + if self.is_nan() && rhs.is_nan() { true } else { self.to_bits() == rhs.to_bits() } } /// Returns true if the value is NaN. From c6c500c8e18001147c4f8c8ad908d05913a0890c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 19 Dec 2024 11:18:33 +0000 Subject: [PATCH 0984/1459] Add an 8-bit float type for testing purposes Introduce `f8`, which is an 8-bit float compliant with IEEE-754. This type is useful for testing since it is easily possible to enumerate all values. --- libm/crates/libm-test/src/f8_impl.rs | 487 +++++++++++++++++++++++++++ libm/crates/libm-test/src/lib.rs | 4 + 2 files changed, 491 insertions(+) create mode 100644 libm/crates/libm-test/src/f8_impl.rs diff --git a/libm/crates/libm-test/src/f8_impl.rs b/libm/crates/libm-test/src/f8_impl.rs new file mode 100644 index 000000000..babcc6357 --- /dev/null +++ b/libm/crates/libm-test/src/f8_impl.rs @@ -0,0 +1,487 @@ +//! An IEEE-compliant 8-bit float type for testing purposes. + +use std::cmp::{self, Ordering}; +use std::{fmt, ops}; + +use crate::Float; + +/// Sometimes verifying float logic is easiest when all values can quickly be checked exhaustively +/// or by hand. +/// +/// IEEE-754 compliant type that includes a 1 bit sign, 4 bit exponent, and 3 bit significand. +/// Bias is -7. +/// +/// Based on . +#[derive(Clone, Copy)] +#[repr(transparent)] +#[allow(non_camel_case_types)] +pub struct f8(u8); + +impl Float for f8 { + type Int = u8; + type SignedInt = i8; + type ExpInt = i8; + + const ZERO: Self = Self(0b0_0000_000); + const NEG_ZERO: Self = Self(0b1_0000_000); + const ONE: Self = Self(0b0_0111_000); + const NEG_ONE: Self = Self(0b1_0111_000); + const MAX: Self = Self(0b0_1110_111); + const MIN: Self = Self(0b1_1110_111); + const INFINITY: Self = Self(0b0_1111_000); + const NEG_INFINITY: Self = Self(0b1_1111_000); + const NAN: Self = Self(0b0_1111_100); + const PI: Self = Self::ZERO; + const NEG_PI: Self = Self::ZERO; + const FRAC_PI_2: Self = Self::ZERO; + + const BITS: u32 = 8; + const SIG_BITS: u32 = 3; + const SIGN_MASK: Self::Int = 0b1_0000_000; + const SIG_MASK: Self::Int = 0b0_0000_111; + const EXP_MASK: Self::Int = 0b0_1111_000; + const IMPLICIT_BIT: Self::Int = 0b0_0001_000; + + fn to_bits(self) -> Self::Int { + self.0 + } + + fn to_bits_signed(self) -> Self::SignedInt { + self.0 as i8 + } + + fn is_nan(self) -> bool { + self.0 & Self::EXP_MASK == Self::EXP_MASK && self.0 & Self::SIG_MASK != 0 + } + + fn is_infinite(self) -> bool { + self.0 & Self::EXP_MASK == Self::EXP_MASK && self.0 & Self::SIG_MASK == 0 + } + + fn is_sign_negative(self) -> bool { + self.0 & Self::SIGN_MASK != 0 + } + + fn exp(self) -> Self::ExpInt { + unimplemented!() + } + + fn from_bits(a: Self::Int) -> Self { + Self(a) + } + + fn normalize(_significand: Self::Int) -> (i32, Self::Int) { + unimplemented!() + } +} + +impl f8 { + pub const ALL_LEN: usize = 240; + + /// All non-infinite non-NaN values of `f8` + pub const ALL: [Self; Self::ALL_LEN] = [ + // -m*2^7 + Self(0b1_1110_111), // -240 + Self(0b1_1110_110), + Self(0b1_1110_101), + Self(0b1_1110_100), + Self(0b1_1110_011), + Self(0b1_1110_010), + Self(0b1_1110_001), + Self(0b1_1110_000), // -128 + // -m*2^6 + Self(0b1_1101_111), // -120 + Self(0b1_1101_110), + Self(0b1_1101_101), + Self(0b1_1101_100), + Self(0b1_1101_011), + Self(0b1_1101_010), + Self(0b1_1101_001), + Self(0b1_1101_000), // -64 + // -m*2^5 + Self(0b1_1100_111), // -60 + Self(0b1_1100_110), + Self(0b1_1100_101), + Self(0b1_1100_100), + Self(0b1_1100_011), + Self(0b1_1100_010), + Self(0b1_1100_001), + Self(0b1_1100_000), // -32 + // -m*2^4 + Self(0b1_1011_111), // -30 + Self(0b1_1011_110), + Self(0b1_1011_101), + Self(0b1_1011_100), + Self(0b1_1011_011), + Self(0b1_1011_010), + Self(0b1_1011_001), + Self(0b1_1011_000), // -16 + // -m*2^3 + Self(0b1_1010_111), // -15 + Self(0b1_1010_110), + Self(0b1_1010_101), + Self(0b1_1010_100), + Self(0b1_1010_011), + Self(0b1_1010_010), + Self(0b1_1010_001), + Self(0b1_1010_000), // -8 + // -m*2^2 + Self(0b1_1001_111), // -7.5 + Self(0b1_1001_110), + Self(0b1_1001_101), + Self(0b1_1001_100), + Self(0b1_1001_011), + Self(0b1_1001_010), + Self(0b1_1001_001), + Self(0b1_1001_000), // -4 + // -m*2^1 + Self(0b1_1000_111), // -3.75 + Self(0b1_1000_110), + Self(0b1_1000_101), + Self(0b1_1000_100), + Self(0b1_1000_011), + Self(0b1_1000_010), + Self(0b1_1000_001), + Self(0b1_1000_000), // -2 + // -m*2^0 + Self(0b1_0111_111), // -1.875 + Self(0b1_0111_110), + Self(0b1_0111_101), + Self(0b1_0111_100), + Self(0b1_0111_011), + Self(0b1_0111_010), + Self(0b1_0111_001), + Self(0b1_0111_000), // -1 + // -m*2^-1 + Self(0b1_0110_111), // −0.9375 + Self(0b1_0110_110), + Self(0b1_0110_101), + Self(0b1_0110_100), + Self(0b1_0110_011), + Self(0b1_0110_010), + Self(0b1_0110_001), + Self(0b1_0110_000), // -0.5 + // -m*2^-2 + Self(0b1_0101_111), // −0.46875 + Self(0b1_0101_110), + Self(0b1_0101_101), + Self(0b1_0101_100), + Self(0b1_0101_011), + Self(0b1_0101_010), + Self(0b1_0101_001), + Self(0b1_0101_000), // -0.25 + // -m*2^-3 + Self(0b1_0100_111), // −0.234375 + Self(0b1_0100_110), + Self(0b1_0100_101), + Self(0b1_0100_100), + Self(0b1_0100_011), + Self(0b1_0100_010), + Self(0b1_0100_001), + Self(0b1_0100_000), // -0.125 + // -m*2^-4 + Self(0b1_0011_111), // −0.1171875 + Self(0b1_0011_110), + Self(0b1_0011_101), + Self(0b1_0011_100), + Self(0b1_0011_011), + Self(0b1_0011_010), + Self(0b1_0011_001), + Self(0b1_0011_000), // −0.0625 + // -m*2^-5 + Self(0b1_0010_111), // −0.05859375 + Self(0b1_0010_110), + Self(0b1_0010_101), + Self(0b1_0010_100), + Self(0b1_0010_011), + Self(0b1_0010_010), + Self(0b1_0010_001), + Self(0b1_0010_000), // −0.03125 + // -m*2^-6 + Self(0b1_0001_111), // −0.029296875 + Self(0b1_0001_110), + Self(0b1_0001_101), + Self(0b1_0001_100), + Self(0b1_0001_011), + Self(0b1_0001_010), + Self(0b1_0001_001), + Self(0b1_0001_000), // −0.015625 + // -m*2^-7 subnormal numbers + Self(0b1_0000_111), // −0.013671875 + Self(0b1_0000_110), + Self(0b1_0000_101), + Self(0b1_0000_100), + Self(0b1_0000_011), + Self(0b1_0000_010), + Self(0b1_0000_001), // −0.001953125 + // Zeroes + Self(0b1_0000_000), // -0.0 + Self(0b0_0000_000), // 0.0 + // m*2^-7 // subnormal numbers + Self(0b0_0000_001), + Self(0b0_0000_010), + Self(0b0_0000_011), + Self(0b0_0000_100), + Self(0b0_0000_101), + Self(0b0_0000_110), + Self(0b0_0000_111), // 0.013671875 + // m*2^-6 + Self(0b0_0001_000), // 0.015625 + Self(0b0_0001_001), + Self(0b0_0001_010), + Self(0b0_0001_011), + Self(0b0_0001_100), + Self(0b0_0001_101), + Self(0b0_0001_110), + Self(0b0_0001_111), // 0.029296875 + // m*2^-5 + Self(0b0_0010_000), // 0.03125 + Self(0b0_0010_001), + Self(0b0_0010_010), + Self(0b0_0010_011), + Self(0b0_0010_100), + Self(0b0_0010_101), + Self(0b0_0010_110), + Self(0b0_0010_111), // 0.05859375 + // m*2^-4 + Self(0b0_0011_000), // 0.0625 + Self(0b0_0011_001), + Self(0b0_0011_010), + Self(0b0_0011_011), + Self(0b0_0011_100), + Self(0b0_0011_101), + Self(0b0_0011_110), + Self(0b0_0011_111), // 0.1171875 + // m*2^-3 + Self(0b0_0100_000), // 0.125 + Self(0b0_0100_001), + Self(0b0_0100_010), + Self(0b0_0100_011), + Self(0b0_0100_100), + Self(0b0_0100_101), + Self(0b0_0100_110), + Self(0b0_0100_111), // 0.234375 + // m*2^-2 + Self(0b0_0101_000), // 0.25 + Self(0b0_0101_001), + Self(0b0_0101_010), + Self(0b0_0101_011), + Self(0b0_0101_100), + Self(0b0_0101_101), + Self(0b0_0101_110), + Self(0b0_0101_111), // 0.46875 + // m*2^-1 + Self(0b0_0110_000), // 0.5 + Self(0b0_0110_001), + Self(0b0_0110_010), + Self(0b0_0110_011), + Self(0b0_0110_100), + Self(0b0_0110_101), + Self(0b0_0110_110), + Self(0b0_0110_111), // 0.9375 + // m*2^0 + Self(0b0_0111_000), // 1 + Self(0b0_0111_001), + Self(0b0_0111_010), + Self(0b0_0111_011), + Self(0b0_0111_100), + Self(0b0_0111_101), + Self(0b0_0111_110), + Self(0b0_0111_111), // 1.875 + // m*2^1 + Self(0b0_1000_000), // 2 + Self(0b0_1000_001), + Self(0b0_1000_010), + Self(0b0_1000_011), + Self(0b0_1000_100), + Self(0b0_1000_101), + Self(0b0_1000_110), + Self(0b0_1000_111), // 3.75 + // m*2^2 + Self(0b0_1001_000), // 4 + Self(0b0_1001_001), + Self(0b0_1001_010), + Self(0b0_1001_011), + Self(0b0_1001_100), + Self(0b0_1001_101), + Self(0b0_1001_110), + Self(0b0_1001_111), // 7.5 + // m*2^3 + Self(0b0_1010_000), // 8 + Self(0b0_1010_001), + Self(0b0_1010_010), + Self(0b0_1010_011), + Self(0b0_1010_100), + Self(0b0_1010_101), + Self(0b0_1010_110), + Self(0b0_1010_111), // 15 + // m*2^4 + Self(0b0_1011_000), // 16 + Self(0b0_1011_001), + Self(0b0_1011_010), + Self(0b0_1011_011), + Self(0b0_1011_100), + Self(0b0_1011_101), + Self(0b0_1011_110), + Self(0b0_1011_111), // 30 + // m*2^5 + Self(0b0_1100_000), // 32 + Self(0b0_1100_001), + Self(0b0_1100_010), + Self(0b0_1100_011), + Self(0b0_1100_100), + Self(0b0_1100_101), + Self(0b0_1100_110), + Self(0b0_1100_111), // 60 + // m*2^6 + Self(0b0_1101_000), // 64 + Self(0b0_1101_001), + Self(0b0_1101_010), + Self(0b0_1101_011), + Self(0b0_1101_100), + Self(0b0_1101_101), + Self(0b0_1101_110), + Self(0b0_1101_111), // 120 + // m*2^7 + Self(0b0_1110_000), // 128 + Self(0b0_1110_001), + Self(0b0_1110_010), + Self(0b0_1110_011), + Self(0b0_1110_100), + Self(0b0_1110_101), + Self(0b0_1110_110), + Self(0b0_1110_111), // 240 + ]; +} + +impl ops::Add for f8 { + type Output = Self; + fn add(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} + +impl ops::Sub for f8 { + type Output = Self; + fn sub(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} +impl ops::Mul for f8 { + type Output = Self; + fn mul(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} +impl ops::Div for f8 { + type Output = Self; + fn div(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} + +impl ops::Neg for f8 { + type Output = Self; + fn neg(self) -> Self::Output { + Self(self.0 ^ Self::SIGN_MASK) + } +} + +impl ops::Rem for f8 { + type Output = Self; + fn rem(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} + +impl ops::AddAssign for f8 { + fn add_assign(&mut self, _rhs: Self) { + unimplemented!() + } +} + +impl ops::SubAssign for f8 { + fn sub_assign(&mut self, _rhs: Self) { + unimplemented!() + } +} + +impl ops::MulAssign for f8 { + fn mul_assign(&mut self, _rhs: Self) { + unimplemented!() + } +} + +impl cmp::PartialEq for f8 { + fn eq(&self, other: &Self) -> bool { + if self.is_nan() || other.is_nan() { + false + } else if self.abs().to_bits() | other.abs().to_bits() == 0 { + true + } else { + self.0 == other.0 + } + } +} +impl cmp::PartialOrd for f8 { + fn partial_cmp(&self, other: &Self) -> Option { + let inf_rep = f8::EXP_MASK; + + let a_abs = self.abs().to_bits(); + let b_abs = other.abs().to_bits(); + + // If either a or b is NaN, they are unordered. + if a_abs > inf_rep || b_abs > inf_rep { + return None; + } + + // If a and b are both zeros, they are equal. + if a_abs | b_abs == 0 { + return Some(Ordering::Equal); + } + + let a_srep = self.to_bits_signed(); + let b_srep = other.to_bits_signed(); + let res = a_srep.cmp(&b_srep); + + if a_srep & b_srep >= 0 { + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a fp_ting-point compare. + Some(res) + } else { + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. + Some(res.reverse()) + } + } +} +impl fmt::Display for f8 { + fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result { + unimplemented!() + } +} + +impl fmt::Debug for f8 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Binary::fmt(self, f) + } +} + +impl fmt::Binary for f8 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let v = self.0; + write!( + f, + "0b{:b}_{:04b}_{:03b}", + v >> 7, + (v & Self::EXP_MASK) >> Self::SIG_BITS, + v & Self::SIG_MASK + ) + } +} + +impl fmt::LowerHex for f8 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 17a06b3be..ed7131713 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -1,3 +1,6 @@ +#![allow(clippy::unusual_byte_groupings)] // sometimes we group by sign_exp_sig + +mod f8_impl; pub mod gen; #[cfg(feature = "test-multiprecision")] pub mod mpfloat; @@ -5,6 +8,7 @@ pub mod op; mod precision; mod test_traits; +pub use f8_impl::f8; pub use libm::support::{Float, Int, IntTy}; pub use op::{BaseName, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet}; pub use precision::{MaybeOverride, SpecialCase, default_ulp}; From 2d3c68dacd739ab85c2979d92a70dccdc6370d5f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 19 Dec 2024 15:10:35 +0000 Subject: [PATCH 0985/1459] Introduce a float extension trait and some numerical routines --- libm/crates/libm-test/src/lib.rs | 4 +- libm/crates/libm-test/src/num.rs | 458 +++++++++++++++++++++++++++++++ 2 files changed, 461 insertions(+), 1 deletion(-) create mode 100644 libm/crates/libm-test/src/num.rs diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index ed7131713..48b382d20 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -4,12 +4,14 @@ mod f8_impl; pub mod gen; #[cfg(feature = "test-multiprecision")] pub mod mpfloat; +mod num; pub mod op; mod precision; mod test_traits; pub use f8_impl::f8; -pub use libm::support::{Float, Int, IntTy}; +pub use libm::support::{Float, Int, IntTy, MinInt}; +pub use num::{FloatExt, logspace}; pub use op::{BaseName, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet}; pub use precision::{MaybeOverride, SpecialCase, default_ulp}; pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall}; diff --git a/libm/crates/libm-test/src/num.rs b/libm/crates/libm-test/src/num.rs new file mode 100644 index 000000000..4aa7f61b0 --- /dev/null +++ b/libm/crates/libm-test/src/num.rs @@ -0,0 +1,458 @@ +//! Helpful numeric operations. + +use std::cmp::min; + +use libm::support::{CastInto, Float}; + +use crate::{Int, MinInt}; + +/// Extension to `libm`'s `Float` trait with methods that are useful for tests but not +/// needed in `libm` itself. +pub trait FloatExt: Float { + /// The minimum subnormal number. + const TINY_BITS: Self::Int = Self::Int::ONE; + + /// Retrieve additional constants for this float type. + fn consts() -> Consts { + Consts::new() + } + + /// Increment by one ULP, saturating at infinity. + fn next_up(self) -> Self { + let bits = self.to_bits(); + if self.is_nan() || bits == Self::INFINITY.to_bits() { + return self; + } + + let abs = self.abs().to_bits(); + let next_bits = if abs == Self::Int::ZERO { + // Next up from 0 is the smallest subnormal + Self::TINY_BITS + } else if bits == abs { + // Positive: counting up is more positive + bits + Self::Int::ONE + } else { + // Negative: counting down is more positive + bits - Self::Int::ONE + }; + Self::from_bits(next_bits) + } + + /// A faster way to effectively call `next_up` `n` times. + fn n_up(self, n: Self::Int) -> Self { + let bits = self.to_bits(); + if self.is_nan() || bits == Self::INFINITY.to_bits() || n == Self::Int::ZERO { + return self; + } + + let abs = self.abs().to_bits(); + let is_positive = bits == abs; + let crosses_zero = !is_positive && n > abs; + let inf_bits = Self::INFINITY.to_bits(); + + let next_bits = if abs == Self::Int::ZERO { + min(n, inf_bits) + } else if crosses_zero { + min(n - abs, inf_bits) + } else if is_positive { + // Positive, counting up is more positive but this may overflow + match bits.checked_add(n) { + Some(v) if v >= inf_bits => inf_bits, + Some(v) => v, + None => inf_bits, + } + } else { + // Negative, counting down is more positive + bits - n + }; + Self::from_bits(next_bits) + } + + /// Decrement by one ULP, saturating at negative infinity. + fn next_down(self) -> Self { + let bits = self.to_bits(); + if self.is_nan() || bits == Self::NEG_INFINITY.to_bits() { + return self; + } + + let abs = self.abs().to_bits(); + let next_bits = if abs == Self::Int::ZERO { + // Next up from 0 is the smallest negative subnormal + Self::TINY_BITS | Self::SIGN_MASK + } else if bits == abs { + // Positive: counting down is more negative + bits - Self::Int::ONE + } else { + // Negative: counting up is more negative + bits + Self::Int::ONE + }; + Self::from_bits(next_bits) + } + + /// A faster way to effectively call `next_down` `n` times. + fn n_down(self, n: Self::Int) -> Self { + let bits = self.to_bits(); + if self.is_nan() || bits == Self::NEG_INFINITY.to_bits() || n == Self::Int::ZERO { + return self; + } + + let abs = self.abs().to_bits(); + let is_positive = bits == abs; + let crosses_zero = is_positive && n > abs; + let inf_bits = Self::INFINITY.to_bits(); + let ninf_bits = Self::NEG_INFINITY.to_bits(); + + let next_bits = if abs == Self::Int::ZERO { + min(n, inf_bits) | Self::SIGN_MASK + } else if crosses_zero { + min(n - abs, inf_bits) | Self::SIGN_MASK + } else if is_positive { + // Positive, counting down is more negative + bits - n + } else { + // Negative, counting up is more negative but this may overflow + match bits.checked_add(n) { + Some(v) if v > ninf_bits => ninf_bits, + Some(v) => v, + None => ninf_bits, + } + }; + Self::from_bits(next_bits) + } +} + +impl FloatExt for F where F: Float {} + +/// Extra constants that are useful for tests. +#[derive(Debug, Clone, Copy)] +pub struct Consts { + /// The default quiet NaN, which is also the minimum quiet NaN. + pub pos_nan: F, + /// The default quiet NaN with negative sign. + pub neg_nan: F, + /// NaN with maximum (unsigned) significand to be a quiet NaN. The significand is saturated. + pub max_qnan: F, + /// NaN with minimum (unsigned) significand to be a signaling NaN. + pub min_snan: F, + /// NaN with maximum (unsigned) significand to be a signaling NaN. + pub max_snan: F, + pub neg_max_qnan: F, + pub neg_min_snan: F, + pub neg_max_snan: F, +} + +impl Consts { + fn new() -> Self { + let top_sigbit_mask = F::Int::ONE << (F::SIG_BITS - 1); + let pos_nan = F::EXP_MASK | top_sigbit_mask; + let max_qnan = F::EXP_MASK | F::SIG_MASK; + let min_snan = F::EXP_MASK | F::Int::ONE; + let max_snan = (F::EXP_MASK | F::SIG_MASK) ^ top_sigbit_mask; + + let neg_nan = pos_nan | F::SIGN_MASK; + let neg_max_qnan = max_qnan | F::SIGN_MASK; + let neg_min_snan = min_snan | F::SIGN_MASK; + let neg_max_snan = max_snan | F::SIGN_MASK; + + Self { + pos_nan: F::from_bits(pos_nan), + neg_nan: F::from_bits(neg_nan), + max_qnan: F::from_bits(max_qnan), + min_snan: F::from_bits(min_snan), + max_snan: F::from_bits(max_snan), + neg_max_qnan: F::from_bits(neg_max_qnan), + neg_min_snan: F::from_bits(neg_min_snan), + neg_max_snan: F::from_bits(neg_max_snan), + } + } + + pub fn iter(self) -> impl Iterator { + // Destructure so we get unused warnings if we forget a list entry. + let Self { + pos_nan, + neg_nan, + max_qnan, + min_snan, + max_snan, + neg_max_qnan, + neg_min_snan, + neg_max_snan, + } = self; + + [pos_nan, neg_nan, max_qnan, min_snan, max_snan, neg_max_qnan, neg_min_snan, neg_max_snan] + .into_iter() + } +} + +/// Return the number of steps between two floats, returning `None` if either input is NaN. +/// +/// This is the number of steps needed for `n_up` or `n_down` to go between values. Infinities +/// are treated the same as those functions (will return the nearest finite value), and only one +/// of `-0` or `+0` is counted. It does not matter which value is greater. +pub fn ulp_between(x: F, y: F) -> Option { + let a = as_ulp_steps(x)?; + let b = as_ulp_steps(y)?; + Some(a.abs_diff(b)) +} + +/// Return the (signed) number of steps from zero to `x`. +fn as_ulp_steps(x: F) -> Option { + let s = x.to_bits_signed(); + let val = if s >= F::SignedInt::ZERO { + // each increment from `s = 0` is one step up from `x = 0.0` + s + } else { + // each increment from `s = F::SignedInt::MIN` is one step down from `x = -0.0` + F::SignedInt::MIN - s + }; + + // If `x` is NaN, return `None` + (!x.is_nan()).then_some(val) +} + +/// An iterator that returns floats with linearly spaced integer representations, which translates +/// to logarithmic spacing of their values. +/// +/// Note that this tends to skip negative zero, so that needs to be checked explicitly. +pub fn logspace(start: F, end: F, steps: F::Int) -> impl Iterator { + assert!(!start.is_nan()); + assert!(!end.is_nan()); + assert!(end >= start); + + let mut steps = steps.checked_sub(F::Int::ONE).expect("`steps` must be at least 2"); + let between = ulp_between(start, end).expect("`start` or `end` is NaN"); + let spacing = (between / steps).max(F::Int::ONE); + steps = steps.min(between); // At maximum, one step per ULP + + let mut x = start; + (0..=steps.cast()).map(move |_| { + let ret = x; + x = x.n_up(spacing); + ret + }) +} + +#[cfg(test)] +mod tests { + use std::cmp::max; + + use super::*; + use crate::f8; + + #[test] + fn test_next_up_down() { + for (i, v) in f8::ALL.into_iter().enumerate() { + let down = v.next_down().to_bits(); + let up = v.next_up().to_bits(); + + if i == 0 { + assert_eq!(down, f8::NEG_INFINITY.to_bits(), "{i} next_down({v:#010b})"); + } else { + let expected = + if v == f8::ZERO { 1 | f8::SIGN_MASK } else { f8::ALL[i - 1].to_bits() }; + assert_eq!(down, expected, "{i} next_down({v:#010b})"); + } + + if i == f8::ALL_LEN - 1 { + assert_eq!(up, f8::INFINITY.to_bits(), "{i} next_up({v:#010b})"); + } else { + let expected = if v == f8::NEG_ZERO { 1 } else { f8::ALL[i + 1].to_bits() }; + assert_eq!(up, expected, "{i} next_up({v:#010b})"); + } + } + } + + #[test] + fn test_next_up_down_inf_nan() { + assert_eq!(f8::NEG_INFINITY.next_up().to_bits(), f8::ALL[0].to_bits(),); + assert_eq!(f8::NEG_INFINITY.next_down().to_bits(), f8::NEG_INFINITY.to_bits(),); + assert_eq!(f8::INFINITY.next_down().to_bits(), f8::ALL[f8::ALL_LEN - 1].to_bits(),); + assert_eq!(f8::INFINITY.next_up().to_bits(), f8::INFINITY.to_bits(),); + assert_eq!(f8::NAN.next_up().to_bits(), f8::NAN.to_bits(),); + assert_eq!(f8::NAN.next_down().to_bits(), f8::NAN.to_bits(),); + } + + #[test] + fn test_n_up_down_quick() { + assert_eq!(f8::ALL[0].n_up(4).to_bits(), f8::ALL[4].to_bits(),); + assert_eq!( + f8::ALL[f8::ALL_LEN - 1].n_down(4).to_bits(), + f8::ALL[f8::ALL_LEN - 5].to_bits(), + ); + + // Check around zero + assert_eq!(f8::from_bits(0b0).n_up(7).to_bits(), 0b0_0000_111); + assert_eq!(f8::from_bits(0b0).n_down(7).to_bits(), 0b1_0000_111); + + // Check across zero + assert_eq!(f8::from_bits(0b1_0000_111).n_up(8).to_bits(), 0b0_0000_001); + assert_eq!(f8::from_bits(0b0_0000_111).n_down(8).to_bits(), 0b1_0000_001); + } + + #[test] + fn test_n_up_down_one() { + // Verify that `n_up(1)` and `n_down(1)` are the same as `next_up()` and next_down()`.` + for i in 0..u8::MAX { + let v = f8::from_bits(i); + assert_eq!(v.next_up().to_bits(), v.n_up(1).to_bits()); + assert_eq!(v.next_down().to_bits(), v.n_down(1).to_bits()); + } + } + + #[test] + fn test_n_up_down_inf_nan_zero() { + assert_eq!(f8::NEG_INFINITY.n_up(1).to_bits(), f8::ALL[0].to_bits()); + assert_eq!(f8::NEG_INFINITY.n_up(239).to_bits(), f8::ALL[f8::ALL_LEN - 1].to_bits()); + assert_eq!(f8::NEG_INFINITY.n_up(240).to_bits(), f8::INFINITY.to_bits()); + assert_eq!(f8::NEG_INFINITY.n_down(u8::MAX).to_bits(), f8::NEG_INFINITY.to_bits()); + + assert_eq!(f8::INFINITY.n_down(1).to_bits(), f8::ALL[f8::ALL_LEN - 1].to_bits()); + assert_eq!(f8::INFINITY.n_down(239).to_bits(), f8::ALL[0].to_bits()); + assert_eq!(f8::INFINITY.n_down(240).to_bits(), f8::NEG_INFINITY.to_bits()); + assert_eq!(f8::INFINITY.n_up(u8::MAX).to_bits(), f8::INFINITY.to_bits()); + + assert_eq!(f8::NAN.n_up(u8::MAX).to_bits(), f8::NAN.to_bits()); + assert_eq!(f8::NAN.n_down(u8::MAX).to_bits(), f8::NAN.to_bits()); + + assert_eq!(f8::ZERO.n_down(1).to_bits(), f8::TINY_BITS | f8::SIGN_MASK); + assert_eq!(f8::NEG_ZERO.n_up(1).to_bits(), f8::TINY_BITS); + } + + /// True if the specified range of `f8::ALL` includes both +0 and -0 + fn crossed_zero(start: usize, end: usize) -> bool { + let crossed = &f8::ALL[start..=end]; + crossed.iter().any(|f| f8::eq_repr(*f, f8::ZERO)) + && crossed.iter().any(|f| f8::eq_repr(*f, f8::NEG_ZERO)) + } + + #[test] + fn test_n_up_down() { + for (i, v) in f8::ALL.into_iter().enumerate() { + for n in 0..f8::ALL_LEN { + let down = v.n_down(n as u8).to_bits(); + let up = v.n_up(n as u8).to_bits(); + + if let Some(down_exp_idx) = i.checked_sub(n) { + // No overflow + let mut expected = f8::ALL[down_exp_idx].to_bits(); + if n >= 1 && crossed_zero(down_exp_idx, i) { + // If both -0 and +0 are included, we need to adjust our expected value + match down_exp_idx.checked_sub(1) { + Some(v) => expected = f8::ALL[v].to_bits(), + // Saturate to -inf if we are out of values + None => expected = f8::NEG_INFINITY.to_bits(), + } + } + assert_eq!(down, expected, "{i} {n} n_down({v:#010b})"); + } else { + // Overflow to -inf + assert_eq!(down, f8::NEG_INFINITY.to_bits(), "{i} {n} n_down({v:#010b})"); + } + + let mut up_exp_idx = i + n; + if up_exp_idx < f8::ALL_LEN { + // No overflow + if n >= 1 && up_exp_idx < f8::ALL_LEN && crossed_zero(i, up_exp_idx) { + // If both -0 and +0 are included, we need to adjust our expected value + up_exp_idx += 1; + } + + let expected = if up_exp_idx >= f8::ALL_LEN { + f8::INFINITY.to_bits() + } else { + f8::ALL[up_exp_idx].to_bits() + }; + + assert_eq!(up, expected, "{i} {n} n_up({v:#010b})"); + } else { + // Overflow to +inf + assert_eq!(up, f8::INFINITY.to_bits(), "{i} {n} n_up({v:#010b})"); + } + } + } + } + + #[test] + fn test_ulp_between() { + for (i, x) in f8::ALL.into_iter().enumerate() { + for (j, y) in f8::ALL.into_iter().enumerate() { + let ulp = ulp_between(x, y).unwrap(); + let make_msg = || format!("i: {i} j: {j} x: {x:b} y: {y:b} ulp {ulp}"); + + let i_low = min(i, j); + let i_hi = max(i, j); + let mut expected = u8::try_from(i_hi - i_low).unwrap(); + if crossed_zero(i_low, i_hi) { + expected -= 1; + } + + assert_eq!(ulp, expected, "{}", make_msg()); + + // Skip if either are zero since `next_{up,down}` will count over it + let either_zero = x == f8::ZERO || y == f8::ZERO; + if x < y && !either_zero { + assert_eq!(x.n_up(ulp).to_bits(), y.to_bits(), "{}", make_msg()); + assert_eq!(y.n_down(ulp).to_bits(), x.to_bits(), "{}", make_msg()); + } else if !either_zero { + assert_eq!(y.n_up(ulp).to_bits(), x.to_bits(), "{}", make_msg()); + assert_eq!(x.n_down(ulp).to_bits(), y.to_bits(), "{}", make_msg()); + } + } + } + } + + #[test] + fn test_ulp_between_inf_nan_zero() { + assert_eq!(ulp_between(f8::NEG_INFINITY, f8::INFINITY).unwrap(), f8::ALL_LEN as u8); + assert_eq!(ulp_between(f8::INFINITY, f8::NEG_INFINITY).unwrap(), f8::ALL_LEN as u8); + assert_eq!( + ulp_between(f8::NEG_INFINITY, f8::ALL[f8::ALL_LEN - 1]).unwrap(), + f8::ALL_LEN as u8 - 1 + ); + assert_eq!(ulp_between(f8::INFINITY, f8::ALL[0]).unwrap(), f8::ALL_LEN as u8 - 1); + + assert_eq!(ulp_between(f8::ZERO, f8::NEG_ZERO).unwrap(), 0); + assert_eq!(ulp_between(f8::NAN, f8::ZERO), None); + assert_eq!(ulp_between(f8::ZERO, f8::NAN), None); + } + + #[test] + fn test_logspace() { + let ls: Vec<_> = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 2).collect(); + let exp = [f8::from_bits(0x0), f8::from_bits(0x4)]; + assert_eq!(ls, exp); + + let ls: Vec<_> = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 3).collect(); + let exp = [f8::from_bits(0x0), f8::from_bits(0x2), f8::from_bits(0x4)]; + assert_eq!(ls, exp); + + // Check that we include all values with no repeats if `steps` exceeds the maximum number + // of steps. + let ls: Vec<_> = logspace(f8::from_bits(0x0), f8::from_bits(0x3), 10).collect(); + let exp = [f8::from_bits(0x0), f8::from_bits(0x1), f8::from_bits(0x2), f8::from_bits(0x3)]; + assert_eq!(ls, exp); + } + + #[test] + fn test_consts() { + let Consts { + pos_nan, + neg_nan, + max_qnan, + min_snan, + max_snan, + neg_max_qnan, + neg_min_snan, + neg_max_snan, + } = f8::consts(); + + assert_eq!(pos_nan.to_bits(), 0b0_1111_100); + assert_eq!(neg_nan.to_bits(), 0b1_1111_100); + assert_eq!(max_qnan.to_bits(), 0b0_1111_111); + assert_eq!(min_snan.to_bits(), 0b0_1111_001); + assert_eq!(max_snan.to_bits(), 0b0_1111_011); + assert_eq!(neg_max_qnan.to_bits(), 0b1_1111_111); + assert_eq!(neg_min_snan.to_bits(), 0b1_1111_001); + assert_eq!(neg_max_snan.to_bits(), 0b1_1111_011); + } +} From 5f713b11aec427f93cc6b4f2ed997d4f9dbee399 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 19 Dec 2024 11:19:01 +0000 Subject: [PATCH 0986/1459] Add interfaces and tests based on function domains Create a type representing a function's domain and a test that does a logarithmic sweep of points within the domain. --- libm/crates/libm-test/src/domain.rs | 186 ++++++++++++++++++ libm/crates/libm-test/src/gen.rs | 1 + .../libm-test/src/gen/domain_logspace.rs | 43 ++++ libm/crates/libm-test/src/lib.rs | 1 + libm/crates/libm-test/tests/multiprecision.rs | 101 +++++++++- 5 files changed, 327 insertions(+), 5 deletions(-) create mode 100644 libm/crates/libm-test/src/domain.rs create mode 100644 libm/crates/libm-test/src/gen/domain_logspace.rs diff --git a/libm/crates/libm-test/src/domain.rs b/libm/crates/libm-test/src/domain.rs new file mode 100644 index 000000000..43ba21974 --- /dev/null +++ b/libm/crates/libm-test/src/domain.rs @@ -0,0 +1,186 @@ +//! Traits and operations related to bounds of a function. + +use std::fmt; +use std::ops::{self, Bound}; + +use crate::Float; + +/// Representation of a function's domain. +#[derive(Clone, Debug)] +pub struct Domain { + /// Start of the region for which a function is defined (ignoring poles). + pub start: Bound, + /// Endof the region for which a function is defined (ignoring poles). + pub end: Bound, + /// Additional points to check closer around. These can be e.g. undefined asymptotes or + /// inflection points. + pub check_points: Option BoxIter>, +} + +type BoxIter = Box>; + +impl Domain { + /// The start of this domain, saturating at negative infinity. + pub fn range_start(&self) -> F { + match self.start { + Bound::Included(v) => v, + Bound::Excluded(v) => v.next_up(), + Bound::Unbounded => F::NEG_INFINITY, + } + } + + /// The end of this domain, saturating at infinity. + pub fn range_end(&self) -> F { + match self.end { + Bound::Included(v) => v, + Bound::Excluded(v) => v.next_down(), + Bound::Unbounded => F::INFINITY, + } + } +} + +impl Domain { + /// x ∈ ℝ + pub const UNBOUNDED: Self = + Self { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None }; + + /// x ∈ ℝ >= 0 + pub const POSITIVE: Self = + Self { start: Bound::Included(F::ZERO), end: Bound::Unbounded, check_points: None }; + + /// x ∈ ℝ > 0 + pub const STRICTLY_POSITIVE: Self = + Self { start: Bound::Excluded(F::ZERO), end: Bound::Unbounded, check_points: None }; + + /// Used for versions of `asin` and `acos`. + pub const INVERSE_TRIG_PERIODIC: Self = Self { + start: Bound::Included(F::NEG_ONE), + end: Bound::Included(F::ONE), + check_points: None, + }; + + /// Domain for `acosh` + pub const ACOSH: Self = + Self { start: Bound::Included(F::ONE), end: Bound::Unbounded, check_points: None }; + + /// Domain for `atanh` + pub const ATANH: Self = Self { + start: Bound::Excluded(F::NEG_ONE), + end: Bound::Excluded(F::ONE), + check_points: None, + }; + + /// Domain for `sin`, `cos`, and `tan` + pub const TRIG: Self = Self { + // TODO + check_points: Some(|| Box::new([-F::PI, -F::FRAC_PI_2, F::FRAC_PI_2, F::PI].into_iter())), + ..Self::UNBOUNDED + }; + + /// Domain for `log` in various bases + pub const LOG: Self = Self::STRICTLY_POSITIVE; + + /// Domain for `log1p` i.e. `log(1 + x)` + pub const LOG1P: Self = + Self { start: Bound::Excluded(F::NEG_ONE), end: Bound::Unbounded, check_points: None }; + + /// Domain for `sqrt` + pub const SQRT: Self = Self::POSITIVE; + + /// Domain for `gamma` + pub const GAMMA: Self = Self { + check_points: Some(|| { + // Negative integers are asymptotes + Box::new((0..u8::MAX).map(|scale| { + let mut base = F::ZERO; + for _ in 0..scale { + base = base - F::ONE; + } + base + })) + }), + // Whether or not gamma is defined for negative numbers is implementation dependent + ..Self::UNBOUNDED + }; + + /// Domain for `loggamma` + pub const LGAMMA: Self = Self::STRICTLY_POSITIVE; +} + +/// Implement on `op::*` types to indicate how they are bounded. +pub trait HasDomain +where + T: Copy + fmt::Debug + ops::Add + ops::Sub + PartialOrd + 'static, +{ + const DOMAIN: Domain; +} + +/// Implement [`HasDomain`] for both the `f32` and `f64` variants of a function. +macro_rules! impl_has_domain { + ($($fn_name:ident => $domain:expr;)*) => { + paste::paste! { + $( + // Implement for f64 functions + impl HasDomain for $crate::op::$fn_name::Routine { + const DOMAIN: Domain = Domain::::$domain; + } + + // Implement for f32 functions + impl HasDomain for $crate::op::[< $fn_name f >]::Routine { + const DOMAIN: Domain = Domain::::$domain; + } + )* + } + }; +} + +// Tie functions together with their domains. +impl_has_domain! { + acos => INVERSE_TRIG_PERIODIC; + acosh => ACOSH; + asin => INVERSE_TRIG_PERIODIC; + asinh => UNBOUNDED; + atan => UNBOUNDED; + atanh => ATANH; + cbrt => UNBOUNDED; + ceil => UNBOUNDED; + cos => TRIG; + cosh => UNBOUNDED; + erf => UNBOUNDED; + exp => UNBOUNDED; + exp10 => UNBOUNDED; + exp2 => UNBOUNDED; + expm1 => UNBOUNDED; + fabs => UNBOUNDED; + floor => UNBOUNDED; + frexp => UNBOUNDED; + ilogb => UNBOUNDED; + j0 => UNBOUNDED; + j1 => UNBOUNDED; + lgamma => LGAMMA; + log => LOG; + log10 => LOG; + log1p => LOG1P; + log2 => LOG; + modf => UNBOUNDED; + rint => UNBOUNDED; + round => UNBOUNDED; + sin => TRIG; + sincos => TRIG; + sinh => UNBOUNDED; + sqrt => SQRT; + tan => TRIG; + tanh => UNBOUNDED; + tgamma => GAMMA; + trunc => UNBOUNDED; +} + +/* Manual implementations, these functions don't follow `foo`->`foof` naming */ + +impl HasDomain for crate::op::lgammaf_r::Routine { + const DOMAIN: Domain = Domain::::LGAMMA; +} + +impl HasDomain for crate::op::lgamma_r::Routine { + const DOMAIN: Domain = Domain::::LGAMMA; +} diff --git a/libm/crates/libm-test/src/gen.rs b/libm/crates/libm-test/src/gen.rs index 3e9eca37a..e3c88c44a 100644 --- a/libm/crates/libm-test/src/gen.rs +++ b/libm/crates/libm-test/src/gen.rs @@ -1,6 +1,7 @@ //! Different generators that can create random or systematic bit patterns. use crate::GenerateInput; +pub mod domain_logspace; pub mod random; /// Helper type to turn any reusable input into a generator. diff --git a/libm/crates/libm-test/src/gen/domain_logspace.rs b/libm/crates/libm-test/src/gen/domain_logspace.rs new file mode 100644 index 000000000..e8cdb9d2b --- /dev/null +++ b/libm/crates/libm-test/src/gen/domain_logspace.rs @@ -0,0 +1,43 @@ +//! A generator that produces logarithmically spaced values within domain bounds. + +use libm::support::{IntTy, MinInt}; + +use crate::domain::HasDomain; +use crate::op::OpITy; +use crate::{MathOp, logspace}; + +/// Number of tests to run. +// FIXME(ntests): replace this with a more logical algorithm +const NTESTS: usize = { + if cfg!(optimizations_enabled) { + if crate::emulated() + || !cfg!(target_pointer_width = "64") + || cfg!(all(target_arch = "x86_64", target_vendor = "apple")) + { + // Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run + // in QEMU. + 100_000 + } else { + 5_000_000 + } + } else { + // Without optimizations just run a quick check + 800 + } +}; + +/// Create a range of logarithmically spaced inputs within a function's domain. +/// +/// This allows us to get reasonably thorough coverage without wasting time on values that are +/// NaN or out of range. Random tests will still cover values that are excluded here. +pub fn get_test_cases() -> impl Iterator +where + Op: MathOp + HasDomain, + IntTy: TryFrom, +{ + let domain = Op::DOMAIN; + let start = domain.range_start(); + let end = domain.range_end(); + let steps = OpITy::::try_from(NTESTS).unwrap_or(OpITy::::MAX); + logspace(start, end, steps).map(|v| (v,)) +} diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 48b382d20..622b2dec9 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -1,5 +1,6 @@ #![allow(clippy::unusual_byte_groupings)] // sometimes we group by sign_exp_sig +pub mod domain; mod f8_impl; pub mod gen; #[cfg(feature = "test-multiprecision")] diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 0b41fba82..e643f3c9c 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -2,11 +2,14 @@ #![cfg(feature = "test-multiprecision")] -use libm_test::gen::{CachedInput, random}; +use libm_test::domain::HasDomain; +use libm_test::gen::{CachedInput, domain_logspace, random}; use libm_test::mpfloat::MpOp; -use libm_test::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall}; +use libm_test::{ + CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, OpFTy, OpRustFn, OpRustRet, TupleCall, +}; -/// Implement a test against MPFR with random inputs. +/// Test against MPFR with random inputs. macro_rules! mp_rand_tests { ( fn_name: $fn_name:ident, @@ -16,13 +19,14 @@ macro_rules! mp_rand_tests { #[test] $(#[$meta])* fn [< mp_random_ $fn_name >]() { - test_one::(); + test_one_random::(); } } }; } -fn test_one() +/// Test a single routine with random inputs +fn test_one_random() where Op: MathOp + MpOp, CachedInput: GenerateInput, @@ -67,3 +71,90 @@ libm_macros::for_each_function! { nextafterf, ], } + +/// Test against MPFR with generators from a domain. +macro_rules! mp_domain_tests { + ( + fn_name: $fn_name:ident, + attrs: [$($meta:meta)*] + ) => { + paste::paste! { + #[test] + $(#[$meta])* + fn [< mp_logspace_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + domain_test_runner::(domain_logspace::get_test_cases::()); + } + } + }; +} + +/// Test a single routine against domaine-aware inputs. +fn domain_test_runner(cases: impl Iterator) +where + // Complicated generics... + // The operation must take a single float argument (unary only) + Op: MathOp::FTy,)>, + // It must also support multiprecision operations + Op: MpOp, + // And it must have a domain specified + Op: HasDomain, + // The single float argument tuple must be able to call the `RustFn` and return `RustRet` + (OpFTy,): TupleCall, Output = OpRustRet>, +{ + let mut mp_vals = Op::new_mp(); + let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr); + + for input in cases { + let mp_res = Op::run(&mut mp_vals, input); + let crate_res = input.call(Op::ROUTINE); + + crate_res.validate(mp_res, input, &ctx).unwrap(); + } +} + +libm_macros::for_each_function! { + callback: mp_domain_tests, + attributes: [], + skip: [ + // Functions with multiple inputs + atan2, + atan2f, + copysign, + copysignf, + fdim, + fdimf, + fma, + fmaf, + fmax, + fmaxf, + fmin, + fminf, + fmod, + fmodf, + hypot, + hypotf, + jn, + jnf, + ldexp, + ldexpf, + nextafter, + nextafterf, + pow, + powf, + remainder, + remainderf, + remquo, + remquof, + scalbn, + scalbnf, + + // FIXME: MPFR tests needed + frexp, + frexpf, + ilogb, + ilogbf, + modf, + modff, + ], +} From a109046b6bf46f0a1dca5dc0b6191bb3bbc5a36e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 19 Dec 2024 11:22:02 +0000 Subject: [PATCH 0987/1459] Add tests for edge cases Introduce a generator that will tests various points of interest including zeros, infinities, and NaNs. --- libm/crates/libm-test/src/domain.rs | 4 +- libm/crates/libm-test/src/gen.rs | 1 + libm/crates/libm-test/src/gen/edge_cases.rs | 90 +++++++++++++++++++ libm/crates/libm-test/tests/multiprecision.rs | 9 +- 4 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 libm/crates/libm-test/src/gen/edge_cases.rs diff --git a/libm/crates/libm-test/src/domain.rs b/libm/crates/libm-test/src/domain.rs index 43ba21974..9ee8a19b9 100644 --- a/libm/crates/libm-test/src/domain.rs +++ b/libm/crates/libm-test/src/domain.rs @@ -3,7 +3,7 @@ use std::fmt; use std::ops::{self, Bound}; -use crate::Float; +use crate::{Float, FloatExt}; /// Representation of a function's domain. #[derive(Clone, Debug)] @@ -19,7 +19,7 @@ pub struct Domain { type BoxIter = Box>; -impl Domain { +impl Domain { /// The start of this domain, saturating at negative infinity. pub fn range_start(&self) -> F { match self.start { diff --git a/libm/crates/libm-test/src/gen.rs b/libm/crates/libm-test/src/gen.rs index e3c88c44a..2d15915d9 100644 --- a/libm/crates/libm-test/src/gen.rs +++ b/libm/crates/libm-test/src/gen.rs @@ -2,6 +2,7 @@ use crate::GenerateInput; pub mod domain_logspace; +pub mod edge_cases; pub mod random; /// Helper type to turn any reusable input into a generator. diff --git a/libm/crates/libm-test/src/gen/edge_cases.rs b/libm/crates/libm-test/src/gen/edge_cases.rs new file mode 100644 index 000000000..625e18bc7 --- /dev/null +++ b/libm/crates/libm-test/src/gen/edge_cases.rs @@ -0,0 +1,90 @@ +//! A generator that checks a handful of cases near infinities, zeros, asymptotes, and NaNs. + +use libm::support::Float; + +use crate::domain::HasDomain; +use crate::{FloatExt, MathOp}; + +/// Number of values near an interesting point to check. +// FIXME(ntests): replace this with a more logical algorithm +const AROUND: usize = 100; + +/// Functions have infinite asymptotes, limit how many we check. +// FIXME(ntests): replace this with a more logical algorithm +const MAX_CHECK_POINTS: usize = 10; + +/// Create a list of values around interesting points (infinities, zeroes, NaNs). +pub fn get_test_cases() -> impl Iterator +where + Op: MathOp + HasDomain, + F: Float, +{ + let mut ret = Vec::new(); + let values = &mut ret; + let domain = Op::DOMAIN; + let domain_start = domain.range_start(); + let domain_end = domain.range_end(); + + // Check near some notable constants + count_up(F::ONE, values); + count_up(F::ZERO, values); + count_up(F::NEG_ONE, values); + count_down(F::ONE, values); + count_down(F::ZERO, values); + count_down(F::NEG_ONE, values); + values.push(F::NEG_ZERO); + + // Check values near the extremes + count_up(F::NEG_INFINITY, values); + count_down(F::INFINITY, values); + count_down(domain_end, values); + count_up(domain_start, values); + count_down(domain_start, values); + count_up(domain_end, values); + count_down(domain_end, values); + + // Check some special values that aren't included in the above ranges + values.push(F::NAN); + values.extend(F::consts().iter()); + + // Check around asymptotes + if let Some(f) = domain.check_points { + let iter = f(); + for x in iter.take(MAX_CHECK_POINTS) { + count_up(x, values); + count_down(x, values); + } + } + + // Some results may overlap so deduplicate the vector to save test cycles. + values.sort_by_key(|x| x.to_bits()); + values.dedup_by_key(|x| x.to_bits()); + + ret.into_iter().map(|v| (v,)) +} + +/// Add `AROUND` values starting at and including `x` and counting up. Uses the smallest possible +/// increments (1 ULP). +fn count_up(mut x: F, values: &mut Vec) { + assert!(!x.is_nan()); + + let mut count = 0; + while x < F::INFINITY && count < AROUND { + values.push(x); + x = x.next_up(); + count += 1; + } +} + +/// Add `AROUND` values starting at and including `x` and counting down. Uses the smallest possible +/// increments (1 ULP). +fn count_down(mut x: F, values: &mut Vec) { + assert!(!x.is_nan()); + + let mut count = 0; + while x > F::NEG_INFINITY && count < AROUND { + values.push(x); + x = x.next_down(); + count += 1; + } +} diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index e643f3c9c..5255dc1cf 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -3,7 +3,7 @@ #![cfg(feature = "test-multiprecision")] use libm_test::domain::HasDomain; -use libm_test::gen::{CachedInput, domain_logspace, random}; +use libm_test::gen::{CachedInput, domain_logspace, edge_cases, random}; use libm_test::mpfloat::MpOp; use libm_test::{ CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, OpFTy, OpRustFn, OpRustRet, TupleCall, @@ -79,6 +79,13 @@ macro_rules! mp_domain_tests { attrs: [$($meta:meta)*] ) => { paste::paste! { + #[test] + $(#[$meta])* + fn [< mp_edge_case_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + domain_test_runner::(edge_cases::get_test_cases::()); + } + #[test] $(#[$meta])* fn [< mp_logspace_ $fn_name >]() { From d278097f558e124ff80da4e5a30966225ef7d925 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 19 Dec 2024 14:23:05 +0000 Subject: [PATCH 0988/1459] Update allowed precision to account for new tests --- libm/crates/libm-test/src/precision.rs | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index c7f9d9e30..b878212fa 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -41,10 +41,11 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { (Musl, Id::Tgamma) => 20, // Overrides for MPFR + (Mpfr, Id::Acosh) => 4, (Mpfr, Id::Acoshf) => 4, (Mpfr, Id::Asinh | Id::Asinhf) => 2, (Mpfr, Id::Atanh | Id::Atanhf) => 2, - (Mpfr, Id::Exp10 | Id::Exp10f) => 3, + (Mpfr, Id::Exp10 | Id::Exp10f) => 6, (Mpfr, Id::Lgamma | Id::LgammaR | Id::Lgammaf | Id::LgammafR) => 16, (Mpfr, Id::Sinh | Id::Sinhf) => 2, (Mpfr, Id::Tanh | Id::Tanhf) => 2, @@ -105,17 +106,14 @@ impl MaybeOverride<(f32,)> for SpecialCase { _ulp: &mut u32, ctx: &CheckCtx, ) -> Option { - if ctx.basis == CheckBasis::Musl { - if ctx.base_name == BaseName::Expm1 && input.0 > 80.0 && actual.is_infinite() { - // we return infinity but the number is representable - return XFAIL; - } + if ctx.base_name == BaseName::Expm1 && input.0 > 80.0 && actual.is_infinite() { + // we return infinity but the number is representable + return XFAIL; + } - if ctx.base_name == BaseName::Sinh && input.0.abs() > 80.0 && actual.is_nan() { - // we return some NaN that should be real values or infinite - // doesn't seem to happen on x86 - return XFAIL; - } + if ctx.base_name == BaseName::Sinh && input.0.abs() > 80.0 && actual.is_nan() { + // we return some NaN that should be real values or infinite + return XFAIL; } if ctx.base_name == BaseName::Acosh && input.0 < -1.0 { From 2e3a78b3ffeb40b3a3d6f5c08b01e9d371cadddc Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 19 Dec 2024 11:22:22 +0000 Subject: [PATCH 0989/1459] Add a way to plot the output from generators For visualization, add a simple script for generating scatter plots and a binary (via examples) to plot the inputs given various domains. --- .../crates/libm-test/examples/plot_domains.rs | 105 ++++++++++++ libm/crates/libm-test/examples/plot_file.jl | 157 ++++++++++++++++++ 2 files changed, 262 insertions(+) create mode 100644 libm/crates/libm-test/examples/plot_domains.rs create mode 100644 libm/crates/libm-test/examples/plot_file.jl diff --git a/libm/crates/libm-test/examples/plot_domains.rs b/libm/crates/libm-test/examples/plot_domains.rs new file mode 100644 index 000000000..630a0c233 --- /dev/null +++ b/libm/crates/libm-test/examples/plot_domains.rs @@ -0,0 +1,105 @@ +//! Program to write all inputs from a generator to a file, then invoke a Julia script to plot +//! them. Output is in `target/plots`. +//! +//! Requires Julia with the `CairoMakie` dependency. +//! +//! Note that running in release mode by default generates a _lot_ more datapoints, which +//! causes plotting to be extremely slow (some simplification to be done in the script). + +use std::fmt::Write as _; +use std::io::{BufWriter, Write}; +use std::path::Path; +use std::process::Command; +use std::{env, fs}; + +use libm_test::domain::HasDomain; +use libm_test::gen::{domain_logspace, edge_cases}; +use libm_test::{MathOp, op}; + +const JL_PLOT: &str = "examples/plot_file.jl"; + +fn main() { + let manifest_env = env::var("CARGO_MANIFEST_DIR").unwrap(); + let manifest_dir = Path::new(&manifest_env); + let out_dir = manifest_dir.join("../../target/plots"); + if !out_dir.exists() { + fs::create_dir(&out_dir).unwrap(); + } + + let jl_script = manifest_dir.join(JL_PLOT); + let mut config = format!(r#"out_dir = "{}""#, out_dir.display()); + config.write_str("\n\n").unwrap(); + + // Plot a few domains with some functions that use them. + plot_one_operator::(&out_dir, &mut config); + plot_one_operator::(&out_dir, &mut config); + plot_one_operator::(&out_dir, &mut config); + + let config_path = out_dir.join("config.toml"); + fs::write(&config_path, config).unwrap(); + + // The script expects a path to `config.toml` to be passed as its only argument + let mut cmd = Command::new("julia"); + if cfg!(optimizations_enabled) { + cmd.arg("-O3"); + } + cmd.arg(jl_script).arg(config_path); + + println!("launching script... {cmd:?}"); + cmd.status().unwrap(); +} + +/// Run multiple generators for a single operator. +fn plot_one_operator(out_dir: &Path, config: &mut String) +where + Op: MathOp + HasDomain, +{ + plot_one_generator( + out_dir, + Op::BASE_NAME.as_str(), + "logspace", + config, + domain_logspace::get_test_cases::(), + ); + plot_one_generator( + out_dir, + Op::BASE_NAME.as_str(), + "edge_cases", + config, + edge_cases::get_test_cases::(), + ); +} + +/// Plot the output of a single generator. +fn plot_one_generator( + out_dir: &Path, + fn_name: &str, + gen_name: &str, + config: &mut String, + gen: impl Iterator, +) { + let text_file = out_dir.join(format!("input-{fn_name}-{gen_name}.txt")); + + let f = fs::File::create(&text_file).unwrap(); + let mut w = BufWriter::new(f); + let mut count = 0u64; + + for input in gen { + writeln!(w, "{:e}", input.0).unwrap(); + count += 1; + } + + w.flush().unwrap(); + println!("generated {count} inputs for {fn_name}-{gen_name}"); + + writeln!( + config, + r#"[[input]] +function = "{fn_name}" +generator = "{gen_name}" +input_file = "{}" +"#, + text_file.to_str().unwrap() + ) + .unwrap() +} diff --git a/libm/crates/libm-test/examples/plot_file.jl b/libm/crates/libm-test/examples/plot_file.jl new file mode 100644 index 000000000..14a128303 --- /dev/null +++ b/libm/crates/libm-test/examples/plot_file.jl @@ -0,0 +1,157 @@ +"A quick script for plotting a list of floats. + +Takes a path to a TOML file (Julia has builtin TOML support but not JSON) which +specifies a list of source files to plot. Plots are done with both a linear and +a log scale. + +Requires [Makie] (specifically CairoMakie) for plotting. + +[Makie]: https://docs.makie.org/stable/ +" + +using CairoMakie +using TOML + +function main()::Nothing + CairoMakie.activate!(px_per_unit=10) + config_path = ARGS[1] + + cfg = Dict() + open(config_path, "r") do f + cfg = TOML.parse(f) + end + + out_dir = cfg["out_dir"] + for input in cfg["input"] + fn_name = input["function"] + gen_name = input["generator"] + input_file = input["input_file"] + + plot_one(input_file, out_dir, fn_name, gen_name) + end +end + +"Read inputs from a file, create both linear and log plots for one function" +function plot_one( + input_file::String, + out_dir::String, + fn_name::String, + gen_name::String, +)::Nothing + fig = Figure() + + lin_out_file = joinpath(out_dir, "plot-$fn_name-$gen_name.png") + log_out_file = joinpath(out_dir, "plot-$fn_name-$gen_name-log.png") + + # Map string function names to callable functions + if fn_name == "cos" + orig_func = cos + xlims = (-6.0, 6.0) + xlims_log = (-pi * 10, pi * 10) + elseif fn_name == "cbrt" + orig_func = cbrt + xlims = (-2.0, 2.0) + xlims_log = (-1000.0, 1000.0) + elseif fn_name == "sqrt" + orig_func = sqrt + xlims = (-1.1, 6.0) + xlims_log = (-1.1, 5000.0) + else + println("unrecognized function name `$fn_name`; update plot_file.jl") + exit(1) + end + + # Edge cases don't do much beyond +/-1, except for infinity. + if gen_name == "edge_cases" + xlims = (-1.1, 1.1) + xlims_log = (-1.1, 1.1) + end + + # Turn domain errors into NaN + func(x) = map_or(x, orig_func, NaN) + + # Parse a series of X values produced by the generator + inputs = readlines(input_file) + gen_x = map((v) -> parse(Float32, v), inputs) + + do_plot( + fig, gen_x, func, xlims[1], xlims[2], + "$fn_name $gen_name (linear scale)", + lin_out_file, false, + ) + + do_plot( + fig, gen_x, func, xlims_log[1], xlims_log[2], + "$fn_name $gen_name (log scale)", + log_out_file, true, + ) +end + +"Create a single plot" +function do_plot( + fig::Figure, + gen_x::Vector{F}, + func::Function, + xmin::AbstractFloat, + xmax::AbstractFloat, + title::String, + out_file::String, + logscale::Bool, +)::Nothing where F<:AbstractFloat + println("plotting $title") + + # `gen_x` is the values the generator produces. `actual_x` is for plotting a + # continuous function. + input_min = xmin - 1.0 + input_max = xmax + 1.0 + gen_x = filter((v) -> v >= input_min && v <= input_max, gen_x) + markersize = length(gen_x) < 10_000 ? 6.0 : 4.0 + + steps = 10_000 + if logscale + r = LinRange(symlog10(input_min), symlog10(input_max), steps) + actual_x = sympow10.(r) + xscale = Makie.pseudolog10 + else + actual_x = LinRange(input_min, input_max, steps) + xscale = identity + end + + gen_y = @. func(gen_x) + actual_y = @. func(actual_x) + + ax = Axis(fig[1, 1], xscale=xscale, title=title) + + lines!( + ax, actual_x, actual_y, color=(:lightblue, 0.6), + linewidth=6.0, label="true function", + ) + scatter!( + ax, gen_x, gen_y, color=(:darkblue, 0.9), + markersize=markersize, label="checked inputs", + ) + axislegend(ax, position=:rb, framevisible=false) + + save(out_file, fig) + delete!(ax) +end + +"Apply a function, returning the default if there is a domain error" +function map_or( + input::AbstractFloat, + f::Function, + default::Any +)::Union{AbstractFloat,Any} + try + return f(input) + catch + return default + end +end + +# Operations for logarithms that are symmetric about 0 +C = 10 +symlog10(x::Number) = sign(x) * (log10(1 + abs(x)/(10^C))) +sympow10(x::Number) = (10^C) * (10^x - 1) + +main() From eb8e715d03f48f2367ce64a9dd70d3d83878a9a0 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 22 Dec 2024 11:22:02 +0000 Subject: [PATCH 0990/1459] Move the macro's input function list to a new module `shared` This will enable us to `include!` the file to access these types in `libm-test`, rather than somehow reproducing the types as part of the macro. Ideally `libm-test` would just `use` the types from `libm-macros` but proc macro crates cannot currently export anything else. This also adjusts naming to closer match the scheme described in `libm_test::op`. --- libm/crates/libm-macros/src/enums.rs | 7 +- libm/crates/libm-macros/src/lib.rs | 299 ++++---------------------- libm/crates/libm-macros/src/shared.rs | 277 ++++++++++++++++++++++++ 3 files changed, 320 insertions(+), 263 deletions(-) create mode 100644 libm/crates/libm-macros/src/shared.rs diff --git a/libm/crates/libm-macros/src/enums.rs b/libm/crates/libm-macros/src/enums.rs index 1f9fca2ef..82dedc66e 100644 --- a/libm/crates/libm-macros/src/enums.rs +++ b/libm/crates/libm-macros/src/enums.rs @@ -5,7 +5,7 @@ use quote::quote; use syn::spanned::Spanned; use syn::{Fields, ItemEnum, Variant}; -use crate::{ALL_FUNCTIONS_FLAT, base_name}; +use crate::{ALL_OPERATIONS, base_name}; /// Implement `#[function_enum]`, see documentation in `lib.rs`. pub fn function_enum( @@ -33,7 +33,7 @@ pub fn function_enum( let mut as_str_arms = Vec::new(); let mut base_arms = Vec::new(); - for func in ALL_FUNCTIONS_FLAT.iter() { + for func in ALL_OPERATIONS.iter() { let fn_name = func.name; let ident = Ident::new(&fn_name.to_upper_camel_case(), Span::call_site()); let bname_ident = Ident::new(&base_name(fn_name).to_upper_camel_case(), Span::call_site()); @@ -85,8 +85,7 @@ pub fn base_name_enum( return Err(syn::Error::new(sp.span(), "no attributes expected")); } - let mut base_names: Vec<_> = - ALL_FUNCTIONS_FLAT.iter().map(|func| base_name(func.name)).collect(); + let mut base_names: Vec<_> = ALL_OPERATIONS.iter().map(|func| base_name(func.name)).collect(); base_names.sort_unstable(); base_names.dedup(); diff --git a/libm/crates/libm-macros/src/lib.rs b/libm/crates/libm-macros/src/lib.rs index 1e7cd08b9..916b539ed 100644 --- a/libm/crates/libm-macros/src/lib.rs +++ b/libm/crates/libm-macros/src/lib.rs @@ -1,270 +1,18 @@ mod enums; mod parse; - -use std::sync::LazyLock; +mod shared; use parse::{Invocation, StructuredInput}; use proc_macro as pm; use proc_macro2::{self as pm2, Span}; use quote::{ToTokens, quote}; +pub(crate) use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty}; use syn::spanned::Spanned; use syn::visit_mut::VisitMut; use syn::{Ident, ItemEnum}; -const ALL_FUNCTIONS: &[(Ty, Signature, Option, &[&str])] = &[ - ( - // `fn(f32) -> f32` - Ty::F32, - Signature { args: &[Ty::F32], returns: &[Ty::F32] }, - None, - &[ - "acosf", "acoshf", "asinf", "asinhf", "atanf", "atanhf", "cbrtf", "ceilf", "cosf", - "coshf", "erff", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf", "j0f", "j1f", - "lgammaf", "log10f", "log1pf", "log2f", "logf", "rintf", "roundf", "sinf", "sinhf", - "sqrtf", "tanf", "tanhf", "tgammaf", "truncf", - ], - ), - ( - // `(f64) -> f64` - Ty::F64, - Signature { args: &[Ty::F64], returns: &[Ty::F64] }, - None, - &[ - "acos", "acosh", "asin", "asinh", "atan", "atanh", "cbrt", "ceil", "cos", "cosh", - "erf", "exp10", "exp2", "exp", "expm1", "fabs", "floor", "j0", "j1", "lgamma", "log10", - "log1p", "log2", "log", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh", - "tgamma", "trunc", - ], - ), - ( - // `(f32, f32) -> f32` - Ty::F32, - Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32] }, - None, - &[ - "atan2f", - "copysignf", - "fdimf", - "fmaxf", - "fminf", - "fmodf", - "hypotf", - "nextafterf", - "powf", - "remainderf", - ], - ), - ( - // `(f64, f64) -> f64` - Ty::F64, - Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64] }, - None, - &[ - "atan2", - "copysign", - "fdim", - "fmax", - "fmin", - "fmod", - "hypot", - "nextafter", - "pow", - "remainder", - ], - ), - ( - // `(f32, f32, f32) -> f32` - Ty::F32, - Signature { args: &[Ty::F32, Ty::F32, Ty::F32], returns: &[Ty::F32] }, - None, - &["fmaf"], - ), - ( - // `(f64, f64, f64) -> f64` - Ty::F64, - Signature { args: &[Ty::F64, Ty::F64, Ty::F64], returns: &[Ty::F64] }, - None, - &["fma"], - ), - ( - // `(f32) -> i32` - Ty::F32, - Signature { args: &[Ty::F32], returns: &[Ty::I32] }, - None, - &["ilogbf"], - ), - ( - // `(f64) -> i32` - Ty::F64, - Signature { args: &[Ty::F64], returns: &[Ty::I32] }, - None, - &["ilogb"], - ), - ( - // `(i32, f32) -> f32` - Ty::F32, - Signature { args: &[Ty::I32, Ty::F32], returns: &[Ty::F32] }, - None, - &["jnf"], - ), - ( - // `(i32, f64) -> f64` - Ty::F64, - Signature { args: &[Ty::I32, Ty::F64], returns: &[Ty::F64] }, - None, - &["jn"], - ), - ( - // `(f32, i32) -> f32` - Ty::F32, - Signature { args: &[Ty::F32, Ty::I32], returns: &[Ty::F32] }, - None, - &["scalbnf", "ldexpf"], - ), - ( - // `(f64, i64) -> f64` - Ty::F64, - Signature { args: &[Ty::F64, Ty::I32], returns: &[Ty::F64] }, - None, - &["scalbn", "ldexp"], - ), - ( - // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)` - Ty::F32, - Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] }, - Some(Signature { args: &[Ty::F32, Ty::MutF32], returns: &[Ty::F32] }), - &["modff"], - ), - ( - // `(f64, &mut f64) -> f64` as `(f64) -> (f64, f64)` - Ty::F64, - Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] }, - Some(Signature { args: &[Ty::F64, Ty::MutF64], returns: &[Ty::F64] }), - &["modf"], - ), - ( - // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)` - Ty::F32, - Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::I32] }, - Some(Signature { args: &[Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }), - &["frexpf", "lgammaf_r"], - ), - ( - // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)` - Ty::F64, - Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::I32] }, - Some(Signature { args: &[Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }), - &["frexp", "lgamma_r"], - ), - ( - // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)` - Ty::F32, - Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32, Ty::I32] }, - Some(Signature { args: &[Ty::F32, Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }), - &["remquof"], - ), - ( - // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)` - Ty::F64, - Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64, Ty::I32] }, - Some(Signature { args: &[Ty::F64, Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }), - &["remquo"], - ), - ( - // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)` - Ty::F32, - Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] }, - Some(Signature { args: &[Ty::F32, Ty::MutF32, Ty::MutF32], returns: &[] }), - &["sincosf"], - ), - ( - // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)` - Ty::F64, - Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] }, - Some(Signature { args: &[Ty::F64, Ty::MutF64, Ty::MutF64], returns: &[] }), - &["sincos"], - ), -]; - const KNOWN_TYPES: &[&str] = &["FTy", "CFn", "CArgs", "CRet", "RustFn", "RustArgs", "RustRet"]; -/// A type used in a function signature. -#[allow(dead_code)] -#[derive(Debug, Clone, Copy)] -enum Ty { - F16, - F32, - F64, - F128, - I32, - CInt, - MutF16, - MutF32, - MutF64, - MutF128, - MutI32, - MutCInt, -} - -impl ToTokens for Ty { - fn to_tokens(&self, tokens: &mut pm2::TokenStream) { - let ts = match self { - Ty::F16 => quote! { f16 }, - Ty::F32 => quote! { f32 }, - Ty::F64 => quote! { f64 }, - Ty::F128 => quote! { f128 }, - Ty::I32 => quote! { i32 }, - Ty::CInt => quote! { ::core::ffi::c_int }, - Ty::MutF16 => quote! { &'a mut f16 }, - Ty::MutF32 => quote! { &'a mut f32 }, - Ty::MutF64 => quote! { &'a mut f64 }, - Ty::MutF128 => quote! { &'a mut f128 }, - Ty::MutI32 => quote! { &'a mut i32 }, - Ty::MutCInt => quote! { &'a mut core::ffi::c_int }, - }; - - tokens.extend(ts); - } -} - -/// Representation of e.g. `(f32, f32) -> f32` -#[derive(Debug, Clone)] -struct Signature { - args: &'static [Ty], - returns: &'static [Ty], -} - -/// Combined information about a function implementation. -#[derive(Debug, Clone)] -struct FunctionInfo { - name: &'static str, - base_fty: Ty, - /// Function signature for C implementations - c_sig: Signature, - /// Function signature for Rust implementations - rust_sig: Signature, -} - -/// A flat representation of `ALL_FUNCTIONS`. -static ALL_FUNCTIONS_FLAT: LazyLock> = LazyLock::new(|| { - let mut ret = Vec::new(); - - for (base_fty, rust_sig, c_sig, names) in ALL_FUNCTIONS { - for name in *names { - let api = FunctionInfo { - name, - base_fty: *base_fty, - rust_sig: rust_sig.clone(), - c_sig: c_sig.clone().unwrap_or_else(|| rust_sig.clone()), - }; - ret.push(api); - } - } - - ret.sort_by_key(|item| item.name); - ret -}); - /// Populate an enum with a variant representing function. Names are in upper camel case. /// /// Applied to an empty enum. Expects one attribute `#[function_enum(BaseName)]` that provides @@ -382,7 +130,7 @@ pub fn for_each_function(tokens: pm::TokenStream) -> pm::TokenStream { /// Check for any input that is structurally correct but has other problems. /// /// Returns the list of function names that we should expand for. -fn validate(input: &mut StructuredInput) -> syn::Result> { +fn validate(input: &mut StructuredInput) -> syn::Result> { // Collect lists of all functions that are provied as macro inputs in various fields (only, // skip, attributes). let attr_mentions = input @@ -398,7 +146,7 @@ fn validate(input: &mut StructuredInput) -> syn::Result syn::Result syn::Result syn::Result { +fn expand(input: StructuredInput, fn_list: &[&MathOpInfo]) -> syn::Result { let mut out = pm2::TokenStream::new(); let default_ident = Ident::new("_", Span::call_site()); let callback = input.callback; @@ -545,7 +293,7 @@ fn expand(input: StructuredInput, fn_list: &[&FunctionInfo]) -> syn::Result pm2::TokenStream::new(), }; - let base_fty = func.base_fty; + let base_fty = func.float_ty; let c_args = &func.c_sig.args; let c_ret = &func.c_sig.returns; let rust_args = &func.rust_sig.args; @@ -648,3 +396,36 @@ fn base_name(name: &str) -> &str { .unwrap_or(name), } } + +impl ToTokens for Ty { + fn to_tokens(&self, tokens: &mut pm2::TokenStream) { + let ts = match self { + Ty::F16 => quote! { f16 }, + Ty::F32 => quote! { f32 }, + Ty::F64 => quote! { f64 }, + Ty::F128 => quote! { f128 }, + Ty::I32 => quote! { i32 }, + Ty::CInt => quote! { ::core::ffi::c_int }, + Ty::MutF16 => quote! { &'a mut f16 }, + Ty::MutF32 => quote! { &'a mut f32 }, + Ty::MutF64 => quote! { &'a mut f64 }, + Ty::MutF128 => quote! { &'a mut f128 }, + Ty::MutI32 => quote! { &'a mut i32 }, + Ty::MutCInt => quote! { &'a mut core::ffi::c_int }, + }; + + tokens.extend(ts); + } +} +impl ToTokens for FloatTy { + fn to_tokens(&self, tokens: &mut pm2::TokenStream) { + let ts = match self { + FloatTy::F16 => quote! { f16 }, + FloatTy::F32 => quote! { f32 }, + FloatTy::F64 => quote! { f64 }, + FloatTy::F128 => quote! { f128 }, + }; + + tokens.extend(ts); + } +} diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs new file mode 100644 index 000000000..100bcc7ad --- /dev/null +++ b/libm/crates/libm-macros/src/shared.rs @@ -0,0 +1,277 @@ +/* List of all functions that is shared between `libm-macros` and `libm-test`. */ + +use std::fmt; +use std::sync::LazyLock; + +const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] = &[ + ( + // `fn(f32) -> f32` + FloatTy::F32, + Signature { args: &[Ty::F32], returns: &[Ty::F32] }, + None, + &[ + "acosf", "acoshf", "asinf", "asinhf", "atanf", "atanhf", "cbrtf", "ceilf", "cosf", + "coshf", "erff", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf", "j0f", "j1f", + "lgammaf", "log10f", "log1pf", "log2f", "logf", "rintf", "roundf", "sinf", "sinhf", + "sqrtf", "tanf", "tanhf", "tgammaf", "truncf", + ], + ), + ( + // `(f64) -> f64` + FloatTy::F64, + Signature { args: &[Ty::F64], returns: &[Ty::F64] }, + None, + &[ + "acos", "acosh", "asin", "asinh", "atan", "atanh", "cbrt", "ceil", "cos", "cosh", + "erf", "exp10", "exp2", "exp", "expm1", "fabs", "floor", "j0", "j1", "lgamma", "log10", + "log1p", "log2", "log", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh", + "tgamma", "trunc", + ], + ), + ( + // `(f32, f32) -> f32` + FloatTy::F32, + Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32] }, + None, + &[ + "atan2f", + "copysignf", + "fdimf", + "fmaxf", + "fminf", + "fmodf", + "hypotf", + "nextafterf", + "powf", + "remainderf", + ], + ), + ( + // `(f64, f64) -> f64` + FloatTy::F64, + Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64] }, + None, + &[ + "atan2", + "copysign", + "fdim", + "fmax", + "fmin", + "fmod", + "hypot", + "nextafter", + "pow", + "remainder", + ], + ), + ( + // `(f32, f32, f32) -> f32` + FloatTy::F32, + Signature { args: &[Ty::F32, Ty::F32, Ty::F32], returns: &[Ty::F32] }, + None, + &["fmaf"], + ), + ( + // `(f64, f64, f64) -> f64` + FloatTy::F64, + Signature { args: &[Ty::F64, Ty::F64, Ty::F64], returns: &[Ty::F64] }, + None, + &["fma"], + ), + ( + // `(f32) -> i32` + FloatTy::F32, + Signature { args: &[Ty::F32], returns: &[Ty::I32] }, + None, + &["ilogbf"], + ), + ( + // `(f64) -> i32` + FloatTy::F64, + Signature { args: &[Ty::F64], returns: &[Ty::I32] }, + None, + &["ilogb"], + ), + ( + // `(i32, f32) -> f32` + FloatTy::F32, + Signature { args: &[Ty::I32, Ty::F32], returns: &[Ty::F32] }, + None, + &["jnf"], + ), + ( + // `(i32, f64) -> f64` + FloatTy::F64, + Signature { args: &[Ty::I32, Ty::F64], returns: &[Ty::F64] }, + None, + &["jn"], + ), + ( + // `(f32, i32) -> f32` + FloatTy::F32, + Signature { args: &[Ty::F32, Ty::I32], returns: &[Ty::F32] }, + None, + &["scalbnf", "ldexpf"], + ), + ( + // `(f64, i64) -> f64` + FloatTy::F64, + Signature { args: &[Ty::F64, Ty::I32], returns: &[Ty::F64] }, + None, + &["scalbn", "ldexp"], + ), + ( + // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)` + FloatTy::F32, + Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] }, + Some(Signature { args: &[Ty::F32, Ty::MutF32], returns: &[Ty::F32] }), + &["modff"], + ), + ( + // `(f64, &mut f64) -> f64` as `(f64) -> (f64, f64)` + FloatTy::F64, + Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] }, + Some(Signature { args: &[Ty::F64, Ty::MutF64], returns: &[Ty::F64] }), + &["modf"], + ), + ( + // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)` + FloatTy::F32, + Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::I32] }, + Some(Signature { args: &[Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }), + &["frexpf", "lgammaf_r"], + ), + ( + // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)` + FloatTy::F64, + Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::I32] }, + Some(Signature { args: &[Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }), + &["frexp", "lgamma_r"], + ), + ( + // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)` + FloatTy::F32, + Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32, Ty::I32] }, + Some(Signature { args: &[Ty::F32, Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }), + &["remquof"], + ), + ( + // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)` + FloatTy::F64, + Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64, Ty::I32] }, + Some(Signature { args: &[Ty::F64, Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }), + &["remquo"], + ), + ( + // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)` + FloatTy::F32, + Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] }, + Some(Signature { args: &[Ty::F32, Ty::MutF32, Ty::MutF32], returns: &[] }), + &["sincosf"], + ), + ( + // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)` + FloatTy::F64, + Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] }, + Some(Signature { args: &[Ty::F64, Ty::MutF64, Ty::MutF64], returns: &[] }), + &["sincos"], + ), +]; + +/// A type used in a function signature. +#[allow(dead_code)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum Ty { + F16, + F32, + F64, + F128, + I32, + CInt, + MutF16, + MutF32, + MutF64, + MutF128, + MutI32, + MutCInt, +} + +/// A subset of [`Ty`] representing only floats. +#[allow(dead_code)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum FloatTy { + F16, + F32, + F64, + F128, +} + +impl fmt::Display for Ty { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Ty::F16 => "f16", + Ty::F32 => "f32", + Ty::F64 => "f64", + Ty::F128 => "f128", + Ty::I32 => "i32", + Ty::CInt => "::core::ffi::c_int", + Ty::MutF16 => "&mut f16", + Ty::MutF32 => "&mut f32", + Ty::MutF64 => "&mut f64", + Ty::MutF128 => "&mut f128", + Ty::MutI32 => "&mut i32", + Ty::MutCInt => "&mut ::core::ffi::c_int", + }; + f.write_str(s) + } +} + +impl fmt::Display for FloatTy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + FloatTy::F16 => "f16", + FloatTy::F32 => "f32", + FloatTy::F64 => "f64", + FloatTy::F128 => "f128", + }; + f.write_str(s) + } +} + +/// Representation of e.g. `(f32, f32) -> f32` +#[derive(Debug, Clone)] +pub struct Signature { + pub args: &'static [Ty], + pub returns: &'static [Ty], +} + +/// Combined information about a function implementation. +#[derive(Debug, Clone)] +pub struct MathOpInfo { + pub name: &'static str, + pub float_ty: FloatTy, + /// Function signature for C implementations + pub c_sig: Signature, + /// Function signature for Rust implementations + pub rust_sig: Signature, +} + +/// A flat representation of `ALL_FUNCTIONS`. +pub static ALL_OPERATIONS: LazyLock> = LazyLock::new(|| { + let mut ret = Vec::new(); + + for (base_fty, rust_sig, c_sig, names) in ALL_OPERATIONS_NESTED { + for name in *names { + let api = MathOpInfo { + name, + float_ty: *base_fty, + rust_sig: rust_sig.clone(), + c_sig: c_sig.clone().unwrap_or_else(|| rust_sig.clone()), + }; + ret.push(api); + } + } + + ret.sort_by_key(|item| item.name); + ret +}); From 67b7c8577beb274eb60196d4d662f176cb9f2dd8 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 22 Dec 2024 11:41:43 +0000 Subject: [PATCH 0991/1459] Include `shared.rs` in `libm_test::op` These types from `libm-macros` provide a way to get information about an operation at runtime, rather than only being encoded in the type system. Include the file and reexport relevant types. --- libm/crates/libm-test/src/lib.rs | 2 +- libm/crates/libm-test/src/op.rs | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 622b2dec9..e3a690678 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -13,7 +13,7 @@ mod test_traits; pub use f8_impl::f8; pub use libm::support::{Float, Int, IntTy, MinInt}; pub use num::{FloatExt, logspace}; -pub use op::{BaseName, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet}; +pub use op::{BaseName, FloatTy, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet, Ty}; pub use precision::{MaybeOverride, SpecialCase, default_ulp}; pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall}; diff --git a/libm/crates/libm-test/src/op.rs b/libm/crates/libm-test/src/op.rs index bcea31c22..a2f21d3c1 100644 --- a/libm/crates/libm-test/src/op.rs +++ b/libm/crates/libm-test/src/op.rs @@ -13,8 +13,16 @@ //! - "Operation" / "Op": Something that relates a routine to a function or is otherwise higher //! level. `Op` is also used as the name for generic parameters since it is terse. +use std::fmt; + +pub use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty}; + use crate::{CheckOutput, Float, TupleCall}; +mod shared { + include!("../../libm-macros/src/shared.rs"); +} + /// An enum representing each possible symbol name (`sin`, `sinf`, `sinl`, etc). #[libm_macros::function_enum(BaseName)] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] From 91a3a8e2a7bbeaf5f7e88b99149b6a145d953c2c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 29 Dec 2024 08:06:53 +0000 Subject: [PATCH 0992/1459] Add new trait implementations for `Identifier` and `BaseName` These allow for more convenient printing, as well as storage in map types. --- libm/crates/libm-test/src/op.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/libm/crates/libm-test/src/op.rs b/libm/crates/libm-test/src/op.rs index a2f21d3c1..e58c28903 100644 --- a/libm/crates/libm-test/src/op.rs +++ b/libm/crates/libm-test/src/op.rs @@ -25,14 +25,26 @@ mod shared { /// An enum representing each possible symbol name (`sin`, `sinf`, `sinl`, etc). #[libm_macros::function_enum(BaseName)] -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Identifier {} +impl fmt::Display for Identifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + /// The name without any type specifier, e.g. `sin` and `sinf` both become `sin`. #[libm_macros::base_name_enum] -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum BaseName {} +impl fmt::Display for BaseName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + /// Attributes ascribed to a `libm` routine including signature, type information, /// and naming. pub trait MathOp { From 9ff29d3735ca57af7e4960a2bca2f0be92f73c21 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 22 Dec 2024 11:26:22 +0000 Subject: [PATCH 0993/1459] Add `ALL`, `from_str` and `math_op` to `Identifier` Introduce new API to iterate the function list and associate items with their `MathOp`. --- libm/crates/libm-macros/src/enums.rs | 23 +++++++++++++++++++++ libm/crates/libm-macros/tests/enum.rs | 29 ++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/libm/crates/libm-macros/src/enums.rs b/libm/crates/libm-macros/src/enums.rs index 82dedc66e..864b625ea 100644 --- a/libm/crates/libm-macros/src/enums.rs +++ b/libm/crates/libm-macros/src/enums.rs @@ -31,6 +31,7 @@ pub fn function_enum( let enum_name = &item.ident; let mut as_str_arms = Vec::new(); + let mut from_str_arms = Vec::new(); let mut base_arms = Vec::new(); for func in ALL_OPERATIONS.iter() { @@ -40,6 +41,7 @@ pub fn function_enum( // Match arm for `fn as_str(self)` matcher as_str_arms.push(quote! { Self::#ident => #fn_name }); + from_str_arms.push(quote! { #fn_name => Self::#ident }); // Match arm for `fn base_name(self)` matcher base_arms.push(quote! { Self::#ident => #base_enum::#bname_ident }); @@ -50,11 +52,18 @@ pub fn function_enum( item.variants.push(variant); } + let variants = item.variants.iter(); + let res = quote! { // Instantiate the enum #item impl #enum_name { + /// All variants of this enum. + pub const ALL: &[Self] = &[ + #( Self::#variants, )* + ]; + /// The stringified version of this function name. pub const fn as_str(self) -> &'static str { match self { @@ -62,12 +71,26 @@ pub fn function_enum( } } + /// If `s` is the name of a function, return it. + pub fn from_str(s: &str) -> Option { + let ret = match s { + #( #from_str_arms , )* + _ => return None, + }; + Some(ret) + } + /// The base name enum for this function. pub const fn base_name(self) -> #base_enum { match self { #( #base_arms, )* } } + + /// Return information about this operation. + pub fn math_op(self) -> &'static crate::op::MathOpInfo { + crate::op::ALL_OPERATIONS.iter().find(|op| op.name == self.as_str()).unwrap() + } } }; diff --git a/libm/crates/libm-macros/tests/enum.rs b/libm/crates/libm-macros/tests/enum.rs index 884b8d8d6..93e209a0d 100644 --- a/libm/crates/libm-macros/tests/enum.rs +++ b/libm/crates/libm-macros/tests/enum.rs @@ -1,6 +1,6 @@ #[libm_macros::function_enum(BaseName)] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub enum Function {} +pub enum Identifier {} #[libm_macros::base_name_enum] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] @@ -8,12 +8,31 @@ pub enum BaseName {} #[test] fn as_str() { - assert_eq!(Function::Sin.as_str(), "sin"); - assert_eq!(Function::Sinf.as_str(), "sinf"); + assert_eq!(Identifier::Sin.as_str(), "sin"); + assert_eq!(Identifier::Sinf.as_str(), "sinf"); +} + +#[test] +fn from_str() { + assert_eq!(Identifier::from_str("sin").unwrap(), Identifier::Sin); + assert_eq!(Identifier::from_str("sinf").unwrap(), Identifier::Sinf); } #[test] fn basename() { - assert_eq!(Function::Sin.base_name(), BaseName::Sin); - assert_eq!(Function::Sinf.base_name(), BaseName::Sin); + assert_eq!(Identifier::Sin.base_name(), BaseName::Sin); + assert_eq!(Identifier::Sinf.base_name(), BaseName::Sin); } + +#[test] +fn math_op() { + assert_eq!(Identifier::Sin.math_op().float_ty, FloatTy::F64); + assert_eq!(Identifier::Sinf.math_op().float_ty, FloatTy::F32); +} + +// Replicate the structure that we have in `libm-test` +mod op { + include!("../../libm-macros/src/shared.rs"); +} + +use op::FloatTy; From aa8d8ce841e4c23b26f2f6736745dde03efb2998 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 26 Dec 2024 07:43:56 +0000 Subject: [PATCH 0994/1459] Move `CheckBasis` and `CheckCtx` to a new `run_cfg` module These are used more places than just test traits, so this new module should be a better home. `run_cfg` will also be expanded in the near future. --- libm/crates/libm-test/src/lib.rs | 4 +- libm/crates/libm-test/src/run_cfg.rs | 51 ++++++++++++++++++++++++ libm/crates/libm-test/src/test_traits.rs | 39 +----------------- 3 files changed, 55 insertions(+), 39 deletions(-) create mode 100644 libm/crates/libm-test/src/run_cfg.rs diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index e3a690678..eb457b0ae 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -8,6 +8,7 @@ pub mod mpfloat; mod num; pub mod op; mod precision; +mod run_cfg; mod test_traits; pub use f8_impl::f8; @@ -15,7 +16,8 @@ pub use libm::support::{Float, Int, IntTy, MinInt}; pub use num::{FloatExt, logspace}; pub use op::{BaseName, FloatTy, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet, Ty}; pub use precision::{MaybeOverride, SpecialCase, default_ulp}; -pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall}; +pub use run_cfg::{CheckBasis, CheckCtx}; +pub use test_traits::{CheckOutput, GenerateInput, Hex, TupleCall}; /// Result type for tests is usually from `anyhow`. Most times there is no success value to /// propagate. diff --git a/libm/crates/libm-test/src/run_cfg.rs b/libm/crates/libm-test/src/run_cfg.rs new file mode 100644 index 000000000..eb7e0e2c1 --- /dev/null +++ b/libm/crates/libm-test/src/run_cfg.rs @@ -0,0 +1,51 @@ +//! Configuration for how tests get run. + +#![allow(unused)] + +use std::collections::BTreeMap; +use std::env; +use std::sync::LazyLock; + +use crate::{BaseName, FloatTy, Identifier, op}; + +pub const EXTENSIVE_ENV: &str = "LIBM_EXTENSIVE_TESTS"; + +/// Context passed to [`CheckOutput`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct CheckCtx { + /// Allowed ULP deviation + pub ulp: u32, + pub fn_ident: Identifier, + pub base_name: BaseName, + /// Function name. + pub fn_name: &'static str, + /// Return the unsuffixed version of the function name. + pub base_name_str: &'static str, + /// Source of truth for tests. + pub basis: CheckBasis, +} + +impl CheckCtx { + /// Create a new check context, using the default ULP for the function. + pub fn new(fn_ident: Identifier, basis: CheckBasis) -> Self { + let mut ret = Self { + ulp: 0, + fn_ident, + fn_name: fn_ident.as_str(), + base_name: fn_ident.base_name(), + base_name_str: fn_ident.base_name().as_str(), + basis, + }; + ret.ulp = crate::default_ulp(&ret); + ret + } +} + +/// Possible items to test against +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum CheckBasis { + /// Check against Musl's math sources. + Musl, + /// Check against infinite precision (MPFR). + Mpfr, +} diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index ca933bbda..6b833dfb5 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -11,44 +11,7 @@ use std::fmt; use anyhow::{Context, bail, ensure}; -use crate::{BaseName, Float, Identifier, Int, MaybeOverride, SpecialCase, TestResult}; - -/// Context passed to [`CheckOutput`]. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct CheckCtx { - /// Allowed ULP deviation - pub ulp: u32, - pub fn_ident: Identifier, - pub base_name: BaseName, - /// Function name. - pub fn_name: &'static str, - /// Source of truth for tests. - pub basis: CheckBasis, -} - -impl CheckCtx { - /// Create a new check context, using the default ULP for the function. - pub fn new(fn_ident: Identifier, basis: CheckBasis) -> Self { - let mut ret = Self { - ulp: 0, - fn_ident, - fn_name: fn_ident.as_str(), - base_name: fn_ident.base_name(), - basis, - }; - ret.ulp = crate::default_ulp(&ret); - ret - } -} - -/// Possible items to test against -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum CheckBasis { - /// Check against Musl's math sources. - Musl, - /// Check against infinite precision (MPFR). - Mpfr, -} +use crate::{CheckCtx, Float, Int, MaybeOverride, SpecialCase, TestResult}; /// Implement this on types that can generate a sequence of tuples for test input. pub trait GenerateInput { From 20ac6feaa6f49b194cd438e79c774ebd71bbe1c5 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 26 Dec 2024 07:42:13 +0000 Subject: [PATCH 0995/1459] Use `CheckCtx` in more places Rather than passing names or identifiers, just pass `CheckCtx` in a few more places. --- libm/crates/libm-test/examples/plot_domains.rs | 14 ++++++++------ libm/crates/libm-test/src/gen/domain_logspace.rs | 4 ++-- libm/crates/libm-test/src/gen/edge_cases.rs | 4 ++-- libm/crates/libm-test/tests/multiprecision.rs | 8 +++++--- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/libm/crates/libm-test/examples/plot_domains.rs b/libm/crates/libm-test/examples/plot_domains.rs index 630a0c233..626511245 100644 --- a/libm/crates/libm-test/examples/plot_domains.rs +++ b/libm/crates/libm-test/examples/plot_domains.rs @@ -14,7 +14,7 @@ use std::{env, fs}; use libm_test::domain::HasDomain; use libm_test::gen::{domain_logspace, edge_cases}; -use libm_test::{MathOp, op}; +use libm_test::{CheckBasis, CheckCtx, MathOp, op}; const JL_PLOT: &str = "examples/plot_file.jl"; @@ -54,30 +54,32 @@ fn plot_one_operator(out_dir: &Path, config: &mut String) where Op: MathOp + HasDomain, { + let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr); plot_one_generator( out_dir, - Op::BASE_NAME.as_str(), + &ctx, "logspace", config, - domain_logspace::get_test_cases::(), + domain_logspace::get_test_cases::(&ctx), ); plot_one_generator( out_dir, - Op::BASE_NAME.as_str(), + &ctx, "edge_cases", config, - edge_cases::get_test_cases::(), + edge_cases::get_test_cases::(&ctx), ); } /// Plot the output of a single generator. fn plot_one_generator( out_dir: &Path, - fn_name: &str, + ctx: &CheckCtx, gen_name: &str, config: &mut String, gen: impl Iterator, ) { + let fn_name = ctx.base_name_str; let text_file = out_dir.join(format!("input-{fn_name}-{gen_name}.txt")); let f = fs::File::create(&text_file).unwrap(); diff --git a/libm/crates/libm-test/src/gen/domain_logspace.rs b/libm/crates/libm-test/src/gen/domain_logspace.rs index e8cdb9d2b..3e69bee34 100644 --- a/libm/crates/libm-test/src/gen/domain_logspace.rs +++ b/libm/crates/libm-test/src/gen/domain_logspace.rs @@ -4,7 +4,7 @@ use libm::support::{IntTy, MinInt}; use crate::domain::HasDomain; use crate::op::OpITy; -use crate::{MathOp, logspace}; +use crate::{CheckCtx, MathOp, logspace}; /// Number of tests to run. // FIXME(ntests): replace this with a more logical algorithm @@ -30,7 +30,7 @@ const NTESTS: usize = { /// /// This allows us to get reasonably thorough coverage without wasting time on values that are /// NaN or out of range. Random tests will still cover values that are excluded here. -pub fn get_test_cases() -> impl Iterator +pub fn get_test_cases(_ctx: &CheckCtx) -> impl Iterator where Op: MathOp + HasDomain, IntTy: TryFrom, diff --git a/libm/crates/libm-test/src/gen/edge_cases.rs b/libm/crates/libm-test/src/gen/edge_cases.rs index 625e18bc7..3387f6c48 100644 --- a/libm/crates/libm-test/src/gen/edge_cases.rs +++ b/libm/crates/libm-test/src/gen/edge_cases.rs @@ -3,7 +3,7 @@ use libm::support::Float; use crate::domain::HasDomain; -use crate::{FloatExt, MathOp}; +use crate::{CheckCtx, FloatExt, MathOp}; /// Number of values near an interesting point to check. // FIXME(ntests): replace this with a more logical algorithm @@ -14,7 +14,7 @@ const AROUND: usize = 100; const MAX_CHECK_POINTS: usize = 10; /// Create a list of values around interesting points (infinities, zeroes, NaNs). -pub fn get_test_cases() -> impl Iterator +pub fn get_test_cases(_ctx: &CheckCtx) -> impl Iterator where Op: MathOp + HasDomain, F: Float, diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 5255dc1cf..2675ca018 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -83,21 +83,21 @@ macro_rules! mp_domain_tests { $(#[$meta])* fn [< mp_edge_case_ $fn_name >]() { type Op = libm_test::op::$fn_name::Routine; - domain_test_runner::(edge_cases::get_test_cases::()); + domain_test_runner::(edge_cases::get_test_cases::); } #[test] $(#[$meta])* fn [< mp_logspace_ $fn_name >]() { type Op = libm_test::op::$fn_name::Routine; - domain_test_runner::(domain_logspace::get_test_cases::()); + domain_test_runner::(domain_logspace::get_test_cases::); } } }; } /// Test a single routine against domaine-aware inputs. -fn domain_test_runner(cases: impl Iterator) +fn domain_test_runner(gen: impl FnOnce(&CheckCtx) -> I) where // Complicated generics... // The operation must take a single float argument (unary only) @@ -108,9 +108,11 @@ where Op: HasDomain, // The single float argument tuple must be able to call the `RustFn` and return `RustRet` (OpFTy,): TupleCall, Output = OpRustRet>, + I: Iterator, { let mut mp_vals = Op::new_mp(); let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr); + let cases = gen(&ctx); for input in cases { let mp_res = Op::run(&mut mp_vals, input); From ca9f238a789b1f7c5fa48d63402eacdb9fcf8b01 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 29 Dec 2024 09:04:18 +0000 Subject: [PATCH 0996/1459] Don't run `push` CI on anything other than `master` --- libm/.github/workflows/main.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 93cd541f8..83875f368 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -1,5 +1,9 @@ name: CI -on: [push, pull_request] +on: + push: + branches: + - master + pull_request: env: CARGO_TERM_VERBOSE: true From 1c224e10a7c793a755f4ad776dbf9944415a0cd7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 30 Dec 2024 01:53:29 +0000 Subject: [PATCH 0997/1459] Set the allowed FMA ULP to 0 It is currently getting the default of 1 or 2. Since this operation should always be infinite precision, no deviation is allowed. --- libm/crates/libm-test/src/precision.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index b878212fa..058d01c6e 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -24,6 +24,8 @@ const MP_DEFAULT_ULP: u32 = 1; pub fn default_ulp(ctx: &CheckCtx) -> u32 { match (&ctx.basis, ctx.fn_ident) { // Overrides that apply to either basis + // FMA is expected to be infinite precision. + (_, Id::Fma | Id::Fmaf) => 0, (_, Id::J0 | Id::J0f | Id::J1 | Id::J1f) => { // Results seem very target-dependent if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 } From b7a4ccff6a9c9c7bb692aa7e4416f2266448f894 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 30 Dec 2024 05:55:41 +0000 Subject: [PATCH 0998/1459] Remove lossy casting in `logspace` Currently `logspace` does a lossy cast from `F::Int` to `usize`. This could be problematic in the rare cases that this is called with a step count exceeding what is representable in `usize`. Resolve this by instead adding bounds so the float's integer type itself can be iterated. --- libm/crates/libm-test/src/gen/domain_logspace.rs | 3 +++ libm/crates/libm-test/src/num.rs | 10 +++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/libm/crates/libm-test/src/gen/domain_logspace.rs b/libm/crates/libm-test/src/gen/domain_logspace.rs index 3e69bee34..5e37170fa 100644 --- a/libm/crates/libm-test/src/gen/domain_logspace.rs +++ b/libm/crates/libm-test/src/gen/domain_logspace.rs @@ -1,5 +1,7 @@ //! A generator that produces logarithmically spaced values within domain bounds. +use std::ops::RangeInclusive; + use libm::support::{IntTy, MinInt}; use crate::domain::HasDomain; @@ -34,6 +36,7 @@ pub fn get_test_cases(_ctx: &CheckCtx) -> impl Iterator where Op: MathOp + HasDomain, IntTy: TryFrom, + RangeInclusive>: Iterator, { let domain = Op::DOMAIN; let start = domain.range_start(); diff --git a/libm/crates/libm-test/src/num.rs b/libm/crates/libm-test/src/num.rs index 4aa7f61b0..eff2fbc1f 100644 --- a/libm/crates/libm-test/src/num.rs +++ b/libm/crates/libm-test/src/num.rs @@ -1,8 +1,9 @@ //! Helpful numeric operations. use std::cmp::min; +use std::ops::RangeInclusive; -use libm::support::{CastInto, Float}; +use libm::support::Float; use crate::{Int, MinInt}; @@ -214,7 +215,10 @@ fn as_ulp_steps(x: F) -> Option { /// to logarithmic spacing of their values. /// /// Note that this tends to skip negative zero, so that needs to be checked explicitly. -pub fn logspace(start: F, end: F, steps: F::Int) -> impl Iterator { +pub fn logspace(start: F, end: F, steps: F::Int) -> impl Iterator +where + RangeInclusive: Iterator, +{ assert!(!start.is_nan()); assert!(!end.is_nan()); assert!(end >= start); @@ -225,7 +229,7 @@ pub fn logspace(start: F, end: F, steps: F::Int) -> impl Iterator Date: Sun, 22 Dec 2024 10:14:56 +0000 Subject: [PATCH 0999/1459] Forward the `CI` environment variable when running in Docker We want to be able to adjust our configuration based on whether we are running in CI, propagate this so our tests can use it. --- libm/ci/run-docker.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/libm/ci/run-docker.sh b/libm/ci/run-docker.sh index a040126df..d9f29656d 100755 --- a/libm/ci/run-docker.sh +++ b/libm/ci/run-docker.sh @@ -28,6 +28,7 @@ run() { docker run \ --rm \ --user "$(id -u):$(id -g)" \ + -e CI \ -e RUSTFLAGS \ -e CARGO_HOME=/cargo \ -e CARGO_TARGET_DIR=/target \ From bfc6ddddb9cff2389dda39bb5c450829d1cf1165 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 31 Dec 2024 22:56:36 +0000 Subject: [PATCH 1000/1459] Use `rustdoc` output to create a list of public API Rather than collecting a list of file names in `libm-test/build.rs`, just use a script to parse rustdoc's JSON output. --- libm/.github/workflows/main.yml | 4 + libm/crates/libm-test/build.rs | 34 ----- libm/crates/libm-test/src/lib.rs | 12 +- libm/crates/libm-test/tests/check_coverage.rs | 70 ++++++----- libm/etc/function-list.txt | 115 +++++++++++++++++ libm/etc/update-api-list.py | 117 ++++++++++++++++++ 6 files changed, 283 insertions(+), 69 deletions(-) create mode 100644 libm/etc/function-list.txt create mode 100755 libm/etc/update-api-list.py diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 83875f368..0f5becf73 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -96,6 +96,10 @@ jobs: run: ./ci/download-musl.sh shell: bash + - name: Verify API list + if: matrix.os == 'ubuntu-24.04' + run: python3 etc/update-api-list.py --check + # Non-linux tests just use our raw script - name: Run locally if: matrix.os != 'ubuntu-24.04' || contains(matrix.target, 'wasm') diff --git a/libm/crates/libm-test/build.rs b/libm/crates/libm-test/build.rs index f2cd298ba..134fb11ce 100644 --- a/libm/crates/libm-test/build.rs +++ b/libm/crates/libm-test/build.rs @@ -1,42 +1,8 @@ -use std::fmt::Write; -use std::fs; - #[path = "../../configure.rs"] mod configure; use configure::Config; fn main() { let cfg = Config::from_env(); - - list_all_tests(&cfg); - configure::emit_test_config(&cfg); } - -/// Create a list of all source files in an array. This can be used for making sure that -/// all functions are tested or otherwise covered in some way. -// FIXME: it would probably be better to use rustdoc JSON output to get public functions. -fn list_all_tests(cfg: &Config) { - let math_src = cfg.manifest_dir.join("../../src/math"); - - let mut files = fs::read_dir(math_src) - .unwrap() - .map(|f| f.unwrap().path()) - .filter(|entry| entry.is_file()) - .map(|f| f.file_stem().unwrap().to_str().unwrap().to_owned()) - .collect::>(); - files.sort(); - - let mut s = "pub const ALL_FUNCTIONS: &[&str] = &[".to_owned(); - for f in files { - if f == "mod" { - // skip mod.rs - continue; - } - write!(s, "\"{f}\",").unwrap(); - } - write!(s, "];").unwrap(); - - let outfile = cfg.out_dir.join("all_files.rs"); - fs::write(outfile, s).unwrap(); -} diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index eb457b0ae..fdba0357f 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -23,9 +23,6 @@ pub use test_traits::{CheckOutput, GenerateInput, Hex, TupleCall}; /// propagate. pub type TestResult = Result; -// List of all files present in libm's source -include!(concat!(env!("OUT_DIR"), "/all_files.rs")); - /// True if `EMULATED` is set and nonempty. Used to determine how many iterations to run. pub const fn emulated() -> bool { match option_env!("EMULATED") { @@ -34,3 +31,12 @@ pub const fn emulated() -> bool { Some(_) => true, } } + +/// True if `CI` is set and nonempty. +pub const fn ci() -> bool { + match option_env!("CI") { + Some(s) if s.is_empty() => false, + None => false, + Some(_) => true, + } +} diff --git a/libm/crates/libm-test/tests/check_coverage.rs b/libm/crates/libm-test/tests/check_coverage.rs index b7988660e..9f85d6424 100644 --- a/libm/crates/libm-test/tests/check_coverage.rs +++ b/libm/crates/libm-test/tests/check_coverage.rs @@ -1,54 +1,60 @@ //! Ensure that `for_each_function!` isn't missing any symbols. -/// Files in `src/` that do not export a testable symbol. -const ALLOWED_SKIPS: &[&str] = &[ - // Not a generic test function - "fenv", - // Nonpublic functions - "expo2", - "k_cos", - "k_cosf", - "k_expo2", - "k_expo2f", - "k_sin", - "k_sinf", - "k_tan", - "k_tanf", - "rem_pio2", - "rem_pio2_large", - "rem_pio2f", -]; +use std::collections::HashSet; +use std::env; +use std::path::Path; +use std::process::Command; macro_rules! callback { ( fn_name: $name:ident, - extra: [$push_to:ident], + extra: [$set:ident], ) => { - $push_to.push(stringify!($name)); + let name = stringify!($name); + let new = $set.insert(name); + assert!(new, "duplicate function `{name}` in `ALL_OPERATIONS`"); }; } #[test] fn test_for_each_function_all_included() { - let mut included = Vec::new(); - let mut missing = Vec::new(); + let all_functions: HashSet<_> = include_str!("../../../etc/function-list.txt") + .lines() + .filter(|line| !line.starts_with("#")) + .collect(); + + let mut tested = HashSet::new(); libm_macros::for_each_function! { callback: callback, - extra: [included], + extra: [tested], }; - for f in libm_test::ALL_FUNCTIONS { - if !included.contains(f) && !ALLOWED_SKIPS.contains(f) { - missing.push(f) - } - } - - if !missing.is_empty() { + let untested = all_functions.difference(&tested); + if untested.clone().next().is_some() { panic!( - "missing tests for the following: {missing:#?} \ + "missing tests for the following: {untested:#?} \ \nmake sure any new functions are entered in \ - `ALL_FUNCTIONS` (in `libm-macros`)." + `ALL_OPERATIONS` (in `libm-macros`)." ); } + assert_eq!(all_functions, tested); +} + +#[test] +fn ensure_list_updated() { + if libm_test::ci() { + // Most CI tests run in Docker where we don't have Python or Rustdoc, so it's easiest + // to just run the python file directly when it is available. + eprintln!("skipping test; CI runs the python file directly"); + return; + } + + let res = Command::new("python3") + .arg(Path::new(env!("CARGO_MANIFEST_DIR")).join("../../etc/update-api-list.py")) + .arg("--check") + .status() + .unwrap(); + + assert!(res.success(), "May need to run `./etc/update-api-list.py`"); } diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt new file mode 100644 index 000000000..51f5b221c --- /dev/null +++ b/libm/etc/function-list.txt @@ -0,0 +1,115 @@ +# autogenerated by update-api-list.py +acos +acosf +acosh +acoshf +asin +asinf +asinh +asinhf +atan +atan2 +atan2f +atanf +atanh +atanhf +cbrt +cbrtf +ceil +ceilf +copysign +copysignf +cos +cosf +cosh +coshf +erf +erfc +erfcf +erff +exp +exp10 +exp10f +exp2 +exp2f +expf +expm1 +expm1f +fabs +fabsf +fdim +fdimf +floor +floorf +fma +fmaf +fmax +fmaxf +fmin +fminf +fmod +fmodf +frexp +frexpf +hypot +hypotf +ilogb +ilogbf +j0 +j0f +j1 +j1f +jn +jnf +ldexp +ldexpf +lgamma +lgamma_r +lgammaf +lgammaf_r +log +log10 +log10f +log1p +log1pf +log2 +log2f +logf +modf +modff +nextafter +nextafterf +pow +powf +remainder +remainderf +remquo +remquof +rint +rintf +round +roundf +scalbn +scalbnf +sin +sincos +sincosf +sinf +sinh +sinhf +sqrt +sqrtf +tan +tanf +tanh +tanhf +tgamma +tgammaf +trunc +truncf +y0 +y0f +y1 +y1f +yn +ynf diff --git a/libm/etc/update-api-list.py b/libm/etc/update-api-list.py new file mode 100755 index 000000000..7284a628c --- /dev/null +++ b/libm/etc/update-api-list.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +"""Create a text file listing all public API. This can be used to ensure that all +functions are covered by our macros. +""" + +import json +import subprocess as sp +import sys +import difflib +from pathlib import Path +from typing import Any + +ETC_DIR = Path(__file__).parent + + +def get_rustdoc_json() -> dict[Any, Any]: + """Get rustdoc's JSON output for the `libm` crate.""" + + librs_path = ETC_DIR.joinpath("../src/lib.rs") + j = sp.check_output( + [ + "rustdoc", + librs_path, + "--edition=2021", + "--output-format=json", + "-Zunstable-options", + "-o-", + ], + text=True, + ) + j = json.loads(j) + return j + + +def list_public_functions() -> list[str]: + """Get a list of public functions from rustdoc JSON output. + + Note that this only finds functions that are reexported in `lib.rs`, this will + need to be adjusted if we need to account for functions that are defined there. + """ + names = [] + index: dict[str, dict[str, Any]] = get_rustdoc_json()["index"] + for item in index.values(): + # Find public items + if item["visibility"] != "public": + continue + + # Find only reexports + if "use" not in item["inner"].keys(): + continue + + # Locate the item that is reexported + id = item["inner"]["use"]["id"] + srcitem = index.get(str(id)) + + # External crate + if srcitem is None: + continue + + # Skip if not a function + if "function" not in srcitem["inner"].keys(): + continue + + names.append(srcitem["name"]) + + names.sort() + return names + + +def diff_and_exit(actual: str, expected: str): + """If the two strings are different, print a diff between them and then exit + with an error. + """ + if actual == expected: + print("output matches expected; success") + return + + a = [f"{line}\n" for line in actual.splitlines()] + b = [f"{line}\n" for line in expected.splitlines()] + + diff = difflib.unified_diff(a, b, "actual", "expected") + sys.stdout.writelines(diff) + print("mismatched function list") + exit(1) + + +def main(): + """By default overwrite the file. If `--check` is passed, print a diff instead and + error if the files are different. + """ + match sys.argv: + case [_]: + check = False + case [_, "--check"]: + check = True + case _: + print("unrecognized arguments") + exit(1) + + names = list_public_functions() + output = "# autogenerated by update-api-list.py\n" + for name in names: + output += f"{name}\n" + + out_file = ETC_DIR.joinpath("function-list.txt") + + if check: + with open(out_file, "r") as f: + current = f.read() + diff_and_exit(current, output) + else: + with open(out_file, "w") as f: + f.write(output) + + +if __name__ == "__main__": + main() From 11f281a99b30c48cb50f47a919cbc4c20b3800ef Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 31 Dec 2024 22:57:21 +0000 Subject: [PATCH 1001/1459] Add missing functions to the macro list Now that we are using rustdoc output to locate public functions, the test is indicating a few that were missed since they don't have their own function. Update everything to now include the following routines: * `erfc` * `erfcf` * `y0` * `y0f` * `y1` * `y1f` * `yn` * `ynf` --- libm/crates/libm-macros/src/shared.rs | 16 ++++++++-------- libm/crates/libm-test/src/domain.rs | 3 +++ libm/crates/libm-test/src/gen/random.rs | 6 +++++- libm/crates/libm-test/src/mpfloat.rs | 17 ++++++++++++++++- libm/crates/libm-test/src/precision.rs | 14 ++++++-------- libm/crates/libm-test/tests/multiprecision.rs | 4 +++- libm/crates/musl-math-sys/src/lib.rs | 1 + 7 files changed, 42 insertions(+), 19 deletions(-) diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index 100bcc7ad..ef0f18801 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -11,9 +11,9 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] None, &[ "acosf", "acoshf", "asinf", "asinhf", "atanf", "atanhf", "cbrtf", "ceilf", "cosf", - "coshf", "erff", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf", "j0f", "j1f", - "lgammaf", "log10f", "log1pf", "log2f", "logf", "rintf", "roundf", "sinf", "sinhf", - "sqrtf", "tanf", "tanhf", "tgammaf", "truncf", + "coshf", "erff", "erfcf", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf", + "j0f", "j1f", "lgammaf", "log10f", "log1pf", "log2f", "logf", "rintf", "roundf", + "sinf", "sinhf", "sqrtf", "tanf", "tanhf", "tgammaf", "truncf", "y0f", "y1f", ], ), ( @@ -23,9 +23,9 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] None, &[ "acos", "acosh", "asin", "asinh", "atan", "atanh", "cbrt", "ceil", "cos", "cosh", - "erf", "exp10", "exp2", "exp", "expm1", "fabs", "floor", "j0", "j1", "lgamma", "log10", - "log1p", "log2", "log", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh", - "tgamma", "trunc", + "erf", "erfc", "exp10", "exp2", "exp", "expm1", "fabs", "floor", "j0", "j1", "lgamma", + "log10", "log1p", "log2", "log", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh", + "tgamma", "trunc", "y0", "y1", ], ), ( @@ -97,14 +97,14 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F32, Signature { args: &[Ty::I32, Ty::F32], returns: &[Ty::F32] }, None, - &["jnf"], + &["jnf", "ynf"], ), ( // `(i32, f64) -> f64` FloatTy::F64, Signature { args: &[Ty::I32, Ty::F64], returns: &[Ty::F64] }, None, - &["jn"], + &["jn", "yn"], ), ( // `(f32, i32) -> f32` diff --git a/libm/crates/libm-test/src/domain.rs b/libm/crates/libm-test/src/domain.rs index 9ee8a19b9..7b5a01b96 100644 --- a/libm/crates/libm-test/src/domain.rs +++ b/libm/crates/libm-test/src/domain.rs @@ -147,6 +147,7 @@ impl_has_domain! { cos => TRIG; cosh => UNBOUNDED; erf => UNBOUNDED; + erfc => UNBOUNDED; exp => UNBOUNDED; exp10 => UNBOUNDED; exp2 => UNBOUNDED; @@ -173,6 +174,8 @@ impl_has_domain! { tanh => UNBOUNDED; tgamma => GAMMA; trunc => UNBOUNDED; + y0 => UNBOUNDED; + y1 => UNBOUNDED; } /* Manual implementations, these functions don't follow `foo`->`foof` naming */ diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index 527cd1351..4f75da07b 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -110,6 +110,10 @@ pub fn get_test_cases(ctx: &CheckCtx) -> impl Iterator, { - let inputs = if ctx.base_name == BaseName::Jn { &TEST_CASES_JN } else { &TEST_CASES }; + let inputs = if ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn { + &TEST_CASES_JN + } else { + &TEST_CASES + }; inputs.get_cases() } diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 507b077b3..28df916bd 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -130,7 +130,7 @@ libm_macros::for_each_function! { fabsf, ceilf, copysignf, floorf, rintf, roundf, truncf, fmod, fmodf, frexp, frexpf, ilogb, ilogbf, jn, jnf, ldexp, ldexpf, lgamma_r, lgammaf_r, modf, modff, nextafter, nextafterf, pow,powf, - remquo, remquof, scalbn, scalbnf, sincos, sincosf, + remquo, remquof, scalbn, scalbnf, sincos, sincosf, yn, ynf, ], fn_extra: match MACRO_FN_NAME { // Remap function names that are different between mpfr and libm @@ -266,6 +266,21 @@ macro_rules! impl_op_for_ty { ) } } + + impl MpOp for crate::op::[]::Routine { + type MpTy = (i32, MpFloat); + + fn new_mp() -> Self::MpTy { + (0, new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0 = input.0; + this.1.assign(input.1); + let ord = this.1.yn_round(this.0, Nearest); + prep_retval::(&mut this.1, ord) + } + } } }; } diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 058d01c6e..6d4561c43 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -26,11 +26,9 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { // Overrides that apply to either basis // FMA is expected to be infinite precision. (_, Id::Fma | Id::Fmaf) => 0, - (_, Id::J0 | Id::J0f | Id::J1 | Id::J1f) => { - // Results seem very target-dependent - if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 } - } - (_, Id::Jn | Id::Jnf) => 1000, + (_, Id::J0 | Id::J0f | Id::J1 | Id::J1f | Id::Y0 | Id::Y0f | Id::Y1 | Id::Y1f) => 800_000, + (_, Id::Jn | Id::Jnf | Id::Yn | Id::Ynf) => 1000, + (_, Id::Erfc | Id::Erfcf) => 4, // Overrides for musl #[cfg(x86_no_sse)] @@ -297,7 +295,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase { (Musl, _) => bessel_prec_dropoff(input, ulp, ctx), // We return +0.0, MPFR returns -0.0 - (Mpfr, BaseName::Jn) + (Mpfr, BaseName::Jn | BaseName::Yn) if input.1 == f32::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO => { XFAIL @@ -319,7 +317,7 @@ impl MaybeOverride<(i32, f64)> for SpecialCase { (Musl, _) => bessel_prec_dropoff(input, ulp, ctx), // We return +0.0, MPFR returns -0.0 - (Mpfr, BaseName::Jn) + (Mpfr, BaseName::Jn | BaseName::Yn) if input.1 == f64::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO => { XFAIL @@ -336,7 +334,7 @@ fn bessel_prec_dropoff( ulp: &mut u32, ctx: &CheckCtx, ) -> Option { - if ctx.base_name == BaseName::Jn { + if ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn { if input.0 > 4000 { return XFAIL; } else if input.0 > 2000 { diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 2675ca018..4821f7446 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -48,7 +48,7 @@ libm_macros::for_each_function! { attributes: [ // Also an assertion failure on i686: at `MPFR_ASSERTN (! mpfr_erangeflag_p ())` #[ignore = "large values are infeasible in MPFR"] - [jn, jnf], + [jn, jnf, yn, ynf], ], skip: [ // FIXME: MPFR tests needed @@ -157,6 +157,8 @@ libm_macros::for_each_function! { remquof, scalbn, scalbnf, + yn, + ynf, // FIXME: MPFR tests needed frexp, diff --git a/libm/crates/musl-math-sys/src/lib.rs b/libm/crates/musl-math-sys/src/lib.rs index db352fab8..07277ef3e 100644 --- a/libm/crates/musl-math-sys/src/lib.rs +++ b/libm/crates/musl-math-sys/src/lib.rs @@ -282,5 +282,6 @@ functions! { musl_y0f: y0f(a: f32) -> f32; musl_y1: y1(a: f64) -> f64; musl_y1f: y1f(a: f32) -> f32; + musl_yn: yn(a: c_int, b: f64) -> f64; musl_ynf: ynf(a: c_int, b: f32) -> f32; } From 4b66b6eca9674dcc3ad43a08b27be9df3e552e95 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 2 Jan 2025 21:23:28 +0000 Subject: [PATCH 1002/1459] macros: Always emit `f16_enabled` and `f128_enabled` attributes Once we start addinf `f16` and `f128` routines, we will need to have this cfg for almost all uses of `for_each_function`. Rather than needing to specify this each time, always emit `#[cfg(f16_enabled)]` or `#[cfg(f128_enabled)]` for each function that uses `f16` or `f128`, respectively. --- libm/crates/libm-macros/src/lib.rs | 37 +++++++++++++------ libm/crates/libm-macros/tests/basic.rs | 10 ++++- libm/crates/libm-test/benches/random.rs | 4 ++ libm/crates/libm-test/src/lib.rs | 2 + libm/crates/libm-test/src/mpfloat.rs | 6 +++ libm/crates/libm-test/src/op.rs | 3 ++ libm/crates/libm-test/tests/check_coverage.rs | 1 + .../libm-test/tests/compare_built_musl.rs | 4 +- libm/crates/libm-test/tests/multiprecision.rs | 10 ++--- 9 files changed, 57 insertions(+), 20 deletions(-) diff --git a/libm/crates/libm-macros/src/lib.rs b/libm/crates/libm-macros/src/lib.rs index 916b539ed..3cee5385b 100644 --- a/libm/crates/libm-macros/src/lib.rs +++ b/libm/crates/libm-macros/src/lib.rs @@ -79,7 +79,7 @@ pub fn base_name_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> p /// // The Rust version's return type (e.g. `(f32, f32)`) /// RustRet: $RustRet:ty, /// // Attributes for the current function, if any -/// attrs: [$($meta:meta)*] +/// attrs: [$($attr:meta),*], /// // Extra tokens passed directly (if any) /// extra: [$extra:ident], /// // Extra function-tokens passed directly (if any) @@ -97,6 +97,9 @@ pub fn base_name_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> p /// skip: [sin, cos], /// // Attributes passed as `attrs` for specific functions. For example, here the invocation /// // with `sinf` and that with `cosf` will both get `meta1` and `meta2`, but no others will. +/// // +/// // Note that `f16_enabled` and `f128_enabled` will always get emitted regardless of whether +/// // or not this is specified. /// attributes: [ /// #[meta1] /// #[meta2] @@ -255,16 +258,28 @@ fn expand(input: StructuredInput, fn_list: &[&MathOpInfo]) -> syn::Result { - let meta = attrs - .iter() - .filter(|map| map.names.contains(&fn_name)) - .flat_map(|map| &map.meta); - quote! { attrs: [ #( #meta )* ] } - } - None => pm2::TokenStream::new(), - }; + let mut meta_fields = Vec::new(); + if let Some(attrs) = &input.attributes { + let meta_iter = attrs + .iter() + .filter(|map| map.names.contains(&fn_name)) + .flat_map(|map| &map.meta) + .map(|v| v.into_token_stream()); + + meta_fields.extend(meta_iter); + } + + // Always emit f16 and f128 meta so this doesn't need to be repeated everywhere + if func.rust_sig.args.contains(&Ty::F16) || func.rust_sig.returns.contains(&Ty::F16) { + let ts = quote! { cfg(f16_enabled) }; + meta_fields.push(ts); + } + if func.rust_sig.args.contains(&Ty::F128) || func.rust_sig.returns.contains(&Ty::F128) { + let ts = quote! { cfg(f128_enabled) }; + meta_fields.push(ts); + } + + let meta_field = quote! { attrs: [ #( #meta_fields ),* ], }; // Prepare extra in an `extra: ...` field, running the replacer let extra_field = match input.extra.clone() { diff --git a/libm/crates/libm-macros/tests/basic.rs b/libm/crates/libm-macros/tests/basic.rs index 2eaba04f4..0aa417f13 100644 --- a/libm/crates/libm-macros/tests/basic.rs +++ b/libm/crates/libm-macros/tests/basic.rs @@ -1,3 +1,5 @@ +#![feature(f16)] +#![feature(f128)] // `STATUS_DLL_NOT_FOUND` on i686 MinGW, not worth looking into. #![cfg(not(all(target_arch = "x86", target_os = "windows", target_env = "gnu")))] @@ -11,11 +13,11 @@ macro_rules! basic { RustFn: $RustFn:ty, RustArgs: $RustArgs:ty, RustRet: $RustRet:ty, - attrs: [$($meta:meta)*] + attrs: [$($attr:meta),*], extra: [$($extra_tt:tt)*], fn_extra: $fn_extra:expr, ) => { - $(#[$meta])* + $(#[$attr])* mod $fn_name { #[allow(unused)] type FTy= $FTy; @@ -60,7 +62,9 @@ mod test_basic { macro_rules! basic_no_extra { ( fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], ) => { + $(#[$attr])* mod $fn_name {} }; } @@ -85,7 +89,9 @@ macro_rules! specified_types { fn_name: $fn_name:ident, RustFn: $RustFn:ty, RustArgs: $RustArgs:ty, + attrs: [$($attr:meta),*], ) => { + $(#[$attr])* mod $fn_name { #[allow(unused)] type RustFnTy = $RustFn; diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index b9c39334c..06997cd36 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -18,9 +18,11 @@ struct MuslExtra { macro_rules! musl_rand_benches { ( fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], fn_extra: $skip_on_i586:expr, ) => { paste::paste! { + $(#[$attr])* fn [< musl_bench_ $fn_name >](c: &mut Criterion) { type Op = libm_test::op::$fn_name::Routine; @@ -113,9 +115,11 @@ libm_macros::for_each_function! { macro_rules! run_callback { ( fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], extra: [$criterion:ident], ) => { paste::paste! { + $(#[$attr])* [< musl_bench_ $fn_name >](&mut $criterion) } }; diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index fdba0357f..97907b2a1 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -1,3 +1,5 @@ +#![cfg_attr(f16_enabled, feature(f16))] +#![cfg_attr(f128_enabled, feature(f128))] #![allow(clippy::unusual_byte_groupings)] // sometimes we group by sign_exp_sig pub mod domain; diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 28df916bd..edb4cb962 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -50,9 +50,11 @@ macro_rules! impl_mp_op { ( fn_name: $fn_name:ident, RustFn: fn($_fty:ty,) -> $_ret:ty, + attrs: [$($attr:meta),*], fn_extra: $fn_name_normalized:expr, ) => { paste::paste! { + $(#[$attr])* impl MpOp for crate::op::$fn_name::Routine { type MpTy = MpFloat; @@ -72,9 +74,11 @@ macro_rules! impl_mp_op { ( fn_name: $fn_name:ident, RustFn: fn($_fty:ty, $_fty2:ty,) -> $_ret:ty, + attrs: [$($attr:meta),*], fn_extra: $fn_name_normalized:expr, ) => { paste::paste! { + $(#[$attr])* impl MpOp for crate::op::$fn_name::Routine { type MpTy = (MpFloat, MpFloat); @@ -95,9 +99,11 @@ macro_rules! impl_mp_op { ( fn_name: $fn_name:ident, RustFn: fn($_fty:ty, $_fty2:ty, $_fty3:ty,) -> $_ret:ty, + attrs: [$($attr:meta),*], fn_extra: $fn_name_normalized:expr, ) => { paste::paste! { + $(#[$attr])* impl MpOp for crate::op::$fn_name::Routine { type MpTy = (MpFloat, MpFloat, MpFloat); diff --git a/libm/crates/libm-test/src/op.rs b/libm/crates/libm-test/src/op.rs index e58c28903..ee61eb0b8 100644 --- a/libm/crates/libm-test/src/op.rs +++ b/libm/crates/libm-test/src/op.rs @@ -112,8 +112,11 @@ macro_rules! do_thing { RustFn: $RustFn:ty, RustArgs: $RustArgs:ty, RustRet: $RustRet:ty, + attrs: [$($attr:meta),*], + ) => { paste::paste! { + $(#[$attr])* pub mod $fn_name { use super::*; pub struct Routine; diff --git a/libm/crates/libm-test/tests/check_coverage.rs b/libm/crates/libm-test/tests/check_coverage.rs index 9f85d6424..c23298686 100644 --- a/libm/crates/libm-test/tests/check_coverage.rs +++ b/libm/crates/libm-test/tests/check_coverage.rs @@ -8,6 +8,7 @@ use std::process::Command; macro_rules! callback { ( fn_name: $name:ident, + attrs: [$($attr:meta),*], extra: [$set:ident], ) => { let name = stringify!($name); diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 0022ee03c..71f080ab1 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -15,11 +15,11 @@ use libm_test::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleC macro_rules! musl_rand_tests { ( fn_name: $fn_name:ident, - attrs: [$($meta:meta)*] + attrs: [$($attr:meta),*], ) => { paste::paste! { #[test] - $(#[$meta])* + $(#[$attr])* fn [< musl_random_ $fn_name >]() { test_one::(musl_math_sys::$fn_name); } diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 4821f7446..71ff2ff96 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -13,11 +13,11 @@ use libm_test::{ macro_rules! mp_rand_tests { ( fn_name: $fn_name:ident, - attrs: [$($meta:meta)*] + attrs: [$($attr:meta),*], ) => { paste::paste! { #[test] - $(#[$meta])* + $(#[$attr])* fn [< mp_random_ $fn_name >]() { test_one_random::(); } @@ -76,18 +76,18 @@ libm_macros::for_each_function! { macro_rules! mp_domain_tests { ( fn_name: $fn_name:ident, - attrs: [$($meta:meta)*] + attrs: [$($attr:meta),*], ) => { paste::paste! { #[test] - $(#[$meta])* + $(#[$attr])* fn [< mp_edge_case_ $fn_name >]() { type Op = libm_test::op::$fn_name::Routine; domain_test_runner::(edge_cases::get_test_cases::); } #[test] - $(#[$meta])* + $(#[$attr])* fn [< mp_logspace_ $fn_name >]() { type Op = libm_test::op::$fn_name::Routine; domain_test_runner::(domain_logspace::get_test_cases::); From 7491ed579acd500eece49a6657bf304b69526baf Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 2 Jan 2025 10:19:54 +0000 Subject: [PATCH 1003/1459] Rename generic `abs` to `fabs` Using the same name as the routines themselves means this will correctly get picked up by the CI job looking for exhaustive tests. --- libm/src/math/fabs.rs | 2 +- libm/src/math/fabsf.rs | 2 +- libm/src/math/generic/{abs.rs => fabs.rs} | 2 +- libm/src/math/generic/mod.rs | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) rename libm/src/math/generic/{abs.rs => fabs.rs} (63%) diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index 2163637e7..6687fdcc3 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -9,7 +9,7 @@ pub fn fabs(x: f64) -> f64 { args: x, } - super::generic::abs(x) + super::generic::fabs(x) } #[cfg(test)] diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs index ac77c9201..99bb5b5f1 100644 --- a/libm/src/math/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -9,7 +9,7 @@ pub fn fabsf(x: f32) -> f32 { args: x, } - super::generic::abs(x) + super::generic::fabs(x) } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 diff --git a/libm/src/math/generic/abs.rs b/libm/src/math/generic/fabs.rs similarity index 63% rename from libm/src/math/generic/abs.rs rename to libm/src/math/generic/fabs.rs index 2c9a43c12..f2c7f0f46 100644 --- a/libm/src/math/generic/abs.rs +++ b/libm/src/math/generic/fabs.rs @@ -1,6 +1,6 @@ use super::super::Float; /// Absolute value. -pub fn abs(x: F) -> F { +pub fn fabs(x: F) -> F { x.abs() } diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index 1ddd08f0e..08524b685 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -1,5 +1,5 @@ -mod abs; mod copysign; +mod fabs; -pub use abs::abs; pub use copysign::copysign; +pub use fabs::fabs; From 85604d9f3f569474249c1151cc7f2b5a87396530 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 3 Jan 2025 02:53:35 +0000 Subject: [PATCH 1004/1459] Shorten prefixes for float constants Change `SIGNIFICAND_*` to `SIG_*` and `EXPONENT_*` to `EXP_*`. This makes things more consistent with `libm`, and terseness is convenient here since there isn't anything to confuse. --- src/float/add.rs | 8 +++--- src/float/cmp.rs | 4 +-- src/float/conv.rs | 46 +++++++++++++++---------------- src/float/div.rs | 10 +++---- src/float/extend.rs | 14 +++++----- src/float/mod.rs | 53 +++++++++++++++++------------------- src/float/mul.rs | 14 +++++----- src/float/trunc.rs | 40 +++++++++++++-------------- testcrate/src/lib.rs | 34 +++++++++++------------ testcrate/tests/float_pow.rs | 4 +-- 10 files changed, 111 insertions(+), 116 deletions(-) diff --git a/src/float/add.rs b/src/float/add.rs index ecb96264a..743cc441b 100644 --- a/src/float/add.rs +++ b/src/float/add.rs @@ -13,14 +13,14 @@ where let zero = F::Int::ZERO; let bits = F::BITS.cast(); - let significand_bits = F::SIGNIFICAND_BITS; - let max_exponent = F::EXPONENT_MAX; + let significand_bits = F::SIG_BITS; + let max_exponent = F::EXP_MAX; let implicit_bit = F::IMPLICIT_BIT; - let significand_mask = F::SIGNIFICAND_MASK; + let significand_mask = F::SIG_MASK; let sign_bit = F::SIGN_MASK as F::Int; let abs_mask = sign_bit - one; - let exponent_mask = F::EXPONENT_MASK; + let exponent_mask = F::EXP_MASK; let inf_rep = exponent_mask; let quiet_bit = implicit_bit >> 1; let qnan_rep = exponent_mask | quiet_bit; diff --git a/src/float/cmp.rs b/src/float/cmp.rs index 8b97a0b5c..b9b4d0114 100644 --- a/src/float/cmp.rs +++ b/src/float/cmp.rs @@ -38,7 +38,7 @@ fn cmp(a: F, b: F) -> Result { let sign_bit = F::SIGN_MASK as F::Int; let abs_mask = sign_bit - one; - let exponent_mask = F::EXPONENT_MASK; + let exponent_mask = F::EXP_MASK; let inf_rep = exponent_mask; let a_rep = a.to_bits(); @@ -87,7 +87,7 @@ fn unord(a: F, b: F) -> bool { let sign_bit = F::SIGN_MASK as F::Int; let abs_mask = sign_bit - one; - let exponent_mask = F::EXPONENT_MASK; + let exponent_mask = F::EXP_MASK; let inf_rep = exponent_mask; let a_rep = a.to_bits(); diff --git a/src/float/conv.rs b/src/float/conv.rs index 4aea67c91..40d304719 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -32,7 +32,7 @@ mod int_to_float { /// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit /// bit set can be added back later. fn exp>>(n: u32) -> F::Int { - F::Int::cast_from(F::EXPONENT_BIAS - 1 + I::BITS - n) + F::Int::cast_from(F::EXP_BIAS - 1 + I::BITS - n) } /// Adjust a mantissa with dropped bits to perform correct rounding. @@ -54,17 +54,17 @@ mod int_to_float { /// value to cancel it out. fn repr(e: F::Int, m: F::Int) -> F::Int { // + rather than | so the mantissa can overflow into the exponent - (e << F::SIGNIFICAND_BITS) + m + (e << F::SIG_BITS) + m } /// Shift distance from a left-aligned integer to a smaller float. fn shift_f_lt_i() -> u32 { - (I::BITS - F::BITS) + F::EXPONENT_BITS + (I::BITS - F::BITS) + F::EXP_BITS } /// Shift distance from an integer with `n` leading zeros to a smaller float. fn shift_f_gt_i(n: u32) -> u32 { - F::SIGNIFICAND_BITS - I::BITS + 1 + n + F::SIG_BITS - I::BITS + 1 + n } /// Perform a signed operation as unsigned, then add the sign back. @@ -85,9 +85,9 @@ mod int_to_float { } let n = i.leading_zeros(); // Mantissa with implicit bit set (significant bits) - let m_base = (i << n) >> f32::EXPONENT_BITS; + let m_base = (i << n) >> f32::EXP_BITS; // Bits that will be dropped (insignificant bits) - let adj = (i << n) << (f32::SIGNIFICAND_BITS + 1); + let adj = (i << n) << (f32::SIG_BITS + 1); let m = m_adj::(m_base, adj); let e = exp::(n) - 1; repr::(e, m) @@ -116,7 +116,7 @@ mod int_to_float { let m = (i as u64) << (shift_f_gt_i::(n) - 64); let e = exp::(n) as u64 - 1; // High 64 bits of f128 representation. - let h = (e << (f128::SIGNIFICAND_BITS - 64)) + m; + let h = (e << (f128::SIG_BITS - 64)) + m; // Shift back to the high bits, the rest of the mantissa will always be 0. (h as u128) << 64 @@ -128,8 +128,8 @@ mod int_to_float { // Mantissa with implicit bit set let m_base: u32 = (i_m >> shift_f_lt_i::()) as u32; // The entire lower half of `i` will be truncated (masked portion), plus the - // next `EXPONENT_BITS` bits. - let adj = (i_m >> f32::EXPONENT_BITS | i_m & 0xFFFF) as u32; + // next `EXP_BITS` bits. + let adj = (i_m >> f32::EXP_BITS | i_m & 0xFFFF) as u32; let m = m_adj::(m_base, adj); let e = if i == 0 { 0 } else { exp::(n) - 1 }; repr::(e, m) @@ -141,8 +141,8 @@ mod int_to_float { } let n = i.leading_zeros(); // Mantissa with implicit bit set - let m_base = (i << n) >> f64::EXPONENT_BITS; - let adj = (i << n) << (f64::SIGNIFICAND_BITS + 1); + let m_base = (i << n) >> f64::EXP_BITS; + let adj = (i << n) << (f64::SIG_BITS + 1); let m = m_adj::(m_base, adj); let e = exp::(n) - 1; repr::(e, m) @@ -167,7 +167,7 @@ mod int_to_float { // Within the upper `F::BITS`, everything except for the signifcand // gets truncated - let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIGNIFICAND_BITS - 1)).cast(); + let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIG_BITS - 1)).cast(); // The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just // check if it is nonzero. @@ -186,8 +186,8 @@ mod int_to_float { // Mantissa with implicit bit set let m_base: u64 = (i_m >> shift_f_lt_i::()) as u64; // The entire lower half of `i` will be truncated (masked portion), plus the - // next `EXPONENT_BITS` bits. - let adj = (i_m >> f64::EXPONENT_BITS | i_m & 0xFFFF_FFFF) as u64; + // next `EXP_BITS` bits. + let adj = (i_m >> f64::EXP_BITS | i_m & 0xFFFF_FFFF) as u64; let m = m_adj::(m_base, adj); let e = if i == 0 { 0 } else { exp::(n) - 1 }; repr::(e, m) @@ -200,8 +200,8 @@ mod int_to_float { } let n = i.leading_zeros(); // Mantissa with implicit bit set - let m_base = (i << n) >> f128::EXPONENT_BITS; - let adj = (i << n) << (f128::SIGNIFICAND_BITS + 1); + let m_base = (i << n) >> f128::EXP_BITS; + let adj = (i << n) << (f128::SIG_BITS + 1); let m = m_adj::(m_base, adj); let e = exp::(n) - 1; repr::(e, m) @@ -362,29 +362,29 @@ where F::Int: CastFrom, u32: CastFrom, { - let int_max_exp = F::EXPONENT_BIAS + I::MAX.ilog2() + 1; - let foobar = F::EXPONENT_BIAS + I::UnsignedInt::BITS - 1; + let int_max_exp = F::EXP_BIAS + I::MAX.ilog2() + 1; + let foobar = F::EXP_BIAS + I::UnsignedInt::BITS - 1; if fbits < F::ONE.to_bits() { // < 0 gets rounded to 0 I::ZERO - } else if fbits < F::Int::cast_from(int_max_exp) << F::SIGNIFICAND_BITS { + } else if fbits < F::Int::cast_from(int_max_exp) << F::SIG_BITS { // >= 1, < integer max let m_base = if I::UnsignedInt::BITS >= F::Int::BITS { - I::UnsignedInt::cast_from(fbits) << (I::BITS - F::SIGNIFICAND_BITS - 1) + I::UnsignedInt::cast_from(fbits) << (I::BITS - F::SIG_BITS - 1) } else { - I::UnsignedInt::cast_from(fbits >> (F::SIGNIFICAND_BITS - I::BITS + 1)) + I::UnsignedInt::cast_from(fbits >> (F::SIG_BITS - I::BITS + 1)) }; // Set the implicit 1-bit. let m: I::UnsignedInt = I::UnsignedInt::ONE << (I::BITS - 1) | m_base; // Shift based on the exponent and bias. - let s: u32 = (foobar) - u32::cast_from(fbits >> F::SIGNIFICAND_BITS); + let s: u32 = (foobar) - u32::cast_from(fbits >> F::SIG_BITS); let unsigned = m >> s; map_inbounds(I::from_unsigned(unsigned)) - } else if fbits <= F::EXPONENT_MASK { + } else if fbits <= F::EXP_MASK { // >= max (incl. inf) out_of_bounds() } else { diff --git a/src/float/div.rs b/src/float/div.rs index 4b3f97c35..c211fabad 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -105,16 +105,16 @@ where let hw = F::BITS / 2; let lo_mask = F::Int::MAX >> hw; - let significand_bits = F::SIGNIFICAND_BITS; + let significand_bits = F::SIG_BITS; // Saturated exponent, representing infinity - let exponent_sat: F::Int = F::EXPONENT_MAX.cast(); + let exponent_sat: F::Int = F::EXP_MAX.cast(); - let exponent_bias = F::EXPONENT_BIAS; + let exponent_bias = F::EXP_BIAS; let implicit_bit = F::IMPLICIT_BIT; - let significand_mask = F::SIGNIFICAND_MASK; + let significand_mask = F::SIG_MASK; let sign_bit = F::SIGN_MASK; let abs_mask = sign_bit - one; - let exponent_mask = F::EXPONENT_MASK; + let exponent_mask = F::EXP_MASK; let inf_rep = exponent_mask; let quiet_bit = implicit_bit >> 1; let qnan_rep = exponent_mask | quiet_bit; diff --git a/src/float/extend.rs b/src/float/extend.rs index 2ec79070c..a5d69864b 100644 --- a/src/float/extend.rs +++ b/src/float/extend.rs @@ -15,19 +15,19 @@ where let src_zero = F::Int::ZERO; let src_one = F::Int::ONE; let src_bits = F::BITS; - let src_sign_bits = F::SIGNIFICAND_BITS; - let src_exp_bias = F::EXPONENT_BIAS; + let src_sign_bits = F::SIG_BITS; + let src_exp_bias = F::EXP_BIAS; let src_min_normal = F::IMPLICIT_BIT; - let src_infinity = F::EXPONENT_MASK; + let src_infinity = F::EXP_MASK; let src_sign_mask = F::SIGN_MASK as F::Int; let src_abs_mask = src_sign_mask - src_one; - let src_qnan = F::SIGNIFICAND_MASK; + let src_qnan = F::SIG_MASK; let src_nan_code = src_qnan - src_one; let dst_bits = R::BITS; - let dst_sign_bits = R::SIGNIFICAND_BITS; - let dst_inf_exp = R::EXPONENT_MAX; - let dst_exp_bias = R::EXPONENT_BIAS; + let dst_sign_bits = R::SIG_BITS; + let dst_inf_exp = R::EXP_MAX; + let dst_exp_bias = R::EXP_BIAS; let dst_min_normal = R::IMPLICIT_BIT; let sign_bits_delta = dst_sign_bits - src_sign_bits; diff --git a/src/float/mod.rs b/src/float/mod.rs index af8398644..9ba447f97 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -42,32 +42,32 @@ pub(crate) trait Float: const ZERO: Self; const ONE: Self; - /// The bitwidth of the float type + /// The bitwidth of the float type. const BITS: u32; - /// The bitwidth of the significand - const SIGNIFICAND_BITS: u32; + /// The bitwidth of the significand. + const SIG_BITS: u32; - /// The bitwidth of the exponent - const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1; + /// The bitwidth of the exponent. + const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; /// The saturated value of the exponent (infinite representation), in the rightmost postiion. - const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1; + const EXP_MAX: u32 = (1 << Self::EXP_BITS) - 1; - /// The exponent bias value - const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1; + /// The exponent bias value. + const EXP_BIAS: u32 = Self::EXP_MAX >> 1; - /// A mask for the sign bit + /// A mask for the sign bit. const SIGN_MASK: Self::Int; - /// A mask for the significand - const SIGNIFICAND_MASK: Self::Int; + /// A mask for the significand. + const SIG_MASK: Self::Int; - /// The implicit bit of the float format + /// The implicit bit of the float format. const IMPLICIT_BIT: Self::Int; - /// A mask for the exponent - const EXPONENT_MASK: Self::Int; + /// A mask for the exponent. + const EXP_MASK: Self::Int; /// Returns `self` transmuted to `Self::Int` fn to_bits(self) -> Self::Int; @@ -122,12 +122,12 @@ macro_rules! float_impl { const ONE: Self = 1.0; const BITS: u32 = $bits; - const SIGNIFICAND_BITS: u32 = $significand_bits; + const SIG_BITS: u32 = $significand_bits; const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); - const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1; - const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS; - const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK); + const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1; + const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS; + const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK); fn to_bits(self) -> Self::Int { self.to_bits() @@ -142,8 +142,7 @@ macro_rules! float_impl { // necessary builtin (__unordtf2) to test whether `f128` is NaN. // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin // x is NaN if all the bits of the exponent are set and the significand is non-0 - x.to_bits() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK - && x.to_bits() & $ty::SIGNIFICAND_MASK != 0 + x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0 } #[cfg(not(feature = "mangled-names"))] fn is_nan(x: $ty) -> bool { @@ -159,10 +158,10 @@ macro_rules! float_impl { self.is_sign_negative() } fn exp(self) -> Self::ExpInt { - ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt + ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt } fn frac(self) -> Self::Int { - self.to_bits() & Self::SIGNIFICAND_MASK + self.to_bits() & Self::SIG_MASK } fn imp_frac(self) -> Self::Int { self.frac() | Self::IMPLICIT_BIT @@ -173,21 +172,19 @@ macro_rules! float_impl { fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self { Self::from_bits( ((negative as Self::Int) << (Self::BITS - 1)) - | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK) - | (significand & Self::SIGNIFICAND_MASK), + | ((exponent << Self::SIG_BITS) & Self::EXP_MASK) + | (significand & Self::SIG_MASK), ) } fn normalize(significand: Self::Int) -> (i32, Self::Int) { - let shift = significand - .leading_zeros() - .wrapping_sub(Self::EXPONENT_BITS); + let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); ( 1i32.wrapping_sub(shift as i32), significand << shift as Self::Int, ) } fn is_subnormal(self) -> bool { - (self.to_bits() & Self::EXPONENT_MASK) == Self::Int::ZERO + (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO } } }; diff --git a/src/float/mul.rs b/src/float/mul.rs index 77a271d65..62895293a 100644 --- a/src/float/mul.rs +++ b/src/float/mul.rs @@ -13,20 +13,20 @@ where let zero = F::Int::ZERO; let bits = F::BITS; - let significand_bits = F::SIGNIFICAND_BITS; - let max_exponent = F::EXPONENT_MAX; + let significand_bits = F::SIG_BITS; + let max_exponent = F::EXP_MAX; - let exponent_bias = F::EXPONENT_BIAS; + let exponent_bias = F::EXP_BIAS; let implicit_bit = F::IMPLICIT_BIT; - let significand_mask = F::SIGNIFICAND_MASK; - let sign_bit = F::SIGN_MASK as F::Int; + let significand_mask = F::SIG_MASK; + let sign_bit = F::SIGN_MASK; let abs_mask = sign_bit - one; - let exponent_mask = F::EXPONENT_MASK; + let exponent_mask = F::EXP_MASK; let inf_rep = exponent_mask; let quiet_bit = implicit_bit >> 1; let qnan_rep = exponent_mask | quiet_bit; - let exponent_bits = F::EXPONENT_BITS; + let exponent_bits = F::EXP_BITS; let a_rep = a.to_bits(); let b_rep = b.to_bits(); diff --git a/src/float/trunc.rs b/src/float/trunc.rs index 6fe44f50b..c95e9c198 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -14,33 +14,33 @@ where let src_zero = F::Int::ZERO; let src_one = F::Int::ONE; let src_bits = F::BITS; - let src_exp_bias = F::EXPONENT_BIAS; + let src_exp_bias = F::EXP_BIAS; let src_min_normal = F::IMPLICIT_BIT; - let src_significand_mask = F::SIGNIFICAND_MASK; - let src_infinity = F::EXPONENT_MASK; + let src_significand_mask = F::SIG_MASK; + let src_infinity = F::EXP_MASK; let src_sign_mask = F::SIGN_MASK; let src_abs_mask = src_sign_mask - src_one; - let round_mask = (src_one << (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)) - src_one; - let halfway = src_one << (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS - 1); - let src_qnan = src_one << (F::SIGNIFICAND_BITS - 1); + let round_mask = (src_one << (F::SIG_BITS - R::SIG_BITS)) - src_one; + let halfway = src_one << (F::SIG_BITS - R::SIG_BITS - 1); + let src_qnan = src_one << (F::SIG_BITS - 1); let src_nan_code = src_qnan - src_one; let dst_zero = R::Int::ZERO; let dst_one = R::Int::ONE; let dst_bits = R::BITS; - let dst_inf_exp = R::EXPONENT_MAX; - let dst_exp_bias = R::EXPONENT_BIAS; + let dst_inf_exp = R::EXP_MAX; + let dst_exp_bias = R::EXP_BIAS; let underflow_exponent: F::Int = (src_exp_bias + 1 - dst_exp_bias).cast(); let overflow_exponent: F::Int = (src_exp_bias + dst_inf_exp - dst_exp_bias).cast(); - let underflow: F::Int = underflow_exponent << F::SIGNIFICAND_BITS; - let overflow: F::Int = overflow_exponent << F::SIGNIFICAND_BITS; + let underflow: F::Int = underflow_exponent << F::SIG_BITS; + let overflow: F::Int = overflow_exponent << F::SIG_BITS; - let dst_qnan = R::Int::ONE << (R::SIGNIFICAND_BITS - 1); + let dst_qnan = R::Int::ONE << (R::SIG_BITS - 1); let dst_nan_code = dst_qnan - dst_one; - let sign_bits_delta = F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS; + let sign_bits_delta = F::SIG_BITS - R::SIG_BITS; // Break a into a sign and representation of the absolute value. let a_abs = a.to_bits() & src_abs_mask; let sign = a.to_bits() & src_sign_mask; @@ -53,7 +53,7 @@ where abs_result = (a_abs >> sign_bits_delta).cast(); // Cast before shifting to prevent overflow. let bias_diff: R::Int = src_exp_bias.wrapping_sub(dst_exp_bias).cast(); - let tmp = bias_diff << R::SIGNIFICAND_BITS; + let tmp = bias_diff << R::SIG_BITS; abs_result = abs_result.wrapping_sub(tmp); let round_bits = a_abs & round_mask; @@ -70,26 +70,25 @@ where // bit and inserting the (truncated) trailing NaN field. // Cast before shifting to prevent overflow. let dst_inf_exp: R::Int = dst_inf_exp.cast(); - abs_result = dst_inf_exp << R::SIGNIFICAND_BITS; + abs_result = dst_inf_exp << R::SIG_BITS; abs_result |= dst_qnan; - abs_result |= dst_nan_code - & ((a_abs & src_nan_code) >> (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)).cast(); + abs_result |= dst_nan_code & ((a_abs & src_nan_code) >> (F::SIG_BITS - R::SIG_BITS)).cast(); } else if a_abs >= overflow { // a overflows to infinity. // Cast before shifting to prevent overflow. let dst_inf_exp: R::Int = dst_inf_exp.cast(); - abs_result = dst_inf_exp << R::SIGNIFICAND_BITS; + abs_result = dst_inf_exp << R::SIG_BITS; } else { // a underflows on conversion to the destination type or is an exact // zero. The result may be a denormal or zero. Extract the exponent // to get the shift amount for the denormalization. - let a_exp: u32 = (a_abs >> F::SIGNIFICAND_BITS).cast(); + let a_exp: u32 = (a_abs >> F::SIG_BITS).cast(); let shift = src_exp_bias - dst_exp_bias - a_exp + 1; let significand = (a.to_bits() & src_significand_mask) | src_min_normal; // Right shift by the denormalization amount with sticky. - if shift > F::SIGNIFICAND_BITS { + if shift > F::SIG_BITS { abs_result = dst_zero; } else { let sticky = if (significand << (src_bits - shift)) != src_zero { @@ -98,8 +97,7 @@ where src_zero }; let denormalized_significand: F::Int = significand >> shift | sticky; - abs_result = - (denormalized_significand >> (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)).cast(); + abs_result = (denormalized_significand >> (F::SIG_BITS - R::SIG_BITS)).cast(); let round_bits = denormalized_significand & round_mask; // Round to nearest if round_bits > halfway { diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs index 4154e0fb3..894c2782a 100644 --- a/testcrate/src/lib.rs +++ b/testcrate/src/lib.rs @@ -178,18 +178,18 @@ fn fuzz_float_step(rng: &mut Xoshiro128StarStar, f: &mut F) { let sign = (rng32 & 1) != 0; // exponent fuzzing. Only 4 bits for the selector needed. - let ones = (F::Int::ONE << F::EXPONENT_BITS) - F::Int::ONE; - let r0 = (rng32 >> 1) % F::EXPONENT_BITS; - let r1 = (rng32 >> 5) % F::EXPONENT_BITS; + let ones = (F::Int::ONE << F::EXP_BITS) - F::Int::ONE; + let r0 = (rng32 >> 1) % F::EXP_BITS; + let r1 = (rng32 >> 5) % F::EXP_BITS; // custom rotate shift. Note that `F::Int` is unsigned, so we can shift right without smearing // the sign bit. let mask = if r1 == 0 { ones.wrapping_shr(r0) } else { let tmp = ones.wrapping_shr(r0); - (tmp.wrapping_shl(r1) | tmp.wrapping_shr(F::EXPONENT_BITS - r1)) & ones + (tmp.wrapping_shl(r1) | tmp.wrapping_shr(F::EXP_BITS - r1)) & ones }; - let mut exp = (f.to_bits() & F::EXPONENT_MASK) >> F::SIGNIFICAND_BITS; + let mut exp = (f.to_bits() & F::EXP_MASK) >> F::SIG_BITS; match (rng32 >> 9) % 4 { 0 => exp |= mask, 1 => exp &= mask, @@ -197,9 +197,9 @@ fn fuzz_float_step(rng: &mut Xoshiro128StarStar, f: &mut F) { } // significand fuzzing - let mut sig = f.to_bits() & F::SIGNIFICAND_MASK; + let mut sig = f.to_bits() & F::SIG_MASK; fuzz_step(rng, &mut sig); - sig &= F::SIGNIFICAND_MASK; + sig &= F::SIG_MASK; *f = F::from_parts(sign, exp, sig); } @@ -209,22 +209,22 @@ macro_rules! float_edge_cases { for exponent in [ F::Int::ZERO, F::Int::ONE, - F::Int::ONE << (F::EXPONENT_BITS / 2), - (F::Int::ONE << (F::EXPONENT_BITS - 1)) - F::Int::ONE, - F::Int::ONE << (F::EXPONENT_BITS - 1), - (F::Int::ONE << (F::EXPONENT_BITS - 1)) + F::Int::ONE, - (F::Int::ONE << F::EXPONENT_BITS) - F::Int::ONE, + F::Int::ONE << (F::EXP_BITS / 2), + (F::Int::ONE << (F::EXP_BITS - 1)) - F::Int::ONE, + F::Int::ONE << (F::EXP_BITS - 1), + (F::Int::ONE << (F::EXP_BITS - 1)) + F::Int::ONE, + (F::Int::ONE << F::EXP_BITS) - F::Int::ONE, ] .iter() { for significand in [ F::Int::ZERO, F::Int::ONE, - F::Int::ONE << (F::SIGNIFICAND_BITS / 2), - (F::Int::ONE << (F::SIGNIFICAND_BITS - 1)) - F::Int::ONE, - F::Int::ONE << (F::SIGNIFICAND_BITS - 1), - (F::Int::ONE << (F::SIGNIFICAND_BITS - 1)) + F::Int::ONE, - (F::Int::ONE << F::SIGNIFICAND_BITS) - F::Int::ONE, + F::Int::ONE << (F::SIG_BITS / 2), + (F::Int::ONE << (F::SIG_BITS - 1)) - F::Int::ONE, + F::Int::ONE << (F::SIG_BITS - 1), + (F::Int::ONE << (F::SIG_BITS - 1)) + F::Int::ONE, + (F::Int::ONE << F::SIG_BITS) - F::Int::ONE, ] .iter() { diff --git a/testcrate/tests/float_pow.rs b/testcrate/tests/float_pow.rs index d85ee99df..8d86392f5 100644 --- a/testcrate/tests/float_pow.rs +++ b/testcrate/tests/float_pow.rs @@ -19,8 +19,8 @@ macro_rules! pow { use compiler_builtins::float::Float; fuzz_float_2(N, |x: $f, y: $f| { if !(Float::is_subnormal(x) || Float::is_subnormal(y) || x.is_nan()) { - let n = y.to_bits() & !<$f as Float>::SIGNIFICAND_MASK; - let n = (n as <$f as Float>::SignedInt) >> <$f as Float>::SIGNIFICAND_BITS; + let n = y.to_bits() & !<$f as Float>::SIG_MASK; + let n = (n as <$f as Float>::SignedInt) >> <$f as Float>::SIG_BITS; let n = n as i32; let tmp0: $f = x.powi(n); let tmp1: $f = $fn(x, n); From ef00132996a9f50b3cfa4f2f69c143eee6c2975c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 3 Jan 2025 02:56:34 +0000 Subject: [PATCH 1005/1459] Rename `EXP_MAX` to `EXP_SAT` "Maximum" is technically correct here with regards to what the bitpattern can represent, but it is not the numeric maximum value of the exponent which has a relationship with the bias. So, replace the maximum terminology with "saturated" to indicate it only means the full bitpattern. This change is more relevant to `libm` than `compiler-builtins`. --- src/float/add.rs | 2 +- src/float/div.rs | 2 +- src/float/extend.rs | 2 +- src/float/mod.rs | 9 ++++++--- src/float/mul.rs | 2 +- src/float/trunc.rs | 2 +- 6 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/float/add.rs b/src/float/add.rs index 743cc441b..004ea3ebc 100644 --- a/src/float/add.rs +++ b/src/float/add.rs @@ -14,7 +14,7 @@ where let bits = F::BITS.cast(); let significand_bits = F::SIG_BITS; - let max_exponent = F::EXP_MAX; + let max_exponent = F::EXP_SAT; let implicit_bit = F::IMPLICIT_BIT; let significand_mask = F::SIG_MASK; diff --git a/src/float/div.rs b/src/float/div.rs index c211fabad..a461397ea 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -107,7 +107,7 @@ where let significand_bits = F::SIG_BITS; // Saturated exponent, representing infinity - let exponent_sat: F::Int = F::EXP_MAX.cast(); + let exponent_sat: F::Int = F::EXP_SAT.cast(); let exponent_bias = F::EXP_BIAS; let implicit_bit = F::IMPLICIT_BIT; diff --git a/src/float/extend.rs b/src/float/extend.rs index a5d69864b..a1a9b9720 100644 --- a/src/float/extend.rs +++ b/src/float/extend.rs @@ -26,7 +26,7 @@ where let dst_bits = R::BITS; let dst_sign_bits = R::SIG_BITS; - let dst_inf_exp = R::EXP_MAX; + let dst_inf_exp = R::EXP_SAT; let dst_exp_bias = R::EXP_BIAS; let dst_min_normal = R::IMPLICIT_BIT; diff --git a/src/float/mod.rs b/src/float/mod.rs index 9ba447f97..6ee55950e 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -51,11 +51,14 @@ pub(crate) trait Float: /// The bitwidth of the exponent. const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; - /// The saturated value of the exponent (infinite representation), in the rightmost postiion. - const EXP_MAX: u32 = (1 << Self::EXP_BITS) - 1; + /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite + /// representation. + /// + /// This is in the rightmost position, use `EXP_MASK` for the shifted value. + const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1; /// The exponent bias value. - const EXP_BIAS: u32 = Self::EXP_MAX >> 1; + const EXP_BIAS: u32 = Self::EXP_SAT >> 1; /// A mask for the sign bit. const SIGN_MASK: Self::Int; diff --git a/src/float/mul.rs b/src/float/mul.rs index 62895293a..f0f261a28 100644 --- a/src/float/mul.rs +++ b/src/float/mul.rs @@ -14,7 +14,7 @@ where let bits = F::BITS; let significand_bits = F::SIG_BITS; - let max_exponent = F::EXP_MAX; + let max_exponent = F::EXP_SAT; let exponent_bias = F::EXP_BIAS; diff --git a/src/float/trunc.rs b/src/float/trunc.rs index c95e9c198..5efeac98e 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -29,7 +29,7 @@ where let dst_zero = R::Int::ZERO; let dst_one = R::Int::ONE; let dst_bits = R::BITS; - let dst_inf_exp = R::EXP_MAX; + let dst_inf_exp = R::EXP_SAT; let dst_exp_bias = R::EXP_BIAS; let underflow_exponent: F::Int = (src_exp_bias + 1 - dst_exp_bias).cast(); From 73b6429a20222b84842fad6a021693045af9400a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 3 Jan 2025 03:09:07 +0000 Subject: [PATCH 1006/1459] Fix new `clippy::precedence` errors `clippy::precedence` now applies to bitwise `&` and `|`. Update with all of its suggestions, including a separate elided lifetime suggestion. --- examples/intrinsics.rs | 2 +- src/float/add.rs | 8 ++++---- src/float/conv.rs | 8 ++++---- src/float/div.rs | 2 +- src/float/mul.rs | 2 +- src/float/trunc.rs | 2 +- src/mem/mod.rs | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index ef7a3d430..e13c0fb1f 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -640,7 +640,7 @@ fn run() { fn something_with_a_dtor(f: &dyn Fn()) { struct A<'a>(&'a (dyn Fn() + 'a)); - impl<'a> Drop for A<'a> { + impl Drop for A<'_> { fn drop(&mut self) { (self.0)(); } diff --git a/src/float/add.rs b/src/float/add.rs index 004ea3ebc..ef04ddc16 100644 --- a/src/float/add.rs +++ b/src/float/add.rs @@ -143,9 +143,9 @@ where // If the addition carried up, we need to right-shift the result and // adjust the exponent: - if a_significand & implicit_bit << 4 != MinInt::ZERO { + if a_significand & (implicit_bit << 4) != MinInt::ZERO { let sticky = F::Int::from_bool(a_significand & one != MinInt::ZERO); - a_significand = a_significand >> 1 | sticky; + a_significand = (a_significand >> 1) | sticky; a_exponent += 1; } } @@ -161,7 +161,7 @@ where let shift = (1 - a_exponent).cast(); let sticky = F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != MinInt::ZERO); - a_significand = a_significand >> shift.cast() | sticky; + a_significand = (a_significand >> shift.cast()) | sticky; a_exponent = 0; } @@ -170,7 +170,7 @@ where let round_guard_sticky: i32 = a_significand_i32 & 0x7; // Shift the significand into place, and mask off the implicit bit. - let mut result = a_significand >> 3 & significand_mask; + let mut result = (a_significand >> 3) & significand_mask; // Insert the exponent and sign. result |= a_exponent.cast() << significand_bits; diff --git a/src/float/conv.rs b/src/float/conv.rs index 40d304719..83a181c37 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -42,7 +42,7 @@ mod int_to_float { fn m_adj(m_base: F::Int, dropped_bits: F::Int) -> F::Int { // Branchlessly extract a `1` if rounding up should happen, 0 otherwise // This accounts for rounding to even. - let adj = (dropped_bits - (dropped_bits >> (F::BITS - 1) & !m_base)) >> (F::BITS - 1); + let adj = (dropped_bits - ((dropped_bits >> (F::BITS - 1)) & !m_base)) >> (F::BITS - 1); // Add one when we need to round up. Break ties to even. m_base + adj @@ -129,7 +129,7 @@ mod int_to_float { let m_base: u32 = (i_m >> shift_f_lt_i::()) as u32; // The entire lower half of `i` will be truncated (masked portion), plus the // next `EXP_BITS` bits. - let adj = (i_m >> f32::EXP_BITS | i_m & 0xFFFF) as u32; + let adj = ((i_m >> f32::EXP_BITS) | i_m & 0xFFFF) as u32; let m = m_adj::(m_base, adj); let e = if i == 0 { 0 } else { exp::(n) - 1 }; repr::(e, m) @@ -187,7 +187,7 @@ mod int_to_float { let m_base: u64 = (i_m >> shift_f_lt_i::()) as u64; // The entire lower half of `i` will be truncated (masked portion), plus the // next `EXP_BITS` bits. - let adj = (i_m >> f64::EXP_BITS | i_m & 0xFFFF_FFFF) as u64; + let adj = ((i_m >> f64::EXP_BITS) | i_m & 0xFFFF_FFFF) as u64; let m = m_adj::(m_base, adj); let e = if i == 0 { 0 } else { exp::(n) - 1 }; repr::(e, m) @@ -377,7 +377,7 @@ where }; // Set the implicit 1-bit. - let m: I::UnsignedInt = I::UnsignedInt::ONE << (I::BITS - 1) | m_base; + let m: I::UnsignedInt = (I::UnsignedInt::ONE << (I::BITS - 1)) | m_base; // Shift based on the exponent and bias. let s: u32 = (foobar) - u32::cast_from(fbits >> F::SIG_BITS); diff --git a/src/float/div.rs b/src/float/div.rs index a461397ea..21c757dd6 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -261,7 +261,7 @@ where let c_hw = c_hw::(); // Check that the top bit is set, i.e. value is within `[1, 2)`. - debug_assert!(b_uq1_hw & one_hw << (HalfRep::::BITS - 1) > zero_hw); + debug_assert!(b_uq1_hw & (one_hw << (HalfRep::::BITS - 1)) > zero_hw); // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572, // so x0 fits to UQ0.HW without wrapping. diff --git a/src/float/mul.rs b/src/float/mul.rs index f0f261a28..58636cb5e 100644 --- a/src/float/mul.rs +++ b/src/float/mul.rs @@ -154,7 +154,7 @@ where // not all zero so that the result is correctly rounded below. let sticky = product_low << (bits - shift) != zero; product_low = - product_high << (bits - shift) | product_low >> shift | (sticky as u32).cast(); + (product_high << (bits - shift)) | (product_low >> shift) | (sticky as u32).cast(); product_high >>= shift; } else { // Result is normal before rounding; insert the exponent. diff --git a/src/float/trunc.rs b/src/float/trunc.rs index 5efeac98e..3759aa7dc 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -96,7 +96,7 @@ where } else { src_zero }; - let denormalized_significand: F::Int = significand >> shift | sticky; + let denormalized_significand: F::Int = (significand >> shift) | sticky; abs_result = (denormalized_significand >> (F::SIG_BITS - R::SIG_BITS)).cast(); let round_bits = denormalized_significand & round_mask; // Round to nearest diff --git a/src/mem/mod.rs b/src/mem/mod.rs index d0ff50158..f10439e2d 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -111,7 +111,7 @@ where let mut x = T::from(c); let mut i = 1; while i < mem::size_of::() { - x = x << 8 | T::from(c); + x = (x << 8) | T::from(c); i += 1; } From e1f71a4d777e148e3960065e036cffb58ae9dd18 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 3 Jan 2025 12:04:45 +0000 Subject: [PATCH 1007/1459] Use intrinsics for `abs` and `copysign` when available Currently our implementations for `abs` and `copysign` are defined on the trait, and these are then called from `generic`. It would be better to call core's `.abs()` / `.copysign(y)`, but we can't do this in the generic because calling the standalone function could become recursive (`fabsf` becomes `intrinsics::fabsf32`, that may lower to a call to `fabsf`). Change this so the traits uses the call to `core` if available, falling back to a call to the standalone generic function. In practice the recursion isn't likely to be a problem since LLVM probably always lowers `abs`/`copysign` to assembly, but this pattern should be more correct for functions that we will add in the future (e.g. `fma`). This should eventually be followed by a change to call the trait methods rather than `fabs`/`copysign` directly. --- libm/crates/libm-test/src/f8_impl.rs | 8 ++++++ libm/src/math/generic/copysign.rs | 2 +- libm/src/math/generic/fabs.rs | 3 ++- libm/src/math/mod.rs | 9 ++++++- libm/src/math/support/float_traits.rs | 37 +++++++++++++++++---------- 5 files changed, 43 insertions(+), 16 deletions(-) diff --git a/libm/crates/libm-test/src/f8_impl.rs b/libm/crates/libm-test/src/f8_impl.rs index babcc6357..d378863f2 100644 --- a/libm/crates/libm-test/src/f8_impl.rs +++ b/libm/crates/libm-test/src/f8_impl.rs @@ -70,6 +70,14 @@ impl Float for f8 { Self(a) } + fn abs(self) -> Self { + libm::generic::fabs(self) + } + + fn copysign(self, other: Self) -> Self { + libm::generic::copysign(self, other) + } + fn normalize(_significand: Self::Int) -> (i32, Self::Int) { unimplemented!() } diff --git a/libm/src/math/generic/copysign.rs b/libm/src/math/generic/copysign.rs index d6b814891..04864a359 100644 --- a/libm/src/math/generic/copysign.rs +++ b/libm/src/math/generic/copysign.rs @@ -5,6 +5,6 @@ pub fn copysign(x: F, y: F) -> F { let mut ux = x.to_bits(); let uy = y.to_bits(); ux &= !F::SIGN_MASK; - ux |= uy & (F::SIGN_MASK); + ux |= uy & F::SIGN_MASK; F::from_bits(ux) } diff --git a/libm/src/math/generic/fabs.rs b/libm/src/math/generic/fabs.rs index f2c7f0f46..75b473107 100644 --- a/libm/src/math/generic/fabs.rs +++ b/libm/src/math/generic/fabs.rs @@ -2,5 +2,6 @@ use super::super::Float; /// Absolute value. pub fn fabs(x: F) -> F { - x.abs() + let abs_mask = !F::SIGN_MASK; + F::from_bits(x.to_bits() & abs_mask) } diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index ba1995228..e7b21de67 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -83,11 +83,18 @@ pub mod support; #[cfg(not(feature = "unstable-test-support"))] mod support; +cfg_if! { + if #[cfg(feature = "unstable-test-support")] { + pub mod generic; + } else { + mod generic; + } +} + // Private modules mod arch; mod expo2; mod fenv; -mod generic; mod k_cos; mod k_cosf; mod k_expo2; diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index e64640a0d..3b5be4fa3 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -123,23 +123,14 @@ pub trait Float: ) } - fn abs(self) -> Self { - let abs_mask = !Self::SIGN_MASK; - Self::from_bits(self.to_bits() & abs_mask) - } + fn abs(self) -> Self; + + /// Returns a number composed of the magnitude of self and the sign of sign. + fn copysign(self, other: Self) -> Self; /// Returns (normalized exponent, normalized significand) fn normalize(significand: Self::Int) -> (i32, Self::Int); - /// Returns a number composed of the magnitude of self and the sign of sign. - fn copysign(self, other: Self) -> Self { - let mut x = self.to_bits(); - let y = other.to_bits(); - x &= !Self::SIGN_MASK; - x |= y & Self::SIGN_MASK; - Self::from_bits(x) - } - /// Returns a number that represents the sign of self. fn signum(self) -> Self { if self.is_nan() { self } else { Self::ONE.copysign(self) } @@ -206,6 +197,26 @@ macro_rules! float_impl { fn from_bits(a: Self::Int) -> Self { Self::from_bits(a) } + fn abs(self) -> Self { + cfg_if! { + // FIXME(msrv): `abs` is available in `core` starting with 1.85. + if #[cfg(feature = "unstable-intrinsics")] { + self.abs() + } else { + super::super::generic::fabs(self) + } + } + } + fn copysign(self, other: Self) -> Self { + cfg_if! { + // FIXME(msrv): `copysign` is available in `core` starting with 1.85. + if #[cfg(feature = "unstable-intrinsics")] { + self.copysign(other) + } else { + super::super::generic::copysign(self, other) + } + } + } fn normalize(significand: Self::Int) -> (i32, Self::Int) { let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int) From 0dd1d7b17dddfbee681b73ab69b4293e4ac4313b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 3 Jan 2025 22:26:39 +0000 Subject: [PATCH 1008/1459] Change to exhaustive matching for `default_ulp` Make it more obvious what the expected ULP for a given routine is. This also narrows ULP to 0 for operations that require exact results. --- libm/crates/libm-test/src/precision.rs | 130 +++++++++++++++++-------- 1 file changed, 91 insertions(+), 39 deletions(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 6d4561c43..89b66146c 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -4,57 +4,109 @@ use core::f32; use CheckBasis::{Mpfr, Musl}; -use Identifier as Id; +use {BaseName as Bn, Identifier as Id}; use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult}; /// Type implementing [`IgnoreCase`]. pub struct SpecialCase; -/// Default ULP allowed to differ from musl (note that musl itself may not be accurate). -const MUSL_DEFAULT_ULP: u32 = 2; - -/// Default ULP allowed to differ from multiprecision (i.e. infinite) results. -const MP_DEFAULT_ULP: u32 = 1; - /// ULP allowed to differ from the results returned by a test basis. /// /// Note that these results were obtained using 400M rounds of random inputs, which /// is not a value used by default. pub fn default_ulp(ctx: &CheckCtx) -> u32 { - match (&ctx.basis, ctx.fn_ident) { - // Overrides that apply to either basis - // FMA is expected to be infinite precision. - (_, Id::Fma | Id::Fmaf) => 0, - (_, Id::J0 | Id::J0f | Id::J1 | Id::J1f | Id::Y0 | Id::Y0f | Id::Y1 | Id::Y1f) => 800_000, - (_, Id::Jn | Id::Jnf | Id::Yn | Id::Ynf) => 1000, - (_, Id::Erfc | Id::Erfcf) => 4, - - // Overrides for musl - #[cfg(x86_no_sse)] - (Musl, Id::Asinh | Id::Asinhf) => 6, - #[cfg(not(target_pointer_width = "64"))] - (Musl, Id::Exp10 | Id::Exp10f) => 4, - (Musl, Id::Lgamma | Id::LgammaR | Id::Lgammaf | Id::LgammafR) => 400, - (Musl, Id::Sincosf) => 500, - (Musl, Id::Tanh | Id::Tanhf) => 4, - (Musl, Id::Tgamma) => 20, - - // Overrides for MPFR - (Mpfr, Id::Acosh) => 4, - (Mpfr, Id::Acoshf) => 4, - (Mpfr, Id::Asinh | Id::Asinhf) => 2, - (Mpfr, Id::Atanh | Id::Atanhf) => 2, - (Mpfr, Id::Exp10 | Id::Exp10f) => 6, - (Mpfr, Id::Lgamma | Id::LgammaR | Id::Lgammaf | Id::LgammafR) => 16, - (Mpfr, Id::Sinh | Id::Sinhf) => 2, - (Mpfr, Id::Tanh | Id::Tanhf) => 2, - (Mpfr, Id::Tgamma) => 20, - - // Defaults - (Musl, _) => MUSL_DEFAULT_ULP, - (Mpfr, _) => MP_DEFAULT_ULP, + // ULP compared to the infinite (MPFR) result. + let mut ulp = match ctx.base_name { + // Operations that require exact results. This list should correlate with what we + // have documented at . + Bn::Ceil + | Bn::Copysign + | Bn::Fabs + | Bn::Fdim + | Bn::Floor + | Bn::Fma + | Bn::Fmax + | Bn::Fmin + | Bn::Fmod + | Bn::Frexp + | Bn::Ldexp + | Bn::Modf + | Bn::Nextafter + | Bn::Remainder + | Bn::Remquo + | Bn::Rint + | Bn::Round + | Bn::Scalbn + | Bn::Sqrt + | Bn::Trunc => 0, + + // Operations that aren't required to be exact, but our implementations are. + Bn::Cbrt if ctx.fn_ident != Id::Cbrt => 0, + Bn::Ilogb => 0, + Bn::Tgamma if ctx.fn_ident != Id::Tgamma => 0, + + // Bessel functions have large inaccuracies. + Bn::J0 | Bn::J1 | Bn::Y0 | Bn::Y1 => 8_000_000, + Bn::Jn | Bn::Yn => 1_000, + + // For all other operations, specify our implementation's worst case precision. + Bn::Acos => 1, + Bn::Acosh => 4, + Bn::Asin => 1, + Bn::Asinh => 2, + Bn::Atan => 1, + Bn::Atan2 => 1, + Bn::Atanh => 2, + Bn::Cbrt => 1, + Bn::Cos => 1, + Bn::Cosh => 1, + Bn::Erf => 1, + Bn::Erfc => 4, + Bn::Exp => 1, + Bn::Exp10 => 6, + Bn::Exp2 => 1, + Bn::Expm1 => 1, + Bn::Hypot => 1, + Bn::Lgamma | Bn::LgammaR => 16, + Bn::Log => 1, + Bn::Log10 => 1, + Bn::Log1p => 1, + Bn::Log2 => 1, + Bn::Pow => 1, + Bn::Sin => 1, + Bn::Sincos => 1, + Bn::Sinh => 2, + Bn::Tan => 1, + Bn::Tanh => 2, + Bn::Tgamma => 20, + }; + + // There are some cases where musl's approximation is less accurate than ours. For these + // cases, increase the ULP. + if ctx.basis == Musl { + match ctx.base_name { + Bn::Cosh => ulp = 2, + Bn::Exp10 if usize::BITS < 64 => ulp = 4, + Bn::Lgamma | Bn::LgammaR => ulp = 400, + Bn::Tanh => ulp = 4, + _ if ctx.fn_ident == Id::Sincosf => ulp = 500, + _ if ctx.fn_ident == Id::Tgamma => ulp = 20, + _ => (), + } } + + // In some cases, our implementation is less accurate than musl on i586. + if cfg!(x86_no_sse) { + match ctx.fn_ident { + Id::Log1p | Id::Log1pf => ulp = 2, + Id::Round => ulp = 1, + Id::Tan => ulp = 2, + _ => (), + } + } + + ulp } /// Don't run further validation on this test case. From 224e3d598f70ebcd35bb20a1ac03f9e96d735e51 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 4 Jan 2025 11:14:31 +0000 Subject: [PATCH 1009/1459] precision: Sort `ilogb` with other precise operations This is a nonfunctional change. --- libm/crates/libm-test/src/precision.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 89b66146c..8bedcde44 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -30,6 +30,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { | Bn::Fmin | Bn::Fmod | Bn::Frexp + | Bn::Ilogb | Bn::Ldexp | Bn::Modf | Bn::Nextafter @@ -43,7 +44,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { // Operations that aren't required to be exact, but our implementations are. Bn::Cbrt if ctx.fn_ident != Id::Cbrt => 0, - Bn::Ilogb => 0, Bn::Tgamma if ctx.fn_ident != Id::Tgamma => 0, // Bessel functions have large inaccuracies. From 5d9371ceaa43bb57b8502fdad61764b68de3ba42 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 5 Jan 2025 02:06:02 +0000 Subject: [PATCH 1010/1459] Clean up integers stored in `MpTy` There isn't any need to cache the integer since it gets provided as an argument anyway. Simplify this in `jn` and `yn`. --- libm/crates/libm-test/src/mpfloat.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index edb4cb962..5e516ef68 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -241,17 +241,17 @@ macro_rules! impl_op_for_ty { } impl MpOp for crate::op::[]::Routine { - type MpTy = (i32, MpFloat); + type MpTy = MpFloat; fn new_mp() -> Self::MpTy { - (0, new_mpfloat::()) + new_mpfloat::() } fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { - this.0 = input.0; - this.1.assign(input.1); - let ord = this.1.jn_round(this.0, Nearest); - prep_retval::(&mut this.1, ord) + let (n, x) = input; + this.assign(x); + let ord = this.jn_round(n, Nearest); + prep_retval::(this, ord) } } @@ -274,17 +274,17 @@ macro_rules! impl_op_for_ty { } impl MpOp for crate::op::[]::Routine { - type MpTy = (i32, MpFloat); + type MpTy = MpFloat; fn new_mp() -> Self::MpTy { - (0, new_mpfloat::()) + new_mpfloat::() } fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { - this.0 = input.0; - this.1.assign(input.1); - let ord = this.1.yn_round(this.0, Nearest); - prep_retval::(&mut this.1, ord) + let (n, x) = input; + this.assign(x); + let ord = this.yn_round(n, Nearest); + prep_retval::(this, ord) } } } From 95601206df4c986cc1f8a71b795b5d6ba09cc8ca Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 4 Jan 2025 09:53:58 +0000 Subject: [PATCH 1011/1459] Add tests against MPFR for `modf` and `modff` Rug provides `trunc_fract_round`, which implements `modf`, use it to add a test. --- libm/crates/libm-test/src/mpfloat.rs | 18 ++++++++++++++++++ libm/crates/libm-test/tests/multiprecision.rs | 4 ---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 5e516ef68..b9e71d68a 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -210,6 +210,24 @@ macro_rules! impl_op_for_ty { } } + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(&this.0); + let (ord0, ord1) = this.0.trunc_fract_round(&mut this.1, Nearest); + ( + prep_retval::(&mut this.1, ord0), + prep_retval::(&mut this.0, ord1), + ) + } + } + impl MpOp for crate::op::[]::Routine { type MpTy = (MpFloat, MpFloat); diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 71ff2ff96..54d313059 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -58,8 +58,6 @@ libm_macros::for_each_function! { ilogbf, ldexp, ldexpf, - modf, - modff, remquo, remquof, scalbn, @@ -165,7 +163,5 @@ libm_macros::for_each_function! { frexpf, ilogb, ilogbf, - modf, - modff, ], } From 7ce5096aefd92bf2d37dca5354420d91bc4f3224 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 4 Jan 2025 10:39:27 +0000 Subject: [PATCH 1012/1459] Add tests against MPFR for `frexp` and `frexpf` This implementation comes from `rug::Float::to_f32_exp` [1]. [1]: https://docs.rs/rug/1.26.1/rug/struct.Float.html#method.to_f32_exp --- libm/crates/libm-test/src/mpfloat.rs | 19 +++++++++++++++++++ libm/crates/libm-test/tests/multiprecision.rs | 4 ---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index b9e71d68a..4d40858f2 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -258,6 +258,25 @@ macro_rules! impl_op_for_ty { } } + impl MpOp for crate::op::[]::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + // Implementation taken from `rug::Float::to_f32_exp`. + this.assign(input.0); + let exp = this.get_exp().unwrap_or(0); + if exp != 0 { + *this >>= exp; + } + + (prep_retval::(this, Ordering::Equal), exp) + } + } + impl MpOp for crate::op::[]::Routine { type MpTy = MpFloat; diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 54d313059..ade5a2553 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -52,8 +52,6 @@ libm_macros::for_each_function! { ], skip: [ // FIXME: MPFR tests needed - frexp, - frexpf, ilogb, ilogbf, ldexp, @@ -159,8 +157,6 @@ libm_macros::for_each_function! { ynf, // FIXME: MPFR tests needed - frexp, - frexpf, ilogb, ilogbf, ], From f690245dee1344d9a362265ce1eb6675f96f5891 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 3 Jan 2025 05:28:39 +0000 Subject: [PATCH 1013/1459] Add tests against MPFR for `scalbn{f}` and `ldexp{f}` --- libm/crates/libm-test/src/mpfloat.rs | 28 +++++++++++++++++++ libm/crates/libm-test/tests/multiprecision.rs | 4 --- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 4d40858f2..8b8298004 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -292,6 +292,34 @@ macro_rules! impl_op_for_ty { } } + // `ldexp` and `scalbn` are the same for binary floating point, so just forward all + // methods. + impl MpOp for crate::op::[]::Routine { + type MpTy = ]::Routine as MpOp>::MpTy; + + fn new_mp() -> Self::MpTy { + ]::Routine as MpOp>::new_mp() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + ]::Routine as MpOp>::run(this, input) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + *this <<= input.1; + prep_retval::(this, Ordering::Equal) + } + } + impl MpOp for crate::op::[]::Routine { type MpTy = (MpFloat, MpFloat); diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index ade5a2553..4cdba0942 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -54,12 +54,8 @@ libm_macros::for_each_function! { // FIXME: MPFR tests needed ilogb, ilogbf, - ldexp, - ldexpf, remquo, remquof, - scalbn, - scalbnf, // FIXME: test needed, see // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392 From 3cd43facc9f3ce746eb79922e048908aba57446b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 29 Dec 2024 08:45:08 +0000 Subject: [PATCH 1014/1459] Add a way for tests to log to a file Occasionally it is useful to see some information from running tests without making everything noisy from `--nocapture`. Add a function to log this kind of output to a file, and print the file as part of CI. --- libm/.github/workflows/main.yml | 5 +++ libm/configure.rs | 15 +++++++++ libm/crates/libm-test/src/lib.rs | 53 ++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 0f5becf73..023ec58c0 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -113,6 +113,11 @@ jobs: rustup target add x86_64-unknown-linux-musl cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} + - name: Print test logs if available + if: always() + run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi + shell: bash + clippy: name: Clippy runs-on: ubuntu-24.04 diff --git a/libm/configure.rs b/libm/configure.rs index 389e86c33..a18937c3c 100644 --- a/libm/configure.rs +++ b/libm/configure.rs @@ -8,6 +8,7 @@ pub struct Config { pub manifest_dir: PathBuf, pub out_dir: PathBuf, pub opt_level: u8, + pub cargo_features: Vec, pub target_arch: String, pub target_env: String, pub target_family: Option, @@ -22,11 +23,16 @@ impl Config { let target_features = env::var("CARGO_CFG_TARGET_FEATURE") .map(|feats| feats.split(',').map(ToOwned::to_owned).collect()) .unwrap_or_default(); + let cargo_features = env::vars() + .filter_map(|(name, _value)| name.strip_prefix("CARGO_FEATURE_").map(ToOwned::to_owned)) + .map(|s| s.to_lowercase().replace("_", "-")) + .collect(); Self { manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()), out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()), opt_level: env::var("OPT_LEVEL").unwrap().parse().unwrap(), + cargo_features, target_arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(), target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(), target_family: env::var("CARGO_CFG_TARGET_FAMILY").ok(), @@ -45,6 +51,7 @@ pub fn emit_libm_config(cfg: &Config) { emit_arch_cfg(); emit_optimization_cfg(cfg); emit_cfg_shorthands(cfg); + emit_cfg_env(cfg); emit_f16_f128_cfg(cfg); } @@ -53,6 +60,7 @@ pub fn emit_libm_config(cfg: &Config) { pub fn emit_test_config(cfg: &Config) { emit_optimization_cfg(cfg); emit_cfg_shorthands(cfg); + emit_cfg_env(cfg); emit_f16_f128_cfg(cfg); } @@ -97,6 +105,13 @@ fn emit_cfg_shorthands(cfg: &Config) { } } +/// Reemit config that we make use of for test logging. +fn emit_cfg_env(cfg: &Config) { + println!("cargo:rustc-env=CFG_CARGO_FEATURES={:?}", cfg.cargo_features); + println!("cargo:rustc-env=CFG_OPT_LEVEL={}", cfg.opt_level); + println!("cargo:rustc-env=CFG_TARGET_FEATURES={:?}", cfg.target_features); +} + /// Configure whether or not `f16` and `f128` support should be enabled. fn emit_f16_f128_cfg(cfg: &Config) { println!("cargo:rustc-check-cfg=cfg(f16_enabled)"); diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 97907b2a1..c1aec0230 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -13,6 +13,13 @@ mod precision; mod run_cfg; mod test_traits; +use std::env; +use std::fs::File; +use std::io::Write; +use std::path::PathBuf; +use std::sync::LazyLock; +use std::time::SystemTime; + pub use f8_impl::f8; pub use libm::support::{Float, Int, IntTy, MinInt}; pub use num::{FloatExt, logspace}; @@ -42,3 +49,49 @@ pub const fn ci() -> bool { Some(_) => true, } } + +/// Print to stderr and additionally log it to `target/test-log.txt`. This is useful for saving +/// output that would otherwise be consumed by the test harness. +pub fn test_log(s: &str) { + // Handle to a file opened in append mode, unless a suitable path can't be determined. + static OUTFILE: LazyLock> = LazyLock::new(|| { + // If the target directory is overridden, use that environment variable. Otherwise, save + // at the default path `{workspace_root}/target`. + let target_dir = match env::var("CARGO_TARGET_DIR") { + Ok(s) => PathBuf::from(s), + Err(_) => { + let Ok(x) = env::var("CARGO_MANIFEST_DIR") else { + return None; + }; + + PathBuf::from(x).parent().unwrap().parent().unwrap().join("target") + } + }; + let outfile = target_dir.join("test-log.txt"); + + let mut f = File::options() + .create(true) + .append(true) + .open(outfile) + .expect("failed to open logfile"); + let now = SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap(); + + writeln!(f, "\n\nTest run at {}", now.as_secs()).unwrap(); + writeln!(f, "arch: {}", env::consts::ARCH).unwrap(); + writeln!(f, "os: {}", env::consts::OS).unwrap(); + writeln!(f, "bits: {}", usize::BITS).unwrap(); + writeln!(f, "emulated: {}", emulated()).unwrap(); + writeln!(f, "ci: {}", ci()).unwrap(); + writeln!(f, "cargo features: {}", env!("CFG_CARGO_FEATURES")).unwrap(); + writeln!(f, "opt level: {}", env!("CFG_OPT_LEVEL")).unwrap(); + writeln!(f, "target features: {}", env!("CFG_TARGET_FEATURES")).unwrap(); + + Some(f) + }); + + eprintln!("{s}"); + + if let Some(mut f) = OUTFILE.as_ref() { + writeln!(f, "{s}").unwrap(); + } +} From c92b4a2ed20bfc281ae04d359d0022489e68be7c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 26 Dec 2024 07:44:54 +0000 Subject: [PATCH 1015/1459] Streamline the way that test iteration count is determined Currently, tests use a handful of constants to determine how many iterations to perform: `NTESTS`, `AROUND`, and `MAX_CHECK_POINTS`. This configuration is not very straightforward to adjust and needs to be repeated everywhere it is used. Replace this with new functions in the `run_cfg` module that determine iteration counts in a more reusable and documented way. This only updates `edge_cases` and `domain_logspace`, `random` is refactored in a later commit. --- .../libm-test/src/gen/domain_logspace.rs | 31 +-- libm/crates/libm-test/src/gen/edge_cases.rs | 54 +++--- libm/crates/libm-test/src/gen/random.rs | 1 + libm/crates/libm-test/src/lib.rs | 2 +- libm/crates/libm-test/src/run_cfg.rs | 177 +++++++++++++++++- 5 files changed, 208 insertions(+), 57 deletions(-) diff --git a/libm/crates/libm-test/src/gen/domain_logspace.rs b/libm/crates/libm-test/src/gen/domain_logspace.rs index 5e37170fa..3d8a3e7fe 100644 --- a/libm/crates/libm-test/src/gen/domain_logspace.rs +++ b/libm/crates/libm-test/src/gen/domain_logspace.rs @@ -6,41 +6,26 @@ use libm::support::{IntTy, MinInt}; use crate::domain::HasDomain; use crate::op::OpITy; +use crate::run_cfg::{GeneratorKind, iteration_count}; use crate::{CheckCtx, MathOp, logspace}; -/// Number of tests to run. -// FIXME(ntests): replace this with a more logical algorithm -const NTESTS: usize = { - if cfg!(optimizations_enabled) { - if crate::emulated() - || !cfg!(target_pointer_width = "64") - || cfg!(all(target_arch = "x86_64", target_vendor = "apple")) - { - // Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run - // in QEMU. - 100_000 - } else { - 5_000_000 - } - } else { - // Without optimizations just run a quick check - 800 - } -}; - /// Create a range of logarithmically spaced inputs within a function's domain. /// /// This allows us to get reasonably thorough coverage without wasting time on values that are /// NaN or out of range. Random tests will still cover values that are excluded here. -pub fn get_test_cases(_ctx: &CheckCtx) -> impl Iterator +pub fn get_test_cases(ctx: &CheckCtx) -> impl Iterator where Op: MathOp + HasDomain, - IntTy: TryFrom, + IntTy: TryFrom, RangeInclusive>: Iterator, { let domain = Op::DOMAIN; + let ntests = iteration_count(ctx, GeneratorKind::Domain, 0); + + // We generate logspaced inputs within a specific range, excluding values that are out of + // range in order to make iterations useful (random tests still cover the full range). let start = domain.range_start(); let end = domain.range_end(); - let steps = OpITy::::try_from(NTESTS).unwrap_or(OpITy::::MAX); + let steps = OpITy::::try_from(ntests).unwrap_or(OpITy::::MAX); logspace(start, end, steps).map(|v| (v,)) } diff --git a/libm/crates/libm-test/src/gen/edge_cases.rs b/libm/crates/libm-test/src/gen/edge_cases.rs index 3387f6c48..1f27c1467 100644 --- a/libm/crates/libm-test/src/gen/edge_cases.rs +++ b/libm/crates/libm-test/src/gen/edge_cases.rs @@ -3,18 +3,11 @@ use libm::support::Float; use crate::domain::HasDomain; +use crate::run_cfg::{check_near_count, check_point_count}; use crate::{CheckCtx, FloatExt, MathOp}; -/// Number of values near an interesting point to check. -// FIXME(ntests): replace this with a more logical algorithm -const AROUND: usize = 100; - -/// Functions have infinite asymptotes, limit how many we check. -// FIXME(ntests): replace this with a more logical algorithm -const MAX_CHECK_POINTS: usize = 10; - /// Create a list of values around interesting points (infinities, zeroes, NaNs). -pub fn get_test_cases(_ctx: &CheckCtx) -> impl Iterator +pub fn get_test_cases(ctx: &CheckCtx) -> impl Iterator where Op: MathOp + HasDomain, F: Float, @@ -25,23 +18,26 @@ where let domain_start = domain.range_start(); let domain_end = domain.range_end(); + let check_points = check_point_count(ctx); + let near_points = check_near_count(ctx); + // Check near some notable constants - count_up(F::ONE, values); - count_up(F::ZERO, values); - count_up(F::NEG_ONE, values); - count_down(F::ONE, values); - count_down(F::ZERO, values); - count_down(F::NEG_ONE, values); + count_up(F::ONE, near_points, values); + count_up(F::ZERO, near_points, values); + count_up(F::NEG_ONE, near_points, values); + count_down(F::ONE, near_points, values); + count_down(F::ZERO, near_points, values); + count_down(F::NEG_ONE, near_points, values); values.push(F::NEG_ZERO); // Check values near the extremes - count_up(F::NEG_INFINITY, values); - count_down(F::INFINITY, values); - count_down(domain_end, values); - count_up(domain_start, values); - count_down(domain_start, values); - count_up(domain_end, values); - count_down(domain_end, values); + count_up(F::NEG_INFINITY, near_points, values); + count_down(F::INFINITY, near_points, values); + count_down(domain_end, near_points, values); + count_up(domain_start, near_points, values); + count_down(domain_start, near_points, values); + count_up(domain_end, near_points, values); + count_down(domain_end, near_points, values); // Check some special values that aren't included in the above ranges values.push(F::NAN); @@ -50,9 +46,9 @@ where // Check around asymptotes if let Some(f) = domain.check_points { let iter = f(); - for x in iter.take(MAX_CHECK_POINTS) { - count_up(x, values); - count_down(x, values); + for x in iter.take(check_points) { + count_up(x, near_points, values); + count_down(x, near_points, values); } } @@ -65,11 +61,11 @@ where /// Add `AROUND` values starting at and including `x` and counting up. Uses the smallest possible /// increments (1 ULP). -fn count_up(mut x: F, values: &mut Vec) { +fn count_up(mut x: F, points: u64, values: &mut Vec) { assert!(!x.is_nan()); let mut count = 0; - while x < F::INFINITY && count < AROUND { + while x < F::INFINITY && count < points { values.push(x); x = x.next_up(); count += 1; @@ -78,11 +74,11 @@ fn count_up(mut x: F, values: &mut Vec) { /// Add `AROUND` values starting at and including `x` and counting down. Uses the smallest possible /// increments (1 ULP). -fn count_down(mut x: F, values: &mut Vec) { +fn count_down(mut x: F, points: u64, values: &mut Vec) { assert!(!x.is_nan()); let mut count = 0; - while x > F::NEG_INFINITY && count < AROUND { + while x > F::NEG_INFINITY && count < points { values.push(x); x = x.next_down(); count += 1; diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index 4f75da07b..a30a3674e 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -12,6 +12,7 @@ use crate::{BaseName, CheckCtx, GenerateInput}; const SEED: [u8; 32] = *b"3.141592653589793238462643383279"; /// Number of tests to run. +// FIXME(ntests): clean this up when possible const NTESTS: usize = { if cfg!(optimizations_enabled) { if crate::emulated() diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index c1aec0230..80ec23736 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -25,7 +25,7 @@ pub use libm::support::{Float, Int, IntTy, MinInt}; pub use num::{FloatExt, logspace}; pub use op::{BaseName, FloatTy, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet, Ty}; pub use precision::{MaybeOverride, SpecialCase, default_ulp}; -pub use run_cfg::{CheckBasis, CheckCtx}; +pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind}; pub use test_traits::{CheckOutput, GenerateInput, Hex, TupleCall}; /// Result type for tests is usually from `anyhow`. Most times there is no success value to diff --git a/libm/crates/libm-test/src/run_cfg.rs b/libm/crates/libm-test/src/run_cfg.rs index eb7e0e2c1..46a6a1fad 100644 --- a/libm/crates/libm-test/src/run_cfg.rs +++ b/libm/crates/libm-test/src/run_cfg.rs @@ -1,13 +1,11 @@ //! Configuration for how tests get run. -#![allow(unused)] - -use std::collections::BTreeMap; use std::env; use std::sync::LazyLock; -use crate::{BaseName, FloatTy, Identifier, op}; +use crate::{BaseName, FloatTy, Identifier, test_log}; +/// The environment variable indicating which extensive tests should be run. pub const EXTENSIVE_ENV: &str = "LIBM_EXTENSIVE_TESTS"; /// Context passed to [`CheckOutput`]. @@ -49,3 +47,174 @@ pub enum CheckBasis { /// Check against infinite precision (MPFR). Mpfr, } + +/// The different kinds of generators that provide test input. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum GeneratorKind { + Domain, + Random, +} + +/// A list of all functions that should get extensive tests. +/// +/// This also supports the special test name `all` to run all tests, as well as `all_f16`, +/// `all_f32`, `all_f64`, and `all_f128` to run all tests for a specific float type. +static EXTENSIVE: LazyLock> = LazyLock::new(|| { + let var = env::var(EXTENSIVE_ENV).unwrap_or_default(); + let list = var.split(",").filter(|s| !s.is_empty()).collect::>(); + let mut ret = Vec::new(); + + let append_ty_ops = |ret: &mut Vec<_>, fty: FloatTy| { + let iter = Identifier::ALL.iter().filter(move |id| id.math_op().float_ty == fty).copied(); + ret.extend(iter); + }; + + for item in list { + match item { + "all" => ret = Identifier::ALL.to_owned(), + "all_f16" => append_ty_ops(&mut ret, FloatTy::F16), + "all_f32" => append_ty_ops(&mut ret, FloatTy::F32), + "all_f64" => append_ty_ops(&mut ret, FloatTy::F64), + "all_f128" => append_ty_ops(&mut ret, FloatTy::F128), + s => { + let id = Identifier::from_str(s) + .unwrap_or_else(|| panic!("unrecognized test name `{s}`")); + ret.push(id); + } + } + } + + ret +}); + +/// Information about the function to be tested. +#[derive(Debug)] +struct TestEnv { + /// Tests should be reduced because the platform is slow. E.g. 32-bit or emulated. + slow_platform: bool, + /// The float cannot be tested exhaustively, `f64` or `f128`. + large_float_ty: bool, + /// Env indicates that an extensive test should be run. + should_run_extensive: bool, + /// Multiprecision tests will be run. + mp_tests_enabled: bool, + /// The number of inputs to the function. + input_count: usize, +} + +impl TestEnv { + fn from_env(ctx: &CheckCtx) -> Self { + let id = ctx.fn_ident; + let op = id.math_op(); + + let will_run_mp = cfg!(feature = "test-multiprecision"); + + // Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run in QEMU. Start + // with a reduced number on these platforms. + let slow_on_ci = crate::emulated() + || usize::BITS < 64 + || cfg!(all(target_arch = "x86_64", target_vendor = "apple")); + let slow_platform = slow_on_ci && crate::ci(); + + let large_float_ty = match op.float_ty { + FloatTy::F16 | FloatTy::F32 => false, + FloatTy::F64 | FloatTy::F128 => true, + }; + + let will_run_extensive = EXTENSIVE.contains(&id); + + let input_count = op.rust_sig.args.len(); + + Self { + slow_platform, + large_float_ty, + should_run_extensive: will_run_extensive, + mp_tests_enabled: will_run_mp, + input_count, + } + } +} + +/// The number of iterations to run for a given test. +pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -> u64 { + let t_env = TestEnv::from_env(ctx); + + // Ideally run 5M tests + let mut domain_iter_count: u64 = 4_000_000; + + // Start with a reduced number of tests on slow platforms. + if t_env.slow_platform { + domain_iter_count = 100_000; + } + + // Larger float types get more iterations. + if t_env.large_float_ty { + domain_iter_count *= 4; + } + + // Functions with more arguments get more iterations. + let arg_multiplier = 1 << (t_env.input_count - 1); + domain_iter_count *= arg_multiplier; + + // If we will be running tests against MPFR, we don't need to test as much against musl. + // However, there are some platforms where we have to test against musl since MPFR can't be + // built. + if t_env.mp_tests_enabled && ctx.basis == CheckBasis::Musl { + domain_iter_count /= 100; + } + + // Run fewer random tests than domain tests. + let random_iter_count = domain_iter_count / 100; + + let mut total_iterations = match gen_kind { + GeneratorKind::Domain => domain_iter_count, + GeneratorKind::Random => random_iter_count, + }; + + if cfg!(optimizations_enabled) { + // Always run at least 10,000 tests. + total_iterations = total_iterations.max(10_000); + } else { + // Without optimizations, just run a quick check regardless of other parameters. + total_iterations = 800; + } + + // Adjust for the number of inputs + let ntests = match t_env.input_count { + 1 => total_iterations, + 2 => (total_iterations as f64).sqrt().ceil() as u64, + 3 => (total_iterations as f64).cbrt().ceil() as u64, + _ => panic!("test has more than three arguments"), + }; + let total = ntests.pow(t_env.input_count.try_into().unwrap()); + + test_log(&format!( + "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {ntests} iterations \ + ({total} total)", + basis = ctx.basis, + fn_ident = ctx.fn_ident, + arg = argnum + 1, + args = t_env.input_count, + )); + + ntests +} + +/// For domain tests, limit how many asymptotes or specified check points we test. +pub fn check_point_count(ctx: &CheckCtx) -> usize { + let t_env = TestEnv::from_env(ctx); + if t_env.slow_platform || !cfg!(optimizations_enabled) { 4 } else { 10 } +} + +/// When validating points of interest (e.g. asymptotes, inflection points, extremes), also check +/// this many surrounding values. +pub fn check_near_count(_ctx: &CheckCtx) -> u64 { + if cfg!(optimizations_enabled) { 100 } else { 10 } +} + +/// Check whether extensive actions should be run or skipped. +#[expect(dead_code, reason = "extensive tests have not yet been added")] +pub fn skip_extensive_test(ctx: &CheckCtx) -> bool { + let t_env = TestEnv::from_env(ctx); + !t_env.should_run_extensive +} From 19de96c22cfba18d38b74ca79c77088c16a4ff63 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 30 Dec 2024 06:12:16 +0000 Subject: [PATCH 1016/1459] Add an iterator that ensures known size Introduce the `KnownSize` iterator wrapper, which allows providing the size at construction time. This provides an `ExactSizeIterator` implemenation so we can check a generator's value count during testing. --- libm/crates/libm-test/src/gen.rs | 37 ++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/libm/crates/libm-test/src/gen.rs b/libm/crates/libm-test/src/gen.rs index 2d15915d9..2305d2a23 100644 --- a/libm/crates/libm-test/src/gen.rs +++ b/libm/crates/libm-test/src/gen.rs @@ -5,6 +5,43 @@ pub mod domain_logspace; pub mod edge_cases; pub mod random; +/// A wrapper to turn any iterator into an `ExactSizeIterator`. Asserts the final result to ensure +/// the provided size was correct. +#[derive(Debug)] +pub struct KnownSize { + total: u64, + current: u64, + iter: I, +} + +impl KnownSize { + pub fn new(iter: I, total: u64) -> Self { + Self { total, current: 0, iter } + } +} + +impl Iterator for KnownSize { + type Item = I::Item; + + fn next(&mut self) -> Option { + let next = self.iter.next(); + if next.is_some() { + self.current += 1; + return next; + } + + assert_eq!(self.current, self.total, "total items did not match expected"); + None + } + + fn size_hint(&self) -> (usize, Option) { + let remaining = usize::try_from(self.total - self.current).unwrap(); + (remaining, Some(remaining)) + } +} + +impl ExactSizeIterator for KnownSize {} + /// Helper type to turn any reusable input into a generator. #[derive(Clone, Debug, Default)] pub struct CachedInput { From 02ee86956d6d49f501a43b815965b96bd3c9a465 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 29 Dec 2024 11:23:08 +0000 Subject: [PATCH 1017/1459] Rewrite the random test generator Currently, all inputs are generated and then cached. This works reasonably well but it isn't very configurable or extensible (adding `f16` and `f128` is awkward). Replace this with a trait for generating random sequences of tuples. This also removes possible storage limitations of caching all inputs. --- libm/crates/libm-test/benches/random.rs | 7 +- libm/crates/libm-test/src/gen.rs | 69 ------ libm/crates/libm-test/src/gen/random.rs | 206 +++++++++--------- libm/crates/libm-test/src/lib.rs | 2 +- libm/crates/libm-test/src/run_cfg.rs | 32 ++- libm/crates/libm-test/src/test_traits.rs | 8 +- .../libm-test/tests/compare_built_musl.rs | 11 +- libm/crates/libm-test/tests/multiprecision.rs | 9 +- 8 files changed, 148 insertions(+), 196 deletions(-) diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 06997cd36..23f429455 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -2,8 +2,9 @@ use std::hint::black_box; use std::time::Duration; use criterion::{Criterion, criterion_main}; -use libm_test::gen::{CachedInput, random}; -use libm_test::{CheckBasis, CheckCtx, GenerateInput, MathOp, TupleCall}; +use libm_test::gen::random; +use libm_test::gen::random::RandomInput; +use libm_test::{CheckBasis, CheckCtx, MathOp, TupleCall}; /// Benchmark with this many items to get a variety const BENCH_ITER_ITEMS: usize = if cfg!(feature = "short-benchmarks") { 50 } else { 500 }; @@ -47,7 +48,7 @@ macro_rules! musl_rand_benches { fn bench_one(c: &mut Criterion, musl_extra: MuslExtra) where Op: MathOp, - CachedInput: GenerateInput, + Op::RustArgs: RandomInput, { let name = Op::NAME; diff --git a/libm/crates/libm-test/src/gen.rs b/libm/crates/libm-test/src/gen.rs index 2305d2a23..83e00f31d 100644 --- a/libm/crates/libm-test/src/gen.rs +++ b/libm/crates/libm-test/src/gen.rs @@ -1,6 +1,5 @@ //! Different generators that can create random or systematic bit patterns. -use crate::GenerateInput; pub mod domain_logspace; pub mod edge_cases; pub mod random; @@ -41,71 +40,3 @@ impl Iterator for KnownSize { } impl ExactSizeIterator for KnownSize {} - -/// Helper type to turn any reusable input into a generator. -#[derive(Clone, Debug, Default)] -pub struct CachedInput { - pub inputs_f32: Vec<(f32, f32, f32)>, - pub inputs_f64: Vec<(f64, f64, f64)>, - pub inputs_i32: Vec<(i32, i32, i32)>, -} - -impl GenerateInput<(f32,)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_f32.iter().map(|f| (f.0,)) - } -} - -impl GenerateInput<(f32, f32)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_f32.iter().map(|f| (f.0, f.1)) - } -} - -impl GenerateInput<(i32, f32)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_i32.iter().zip(self.inputs_f32.iter()).map(|(i, f)| (i.0, f.0)) - } -} - -impl GenerateInput<(f32, i32)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - GenerateInput::<(i32, f32)>::get_cases(self).map(|(i, f)| (f, i)) - } -} - -impl GenerateInput<(f32, f32, f32)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_f32.iter().copied() - } -} - -impl GenerateInput<(f64,)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_f64.iter().map(|f| (f.0,)) - } -} - -impl GenerateInput<(f64, f64)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_f64.iter().map(|f| (f.0, f.1)) - } -} - -impl GenerateInput<(i32, f64)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_i32.iter().zip(self.inputs_f64.iter()).map(|(i, f)| (i.0, f.0)) - } -} - -impl GenerateInput<(f64, i32)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - GenerateInput::<(i32, f64)>::get_cases(self).map(|(i, f)| (f, i)) - } -} - -impl GenerateInput<(f64, f64, f64)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_f64.iter().copied() - } -} diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index a30a3674e..6df944317 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -1,120 +1,118 @@ -//! A simple generator that produces deterministic random input, caching to use the same -//! inputs for all functions. - +use std::env; +use std::ops::RangeInclusive; use std::sync::LazyLock; +use libm::support::Float; +use rand::distributions::{Alphanumeric, Standard}; +use rand::prelude::Distribution; use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; -use super::CachedInput; -use crate::{BaseName, CheckCtx, GenerateInput}; - -const SEED: [u8; 32] = *b"3.141592653589793238462643383279"; - -/// Number of tests to run. -// FIXME(ntests): clean this up when possible -const NTESTS: usize = { - if cfg!(optimizations_enabled) { - if crate::emulated() - || !cfg!(target_pointer_width = "64") - || cfg!(all(target_arch = "x86_64", target_vendor = "apple")) - { - // Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run - // in QEMU. - 100_000 - } else { - 5_000_000 - } - } else { - // Without optimizations just run a quick check - 800 - } -}; - -/// Tested inputs. -static TEST_CASES: LazyLock = LazyLock::new(|| make_test_cases(NTESTS)); - -/// The first argument to `jn` and `jnf` is the number of iterations. Make this a reasonable -/// value so tests don't run forever. -static TEST_CASES_JN: LazyLock = LazyLock::new(|| { - // Start with regular test cases - let mut cases = (*TEST_CASES).clone(); - - // These functions are extremely slow, limit them - let ntests_jn = (NTESTS / 1000).max(80); - cases.inputs_i32.truncate(ntests_jn); - cases.inputs_f32.truncate(ntests_jn); - cases.inputs_f64.truncate(ntests_jn); - - // It is easy to overflow the stack with these in debug mode - let max_iterations = if cfg!(optimizations_enabled) && cfg!(target_pointer_width = "64") { - 0xffff - } else if cfg!(windows) { - 0x00ff - } else { - 0x0fff - }; +use super::KnownSize; +use crate::run_cfg::{int_range, iteration_count}; +use crate::{CheckCtx, GeneratorKind}; - let mut rng = ChaCha8Rng::from_seed(SEED); +pub(crate) const SEED_ENV: &str = "LIBM_SEED"; - for case in cases.inputs_i32.iter_mut() { - case.0 = rng.gen_range(3..=max_iterations); - } +pub(crate) static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| { + let s = env::var(SEED_ENV).unwrap_or_else(|_| { + let mut rng = rand::thread_rng(); + (0..32).map(|_| rng.sample(Alphanumeric) as char).collect() + }); - cases + s.as_bytes().try_into().unwrap_or_else(|_| { + panic!("Seed must be 32 characters, got `{s}`"); + }) }); -fn make_test_cases(ntests: usize) -> CachedInput { - let mut rng = ChaCha8Rng::from_seed(SEED); - - // make sure we include some basic cases - let mut inputs_i32 = vec![(0, 0, 0), (1, 1, 1), (-1, -1, -1)]; - let mut inputs_f32 = vec![ - (0.0, 0.0, 0.0), - (f32::EPSILON, f32::EPSILON, f32::EPSILON), - (f32::INFINITY, f32::INFINITY, f32::INFINITY), - (f32::NEG_INFINITY, f32::NEG_INFINITY, f32::NEG_INFINITY), - (f32::MAX, f32::MAX, f32::MAX), - (f32::MIN, f32::MIN, f32::MIN), - (f32::MIN_POSITIVE, f32::MIN_POSITIVE, f32::MIN_POSITIVE), - (f32::NAN, f32::NAN, f32::NAN), - ]; - let mut inputs_f64 = vec![ - (0.0, 0.0, 0.0), - (f64::EPSILON, f64::EPSILON, f64::EPSILON), - (f64::INFINITY, f64::INFINITY, f64::INFINITY), - (f64::NEG_INFINITY, f64::NEG_INFINITY, f64::NEG_INFINITY), - (f64::MAX, f64::MAX, f64::MAX), - (f64::MIN, f64::MIN, f64::MIN), - (f64::MIN_POSITIVE, f64::MIN_POSITIVE, f64::MIN_POSITIVE), - (f64::NAN, f64::NAN, f64::NAN), - ]; - - inputs_i32.extend((0..(ntests - inputs_i32.len())).map(|_| rng.gen::<(i32, i32, i32)>())); - - // Generate integers to get a full range of bitpatterns, then convert back to - // floats. - inputs_f32.extend((0..(ntests - inputs_f32.len())).map(|_| { - let ints = rng.gen::<(u32, u32, u32)>(); - (f32::from_bits(ints.0), f32::from_bits(ints.1), f32::from_bits(ints.2)) - })); - inputs_f64.extend((0..(ntests - inputs_f64.len())).map(|_| { - let ints = rng.gen::<(u64, u64, u64)>(); - (f64::from_bits(ints.0), f64::from_bits(ints.1), f64::from_bits(ints.2)) - })); - - CachedInput { inputs_f32, inputs_f64, inputs_i32 } +/// Generate a sequence of random values of this type. +pub trait RandomInput { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator; } -/// Create a test case iterator. -pub fn get_test_cases(ctx: &CheckCtx) -> impl Iterator +/// Generate a sequence of deterministically random floats. +fn random_floats(count: u64) -> impl Iterator where - CachedInput: GenerateInput, + Standard: Distribution, { - let inputs = if ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn { - &TEST_CASES_JN - } else { - &TEST_CASES + let mut rng = ChaCha8Rng::from_seed(*SEED); + + // Generate integers to get a full range of bitpatterns (including NaNs), then convert back + // to the float type. + (0..count).map(move |_| F::from_bits(rng.gen::())) +} + +/// Generate a sequence of deterministically random `i32`s within a specified range. +fn random_ints(count: u64, range: RangeInclusive) -> impl Iterator { + let mut rng = ChaCha8Rng::from_seed(*SEED); + (0..count).map(move |_| rng.gen_range::(range.clone())) +} + +macro_rules! impl_random_input { + ($fty:ty) => { + impl RandomInput for ($fty,) { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + let count = iteration_count(ctx, GeneratorKind::Random, 0); + let iter = random_floats(count).map(|f: $fty| (f,)); + KnownSize::new(iter, count) + } + } + + impl RandomInput for ($fty, $fty) { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + let count0 = iteration_count(ctx, GeneratorKind::Random, 0); + let count1 = iteration_count(ctx, GeneratorKind::Random, 1); + let iter = random_floats(count0) + .flat_map(move |f1: $fty| random_floats(count1).map(move |f2: $fty| (f1, f2))); + KnownSize::new(iter, count0 * count1) + } + } + + impl RandomInput for ($fty, $fty, $fty) { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + let count0 = iteration_count(ctx, GeneratorKind::Random, 0); + let count1 = iteration_count(ctx, GeneratorKind::Random, 1); + let count2 = iteration_count(ctx, GeneratorKind::Random, 2); + let iter = random_floats(count0).flat_map(move |f1: $fty| { + random_floats(count1).flat_map(move |f2: $fty| { + random_floats(count2).map(move |f3: $fty| (f1, f2, f3)) + }) + }); + KnownSize::new(iter, count0 * count1 * count2) + } + } + + impl RandomInput for (i32, $fty) { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + let count0 = iteration_count(ctx, GeneratorKind::Random, 0); + let count1 = iteration_count(ctx, GeneratorKind::Random, 1); + let range0 = int_range(ctx, 0); + let iter = random_ints(count0, range0) + .flat_map(move |f1: i32| random_floats(count1).map(move |f2: $fty| (f1, f2))); + KnownSize::new(iter, count0 * count1) + } + } + + impl RandomInput for ($fty, i32) { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + let count0 = iteration_count(ctx, GeneratorKind::Random, 0); + let count1 = iteration_count(ctx, GeneratorKind::Random, 1); + let range1 = int_range(ctx, 1); + let iter = random_floats(count0).flat_map(move |f1: $fty| { + random_ints(count1, range1.clone()).map(move |f2: i32| (f1, f2)) + }); + KnownSize::new(iter, count0 * count1) + } + } }; - inputs.get_cases() +} + +impl_random_input!(f32); +impl_random_input!(f64); + +/// Create a test case iterator. +pub fn get_test_cases( + ctx: &CheckCtx, +) -> impl Iterator + use<'_, RustArgs> { + RustArgs::get_cases(ctx) } diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 80ec23736..8a4e782df 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -26,7 +26,7 @@ pub use num::{FloatExt, logspace}; pub use op::{BaseName, FloatTy, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet, Ty}; pub use precision::{MaybeOverride, SpecialCase, default_ulp}; pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind}; -pub use test_traits::{CheckOutput, GenerateInput, Hex, TupleCall}; +pub use test_traits::{CheckOutput, Hex, TupleCall}; /// Result type for tests is usually from `anyhow`. Most times there is no success value to /// propagate. diff --git a/libm/crates/libm-test/src/run_cfg.rs b/libm/crates/libm-test/src/run_cfg.rs index 46a6a1fad..9cede0cc7 100644 --- a/libm/crates/libm-test/src/run_cfg.rs +++ b/libm/crates/libm-test/src/run_cfg.rs @@ -1,8 +1,10 @@ //! Configuration for how tests get run. -use std::env; +use std::ops::RangeInclusive; use std::sync::LazyLock; +use std::{env, str}; +use crate::gen::random::{SEED, SEED_ENV}; use crate::{BaseName, FloatTy, Identifier, test_log}; /// The environment variable indicating which extensive tests should be run. @@ -188,9 +190,16 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) - }; let total = ntests.pow(t_env.input_count.try_into().unwrap()); + let seed_msg = match gen_kind { + GeneratorKind::Domain => String::new(), + GeneratorKind::Random => { + format!(" using `{SEED_ENV}={}`", str::from_utf8(SEED.as_slice()).unwrap()) + } + }; + test_log(&format!( "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {ntests} iterations \ - ({total} total)", + ({total} total){seed_msg}", basis = ctx.basis, fn_ident = ctx.fn_ident, arg = argnum + 1, @@ -200,6 +209,25 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) - ntests } +/// Some tests require that an integer be kept within reasonable limits; generate that here. +pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive { + let t_env = TestEnv::from_env(ctx); + + if !matches!(ctx.base_name, BaseName::Jn | BaseName::Yn) { + return i32::MIN..=i32::MAX; + } + + assert_eq!(argnum, 0, "For `jn`/`yn`, only the first argument takes an integer"); + + // The integer argument to `jn` is an iteration count. Limit this to ensure tests can be + // completed in a reasonable amount of time. + if t_env.slow_platform || !cfg!(optimizations_enabled) { + (-0xf)..=0xff + } else { + (-0xff)..=0xffff + } +} + /// For domain tests, limit how many asymptotes or specified check points we test. pub fn check_point_count(ctx: &CheckCtx) -> usize { let t_env = TestEnv::from_env(ctx); diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index 6b833dfb5..261d1f254 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -1,8 +1,7 @@ //! Traits related to testing. //! -//! There are three main traits in this module: +//! There are two main traits in this module: //! -//! - `GenerateInput`: implemented on any types that create test cases. //! - `TupleCall`: implemented on tuples to allow calling them as function arguments. //! - `CheckOutput`: implemented on anything that is an output type for validation against an //! expected value. @@ -13,11 +12,6 @@ use anyhow::{Context, bail, ensure}; use crate::{CheckCtx, Float, Int, MaybeOverride, SpecialCase, TestResult}; -/// Implement this on types that can generate a sequence of tuples for test input. -pub trait GenerateInput { - fn get_cases(&self) -> impl Iterator; -} - /// Trait for calling a function with a tuple as arguments. /// /// Implemented on the tuple with the function signature as the generic (so we can use the same diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 71f080ab1..ecd379a0a 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -9,8 +9,9 @@ // There are some targets we can't build musl for #![cfg(feature = "build-musl")] -use libm_test::gen::{CachedInput, random}; -use libm_test::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall}; +use libm_test::gen::random; +use libm_test::gen::random::RandomInput; +use libm_test::{CheckBasis, CheckCtx, CheckOutput, MathOp, TupleCall}; macro_rules! musl_rand_tests { ( @@ -21,16 +22,16 @@ macro_rules! musl_rand_tests { #[test] $(#[$attr])* fn [< musl_random_ $fn_name >]() { - test_one::(musl_math_sys::$fn_name); + test_one_random::(musl_math_sys::$fn_name); } } }; } -fn test_one(musl_fn: Op::CFn) +fn test_one_random(musl_fn: Op::CFn) where Op: MathOp, - CachedInput: GenerateInput, + Op::RustArgs: RandomInput, { let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl); let cases = random::get_test_cases::(&ctx); diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 4cdba0942..960c370d4 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -3,11 +3,10 @@ #![cfg(feature = "test-multiprecision")] use libm_test::domain::HasDomain; -use libm_test::gen::{CachedInput, domain_logspace, edge_cases, random}; +use libm_test::gen::random::RandomInput; +use libm_test::gen::{domain_logspace, edge_cases, random}; use libm_test::mpfloat::MpOp; -use libm_test::{ - CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, OpFTy, OpRustFn, OpRustRet, TupleCall, -}; +use libm_test::{CheckBasis, CheckCtx, CheckOutput, MathOp, OpFTy, OpRustFn, OpRustRet, TupleCall}; /// Test against MPFR with random inputs. macro_rules! mp_rand_tests { @@ -29,7 +28,7 @@ macro_rules! mp_rand_tests { fn test_one_random() where Op: MathOp + MpOp, - CachedInput: GenerateInput, + Op::RustArgs: RandomInput, { let mut mp_vals = Op::new_mp(); let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr); From 04238bc7adf76edb54fdbc1aa77b82c8ead30f39 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 Jan 2025 00:36:18 +0000 Subject: [PATCH 1018/1459] Update precision based on new test results --- libm/crates/libm-test/src/precision.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 8bedcde44..a8efe1015 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -90,8 +90,15 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { Bn::Exp10 if usize::BITS < 64 => ulp = 4, Bn::Lgamma | Bn::LgammaR => ulp = 400, Bn::Tanh => ulp = 4, - _ if ctx.fn_ident == Id::Sincosf => ulp = 500, - _ if ctx.fn_ident == Id::Tgamma => ulp = 20, + _ => (), + } + + match ctx.fn_ident { + // FIXME(#401): musl has an incorrect result here. + Id::Fdim => ulp = 2, + Id::Jnf | Id::Ynf => ulp = 4000, + Id::Sincosf => ulp = 500, + Id::Tgamma => ulp = 20, _ => (), } } @@ -99,6 +106,8 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { // In some cases, our implementation is less accurate than musl on i586. if cfg!(x86_no_sse) { match ctx.fn_ident { + Id::Asinh => ulp = 3, + Id::Asinhf => ulp = 3, Id::Log1p | Id::Log1pf => ulp = 2, Id::Round => ulp = 1, Id::Tan => ulp = 2, From 5f725f3683fc736fcd79b36825e241eed9743990 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 Jan 2025 02:52:24 +0000 Subject: [PATCH 1019/1459] Rename `unstable-test-support` to `unstable-public-internals` The `support` module that this feature makes public will be useful for implementations in `compiler-builtins`, not only for testing. Give this feature a more accurate name. --- libm/Cargo.toml | 2 +- libm/crates/compiler-builtins-smoke-test/Cargo.toml | 13 +++++-------- libm/crates/libm-test/Cargo.toml | 2 +- libm/src/math/mod.rs | 6 +++--- 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index bfc11509e..dc362779e 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -27,7 +27,7 @@ unstable = ["unstable-intrinsics", "unstable-float"] unstable-intrinsics = [] # Make some internal things public for testing. -unstable-test-support = [] +unstable-public-internals = [] # Enable the nightly-only `f16` and `f128`. unstable-float = [] diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index 82cfeecb9..1f09ce99c 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -9,14 +9,6 @@ publish = false test = false bench = false -[features] -# Duplicated from libm's Cargo.toml -unstable = [] -unstable-intrinsics = [] -unstable-test-support = [] -checked = [] -force-soft-floats = [] - [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = [ "cfg(arch_enabled)", @@ -24,4 +16,9 @@ unexpected_cfgs = { level = "warn", check-cfg = [ "cfg(f128_enabled)", "cfg(f16_enabled)", "cfg(intrinsics_enabled)", + 'cfg(feature, values("checked"))', + 'cfg(feature, values("force-soft-floats"))', + 'cfg(feature, values("unstable"))', + 'cfg(feature, values("unstable-intrinsics"))', + 'cfg(feature, values("unstable-public-internals"))', ] } diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index f2dd88fa1..9b3ab5c53 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -26,7 +26,7 @@ short-benchmarks = [] [dependencies] anyhow = "1.0.90" az = { version = "1.2.1", optional = true } -libm = { path = "../..", features = ["unstable-test-support"] } +libm = { path = "../..", features = ["unstable-public-internals"] } libm-macros = { path = "../libm-macros" } musl-math-sys = { path = "../musl-math-sys", optional = true } paste = "1.0.15" diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index e7b21de67..9003a8342 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -76,15 +76,15 @@ macro_rules! div { // `support` may be public for testing #[macro_use] -#[cfg(feature = "unstable-test-support")] +#[cfg(feature = "unstable-public-internals")] pub mod support; #[macro_use] -#[cfg(not(feature = "unstable-test-support"))] +#[cfg(not(feature = "unstable-public-internals"))] mod support; cfg_if! { - if #[cfg(feature = "unstable-test-support")] { + if #[cfg(feature = "unstable-public-internals")] { pub mod generic; } else { mod generic; From 6fdcbdc0b240d1cc2a22aba8f3ec32a726b912e9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 Jan 2025 03:00:54 +0000 Subject: [PATCH 1020/1459] Increase the precision for `jn` and `jnf` New random seeds seem to indicate that this test does have some more failures, this is a recent failure on i586: ---- musl_random_jnf stdout ---- Random Musl jnf arg 1/2: 100 iterations (10000 total) using `LIBM_SEED=nLfzQ3U1OBVvqWaMBcto84UTMsC5FIaC` Random Musl jnf arg 2/2: 100 iterations (10000 total) using `LIBM_SEED=nLfzQ3U1OBVvqWaMBcto84UTMsC5FIaC` thread 'musl_random_jnf' panicked at crates/libm-test/tests/compare_built_musl.rs:43:51: called `Result::unwrap()` on an `Err` value: input: (205, 5497.891) (0x000000cd, 0x45abcf21) expected: 7.3291517e-6 0x36f5ecef actual: 7.331668e-6 0x36f6028c Caused by: ulp 5533 > 4000 It seems unlikely that `jn` would somehow have better precision than `j0`/`j1`, so just use the same precision. --- libm/crates/libm-test/src/precision.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index a8efe1015..4a6ca8af7 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -47,8 +47,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { Bn::Tgamma if ctx.fn_ident != Id::Tgamma => 0, // Bessel functions have large inaccuracies. - Bn::J0 | Bn::J1 | Bn::Y0 | Bn::Y1 => 8_000_000, - Bn::Jn | Bn::Yn => 1_000, + Bn::J0 | Bn::J1 | Bn::Y0 | Bn::Y1 | Bn::Jn | Bn::Yn => 8_000_000, // For all other operations, specify our implementation's worst case precision. Bn::Acos => 1, @@ -96,7 +95,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { match ctx.fn_ident { // FIXME(#401): musl has an incorrect result here. Id::Fdim => ulp = 2, - Id::Jnf | Id::Ynf => ulp = 4000, Id::Sincosf => ulp = 500, Id::Tgamma => ulp = 20, _ => (), From 80a0d387cbf0e3b60f685f53fad2fa35a6584e98 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 5 Jan 2025 01:58:40 +0000 Subject: [PATCH 1021/1459] Add tests against MPFR for `ilogb` and `ilogbf` --- libm/crates/libm-test/src/mpfloat.rs | 23 +++++++++++++++++++ libm/crates/libm-test/tests/multiprecision.rs | 6 ----- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 8b8298004..ad98fafc8 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -277,6 +277,29 @@ macro_rules! impl_op_for_ty { } } + impl MpOp for crate::op::[]::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + + // `get_exp` follows `frexp` for `0.5 <= |m| < 1.0`. Adjust the exponent by + // one to scale the significand to `1.0 <= |m| < 2.0`. + this.get_exp().map(|v| v - 1).unwrap_or_else(|| { + if this.is_infinite() { + i32::MAX + } else { + // Zero or NaN + i32::MIN + } + }) + } + } + impl MpOp for crate::op::[]::Routine { type MpTy = MpFloat; diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 960c370d4..42ec965c1 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -51,8 +51,6 @@ libm_macros::for_each_function! { ], skip: [ // FIXME: MPFR tests needed - ilogb, - ilogbf, remquo, remquof, @@ -150,9 +148,5 @@ libm_macros::for_each_function! { scalbnf, yn, ynf, - - // FIXME: MPFR tests needed - ilogb, - ilogbf, ], } From 7960a9d12286943c4dab5591994ed6733e4a2379 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 2 Jan 2025 10:21:35 +0000 Subject: [PATCH 1022/1459] Add more detailed definition output for `update-api-list.py` Update the script to produce, in addition to the simple text list, a JSON file listing routine names, the types they work with, and the source files that contain a function with the routine name. This gets consumed by another script and will be used to determine which extensive CI jobs to run. --- libm/etc/function-definitions.json | 764 +++++++++++++++++++++++++++++ libm/etc/update-api-list.py | 246 +++++++--- 2 files changed, 945 insertions(+), 65 deletions(-) create mode 100644 libm/etc/function-definitions.json diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json new file mode 100644 index 000000000..4b10812c3 --- /dev/null +++ b/libm/etc/function-definitions.json @@ -0,0 +1,764 @@ +{ + "__comment": "Autogenerated by update-api-list.py. List of files that define a function with a given name. This file is checked in to make it obvious if refactoring breaks things", + "acos": { + "sources": [ + "src/libm_helper.rs", + "src/math/acos.rs" + ], + "type": "f64" + }, + "acosf": { + "sources": [ + "src/math/acosf.rs" + ], + "type": "f32" + }, + "acosh": { + "sources": [ + "src/libm_helper.rs", + "src/math/acosh.rs" + ], + "type": "f64" + }, + "acoshf": { + "sources": [ + "src/math/acoshf.rs" + ], + "type": "f32" + }, + "asin": { + "sources": [ + "src/libm_helper.rs", + "src/math/asin.rs" + ], + "type": "f64" + }, + "asinf": { + "sources": [ + "src/math/asinf.rs" + ], + "type": "f32" + }, + "asinh": { + "sources": [ + "src/libm_helper.rs", + "src/math/asinh.rs" + ], + "type": "f64" + }, + "asinhf": { + "sources": [ + "src/math/asinhf.rs" + ], + "type": "f32" + }, + "atan": { + "sources": [ + "src/libm_helper.rs", + "src/math/atan.rs" + ], + "type": "f64" + }, + "atan2": { + "sources": [ + "src/libm_helper.rs", + "src/math/atan2.rs" + ], + "type": "f64" + }, + "atan2f": { + "sources": [ + "src/math/atan2f.rs" + ], + "type": "f32" + }, + "atanf": { + "sources": [ + "src/math/atanf.rs" + ], + "type": "f32" + }, + "atanh": { + "sources": [ + "src/libm_helper.rs", + "src/math/atanh.rs" + ], + "type": "f64" + }, + "atanhf": { + "sources": [ + "src/math/atanhf.rs" + ], + "type": "f32" + }, + "cbrt": { + "sources": [ + "src/libm_helper.rs", + "src/math/cbrt.rs" + ], + "type": "f64" + }, + "cbrtf": { + "sources": [ + "src/math/cbrtf.rs" + ], + "type": "f32" + }, + "ceil": { + "sources": [ + "src/libm_helper.rs", + "src/math/arch/i586.rs", + "src/math/arch/intrinsics.rs", + "src/math/ceil.rs" + ], + "type": "f64" + }, + "ceilf": { + "sources": [ + "src/math/arch/intrinsics.rs", + "src/math/ceilf.rs" + ], + "type": "f32" + }, + "copysign": { + "sources": [ + "src/libm_helper.rs", + "src/math/copysign.rs", + "src/math/generic/copysign.rs", + "src/math/support/float_traits.rs" + ], + "type": "f64" + }, + "copysignf": { + "sources": [ + "src/math/copysignf.rs", + "src/math/generic/copysign.rs" + ], + "type": "f32" + }, + "cos": { + "sources": [ + "src/libm_helper.rs", + "src/math/cos.rs" + ], + "type": "f64" + }, + "cosf": { + "sources": [ + "src/math/cosf.rs" + ], + "type": "f32" + }, + "cosh": { + "sources": [ + "src/libm_helper.rs", + "src/math/cosh.rs" + ], + "type": "f64" + }, + "coshf": { + "sources": [ + "src/math/coshf.rs" + ], + "type": "f32" + }, + "erf": { + "sources": [ + "src/libm_helper.rs", + "src/math/erf.rs" + ], + "type": "f64" + }, + "erfc": { + "sources": [ + "src/libm_helper.rs", + "src/math/erf.rs" + ], + "type": "f64" + }, + "erfcf": { + "sources": [ + "src/math/erff.rs" + ], + "type": "f32" + }, + "erff": { + "sources": [ + "src/math/erff.rs" + ], + "type": "f32" + }, + "exp": { + "sources": [ + "src/libm_helper.rs", + "src/math/exp.rs", + "src/math/support/float_traits.rs" + ], + "type": "f64" + }, + "exp10": { + "sources": [ + "src/libm_helper.rs", + "src/math/exp10.rs" + ], + "type": "f64" + }, + "exp10f": { + "sources": [ + "src/math/exp10f.rs" + ], + "type": "f32" + }, + "exp2": { + "sources": [ + "src/libm_helper.rs", + "src/math/exp2.rs" + ], + "type": "f64" + }, + "exp2f": { + "sources": [ + "src/math/exp2f.rs" + ], + "type": "f32" + }, + "expf": { + "sources": [ + "src/math/expf.rs" + ], + "type": "f32" + }, + "expm1": { + "sources": [ + "src/libm_helper.rs", + "src/math/expm1.rs" + ], + "type": "f64" + }, + "expm1f": { + "sources": [ + "src/math/expm1f.rs" + ], + "type": "f32" + }, + "fabs": { + "sources": [ + "src/libm_helper.rs", + "src/math/arch/intrinsics.rs", + "src/math/fabs.rs", + "src/math/generic/fabs.rs" + ], + "type": "f64" + }, + "fabsf": { + "sources": [ + "src/math/arch/intrinsics.rs", + "src/math/fabsf.rs", + "src/math/generic/fabs.rs" + ], + "type": "f32" + }, + "fdim": { + "sources": [ + "src/libm_helper.rs", + "src/math/fdim.rs" + ], + "type": "f64" + }, + "fdimf": { + "sources": [ + "src/math/fdimf.rs" + ], + "type": "f32" + }, + "floor": { + "sources": [ + "src/libm_helper.rs", + "src/math/arch/i586.rs", + "src/math/arch/intrinsics.rs", + "src/math/floor.rs" + ], + "type": "f64" + }, + "floorf": { + "sources": [ + "src/math/arch/intrinsics.rs", + "src/math/floorf.rs" + ], + "type": "f32" + }, + "fma": { + "sources": [ + "src/libm_helper.rs", + "src/math/fma.rs" + ], + "type": "f64" + }, + "fmaf": { + "sources": [ + "src/math/fmaf.rs" + ], + "type": "f32" + }, + "fmax": { + "sources": [ + "src/libm_helper.rs", + "src/math/fmax.rs" + ], + "type": "f64" + }, + "fmaxf": { + "sources": [ + "src/math/fmaxf.rs" + ], + "type": "f32" + }, + "fmin": { + "sources": [ + "src/libm_helper.rs", + "src/math/fmin.rs" + ], + "type": "f64" + }, + "fminf": { + "sources": [ + "src/math/fminf.rs" + ], + "type": "f32" + }, + "fmod": { + "sources": [ + "src/libm_helper.rs", + "src/math/fmod.rs" + ], + "type": "f64" + }, + "fmodf": { + "sources": [ + "src/math/fmodf.rs" + ], + "type": "f32" + }, + "frexp": { + "sources": [ + "src/libm_helper.rs", + "src/math/frexp.rs" + ], + "type": "f64" + }, + "frexpf": { + "sources": [ + "src/math/frexpf.rs" + ], + "type": "f32" + }, + "hypot": { + "sources": [ + "src/libm_helper.rs", + "src/math/hypot.rs" + ], + "type": "f64" + }, + "hypotf": { + "sources": [ + "src/math/hypotf.rs" + ], + "type": "f32" + }, + "ilogb": { + "sources": [ + "src/libm_helper.rs", + "src/math/ilogb.rs" + ], + "type": "f64" + }, + "ilogbf": { + "sources": [ + "src/math/ilogbf.rs" + ], + "type": "f32" + }, + "j0": { + "sources": [ + "src/libm_helper.rs", + "src/math/j0.rs" + ], + "type": "f64" + }, + "j0f": { + "sources": [ + "src/math/j0f.rs" + ], + "type": "f32" + }, + "j1": { + "sources": [ + "src/libm_helper.rs", + "src/math/j1.rs" + ], + "type": "f64" + }, + "j1f": { + "sources": [ + "src/math/j1f.rs" + ], + "type": "f32" + }, + "jn": { + "sources": [ + "src/libm_helper.rs", + "src/math/jn.rs" + ], + "type": "f64" + }, + "jnf": { + "sources": [ + "src/math/jnf.rs" + ], + "type": "f32" + }, + "ldexp": { + "sources": [ + "src/libm_helper.rs", + "src/math/ldexp.rs" + ], + "type": "f64" + }, + "ldexpf": { + "sources": [ + "src/math/ldexpf.rs" + ], + "type": "f32" + }, + "lgamma": { + "sources": [ + "src/libm_helper.rs", + "src/math/lgamma.rs" + ], + "type": "f64" + }, + "lgamma_r": { + "sources": [ + "src/libm_helper.rs", + "src/math/lgamma_r.rs" + ], + "type": "f64" + }, + "lgammaf": { + "sources": [ + "src/math/lgammaf.rs" + ], + "type": "f32" + }, + "lgammaf_r": { + "sources": [ + "src/math/lgammaf_r.rs" + ], + "type": "f32" + }, + "log": { + "sources": [ + "src/libm_helper.rs", + "src/math/log.rs" + ], + "type": "f64" + }, + "log10": { + "sources": [ + "src/libm_helper.rs", + "src/math/log10.rs" + ], + "type": "f64" + }, + "log10f": { + "sources": [ + "src/math/log10f.rs" + ], + "type": "f32" + }, + "log1p": { + "sources": [ + "src/libm_helper.rs", + "src/math/log1p.rs" + ], + "type": "f64" + }, + "log1pf": { + "sources": [ + "src/math/log1pf.rs" + ], + "type": "f32" + }, + "log2": { + "sources": [ + "src/libm_helper.rs", + "src/math/log2.rs" + ], + "type": "f64" + }, + "log2f": { + "sources": [ + "src/math/log2f.rs" + ], + "type": "f32" + }, + "logf": { + "sources": [ + "src/math/logf.rs" + ], + "type": "f32" + }, + "modf": { + "sources": [ + "src/libm_helper.rs", + "src/math/modf.rs" + ], + "type": "f64" + }, + "modff": { + "sources": [ + "src/math/modff.rs" + ], + "type": "f32" + }, + "nextafter": { + "sources": [ + "src/libm_helper.rs", + "src/math/nextafter.rs" + ], + "type": "f64" + }, + "nextafterf": { + "sources": [ + "src/math/nextafterf.rs" + ], + "type": "f32" + }, + "pow": { + "sources": [ + "src/libm_helper.rs", + "src/math/pow.rs" + ], + "type": "f64" + }, + "powf": { + "sources": [ + "src/math/powf.rs" + ], + "type": "f32" + }, + "remainder": { + "sources": [ + "src/libm_helper.rs", + "src/math/remainder.rs" + ], + "type": "f64" + }, + "remainderf": { + "sources": [ + "src/math/remainderf.rs" + ], + "type": "f32" + }, + "remquo": { + "sources": [ + "src/libm_helper.rs", + "src/math/remquo.rs" + ], + "type": "f64" + }, + "remquof": { + "sources": [ + "src/math/remquof.rs" + ], + "type": "f32" + }, + "rint": { + "sources": [ + "src/libm_helper.rs", + "src/math/rint.rs" + ], + "type": "f64" + }, + "rintf": { + "sources": [ + "src/math/rintf.rs" + ], + "type": "f32" + }, + "round": { + "sources": [ + "src/libm_helper.rs", + "src/math/round.rs" + ], + "type": "f64" + }, + "roundf": { + "sources": [ + "src/math/roundf.rs" + ], + "type": "f32" + }, + "scalbn": { + "sources": [ + "src/libm_helper.rs", + "src/math/scalbn.rs" + ], + "type": "f64" + }, + "scalbnf": { + "sources": [ + "src/math/scalbnf.rs" + ], + "type": "f32" + }, + "sin": { + "sources": [ + "src/libm_helper.rs", + "src/math/sin.rs" + ], + "type": "f64" + }, + "sincos": { + "sources": [ + "src/libm_helper.rs", + "src/math/sincos.rs" + ], + "type": "f64" + }, + "sincosf": { + "sources": [ + "src/math/sincosf.rs" + ], + "type": "f32" + }, + "sinf": { + "sources": [ + "src/math/sinf.rs" + ], + "type": "f32" + }, + "sinh": { + "sources": [ + "src/libm_helper.rs", + "src/math/sinh.rs" + ], + "type": "f64" + }, + "sinhf": { + "sources": [ + "src/math/sinhf.rs" + ], + "type": "f32" + }, + "sqrt": { + "sources": [ + "src/libm_helper.rs", + "src/math/arch/i686.rs", + "src/math/arch/intrinsics.rs", + "src/math/sqrt.rs" + ], + "type": "f64" + }, + "sqrtf": { + "sources": [ + "src/math/arch/i686.rs", + "src/math/arch/intrinsics.rs", + "src/math/sqrtf.rs" + ], + "type": "f32" + }, + "tan": { + "sources": [ + "src/libm_helper.rs", + "src/math/tan.rs" + ], + "type": "f64" + }, + "tanf": { + "sources": [ + "src/math/tanf.rs" + ], + "type": "f32" + }, + "tanh": { + "sources": [ + "src/libm_helper.rs", + "src/math/tanh.rs" + ], + "type": "f64" + }, + "tanhf": { + "sources": [ + "src/math/tanhf.rs" + ], + "type": "f32" + }, + "tgamma": { + "sources": [ + "src/libm_helper.rs", + "src/math/tgamma.rs" + ], + "type": "f64" + }, + "tgammaf": { + "sources": [ + "src/math/tgammaf.rs" + ], + "type": "f32" + }, + "trunc": { + "sources": [ + "src/libm_helper.rs", + "src/math/arch/intrinsics.rs", + "src/math/trunc.rs" + ], + "type": "f64" + }, + "truncf": { + "sources": [ + "src/math/arch/intrinsics.rs", + "src/math/truncf.rs" + ], + "type": "f32" + }, + "y0": { + "sources": [ + "src/libm_helper.rs", + "src/math/j0.rs" + ], + "type": "f64" + }, + "y0f": { + "sources": [ + "src/math/j0f.rs" + ], + "type": "f32" + }, + "y1": { + "sources": [ + "src/libm_helper.rs", + "src/math/j1.rs" + ], + "type": "f64" + }, + "y1f": { + "sources": [ + "src/math/j1f.rs" + ], + "type": "f32" + }, + "yn": { + "sources": [ + "src/libm_helper.rs", + "src/math/jn.rs" + ], + "type": "f64" + }, + "ynf": { + "sources": [ + "src/math/jnf.rs" + ], + "type": "f32" + } +} diff --git a/libm/etc/update-api-list.py b/libm/etc/update-api-list.py index 7284a628c..a4587aa81 100755 --- a/libm/etc/update-api-list.py +++ b/libm/etc/update-api-list.py @@ -3,68 +3,166 @@ functions are covered by our macros. """ +import difflib import json import subprocess as sp import sys -import difflib +from dataclasses import dataclass +from glob import glob from pathlib import Path -from typing import Any +from typing import Any, TypeAlias ETC_DIR = Path(__file__).parent +IndexTy: TypeAlias = dict[str, dict[str, Any]] +"""Type of the `index` item in rustdoc's JSON output""" -def get_rustdoc_json() -> dict[Any, Any]: - """Get rustdoc's JSON output for the `libm` crate.""" - - librs_path = ETC_DIR.joinpath("../src/lib.rs") - j = sp.check_output( - [ - "rustdoc", - librs_path, - "--edition=2021", - "--output-format=json", - "-Zunstable-options", - "-o-", - ], - text=True, - ) - j = json.loads(j) - return j - -def list_public_functions() -> list[str]: - """Get a list of public functions from rustdoc JSON output. - - Note that this only finds functions that are reexported in `lib.rs`, this will - need to be adjusted if we need to account for functions that are defined there. +@dataclass +class Crate: + """Representation of public interfaces and function defintion locations in + `libm`. """ - names = [] - index: dict[str, dict[str, Any]] = get_rustdoc_json()["index"] - for item in index.values(): - # Find public items - if item["visibility"] != "public": - continue - - # Find only reexports - if "use" not in item["inner"].keys(): - continue - # Locate the item that is reexported - id = item["inner"]["use"]["id"] - srcitem = index.get(str(id)) - - # External crate - if srcitem is None: - continue - - # Skip if not a function - if "function" not in srcitem["inner"].keys(): - continue - - names.append(srcitem["name"]) - - names.sort() - return names + public_functions: list[str] + """List of all public functions.""" + defs: dict[str, list[str]] + """Map from `name->[source files]` to find all places that define a public + function. We track this to know which tests need to be rerun when specific files + get updated. + """ + types: dict[str, str] + """Map from `name->type`.""" + + def __init__(self) -> None: + self.public_functions = [] + self.defs = {} + self.types = {} + + j = self.get_rustdoc_json() + index: IndexTy = j["index"] + self._init_function_list(index) + self._init_defs(index) + self._init_types() + + @staticmethod + def get_rustdoc_json() -> dict[Any, Any]: + """Get rustdoc's JSON output for the `libm` crate.""" + + j = sp.check_output( + [ + "rustdoc", + "src/lib.rs", + "--edition=2021", + "--document-private-items", + "--output-format=json", + "-Zunstable-options", + "-o-", + ], + cwd=ETC_DIR.parent, + text=True, + ) + j = json.loads(j) + return j + + def _init_function_list(self, index: IndexTy) -> None: + """Get a list of public functions from rustdoc JSON output. + + Note that this only finds functions that are reexported in `lib.rs`, this will + need to be adjusted if we need to account for functions that are defined there, or + glob reexports in other locations. + """ + # Filter out items that are not public + public = [i for i in index.values() if i["visibility"] == "public"] + + # Collect a list of source IDs for reexported items in `lib.rs` or `mod math`. + use = (i for i in public if "use" in i["inner"]) + use = ( + i for i in use if i["span"]["filename"] in ["src/math/mod.rs", "src/lib.rs"] + ) + reexported_ids = [item["inner"]["use"]["id"] for item in use] + + # Collect a list of reexported items that are functions + for id in reexported_ids: + srcitem = index.get(str(id)) + # External crate + if srcitem is None: + continue + + # Skip if not a function + if "function" not in srcitem["inner"]: + continue + + self.public_functions.append(srcitem["name"]) + self.public_functions.sort() + + def _init_defs(self, index: IndexTy) -> None: + defs = {name: set() for name in self.public_functions} + funcs = (i for i in index.values() if "function" in i["inner"]) + funcs = (f for f in funcs if f["name"] in self.public_functions) + for func in funcs: + defs[func["name"]].add(func["span"]["filename"]) + + # A lot of the `arch` module is often configured out so doesn't show up in docs. Use + # string matching as a fallback. + for fname in glob("src/math/arch/**.rs", root_dir=ETC_DIR.parent): + contents = Path(fname).read_text() + + for name in self.public_functions: + if f"fn {name}" in contents: + defs[name].add(fname) + + for name, sources in defs.items(): + base_sources = defs[base_name(name)[0]] + for src in (s for s in base_sources if "generic" in s): + sources.add(src) + + # Sort the set + self.defs = {k: sorted(v) for (k, v) in defs.items()} + + def _init_types(self) -> None: + self.types = {name: base_name(name)[1] for name in self.public_functions} + + def write_function_list(self, check: bool) -> None: + """Collect the list of public functions to a simple text file.""" + output = "# autogenerated by update-api-list.py\n" + for name in self.public_functions: + output += f"{name}\n" + + out_file = ETC_DIR.joinpath("function-list.txt") + + if check: + with open(out_file, "r") as f: + current = f.read() + diff_and_exit(current, output) + else: + with open(out_file, "w") as f: + f.write(output) + + def write_function_defs(self, check: bool) -> None: + """Collect the list of information about public functions to a JSON file .""" + comment = ( + "Autogenerated by update-api-list.py. " + "List of files that define a function with a given name. " + "This file is checked in to make it obvious if refactoring breaks things" + ) + + d = {"__comment": comment} + d |= { + name: {"sources": self.defs[name], "type": self.types[name]} + for name in self.public_functions + } + + out_file = ETC_DIR.joinpath("function-definitions.json") + output = json.dumps(d, indent=4) + "\n" + + if check: + with open(out_file, "r") as f: + current = f.read() + diff_and_exit(current, output) + else: + with open(out_file, "w") as f: + f.write(output) def diff_and_exit(actual: str, expected: str): @@ -84,6 +182,35 @@ def diff_and_exit(actual: str, expected: str): exit(1) +def base_name(name: str) -> tuple[str, str]: + """Return the basename and type from a full function name. Keep in sync with Rust's + `fn base_name`. + """ + known_mappings = [ + ("erff", ("erf", "f32")), + ("erf", ("erf", "f64")), + ("modff", ("modf", "f32")), + ("modf", ("modf", "f64")), + ("lgammaf_r", ("lgamma_r", "f32")), + ("lgamma_r", ("lgamma_r", "f64")), + ] + + found = next((base for (full, base) in known_mappings if full == name), None) + if found is not None: + return found + + if name.endswith("f"): + return (name.rstrip("f"), "f32") + + if name.endswith("f16"): + return (name.rstrip("f16"), "f16") + + if name.endswith("f128"): + return (name.rstrip("f128"), "f128") + + return (name, "f64") + + def main(): """By default overwrite the file. If `--check` is passed, print a diff instead and error if the files are different. @@ -97,20 +224,9 @@ def main(): print("unrecognized arguments") exit(1) - names = list_public_functions() - output = "# autogenerated by update-api-list.py\n" - for name in names: - output += f"{name}\n" - - out_file = ETC_DIR.joinpath("function-list.txt") - - if check: - with open(out_file, "r") as f: - current = f.read() - diff_and_exit(current, output) - else: - with open(out_file, "w") as f: - f.write(output) + crate = Crate() + crate.write_function_list(check) + crate.write_function_defs(check) if __name__ == "__main__": From 2b3d83d81197e348accef35c6c6c019221f971b1 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 22 Dec 2024 11:47:44 +0000 Subject: [PATCH 1023/1459] Add extensive and exhaustive tests Add a generator that will test all inputs for input spaces `u32::MAX` or smaller (e.g. single-argument `f32` routines). For anything larger, still run approximately `u32::MAX` tests, but distribute inputs evenly across the function domain. Since we often only want to run one of these tests at a time, this implementation parallelizes within each test using `rayon`. A custom test runner is used so a progress bar is possible. Specific tests must be enabled by setting the `LIBM_EXTENSIVE_TESTS` environment variable, e.g. LIBM_EXTENSIVE_TESTS=all_f16,cos,cosf cargo run ... Testing on a recent machine, most tests take about two minutes or less. The Bessel functions are quite slow and take closer to 10 minutes, and FMA is increased to run for about the same. --- libm/crates/libm-test/Cargo.toml | 9 + libm/crates/libm-test/src/gen.rs | 1 + libm/crates/libm-test/src/gen/extensive.rs | 153 ++++++++++++ libm/crates/libm-test/src/gen/random.rs | 4 +- libm/crates/libm-test/src/lib.rs | 4 +- libm/crates/libm-test/src/num.rs | 2 +- libm/crates/libm-test/src/run_cfg.rs | 37 ++- .../libm-test/tests/z_extensive/main.rs | 14 ++ .../crates/libm-test/tests/z_extensive/run.rs | 234 ++++++++++++++++++ 9 files changed, 450 insertions(+), 8 deletions(-) create mode 100644 libm/crates/libm-test/src/gen/extensive.rs create mode 100644 libm/crates/libm-test/tests/z_extensive/main.rs create mode 100644 libm/crates/libm-test/tests/z_extensive/run.rs diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 9b3ab5c53..69e96034e 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -26,12 +26,14 @@ short-benchmarks = [] [dependencies] anyhow = "1.0.90" az = { version = "1.2.1", optional = true } +indicatif = { version = "0.17.9", default-features = false } libm = { path = "../..", features = ["unstable-public-internals"] } libm-macros = { path = "../libm-macros" } musl-math-sys = { path = "../musl-math-sys", optional = true } paste = "1.0.15" rand = "0.8.5" rand_chacha = "0.3.1" +rayon = "1.10.0" rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "std"] } [target.'cfg(target_family = "wasm")'.dependencies] @@ -43,11 +45,18 @@ rand = { version = "0.8.5", optional = true } [dev-dependencies] criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } +libtest-mimic = "0.8.1" [[bench]] name = "random" harness = false +[[test]] +# No harness so that we can skip tests at runtime based on env. Prefixed with +# `z` so these tests get run last. +name = "z_extensive" +harness = false + [lints.rust] # Values from the chared config.rs used by `libm` but not the test crate unexpected_cfgs = { level = "warn", check-cfg = [ diff --git a/libm/crates/libm-test/src/gen.rs b/libm/crates/libm-test/src/gen.rs index 83e00f31d..e2bfcdf34 100644 --- a/libm/crates/libm-test/src/gen.rs +++ b/libm/crates/libm-test/src/gen.rs @@ -2,6 +2,7 @@ pub mod domain_logspace; pub mod edge_cases; +pub mod extensive; pub mod random; /// A wrapper to turn any iterator into an `ExactSizeIterator`. Asserts the final result to ensure diff --git a/libm/crates/libm-test/src/gen/extensive.rs b/libm/crates/libm-test/src/gen/extensive.rs new file mode 100644 index 000000000..d8b991b2a --- /dev/null +++ b/libm/crates/libm-test/src/gen/extensive.rs @@ -0,0 +1,153 @@ +use std::fmt; +use std::ops::RangeInclusive; + +use libm::support::MinInt; + +use crate::domain::HasDomain; +use crate::gen::KnownSize; +use crate::op::OpITy; +use crate::run_cfg::{int_range, iteration_count}; +use crate::{CheckCtx, GeneratorKind, MathOp, logspace}; + +/// Generate a sequence of inputs that either cover the domain in completeness (for smaller float +/// types and single argument functions) or provide evenly spaced inputs across the domain with +/// approximately `u32::MAX` total iterations. +pub trait ExtensiveInput { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator + Send; +} + +/// Construct an iterator from `logspace` and also calculate the total number of steps expected +/// for that iterator. +fn logspace_steps( + start: Op::FTy, + end: Op::FTy, + ctx: &CheckCtx, + argnum: usize, +) -> (impl Iterator + Clone, u64) +where + Op: MathOp, + OpITy: TryFrom, + RangeInclusive>: Iterator, +{ + let max_steps = iteration_count(ctx, GeneratorKind::Extensive, argnum); + let max_steps = OpITy::::try_from(max_steps).unwrap_or(OpITy::::MAX); + let iter = logspace(start, end, max_steps); + + // `logspace` can't implement `ExactSizeIterator` because of the range, but its size hint + // should be accurate (assuming <= usize::MAX iterations). + let size_hint = iter.size_hint(); + assert_eq!(size_hint.0, size_hint.1.unwrap()); + + (iter, size_hint.0.try_into().unwrap()) +} + +macro_rules! impl_extensive_input { + ($fty:ty) => { + impl ExtensiveInput for ($fty,) + where + Op: MathOp, + Op: HasDomain, + { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + let start = Op::DOMAIN.range_start(); + let end = Op::DOMAIN.range_end(); + let (iter0, steps0) = logspace_steps::(start, end, ctx, 0); + let iter0 = iter0.map(|v| (v,)); + KnownSize::new(iter0, steps0) + } + } + + impl ExtensiveInput for ($fty, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + let start = <$fty>::NEG_INFINITY; + let end = <$fty>::INFINITY; + let (iter0, steps0) = logspace_steps::(start, end, ctx, 0); + let (iter1, steps1) = logspace_steps::(start, end, ctx, 1); + let iter = + iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); + let count = steps0.checked_mul(steps1).unwrap(); + KnownSize::new(iter, count) + } + } + + impl ExtensiveInput for ($fty, $fty, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + let start = <$fty>::NEG_INFINITY; + let end = <$fty>::INFINITY; + + let (iter0, steps0) = logspace_steps::(start, end, ctx, 0); + let (iter1, steps1) = logspace_steps::(start, end, ctx, 1); + let (iter2, steps2) = logspace_steps::(start, end, ctx, 2); + + let iter = iter0 + .flat_map(move |first| iter1.clone().map(move |second| (first, second))) + .flat_map(move |(first, second)| { + iter2.clone().map(move |third| (first, second, third)) + }); + let count = steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap(); + + KnownSize::new(iter, count) + } + } + + impl ExtensiveInput for (i32, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + let start = <$fty>::NEG_INFINITY; + let end = <$fty>::INFINITY; + + let iter0 = int_range(ctx, GeneratorKind::Extensive, 0); + let steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0); + let (iter1, steps1) = logspace_steps::(start, end, ctx, 1); + + let iter = + iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); + let count = steps0.checked_mul(steps1).unwrap(); + + KnownSize::new(iter, count) + } + } + + impl ExtensiveInput for ($fty, i32) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + let start = <$fty>::NEG_INFINITY; + let end = <$fty>::INFINITY; + + let (iter0, steps0) = logspace_steps::(start, end, ctx, 0); + let iter1 = int_range(ctx, GeneratorKind::Extensive, 0); + let steps1 = iteration_count(ctx, GeneratorKind::Extensive, 0); + + let iter = + iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); + let count = steps0.checked_mul(steps1).unwrap(); + + KnownSize::new(iter, count) + } + } + }; +} + +impl_extensive_input!(f32); +impl_extensive_input!(f64); + +/// Create a test case iterator for extensive inputs. +pub fn get_test_cases( + ctx: &CheckCtx, +) -> impl ExactSizeIterator + Send + use<'_, Op> +where + Op: MathOp, + Op::RustArgs: ExtensiveInput, +{ + Op::RustArgs::get_cases(ctx) +} diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index 6df944317..29a9dcd2b 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -86,7 +86,7 @@ macro_rules! impl_random_input { fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { let count0 = iteration_count(ctx, GeneratorKind::Random, 0); let count1 = iteration_count(ctx, GeneratorKind::Random, 1); - let range0 = int_range(ctx, 0); + let range0 = int_range(ctx, GeneratorKind::Random, 0); let iter = random_ints(count0, range0) .flat_map(move |f1: i32| random_floats(count1).map(move |f2: $fty| (f1, f2))); KnownSize::new(iter, count0 * count1) @@ -97,7 +97,7 @@ macro_rules! impl_random_input { fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { let count0 = iteration_count(ctx, GeneratorKind::Random, 0); let count1 = iteration_count(ctx, GeneratorKind::Random, 1); - let range1 = int_range(ctx, 1); + let range1 = int_range(ctx, GeneratorKind::Random, 1); let iter = random_floats(count0).flat_map(move |f1: $fty| { random_ints(count1, range1.clone()).map(move |f2: i32| (f1, f2)) }); diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 8a4e782df..a940db1d2 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -25,7 +25,8 @@ pub use libm::support::{Float, Int, IntTy, MinInt}; pub use num::{FloatExt, logspace}; pub use op::{BaseName, FloatTy, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet, Ty}; pub use precision::{MaybeOverride, SpecialCase, default_ulp}; -pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind}; +use run_cfg::EXTENSIVE_MAX_ITERATIONS; +pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, skip_extensive_test}; pub use test_traits::{CheckOutput, Hex, TupleCall}; /// Result type for tests is usually from `anyhow`. Most times there is no success value to @@ -85,6 +86,7 @@ pub fn test_log(s: &str) { writeln!(f, "cargo features: {}", env!("CFG_CARGO_FEATURES")).unwrap(); writeln!(f, "opt level: {}", env!("CFG_OPT_LEVEL")).unwrap(); writeln!(f, "target features: {}", env!("CFG_TARGET_FEATURES")).unwrap(); + writeln!(f, "extensive iterations {}", *EXTENSIVE_MAX_ITERATIONS).unwrap(); Some(f) }); diff --git a/libm/crates/libm-test/src/num.rs b/libm/crates/libm-test/src/num.rs index eff2fbc1f..f693ef02f 100644 --- a/libm/crates/libm-test/src/num.rs +++ b/libm/crates/libm-test/src/num.rs @@ -215,7 +215,7 @@ fn as_ulp_steps(x: F) -> Option { /// to logarithmic spacing of their values. /// /// Note that this tends to skip negative zero, so that needs to be checked explicitly. -pub fn logspace(start: F, end: F, steps: F::Int) -> impl Iterator +pub fn logspace(start: F, end: F, steps: F::Int) -> impl Iterator + Clone where RangeInclusive: Iterator, { diff --git a/libm/crates/libm-test/src/run_cfg.rs b/libm/crates/libm-test/src/run_cfg.rs index 9cede0cc7..48a654caa 100644 --- a/libm/crates/libm-test/src/run_cfg.rs +++ b/libm/crates/libm-test/src/run_cfg.rs @@ -10,6 +10,22 @@ use crate::{BaseName, FloatTy, Identifier, test_log}; /// The environment variable indicating which extensive tests should be run. pub const EXTENSIVE_ENV: &str = "LIBM_EXTENSIVE_TESTS"; +/// Specify the number of iterations via this environment variable, rather than using the default. +pub const EXTENSIVE_ITER_ENV: &str = "LIBM_EXTENSIVE_ITERATIONS"; + +/// Maximum number of iterations to run for a single routine. +/// +/// The default value of one greater than `u32::MAX` allows testing single-argument `f32` routines +/// and single- or double-argument `f16` routines exhaustively. `f64` and `f128` can't feasibly +/// be tested exhaustively; however, [`EXTENSIVE_ITER_ENV`] can be set to run tests for multiple +/// hours. +pub static EXTENSIVE_MAX_ITERATIONS: LazyLock = LazyLock::new(|| { + let default = 1 << 32; + env::var(EXTENSIVE_ITER_ENV) + .map(|v| v.parse().expect("failed to parse iteration count")) + .unwrap_or(default) +}); + /// Context passed to [`CheckOutput`]. #[derive(Clone, Debug, PartialEq, Eq)] pub struct CheckCtx { @@ -54,6 +70,7 @@ pub enum CheckBasis { #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum GeneratorKind { Domain, + Extensive, Random, } @@ -171,8 +188,14 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) - let mut total_iterations = match gen_kind { GeneratorKind::Domain => domain_iter_count, GeneratorKind::Random => random_iter_count, + GeneratorKind::Extensive => *EXTENSIVE_MAX_ITERATIONS, }; + // FMA has a huge domain but is reasonably fast to run, so increase iterations. + if ctx.base_name == BaseName::Fma { + total_iterations *= 4; + } + if cfg!(optimizations_enabled) { // Always run at least 10,000 tests. total_iterations = total_iterations.max(10_000); @@ -191,7 +214,7 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) - let total = ntests.pow(t_env.input_count.try_into().unwrap()); let seed_msg = match gen_kind { - GeneratorKind::Domain => String::new(), + GeneratorKind::Domain | GeneratorKind::Extensive => String::new(), GeneratorKind::Random => { format!(" using `{SEED_ENV}={}`", str::from_utf8(SEED.as_slice()).unwrap()) } @@ -210,7 +233,7 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) - } /// Some tests require that an integer be kept within reasonable limits; generate that here. -pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive { +pub fn int_range(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -> RangeInclusive { let t_env = TestEnv::from_env(ctx); if !matches!(ctx.base_name, BaseName::Jn | BaseName::Yn) { @@ -221,10 +244,17 @@ pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive { // The integer argument to `jn` is an iteration count. Limit this to ensure tests can be // completed in a reasonable amount of time. - if t_env.slow_platform || !cfg!(optimizations_enabled) { + let non_extensive_range = if t_env.slow_platform || !cfg!(optimizations_enabled) { (-0xf)..=0xff } else { (-0xff)..=0xffff + }; + + let extensive_range = (-0xfff)..=0xfffff; + + match gen_kind { + GeneratorKind::Extensive => extensive_range, + GeneratorKind::Domain | GeneratorKind::Random => non_extensive_range, } } @@ -241,7 +271,6 @@ pub fn check_near_count(_ctx: &CheckCtx) -> u64 { } /// Check whether extensive actions should be run or skipped. -#[expect(dead_code, reason = "extensive tests have not yet been added")] pub fn skip_extensive_test(ctx: &CheckCtx) -> bool { let t_env = TestEnv::from_env(ctx); !t_env.should_run_extensive diff --git a/libm/crates/libm-test/tests/z_extensive/main.rs b/libm/crates/libm-test/tests/z_extensive/main.rs new file mode 100644 index 000000000..3a2af88bd --- /dev/null +++ b/libm/crates/libm-test/tests/z_extensive/main.rs @@ -0,0 +1,14 @@ +//! `main` is just a wrapper to handle configuration. + +#[cfg(not(feature = "test-multiprecision"))] +fn main() { + eprintln!("multiprecision not enabled; skipping extensive tests"); +} + +#[cfg(feature = "test-multiprecision")] +mod run; + +#[cfg(feature = "test-multiprecision")] +fn main() { + run::run(); +} diff --git a/libm/crates/libm-test/tests/z_extensive/run.rs b/libm/crates/libm-test/tests/z_extensive/run.rs new file mode 100644 index 000000000..7acff5324 --- /dev/null +++ b/libm/crates/libm-test/tests/z_extensive/run.rs @@ -0,0 +1,234 @@ +//! Exhaustive tests for `f16` and `f32`, high-iteration for `f64` and `f128`. + +use std::fmt; +use std::io::{self, IsTerminal}; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::Duration; + +use indicatif::{ProgressBar, ProgressStyle}; +use libm_test::gen::extensive::{self, ExtensiveInput}; +use libm_test::mpfloat::MpOp; +use libm_test::{ + CheckBasis, CheckCtx, CheckOutput, MathOp, TestResult, TupleCall, skip_extensive_test, +}; +use libtest_mimic::{Arguments, Trial}; +use rayon::prelude::*; + +/// Run the extensive test suite. +pub fn run() { + let mut args = Arguments::from_args(); + // Prevent multiple tests from running in parallel, each test gets parallized internally. + args.test_threads = Some(1); + let tests = register_all_tests(); + + // With default parallelism, the CPU doesn't saturate. We don't need to be nice to + // other processes, so do 1.5x to make sure we use all available resources. + let threads = std::thread::available_parallelism().map(Into::into).unwrap_or(0) * 3 / 2; + rayon::ThreadPoolBuilder::new().num_threads(threads).build_global().unwrap(); + + libtest_mimic::run(&args, tests).exit(); +} + +macro_rules! mp_extensive_tests { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + extra: [$push_to:ident], + ) => { + $(#[$attr])* + register_single_test::(&mut $push_to); + }; +} + +/// Create a list of tests for consumption by `libtest_mimic`. +fn register_all_tests() -> Vec { + let mut all_tests = Vec::new(); + + libm_macros::for_each_function! { + callback: mp_extensive_tests, + extra: [all_tests], + skip: [ + // FIXME: MPFR tests needed + remquo, + remquof, + + // FIXME: test needed, see + // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392 + nextafter, + nextafterf, + ], + } + + all_tests +} + +/// Add a single test to the list. +fn register_single_test(all: &mut Vec) +where + Op: MathOp + MpOp, + Op::RustArgs: ExtensiveInput + Send, +{ + let test_name = format!("mp_extensive_{}", Op::NAME); + let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr); + let skip = skip_extensive_test(&ctx); + + let runner = move || { + if !cfg!(optimizations_enabled) { + panic!("extensive tests should be run with --release"); + } + + let res = run_single_test::(); + let e = match res { + Ok(()) => return Ok(()), + Err(e) => e, + }; + + // Format with the `Debug` implementation so we get the error cause chain, and print it + // here so we see the result immediately (rather than waiting for all tests to conclude). + let e = format!("{e:?}"); + eprintln!("failure testing {}:{e}\n", Op::IDENTIFIER); + + Err(e.into()) + }; + + all.push(Trial::test(test_name, runner).with_ignored_flag(skip)); +} + +/// Test runner for a signle routine. +fn run_single_test() -> TestResult +where + Op: MathOp + MpOp, + Op::RustArgs: ExtensiveInput + Send, +{ + // Small delay before printing anything so other output from the runner has a chance to flush. + std::thread::sleep(Duration::from_millis(500)); + eprintln!(); + + let completed = AtomicU64::new(0); + let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr); + let cases = &mut extensive::get_test_cases::(&ctx); + let total: u64 = cases.len().try_into().unwrap(); + let pb = Progress::new(Op::NAME, total); + + let test_single_chunk = |mp_vals: &mut Op::MpTy, input_vec: Vec| -> TestResult { + for input in input_vec { + // Test the input. + let mp_res = Op::run(mp_vals, input); + let crate_res = input.call(Op::ROUTINE); + crate_res.validate(mp_res, input, &ctx)?; + + let completed = completed.fetch_add(1, Ordering::Relaxed) + 1; + pb.update(completed, input); + } + + Ok(()) + }; + + // Chunk the cases so Rayon doesn't switch threads between each iterator item. 50k seems near + // a performance sweet spot. Ideally we would reuse these allocations rather than discarding, + // but that is difficult with Rayon's API. + let chunk_size = 50_000; + let chunks = std::iter::from_fn(move || { + let mut v = Vec::with_capacity(chunk_size); + v.extend(cases.take(chunk_size)); + (!v.is_empty()).then_some(v) + }); + + // Run the actual tests + let res = chunks.par_bridge().try_for_each_init(Op::new_mp, test_single_chunk); + + let real_total = completed.load(Ordering::Relaxed); + pb.complete(real_total); + + if res.is_ok() && real_total != total { + // Provide a warning if our estimate needs to be updated. + panic!("total run {real_total} does not match expected {total}"); + } + + res +} + +/// Wrapper around a `ProgressBar` that handles styles and non-TTY messages. +struct Progress { + pb: ProgressBar, + name_padded: String, + final_style: ProgressStyle, + is_tty: bool, +} + +impl Progress { + const PB_TEMPLATE: &str = "[{elapsed:3} {percent:3}%] {bar:20.cyan/blue} NAME \ + {human_pos:>13}/{human_len:13} {per_sec:18} eta {eta:8} {msg}"; + const PB_TEMPLATE_FINAL: &str = "[{elapsed:3} {percent:3}%] {bar:20.cyan/blue} NAME \ + {human_pos:>13}/{human_len:13} {per_sec:18} done in {elapsed_precise}"; + + fn new(name: &str, total: u64) -> Self { + eprintln!("starting extensive tests for `{name}`"); + let name_padded = format!("{name:9}"); + let is_tty = io::stderr().is_terminal(); + + let initial_style = + ProgressStyle::with_template(&Self::PB_TEMPLATE.replace("NAME", &name_padded)) + .unwrap() + .progress_chars("##-"); + + let final_style = + ProgressStyle::with_template(&Self::PB_TEMPLATE_FINAL.replace("NAME", &name_padded)) + .unwrap() + .progress_chars("##-"); + + let pb = ProgressBar::new(total); + pb.set_style(initial_style); + + Self { pb, final_style, name_padded, is_tty } + } + + fn update(&self, completed: u64, input: impl fmt::Debug) { + // Infrequently update the progress bar. + if completed % 20_000 == 0 { + self.pb.set_position(completed); + } + + if completed % 500_000 == 0 { + self.pb.set_message(format!("input: {input:<24?}")); + } + + if !self.is_tty && completed % 5_000_000 == 0 { + let len = self.pb.length().unwrap_or_default(); + eprintln!( + "[{elapsed:3?}s {percent:3.0}%] {name} \ + {human_pos:>10}/{human_len:<10} {per_sec:14.2}/s eta {eta:4}s {input:<24?}", + elapsed = self.pb.elapsed().as_secs(), + percent = completed as f32 * 100.0 / len as f32, + name = self.name_padded, + human_pos = completed, + human_len = len, + per_sec = self.pb.per_sec(), + eta = self.pb.eta().as_secs() + ); + } + } + + fn complete(self, real_total: u64) { + self.pb.set_style(self.final_style); + self.pb.set_position(real_total); + self.pb.abandon(); + + if !self.is_tty { + let len = self.pb.length().unwrap_or_default(); + eprintln!( + "[{elapsed:3}s {percent:3.0}%] {name} \ + {human_pos:>10}/{human_len:<10} {per_sec:14.2}/s done in {elapsed_precise}", + elapsed = self.pb.elapsed().as_secs(), + percent = real_total as f32 * 100.0 / len as f32, + name = self.name_padded, + human_pos = real_total, + human_len = len, + per_sec = self.pb.per_sec(), + elapsed_precise = self.pb.elapsed().as_secs(), + ); + } + + eprintln!(); + } +} From 00bdc5859deca65cfa4bc0e289e6526eedd8a541 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 22 Dec 2024 11:47:53 +0000 Subject: [PATCH 1024/1459] Update precision based on failures from extensive tests --- libm/crates/libm-test/src/precision.rs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 4a6ca8af7..696bb3735 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -55,7 +55,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { Bn::Asin => 1, Bn::Asinh => 2, Bn::Atan => 1, - Bn::Atan2 => 1, + Bn::Atan2 => 2, Bn::Atanh => 2, Bn::Cbrt => 1, Bn::Cos => 1, @@ -187,6 +187,20 @@ impl MaybeOverride<(f32,)> for SpecialCase { return XFAIL; } + if (ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR) + && input.0 > 4e36 + && expected.is_infinite() + && !actual.is_infinite() + { + // This result should saturate but we return a finite value. + return XFAIL; + } + + if ctx.base_name == BaseName::J0 && input.0 < -1e34 { + // Errors get huge close to -inf + return XFAIL; + } + maybe_check_nan_bits(actual, expected, ctx) } @@ -248,6 +262,11 @@ impl MaybeOverride<(f64,)> for SpecialCase { return XFAIL; } + if ctx.base_name == BaseName::J0 && input.0 < -1e300 { + // Errors get huge close to -inf + return XFAIL; + } + maybe_check_nan_bits(actual, expected, ctx) } @@ -364,6 +383,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase { } } } + impl MaybeOverride<(i32, f64)> for SpecialCase { fn check_float( input: (i32, f64), From c963b54868a5ae4883b71689cf270a8e9d793ba7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 2 Jan 2025 10:25:27 +0000 Subject: [PATCH 1025/1459] Run extensive tests in CI when relevant files change Add a CI job with a dynamically calculated matrix that runs extensive jobs on changed files. This makes use of the new `function-definitions.json` file to determine which changed files require full tests for a routine to run. --- libm/.github/workflows/main.yml | 59 +++++++++- libm/ci/calculate-exhaustive-matrix.py | 148 +++++++++++++++++++++++++ 2 files changed, 206 insertions(+), 1 deletion(-) create mode 100755 libm/ci/calculate-exhaustive-matrix.py diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 023ec58c0..1b2fd12ba 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -14,7 +14,7 @@ env: jobs: test: name: Build and test - timeout-minutes: 20 + timeout-minutes: 25 strategy: fail-fast: false matrix: @@ -186,6 +186,62 @@ jobs: rustup component add rustfmt - run: cargo fmt -- --check + # Determine which extensive tests should be run based on changed files. + calculate_extensive_matrix: + name: Calculate job matrix + runs-on: ubuntu-24.04 + outputs: + matrix: ${{ steps.script.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 100 + - name: Fetch pull request ref + run: git fetch origin "$GITHUB_REF:$GITHUB_REF" + - run: python3 ci/calculate-exhaustive-matrix.py >> "$GITHUB_OUTPUT" + id: script + + extensive: + name: Extensive tests for ${{ matrix.ty }} + needs: + # Wait on `clippy` so we have some confidence that the crate will build + - clippy + - calculate_extensive_matrix + runs-on: ubuntu-24.04 + timeout-minutes: 80 + strategy: + matrix: + # Use the output from `calculate_extensive_matrix` to calculate the matrix + # FIXME: it would be better to run all jobs (i.e. all types) but mark those that + # didn't change as skipped, rather than completely excluding the job. However, + # this is not currently possible https://github.com/actions/runner/issues/1985. + include: ${{ fromJSON(needs.calculate_extensive_matrix.outputs.matrix).matrix }} + env: + CHANGED: ${{ matrix.changed }} + steps: + - uses: actions/checkout@v4 + - name: Install Rust + run: | + rustup update nightly --no-self-update + rustup default nightly + - uses: Swatinem/rust-cache@v2 + - name: Download musl source + run: ./ci/download-musl.sh + - name: Run extensive tests + run: | + echo "Changed: '$CHANGED'" + if [ -z "$CHANGED" ]; then + echo "No tests to run, exiting." + exit + fi + + LIBM_EXTENSIVE_TESTS="$CHANGED" cargo t \ + --features test-multiprecision,unstable \ + --release -- extensive + - name: Print test logs if available + run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi + shell: bash + success: needs: - test @@ -193,6 +249,7 @@ jobs: - benchmarks - msrv - rustfmt + - extensive runs-on: ubuntu-24.04 # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency # failed" as success. So we have to do some contortions to ensure the job fails if any of its diff --git a/libm/ci/calculate-exhaustive-matrix.py b/libm/ci/calculate-exhaustive-matrix.py new file mode 100755 index 000000000..8b42f9389 --- /dev/null +++ b/libm/ci/calculate-exhaustive-matrix.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +"""Calculate which exhaustive tests should be run as part of CI. + +This dynamically prepares a list of routines that had a source file change based on +git history. +""" + +import subprocess as sp +import sys +import json +from dataclasses import dataclass +from os import getenv +from pathlib import Path +from typing import TypedDict + + +REPO_ROOT = Path(__file__).parent.parent +GIT = ["git", "-C", REPO_ROOT] + +# Don't run exhaustive tests if these files change, even if they contaiin a function +# definition. +IGNORE_FILES = [ + "src/math/support/", + "src/libm_helper.rs", + "src/math/arch/intrinsics.rs", +] + +TYPES = ["f16", "f32", "f64", "f128"] + + +class FunctionDef(TypedDict): + """Type for an entry in `function-definitions.json`""" + + sources: list[str] + type: str + + +@dataclass +class Context: + gh_ref: str | None + changed: list[Path] + defs: dict[str, FunctionDef] + + def __init__(self) -> None: + self.gh_ref = getenv("GITHUB_REF") + self.changed = [] + self._init_change_list() + + with open(REPO_ROOT.joinpath("etc/function-definitions.json")) as f: + defs = json.load(f) + + defs.pop("__comment", None) + self.defs = defs + + def _init_change_list(self): + """Create a list of files that have been changed. This uses GITHUB_REF if + available, otherwise a diff between `HEAD` and `master`. + """ + + # For pull requests, GitHub creates a ref `refs/pull/1234/merge` (1234 being + # the PR number), and sets this as `GITHUB_REF`. + ref = self.gh_ref + eprint(f"using ref `{ref}`") + if ref is None or "merge" not in ref: + # If the ref is not for `merge` then we are not in PR CI + eprint("No diff available for ref") + return + + # The ref is for a dummy merge commit. We can extract the merge base by + # inspecting all parents (`^@`). + merge_sha = sp.check_output( + GIT + ["show-ref", "--hash", ref], text=True + ).strip() + merge_log = sp.check_output(GIT + ["log", "-1", merge_sha], text=True) + eprint(f"Merge:\n{merge_log}\n") + + parents = ( + sp.check_output(GIT + ["rev-parse", f"{merge_sha}^@"], text=True) + .strip() + .splitlines() + ) + assert len(parents) == 2, f"expected two-parent merge but got:\n{parents}" + base = parents[0].strip() + incoming = parents[1].strip() + + eprint(f"base: {base}, incoming: {incoming}") + textlist = sp.check_output( + GIT + ["diff", base, incoming, "--name-only"], text=True + ) + self.changed = [Path(p) for p in textlist.splitlines()] + + @staticmethod + def _ignore_file(fname: str) -> bool: + return any(fname.startswith(pfx) for pfx in IGNORE_FILES) + + def changed_routines(self) -> dict[str, list[str]]: + """Create a list of routines for which one or more files have been updated, + separated by type. + """ + routines = set() + for name, meta in self.defs.items(): + # Don't update if changes to the file should be ignored + sources = (f for f in meta["sources"] if not self._ignore_file(f)) + + # Select changed files + changed = [f for f in sources if Path(f) in self.changed] + + if len(changed) > 0: + eprint(f"changed files for {name}: {changed}") + routines.add(name) + + ret = {} + for r in sorted(routines): + ret.setdefault(self.defs[r]["type"], []).append(r) + + return ret + + def make_workflow_output(self) -> str: + """Create a JSON object a list items for each type's changed files, if any + did change, and the routines that were affected by the change. + """ + changed = self.changed_routines() + ret = [] + for ty in TYPES: + ty_changed = changed.get(ty, []) + item = { + "ty": ty, + "changed": ",".join(ty_changed), + } + ret.append(item) + output = json.dumps({"matrix": ret}, separators=(",", ":")) + eprint(f"output: {output}") + return output + + +def eprint(*args, **kwargs): + """Print to stderr.""" + print(*args, file=sys.stderr, **kwargs) + + +def main(): + ctx = Context() + output = ctx.make_workflow_output() + print(f"matrix={output}") + + +if __name__ == "__main__": + main() From 74a98a0c84813d8b78eef858fb8cadb846f74247 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 2 Jan 2025 21:41:58 +0000 Subject: [PATCH 1026/1459] Enable `f16` and `f128` when creating the API change list Additionally, read glob output as absoulte paths. This enables the script to work properly even when invoked from a different directory. --- libm/etc/update-api-list.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libm/etc/update-api-list.py b/libm/etc/update-api-list.py index a4587aa81..67f73e59c 100755 --- a/libm/etc/update-api-list.py +++ b/libm/etc/update-api-list.py @@ -13,6 +13,7 @@ from typing import Any, TypeAlias ETC_DIR = Path(__file__).parent +ROOT_DIR = ETC_DIR.parent IndexTy: TypeAlias = dict[str, dict[str, Any]] """Type of the `index` item in rustdoc's JSON output""" @@ -56,10 +57,12 @@ def get_rustdoc_json() -> dict[Any, Any]: "--edition=2021", "--document-private-items", "--output-format=json", + "--cfg=f16_enabled", + "--cfg=f128_enabled", "-Zunstable-options", "-o-", ], - cwd=ETC_DIR.parent, + cwd=ROOT_DIR, text=True, ) j = json.loads(j) @@ -105,8 +108,8 @@ def _init_defs(self, index: IndexTy) -> None: # A lot of the `arch` module is often configured out so doesn't show up in docs. Use # string matching as a fallback. - for fname in glob("src/math/arch/**.rs", root_dir=ETC_DIR.parent): - contents = Path(fname).read_text() + for fname in glob("src/math/arch/**.rs", root_dir=ROOT_DIR): + contents = (ROOT_DIR.joinpath(fname)).read_text() for name in self.public_functions: if f"fn {name}" in contents: From 3fe6d7e16dd6aa4224c5f63c8208e03978f78b6d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 2 Jan 2025 21:11:50 +0000 Subject: [PATCH 1027/1459] Add `fabsf16`, `fabsf128`, `copysignf16`, and `copysignf128` Use the generic implementations to provide these simple methods. --- libm/src/libm_helper.rs | 24 ++++++++++++++++++++--- libm/src/math/copysignf128.rs | 8 ++++++++ libm/src/math/copysignf16.rs | 8 ++++++++ libm/src/math/fabs.rs | 1 + libm/src/math/fabsf.rs | 1 + libm/src/math/fabsf128.rs | 37 +++++++++++++++++++++++++++++++++++ libm/src/math/fabsf16.rs | 37 +++++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 20 +++++++++++++++++++ 8 files changed, 133 insertions(+), 3 deletions(-) create mode 100644 libm/src/math/copysignf128.rs create mode 100644 libm/src/math/copysignf16.rs create mode 100644 libm/src/math/fabsf128.rs create mode 100644 libm/src/math/fabsf16.rs diff --git a/libm/src/libm_helper.rs b/libm/src/libm_helper.rs index 52d0c4c2a..f087267e4 100644 --- a/libm/src/libm_helper.rs +++ b/libm/src/libm_helper.rs @@ -30,7 +30,7 @@ macro_rules! libm_helper { } }; - ({$($func:tt);*}) => { + ({$($func:tt;)*}) => { $( libm_helper! { $func } )* @@ -103,7 +103,7 @@ libm_helper! { (fn trunc(x: f32) -> (f32); => truncf); (fn y0(x: f32) -> (f32); => y0f); (fn y1(x: f32) -> (f32); => y1f); - (fn yn(n: i32, x: f32) -> (f32); => ynf) + (fn yn(n: i32, x: f32) -> (f32); => ynf); } } @@ -166,6 +166,24 @@ libm_helper! { (fn trunc(x: f64) -> (f64); => trunc); (fn y0(x: f64) -> (f64); => y0); (fn y1(x: f64) -> (f64); => y1); - (fn yn(n: i32, x: f64) -> (f64); => yn) + (fn yn(n: i32, x: f64) -> (f64); => yn); + } +} + +#[cfg(f16_enabled)] +libm_helper! { + f16, + funcs: { + (fn copysign(x: f16, y: f16) -> (f16); => copysignf16); + (fn fabs(x: f16) -> (f16); => fabsf16); + } +} + +#[cfg(f128_enabled)] +libm_helper! { + f128, + funcs: { + (fn copysign(x: f128, y: f128) -> (f128); => copysignf128); + (fn fabs(x: f128) -> (f128); => fabsf128); } } diff --git a/libm/src/math/copysignf128.rs b/libm/src/math/copysignf128.rs new file mode 100644 index 000000000..7bd81d42b --- /dev/null +++ b/libm/src/math/copysignf128.rs @@ -0,0 +1,8 @@ +/// Sign of Y, magnitude of X (f128) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf128(x: f128, y: f128) -> f128 { + super::generic::copysign(x, y) +} diff --git a/libm/src/math/copysignf16.rs b/libm/src/math/copysignf16.rs new file mode 100644 index 000000000..820658686 --- /dev/null +++ b/libm/src/math/copysignf16.rs @@ -0,0 +1,8 @@ +/// Sign of Y, magnitude of X (f16) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf16(x: f16, y: f16) -> f16 { + super::generic::copysign(x, y) +} diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index 6687fdcc3..46c0d88a5 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -1,4 +1,5 @@ /// Absolute value (magnitude) (f64) +/// /// Calculates the absolute value (magnitude) of the argument `x`, /// by direct manipulation of the bit representation of `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs index 99bb5b5f1..d5775b600 100644 --- a/libm/src/math/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -1,4 +1,5 @@ /// Absolute value (magnitude) (f32) +/// /// Calculates the absolute value (magnitude) of the argument `x`, /// by direct manipulation of the bit representation of `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] diff --git a/libm/src/math/fabsf128.rs b/libm/src/math/fabsf128.rs new file mode 100644 index 000000000..ef531bd91 --- /dev/null +++ b/libm/src/math/fabsf128.rs @@ -0,0 +1,37 @@ +/// Absolute value (magnitude) (f128) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf128(x: f128) -> f128 { + select_implementation! { + name: fabsf, + use_intrinsic: target_arch = "wasm32", + args: x, + } + + super::generic::fabs(x) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sanity_check() { + assert_eq!(fabsf128(-1.0), 1.0); + assert_eq!(fabsf128(2.8), 2.8); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs + #[test] + fn spec_tests() { + assert!(fabsf128(f128::NAN).is_nan()); + for f in [0.0, -0.0].iter().copied() { + assert_eq!(fabsf128(f), 0.0); + } + for f in [f128::INFINITY, f128::NEG_INFINITY].iter().copied() { + assert_eq!(fabsf128(f), f128::INFINITY); + } + } +} diff --git a/libm/src/math/fabsf16.rs b/libm/src/math/fabsf16.rs new file mode 100644 index 000000000..eb41f7391 --- /dev/null +++ b/libm/src/math/fabsf16.rs @@ -0,0 +1,37 @@ +/// Absolute value (magnitude) (f16) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf16(x: f16) -> f16 { + select_implementation! { + name: fabsf, + use_intrinsic: target_arch = "wasm32", + args: x, + } + + super::generic::fabs(x) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sanity_check() { + assert_eq!(fabsf16(-1.0), 1.0); + assert_eq!(fabsf16(2.8), 2.8); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs + #[test] + fn spec_tests() { + assert!(fabsf16(f16::NAN).is_nan()); + for f in [0.0, -0.0].iter().copied() { + assert_eq!(fabsf16(f), 0.0); + } + for f in [f16::INFINITY, f16::NEG_INFINITY].iter().copied() { + assert_eq!(fabsf16(f), f16::INFINITY); + } + } +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 9003a8342..5baf35e42 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -339,6 +339,26 @@ pub use self::tgammaf::tgammaf; pub use self::trunc::trunc; pub use self::truncf::truncf; +cfg_if! { + if #[cfg(f16_enabled)] { + mod copysignf16; + mod fabsf16; + + pub use self::copysignf16::copysignf16; + pub use self::fabsf16::fabsf16; + } +} + +cfg_if! { + if #[cfg(f128_enabled)] { + mod copysignf128; + mod fabsf128; + + pub use self::copysignf128::copysignf128; + pub use self::fabsf128::fabsf128; + } +} + #[inline] fn get_high_word(x: f64) -> u32 { (x.to_bits() >> 32) as u32 From d3ab0e8b94c6cd20d62a6eaf7b1de4cc66ec0320 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 3 Jan 2025 00:12:53 +0000 Subject: [PATCH 1028/1459] Add test infrastructure for `f16` and `f128` Update test traits to support `f16` and `f128`, as applicable. Add the new routines (`fabs` and `copysign` for `f16` and `f128`) to the list of all operations. --- libm/crates/libm-macros/src/shared.rs | 28 +++++++ libm/crates/libm-test/Cargo.toml | 2 +- libm/crates/libm-test/benches/random.rs | 47 ++++++----- libm/crates/libm-test/src/domain.rs | 12 +++ libm/crates/libm-test/src/gen/extensive.rs | 4 + libm/crates/libm-test/src/gen/random.rs | 4 + libm/crates/libm-test/src/mpfloat.rs | 77 +++++++++++++------ libm/crates/libm-test/src/precision.rs | 32 ++++++++ libm/crates/libm-test/src/test_traits.rs | 6 ++ .../libm-test/tests/compare_built_musl.rs | 2 + libm/crates/libm-test/tests/multiprecision.rs | 2 + libm/etc/function-definitions.json | 28 +++++++ libm/etc/function-list.txt | 4 + 13 files changed, 205 insertions(+), 43 deletions(-) diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index ef0f18801..16547404f 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -4,6 +4,13 @@ use std::fmt; use std::sync::LazyLock; const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] = &[ + ( + // `fn(f16) -> f16` + FloatTy::F16, + Signature { args: &[Ty::F16], returns: &[Ty::F16] }, + None, + &["fabsf16"], + ), ( // `fn(f32) -> f32` FloatTy::F32, @@ -28,6 +35,20 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] "tgamma", "trunc", "y0", "y1", ], ), + ( + // `fn(f128) -> f128` + FloatTy::F128, + Signature { args: &[Ty::F128], returns: &[Ty::F128] }, + None, + &["fabsf128"], + ), + ( + // `(f16, f16) -> f16` + FloatTy::F16, + Signature { args: &[Ty::F16, Ty::F16], returns: &[Ty::F16] }, + None, + &["copysignf16"], + ), ( // `(f32, f32) -> f32` FloatTy::F32, @@ -64,6 +85,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] "remainder", ], ), + ( + // `(f128, f128) -> f128` + FloatTy::F128, + Signature { args: &[Ty::F128, Ty::F128], returns: &[Ty::F128] }, + None, + &["copysignf128"], + ), ( // `(f32, f32, f32) -> f32` FloatTy::F32, diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 69e96034e..2761d3d52 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -8,7 +8,7 @@ publish = false default = ["unstable-float"] # Propagated from libm because this affects which functions we test. -unstable-float = ["libm/unstable-float"] +unstable-float = ["libm/unstable-float", "rug?/nightly-float"] # Generate tests which are random inputs and the outputs are calculated with # musl libc. diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 23f429455..cd1e2d2cc 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -20,7 +20,7 @@ macro_rules! musl_rand_benches { ( fn_name: $fn_name:ident, attrs: [$($attr:meta),*], - fn_extra: $skip_on_i586:expr, + fn_extra: ($skip_on_i586:expr, $musl_fn:expr), ) => { paste::paste! { $(#[$attr])* @@ -28,15 +28,15 @@ macro_rules! musl_rand_benches { type Op = libm_test::op::$fn_name::Routine; #[cfg(feature = "build-musl")] - let musl_extra = MuslExtra { - musl_fn: Some(musl_math_sys::$fn_name as libm_test::OpCFn), - skip_on_i586: $skip_on_i586 + let musl_extra = MuslExtra::> { + musl_fn: $musl_fn, + skip_on_i586: $skip_on_i586, }; #[cfg(not(feature = "build-musl"))] let musl_extra = MuslExtra { musl_fn: None, - skip_on_i586: $skip_on_i586 + skip_on_i586: $skip_on_i586, }; bench_one::(c, musl_extra); @@ -67,7 +67,10 @@ where break; } - let musl_res = input.call(musl_extra.musl_fn.unwrap()); + let Some(musl_fn) = musl_extra.musl_fn else { + continue; + }; + let musl_res = input.call(musl_fn); let crate_res = input.call(Op::ROUTINE); crate_res.validate(musl_res, input, &ctx).context(name).unwrap(); @@ -91,15 +94,16 @@ where // Don't test against musl if it is not available #[cfg(feature = "build-musl")] { - let musl_fn = musl_extra.musl_fn.unwrap(); - group.bench_function("musl", |b| { - b.iter(|| { - let f = black_box(musl_fn); - for input in benchvec.iter().copied() { - input.call(f); - } - }) - }); + if let Some(musl_fn) = musl_extra.musl_fn { + group.bench_function("musl", |b| { + b.iter(|| { + let f = black_box(musl_fn); + for input in benchvec.iter().copied() { + input.call(f); + } + }) + }); + } } } @@ -107,9 +111,16 @@ libm_macros::for_each_function! { callback: musl_rand_benches, skip: [], fn_extra: match MACRO_FN_NAME { - // FIXME(correctness): wrong result on i586 - exp10 | exp10f | exp2 | exp2f => true, - _ => false + // We pass a tuple of `(skip_on_i586, musl_fn)` + + // FIXME(correctness): exp functions have the wrong result on i586 + exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)), + + // Musl does not provide `f16` and `f128` functions + copysignf16 | copysignf128 | fabsf16 | fabsf128 => (false, None), + + // By default we never skip (false) and always have a musl function available + _ => (false, Some(musl_math_sys::MACRO_FN_NAME)) } } diff --git a/libm/crates/libm-test/src/domain.rs b/libm/crates/libm-test/src/domain.rs index 7b5a01b96..52393d402 100644 --- a/libm/crates/libm-test/src/domain.rs +++ b/libm/crates/libm-test/src/domain.rs @@ -187,3 +187,15 @@ impl HasDomain for crate::op::lgammaf_r::Routine { impl HasDomain for crate::op::lgamma_r::Routine { const DOMAIN: Domain = Domain::::LGAMMA; } + +/* Not all `f16` and `f128` functions exist yet so we can't easily use the macros. */ + +#[cfg(f16_enabled)] +impl HasDomain for crate::op::fabsf16::Routine { + const DOMAIN: Domain = Domain::::UNBOUNDED; +} + +#[cfg(f128_enabled)] +impl HasDomain for crate::op::fabsf128::Routine { + const DOMAIN: Domain = Domain::::UNBOUNDED; +} diff --git a/libm/crates/libm-test/src/gen/extensive.rs b/libm/crates/libm-test/src/gen/extensive.rs index d8b991b2a..d724226e9 100644 --- a/libm/crates/libm-test/src/gen/extensive.rs +++ b/libm/crates/libm-test/src/gen/extensive.rs @@ -138,8 +138,12 @@ macro_rules! impl_extensive_input { }; } +#[cfg(f16_enabled)] +impl_extensive_input!(f16); impl_extensive_input!(f32); impl_extensive_input!(f64); +#[cfg(f128_enabled)] +impl_extensive_input!(f128); /// Create a test case iterator for extensive inputs. pub fn get_test_cases( diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index 29a9dcd2b..6b08e560d 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -107,8 +107,12 @@ macro_rules! impl_random_input { }; } +#[cfg(f16_enabled)] +impl_random_input!(f16); impl_random_input!(f32); impl_random_input!(f64); +#[cfg(f128_enabled)] +impl_random_input!(f128); /// Create a test case iterator. pub fn get_test_cases( diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index ad98fafc8..f2b7b2f25 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -137,6 +137,7 @@ libm_macros::for_each_function! { fmod, fmodf, frexp, frexpf, ilogb, ilogbf, jn, jnf, ldexp, ldexpf, lgamma_r, lgammaf_r, modf, modff, nextafter, nextafterf, pow,powf, remquo, remquof, scalbn, scalbnf, sincos, sincosf, yn, ynf, + copysignf16, copysignf128, fabsf16, fabsf128, ], fn_extra: match MACRO_FN_NAME { // Remap function names that are different between mpfr and libm @@ -157,10 +158,8 @@ libm_macros::for_each_function! { /// Implement unary functions that don't have a `_round` version macro_rules! impl_no_round { // Unary matcher - ($($fn_name:ident, $rug_name:ident;)*) => { + ($($fn_name:ident => $rug_name:ident;)*) => { paste::paste! { - // Implement for both f32 and f64 - $( impl_no_round!{ @inner_unary [< $fn_name f >], $rug_name } )* $( impl_no_round!{ @inner_unary $fn_name, $rug_name } )* } }; @@ -183,33 +182,34 @@ macro_rules! impl_no_round { } impl_no_round! { - fabs, abs_mut; - ceil, ceil_mut; - floor, floor_mut; - rint, round_even_mut; // FIXME: respect rounding mode - round, round_mut; - trunc, trunc_mut; + ceil => ceil_mut; + ceilf => ceil_mut; + fabs => abs_mut; + fabsf => abs_mut; + floor => floor_mut; + floorf => floor_mut; + rint => round_even_mut; // FIXME: respect rounding mode + rintf => round_even_mut; // FIXME: respect rounding mode + round => round_mut; + roundf => round_mut; + trunc => trunc_mut; + truncf => trunc_mut; +} + +#[cfg(f16_enabled)] +impl_no_round! { + fabsf16 => abs_mut; +} + +#[cfg(f128_enabled)] +impl_no_round! { + fabsf128 => abs_mut; } /// Some functions are difficult to do in a generic way. Implement them here. macro_rules! impl_op_for_ty { ($fty:ty, $suffix:literal) => { paste::paste! { - impl MpOp for crate::op::[]::Routine { - type MpTy = (MpFloat, MpFloat); - - fn new_mp() -> Self::MpTy { - (new_mpfloat::(), new_mpfloat::()) - } - - fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { - this.0.assign(input.0); - this.1.assign(input.1); - this.0.copysign_mut(&this.1); - prep_retval::(&mut this.0, Ordering::Equal) - } - } - impl MpOp for crate::op::[]::Routine { type MpTy = (MpFloat, MpFloat); @@ -379,9 +379,38 @@ macro_rules! impl_op_for_ty { }; } +/// Version of `impl_op_for_ty` with only functions that have `f16` and `f128` implementations. +macro_rules! impl_op_for_ty_all { + ($fty:ty, $suffix:literal) => { + paste::paste! { + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + this.0.copysign_mut(&this.1); + prep_retval::(&mut this.0, Ordering::Equal) + } + } + } + }; +} + impl_op_for_ty!(f32, "f"); impl_op_for_ty!(f64, ""); +#[cfg(f16_enabled)] +impl_op_for_ty_all!(f16, "f16"); +impl_op_for_ty_all!(f32, "f"); +impl_op_for_ty_all!(f64, ""); +#[cfg(f128_enabled)] +impl_op_for_ty_all!(f128, "f128"); + // `lgamma_r` is not a simple suffix so we can't use the above macro. impl MpOp for crate::op::lgamma_r::Routine { type MpTy = MpFloat; diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 696bb3735..f8c3a7b8f 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -157,6 +157,9 @@ pub trait MaybeOverride { } } +#[cfg(f16_enabled)] +impl MaybeOverride<(f16,)> for SpecialCase {} + impl MaybeOverride<(f32,)> for SpecialCase { fn check_float( input: (f32,), @@ -290,6 +293,9 @@ impl MaybeOverride<(f64,)> for SpecialCase { } } +#[cfg(f128_enabled)] +impl MaybeOverride<(f128,)> for SpecialCase {} + /// Check NaN bits if the function requires it fn maybe_check_nan_bits(actual: F, expected: F, ctx: &CheckCtx) -> Option { if !(ctx.base_name == BaseName::Fabs || ctx.base_name == BaseName::Copysign) { @@ -317,6 +323,19 @@ fn maybe_check_nan_bits(actual: F, expected: F, ctx: &CheckCtx) -> Opt } } +#[cfg(f16_enabled)] +impl MaybeOverride<(f16, f16)> for SpecialCase { + fn check_float( + input: (f16, f16), + _actual: F, + expected: F, + _ulp: &mut u32, + ctx: &CheckCtx, + ) -> Option { + maybe_skip_binop_nan(input, expected, ctx) + } +} + impl MaybeOverride<(f32, f32)> for SpecialCase { fn check_float( input: (f32, f32), @@ -341,6 +360,19 @@ impl MaybeOverride<(f64, f64)> for SpecialCase { } } +#[cfg(f128_enabled)] +impl MaybeOverride<(f128, f128)> for SpecialCase { + fn check_float( + input: (f128, f128), + _actual: F, + expected: F, + _ulp: &mut u32, + ctx: &CheckCtx, + ) -> Option { + maybe_skip_binop_nan(input, expected, ctx) + } +} + /// Musl propagates NaNs if one is provided as the input, but we return the other input. // F1 and F2 are always the same type, this is just to please generics fn maybe_skip_binop_nan( diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index 261d1f254..0a4baa2e3 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -303,6 +303,12 @@ where impl_float!(f32, f64); +#[cfg(f16_enabled)] +impl_float!(f16); + +#[cfg(f128_enabled)] +impl_float!(f128); + /* trait implementations for compound types */ /// Implement `CheckOutput` for combinations of types. diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index ecd379a0a..3e11d322a 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -46,6 +46,8 @@ where libm_macros::for_each_function! { callback: musl_rand_tests, + // Musl does not support `f16` and `f128` on all platforms. + skip: [copysignf16, copysignf128, fabsf16, fabsf128], attributes: [ #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586 [exp10, exp10f, exp2, exp2f, rint] diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 42ec965c1..7961b0802 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -120,6 +120,8 @@ libm_macros::for_each_function! { atan2f, copysign, copysignf, + copysignf16, + copysignf128, fdim, fdimf, fma, diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 4b10812c3..0b2d6214f 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -136,6 +136,20 @@ ], "type": "f32" }, + "copysignf128": { + "sources": [ + "src/math/copysignf128.rs", + "src/math/generic/copysign.rs" + ], + "type": "f128" + }, + "copysignf16": { + "sources": [ + "src/math/copysignf16.rs", + "src/math/generic/copysign.rs" + ], + "type": "f16" + }, "cos": { "sources": [ "src/libm_helper.rs", @@ -258,6 +272,20 @@ ], "type": "f32" }, + "fabsf128": { + "sources": [ + "src/math/fabsf128.rs", + "src/math/generic/fabs.rs" + ], + "type": "f128" + }, + "fabsf16": { + "sources": [ + "src/math/fabsf16.rs", + "src/math/generic/fabs.rs" + ], + "type": "f16" + }, "fdim": { "sources": [ "src/libm_helper.rs", diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt index 51f5b221c..0a1bbab24 100644 --- a/libm/etc/function-list.txt +++ b/libm/etc/function-list.txt @@ -19,6 +19,8 @@ ceil ceilf copysign copysignf +copysignf128 +copysignf16 cos cosf cosh @@ -37,6 +39,8 @@ expm1 expm1f fabs fabsf +fabsf128 +fabsf16 fdim fdimf floor From eec6e2dd67071556c4fbbcbd09edeef27a984d35 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 28 Dec 2024 09:52:19 +0000 Subject: [PATCH 1029/1459] Add domain and edge case tests to musl This provides an increase in test coverage on platforms that cannot test against MPFR. --- libm/crates/libm-test/src/lib.rs | 4 +- libm/crates/libm-test/src/op.rs | 2 + .../libm-test/tests/compare_built_musl.rs | 97 ++++++++++++++++++- 3 files changed, 101 insertions(+), 2 deletions(-) diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index a940db1d2..251114a0d 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -23,7 +23,9 @@ use std::time::SystemTime; pub use f8_impl::f8; pub use libm::support::{Float, Int, IntTy, MinInt}; pub use num::{FloatExt, logspace}; -pub use op::{BaseName, FloatTy, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet, Ty}; +pub use op::{ + BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustFn, OpRustRet, Ty, +}; pub use precision::{MaybeOverride, SpecialCase, default_ulp}; use run_cfg::EXTENSIVE_MAX_ITERATIONS; pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, skip_extensive_test}; diff --git a/libm/crates/libm-test/src/op.rs b/libm/crates/libm-test/src/op.rs index ee61eb0b8..8329d3424 100644 --- a/libm/crates/libm-test/src/op.rs +++ b/libm/crates/libm-test/src/op.rs @@ -96,6 +96,8 @@ pub type OpFTy = ::FTy; pub type OpITy = <::FTy as Float>::Int; /// Access the associated `CFn` type from an op (helper to avoid ambiguous associated types). pub type OpCFn = ::CFn; +/// Access the associated `CRet` type from an op (helper to avoid ambiguous associated types). +pub type OpCRet = ::CRet; /// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types). pub type OpRustFn = ::RustFn; /// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types). diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 3e11d322a..b91d7f9f5 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -9,8 +9,9 @@ // There are some targets we can't build musl for #![cfg(feature = "build-musl")] -use libm_test::gen::random; +use libm_test::domain::HasDomain; use libm_test::gen::random::RandomInput; +use libm_test::gen::{domain_logspace, edge_cases, random}; use libm_test::{CheckBasis, CheckCtx, CheckOutput, MathOp, TupleCall}; macro_rules! musl_rand_tests { @@ -53,3 +54,97 @@ libm_macros::for_each_function! { [exp10, exp10f, exp2, exp2f, rint] ], } + +/// Test against musl with generators from a domain. +macro_rules! musl_domain_tests { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + ) => { + paste::paste! { + #[test] + $(#[$attr])* + fn [< musl_edge_case_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + domain_test_runner::( + edge_cases::get_test_cases::, + musl_math_sys::$fn_name, + ); + } + + #[test] + $(#[$attr])* + fn [< musl_logspace_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + domain_test_runner::( + domain_logspace::get_test_cases::, + musl_math_sys::$fn_name, + ); + } + } + }; +} + +/// Test a single routine against domaine-aware inputs. +fn domain_test_runner(gen: impl FnOnce(&CheckCtx) -> I, musl_fn: Op::CFn) +where + Op: MathOp, + Op: HasDomain, + I: Iterator, +{ + let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl); + let cases = gen(&ctx); + + for input in cases { + let musl_res = input.call(musl_fn); + let crate_res = input.call(Op::ROUTINE); + + crate_res.validate(musl_res, input, &ctx).unwrap(); + } +} + +libm_macros::for_each_function! { + callback: musl_domain_tests, + attributes: [], + skip: [ + // Functions with multiple inputs + atan2, + atan2f, + copysign, + copysignf, + copysignf16, + copysignf128, + fdim, + fdimf, + fma, + fmaf, + fmax, + fmaxf, + fmin, + fminf, + fmod, + fmodf, + hypot, + hypotf, + jn, + jnf, + ldexp, + ldexpf, + nextafter, + nextafterf, + pow, + powf, + remainder, + remainderf, + remquo, + remquof, + scalbn, + scalbnf, + yn, + ynf, + + // Not provided by musl + fabsf16, + fabsf128, + ], +} From 6f3344d7b24284209fe280c5940051c41339711e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 Jan 2025 09:26:27 +0000 Subject: [PATCH 1030/1459] Add an override for failing ceil/floor tests on i586 --- libm/crates/libm-test/src/precision.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index f8c3a7b8f..817ea0fae 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -270,6 +270,16 @@ impl MaybeOverride<(f64,)> for SpecialCase { return XFAIL; } + if (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor) + && expected.eq_repr(F::NEG_ZERO) + && actual.eq_repr(F::ZERO) + && cfg!(x86_no_sse) + { + // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0. + // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955 + return XFAIL; + } + maybe_check_nan_bits(actual, expected, ctx) } From f5eb6f42cfff1560bdffdedd7a9f12e12358ad53 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 Jan 2025 09:46:16 +0000 Subject: [PATCH 1031/1459] Loosen precision on i586 based on new tests --- libm/crates/libm-test/src/precision.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 817ea0fae..1a66a430c 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -106,7 +106,11 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { match ctx.fn_ident { Id::Asinh => ulp = 3, Id::Asinhf => ulp = 3, + Id::Exp10 | Id::Exp10f => ulp = 1_000_000, + Id::Exp2 | Id::Exp2f => ulp = 10_000_000, + Id::Fmaf => ulp = 1, Id::Log1p | Id::Log1pf => ulp = 2, + Id::Rint => ulp = 100_000, Id::Round => ulp = 1, Id::Tan => ulp = 2, _ => (), @@ -271,15 +275,23 @@ impl MaybeOverride<(f64,)> for SpecialCase { } if (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor) + && cfg!(x86_no_sse) && expected.eq_repr(F::NEG_ZERO) && actual.eq_repr(F::ZERO) - && cfg!(x86_no_sse) { // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0. // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955 return XFAIL; } + if (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2) + && cfg!(x86_no_sse) + { + // FIXME: i586 has very imprecise results with ULP > u32::MAX for these + // operations so we can't reasonably provide a limit. + return XFAIL; + } + maybe_check_nan_bits(actual, expected, ctx) } From 9592711f794ff20fc389aa995b9efce8a65db5f3 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 Jan 2025 20:17:38 +0000 Subject: [PATCH 1032/1459] ci: Only update the github ref for pull requests On master, this fetch fails with: fatal: refusing to fetch into branch 'refs/heads/master' checked out at '/home/runner/work/libm/libm' Just skip the command when this shouldn't be needed. --- libm/.github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 1b2fd12ba..320800f2e 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -198,6 +198,7 @@ jobs: fetch-depth: 100 - name: Fetch pull request ref run: git fetch origin "$GITHUB_REF:$GITHUB_REF" + if: github.event_name == 'pull_request' - run: python3 ci/calculate-exhaustive-matrix.py >> "$GITHUB_OUTPUT" id: script From 7fe41b29ca3fa4de7aed8fb3915ebe9bf25828d7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 Jan 2025 05:15:40 +0000 Subject: [PATCH 1033/1459] Enable MPFR tests on i586 MPFR does build and run correctly without SSE, but requires `force-cross` be enabled. --- libm/ci/docker/i586-unknown-linux-gnu/Dockerfile | 2 +- libm/ci/run.sh | 2 +- libm/crates/libm-test/Cargo.toml | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/libm/ci/docker/i586-unknown-linux-gnu/Dockerfile b/libm/ci/docker/i586-unknown-linux-gnu/Dockerfile index 3b0bfc0d3..37e206a84 100644 --- a/libm/ci/docker/i586-unknown-linux-gnu/Dockerfile +++ b/libm/ci/docker/i586-unknown-linux-gnu/Dockerfile @@ -2,4 +2,4 @@ FROM ubuntu:24.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ - gcc-multilib libc6-dev ca-certificates + gcc-multilib m4 make libc6-dev ca-certificates diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 7e514a1cd..70fc271f1 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -45,8 +45,8 @@ case "$target" in # FIXME(ci): we should be able to enable aarch64 Linux here once GHA # support rolls out. x86_64*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;; - # i686 works fine, i586 does not i686*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;; + i586*) extra_flags="$extra_flags --features libm-test/test-multiprecision --features gmp-mpfr-sys/force-cross" ;; # Apple aarch64 is native aarch64*apple*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;; esac diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 2761d3d52..371beb19a 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -12,7 +12,7 @@ unstable-float = ["libm/unstable-float", "rug?/nightly-float"] # Generate tests which are random inputs and the outputs are calculated with # musl libc. -test-multiprecision = ["dep:az", "dep:rug"] +test-multiprecision = ["dep:az", "dep:rug", "dep:gmp-mpfr-sys"] # Build our own musl for testing and benchmarks build-musl = ["dep:musl-math-sys"] @@ -26,6 +26,7 @@ short-benchmarks = [] [dependencies] anyhow = "1.0.90" az = { version = "1.2.1", optional = true } +gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false, features = ["mpfr"] } indicatif = { version = "0.17.9", default-features = false } libm = { path = "../..", features = ["unstable-public-internals"] } libm-macros = { path = "../libm-macros" } From 1acec06c03560cce4e5f3f347b15702c1ec26ec7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 Jan 2025 20:27:48 +0000 Subject: [PATCH 1034/1459] Increase the allowed precision for failing tests on i586 These will need to be fixed, for now just xfail them so this doesn't block better test coverage. --- libm/crates/libm-test/src/precision.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 1a66a430c..03bf7cecc 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -104,11 +104,14 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { // In some cases, our implementation is less accurate than musl on i586. if cfg!(x86_no_sse) { match ctx.fn_ident { + // FIXME(#401): these need to be correctly rounded but are not. + Id::Fmaf => ulp = 1, + Id::Fdim => ulp = 1, + Id::Asinh => ulp = 3, Id::Asinhf => ulp = 3, Id::Exp10 | Id::Exp10f => ulp = 1_000_000, Id::Exp2 | Id::Exp2f => ulp = 10_000_000, - Id::Fmaf => ulp = 1, Id::Log1p | Id::Log1pf => ulp = 2, Id::Rint => ulp = 100_000, Id::Round => ulp = 1, From 59485a381096d3ae7dbe5f49fb32b4ba0462d568 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 Jan 2025 21:57:29 +0000 Subject: [PATCH 1035/1459] Switch from using `unstable-intrinsics` to `intrinsics_enabled` Unlike `unstable-intrinsics`, `intrinsics_enabled` gets disabled with `force-soft-floats` which is what we want here. --- libm/src/math/arch/intrinsics.rs | 6 ++---- libm/src/math/support/float_traits.rs | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/libm/src/math/arch/intrinsics.rs b/libm/src/math/arch/intrinsics.rs index 1cf9291f4..733af0f1a 100644 --- a/libm/src/math/arch/intrinsics.rs +++ b/libm/src/math/arch/intrinsics.rs @@ -12,13 +12,11 @@ pub fn ceilf(x: f32) -> f32 { } pub fn fabs(x: f64) -> f64 { - // SAFETY: safe intrinsic with no preconditions - unsafe { core::intrinsics::fabsf64(x) } + x.abs() } pub fn fabsf(x: f32) -> f32 { - // SAFETY: safe intrinsic with no preconditions - unsafe { core::intrinsics::fabsf32(x) } + x.abs() } pub fn floor(x: f64) -> f64 { diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 3b5be4fa3..697050966 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -200,7 +200,7 @@ macro_rules! float_impl { fn abs(self) -> Self { cfg_if! { // FIXME(msrv): `abs` is available in `core` starting with 1.85. - if #[cfg(feature = "unstable-intrinsics")] { + if #[cfg(intrinsics_enabled)] { self.abs() } else { super::super::generic::fabs(self) @@ -210,7 +210,7 @@ macro_rules! float_impl { fn copysign(self, other: Self) -> Self { cfg_if! { // FIXME(msrv): `copysign` is available in `core` starting with 1.85. - if #[cfg(feature = "unstable-intrinsics")] { + if #[cfg(intrinsics_enabled)] { self.copysign(other) } else { super::super::generic::copysign(self, other) From 59fc5ba1b6b6a091c307055c1884935ecc722cf5 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 Jan 2025 22:52:01 +0000 Subject: [PATCH 1036/1459] Remove an unused `feature = "force-soft-floats"` gate --- libm/src/math/arch/i686.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/libm/src/math/arch/i686.rs b/libm/src/math/arch/i686.rs index 80f7face1..ad54d8b61 100644 --- a/libm/src/math/arch/i686.rs +++ b/libm/src/math/arch/i686.rs @@ -1,7 +1,5 @@ //! Architecture-specific support for x86-32 and x86-64 with SSE2 -#![cfg(not(feature = "force-soft-floats"))] - #[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")] From 71ef73384d77ae582ff3200e96d7004bcb400d1d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 Jan 2025 23:17:38 +0000 Subject: [PATCH 1037/1459] Don't use intrinsics abs for `f16` and `f128` on wasm32 This configuration was duplicated from `fabs` and `fabsf`, but wasm is unlikely to have an intrinsic lowering for these float types. So, just always use the generic. --- libm/src/math/fabsf128.rs | 6 ------ libm/src/math/fabsf16.rs | 6 ------ 2 files changed, 12 deletions(-) diff --git a/libm/src/math/fabsf128.rs b/libm/src/math/fabsf128.rs index ef531bd91..46429ca49 100644 --- a/libm/src/math/fabsf128.rs +++ b/libm/src/math/fabsf128.rs @@ -4,12 +4,6 @@ /// by direct manipulation of the bit representation of `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fabsf128(x: f128) -> f128 { - select_implementation! { - name: fabsf, - use_intrinsic: target_arch = "wasm32", - args: x, - } - super::generic::fabs(x) } diff --git a/libm/src/math/fabsf16.rs b/libm/src/math/fabsf16.rs index eb41f7391..eee42ac6a 100644 --- a/libm/src/math/fabsf16.rs +++ b/libm/src/math/fabsf16.rs @@ -4,12 +4,6 @@ /// by direct manipulation of the bit representation of `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fabsf16(x: f16) -> f16 { - select_implementation! { - name: fabsf, - use_intrinsic: target_arch = "wasm32", - args: x, - } - super::generic::fabs(x) } From 4359549eb8ee01d7b8298a0a3257a1fae7d02501 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 6 Jan 2025 01:10:04 +0000 Subject: [PATCH 1038/1459] Update the `libm` submodule --- build.rs | 49 ++++++++++++++++++++++++++++++++++++++++++++----- configure.rs | 8 ++++++++ libm | 2 +- src/math.rs | 1 + 4 files changed, 54 insertions(+), 6 deletions(-) diff --git a/build.rs b/build.rs index 22ec9e4d2..2bb8651d7 100644 --- a/build.rs +++ b/build.rs @@ -14,12 +14,9 @@ fn main() { configure_check_cfg(); configure_f16_f128(&target); - println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display()); + configure_libm(&target); - // Activate libm's unstable features to make full use of Nightly. - println!("cargo::rustc-check-cfg=cfg(feature, values(\"unstable\", \"force-soft-floats\"))"); - println!("cargo:rustc-cfg=feature=\"unstable\""); - println!("cargo:rustc-cfg=feature=\"force-soft-floats\""); + println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display()); // Emscripten's runtime includes all the builtins if target.os == "emscripten" { @@ -104,6 +101,48 @@ fn main() { } } +/// Run configuration for `libm` since it is included directly. +/// +/// Much of this is copied from `libm/configure.rs`. +fn configure_libm(target: &Target) { + println!("cargo:rustc-check-cfg=cfg(intrinsics_enabled)"); + println!("cargo:rustc-check-cfg=cfg(arch_enabled)"); + println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)"); + println!("cargo:rustc-check-cfg=cfg(feature, values(\"unstable-public-internals\"))"); + + // Always use intrinsics + println!("cargo:rustc-cfg=intrinsics_enabled"); + + // The arch module may contain assembly. + if cfg!(feature = "no-asm") { + println!("cargo:rustc-cfg=feature=\"force-soft-floats\""); + } else { + println!("cargo:rustc-cfg=arch_enabled"); + } + + println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)"); + if target.opt_level >= 2 { + println!("cargo:rustc-cfg=optimizations_enabled"); + } + + // Config shorthands + println!("cargo:rustc-check-cfg=cfg(x86_no_sse)"); + if target.arch == "x86" && !target.features.iter().any(|f| f == "sse") { + // Shorthand to detect i586 targets + println!("cargo:rustc-cfg=x86_no_sse"); + } + + println!( + "cargo:rustc-env=CFG_CARGO_FEATURES={:?}", + target.cargo_features + ); + println!("cargo:rustc-env=CFG_OPT_LEVEL={}", target.opt_level); + println!("cargo:rustc-env=CFG_TARGET_FEATURES={:?}", target.features); + + // Activate libm's unstable features to make full use of Nightly. + println!("cargo:rustc-cfg=feature=\"unstable-intrinsics\""); +} + fn aarch64_symbol(ordering: Ordering) -> &'static str { match ordering { Ordering::Relaxed => "relax", diff --git a/configure.rs b/configure.rs index e20c717ec..6cfbe11c2 100644 --- a/configure.rs +++ b/configure.rs @@ -6,6 +6,8 @@ use std::env; #[allow(dead_code)] pub struct Target { pub triple: String, + pub opt_level: u8, + pub cargo_features: Vec, pub os: String, pub arch: String, pub vendor: String, @@ -22,10 +24,16 @@ impl Target { "big" => false, x => panic!("unknown endian {x}"), }; + let cargo_features = env::vars() + .filter_map(|(name, _value)| name.strip_prefix("CARGO_FEATURE_").map(ToOwned::to_owned)) + .map(|s| s.to_lowercase().replace("_", "-")) + .collect(); Self { triple: env::var("TARGET").unwrap(), os: env::var("CARGO_CFG_TARGET_OS").unwrap(), + opt_level: env::var("OPT_LEVEL").unwrap().parse().unwrap(), + cargo_features, arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(), vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), env: env::var("CARGO_CFG_TARGET_ENV").unwrap(), diff --git a/libm b/libm index f4e5b38ae..424c3ece1 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit f4e5b38aee0e0c592a82ed45b21cd068c9b6c89a +Subproject commit 424c3ece1a7546de8530fa9d0fbf90d3b182cd18 diff --git a/src/math.rs b/src/math.rs index 477dfe365..da208239e 100644 --- a/src/math.rs +++ b/src/math.rs @@ -1,3 +1,4 @@ +#[rustfmt::skip] #[allow(dead_code)] #[allow(unused_imports)] #[allow(clippy::all)] From a498d478440b62dcabfeb7f6a44fccce45ce7298 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 7 Jan 2025 00:19:26 +0000 Subject: [PATCH 1039/1459] Replace "intrinsic" config with "arch" config WASM is the only architecture we use `intrinsics::` for. We probably don't want to do this for any other architectures since it is better to use assembly, or work toward getting the functions available in `core`. To more accurately reflect the relationship between arch and intrinsics, make wasm32 an `arch` module and call the intrinsics from there. --- libm/etc/function-definitions.json | 20 +++++++-------- libm/src/math/arch/mod.rs | 8 +++--- .../math/arch/{intrinsics.rs => wasm32.rs} | 6 +++-- libm/src/math/ceil.rs | 2 +- libm/src/math/ceilf.rs | 2 +- libm/src/math/fabs.rs | 2 +- libm/src/math/fabsf.rs | 2 +- libm/src/math/floor.rs | 2 +- libm/src/math/floorf.rs | 2 +- libm/src/math/sqrt.rs | 6 +++-- libm/src/math/sqrtf.rs | 6 +++-- libm/src/math/support/macros.rs | 25 +++---------------- libm/src/math/trunc.rs | 2 +- libm/src/math/truncf.rs | 2 +- 14 files changed, 37 insertions(+), 50 deletions(-) rename libm/src/math/arch/{intrinsics.rs => wasm32.rs} (82%) diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 0b2d6214f..3cf7e0fed 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -108,14 +108,14 @@ "sources": [ "src/libm_helper.rs", "src/math/arch/i586.rs", - "src/math/arch/intrinsics.rs", + "src/math/arch/wasm32.rs", "src/math/ceil.rs" ], "type": "f64" }, "ceilf": { "sources": [ - "src/math/arch/intrinsics.rs", + "src/math/arch/wasm32.rs", "src/math/ceilf.rs" ], "type": "f32" @@ -258,7 +258,7 @@ "fabs": { "sources": [ "src/libm_helper.rs", - "src/math/arch/intrinsics.rs", + "src/math/arch/wasm32.rs", "src/math/fabs.rs", "src/math/generic/fabs.rs" ], @@ -266,7 +266,7 @@ }, "fabsf": { "sources": [ - "src/math/arch/intrinsics.rs", + "src/math/arch/wasm32.rs", "src/math/fabsf.rs", "src/math/generic/fabs.rs" ], @@ -303,14 +303,14 @@ "sources": [ "src/libm_helper.rs", "src/math/arch/i586.rs", - "src/math/arch/intrinsics.rs", + "src/math/arch/wasm32.rs", "src/math/floor.rs" ], "type": "f64" }, "floorf": { "sources": [ - "src/math/arch/intrinsics.rs", + "src/math/arch/wasm32.rs", "src/math/floorf.rs" ], "type": "f32" @@ -683,7 +683,7 @@ "sources": [ "src/libm_helper.rs", "src/math/arch/i686.rs", - "src/math/arch/intrinsics.rs", + "src/math/arch/wasm32.rs", "src/math/sqrt.rs" ], "type": "f64" @@ -691,7 +691,7 @@ "sqrtf": { "sources": [ "src/math/arch/i686.rs", - "src/math/arch/intrinsics.rs", + "src/math/arch/wasm32.rs", "src/math/sqrtf.rs" ], "type": "f32" @@ -738,14 +738,14 @@ "trunc": { "sources": [ "src/libm_helper.rs", - "src/math/arch/intrinsics.rs", + "src/math/arch/wasm32.rs", "src/math/trunc.rs" ], "type": "f64" }, "truncf": { "sources": [ - "src/math/arch/intrinsics.rs", + "src/math/arch/wasm32.rs", "src/math/truncf.rs" ], "type": "f32" diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs index cf9547117..bd79ae1c6 100644 --- a/libm/src/math/arch/mod.rs +++ b/libm/src/math/arch/mod.rs @@ -5,14 +5,14 @@ //! is used when calling the function directly. This helps anyone who uses `libm` directly, as //! well as improving things when these routines are called as part of other implementations. -#[cfg(intrinsics_enabled)] -pub mod intrinsics; - // Most implementations should be defined here, to ensure they are not made available when // soft floats are required. #[cfg(arch_enabled)] cfg_if! { - if #[cfg(target_feature = "sse2")] { + if #[cfg(all(target_arch = "wasm32", intrinsics_enabled))] { + mod wasm32; + pub use wasm32::{ceil, ceilf, fabs, fabsf, floor, floorf, sqrt, sqrtf, trunc, truncf}; + } else if #[cfg(target_feature = "sse2")] { mod i686; pub use i686::{sqrt, sqrtf}; } diff --git a/libm/src/math/arch/intrinsics.rs b/libm/src/math/arch/wasm32.rs similarity index 82% rename from libm/src/math/arch/intrinsics.rs rename to libm/src/math/arch/wasm32.rs index 733af0f1a..09df8624e 100644 --- a/libm/src/math/arch/intrinsics.rs +++ b/libm/src/math/arch/wasm32.rs @@ -1,5 +1,7 @@ -// Config is needed for times when this module is available but we don't call everything -#![allow(dead_code)] +//! Wasm asm is not stable; just use intrinsics for operations that have asm routine equivalents. +//! +//! Note that we need to be absolutely certain that everything here lowers to assembly operations, +//! otherwise libcalls will be recursive. pub fn ceil(x: f64) -> f64 { // SAFETY: safe intrinsic with no preconditions diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index b0576f3dc..398bfee47 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -10,8 +10,8 @@ const TOINT: f64 = 1. / f64::EPSILON; pub fn ceil(x: f64) -> f64 { select_implementation! { name: ceil, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")), - use_intrinsic: target_arch = "wasm32", args: x, } diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs index 9eb2ec07a..9e8e78e3e 100644 --- a/libm/src/math/ceilf.rs +++ b/libm/src/math/ceilf.rs @@ -7,7 +7,7 @@ use core::f32; pub fn ceilf(x: f32) -> f32 { select_implementation! { name: ceilf, - use_intrinsic: target_arch = "wasm32", + use_arch: all(target_arch = "wasm32", intrinsics_enabled), args: x, } diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index 46c0d88a5..22867fab0 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -6,7 +6,7 @@ pub fn fabs(x: f64) -> f64 { select_implementation! { name: fabs, - use_intrinsic: target_arch = "wasm32", + use_arch: all(target_arch = "wasm32", intrinsics_enabled), args: x, } diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs index d5775b600..e5820a26c 100644 --- a/libm/src/math/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -6,7 +6,7 @@ pub fn fabsf(x: f32) -> f32 { select_implementation! { name: fabsf, - use_intrinsic: target_arch = "wasm32", + use_arch: all(target_arch = "wasm32", intrinsics_enabled), args: x, } diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index e478f6d54..2823bf44d 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -10,8 +10,8 @@ const TOINT: f64 = 1. / f64::EPSILON; pub fn floor(x: f64) -> f64 { select_implementation! { name: floor, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")), - use_intrinsic: target_arch = "wasm32", args: x, } diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs index bd1570c86..23a18c0f7 100644 --- a/libm/src/math/floorf.rs +++ b/libm/src/math/floorf.rs @@ -7,7 +7,7 @@ use core::f32; pub fn floorf(x: f32) -> f32 { select_implementation! { name: floorf, - use_intrinsic: target_arch = "wasm32", + use_arch: all(target_arch = "wasm32", intrinsics_enabled), args: x, } diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 3f1a10fdd..2fd7070b1 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -83,8 +83,10 @@ use core::f64; pub fn sqrt(x: f64) -> f64 { select_implementation! { name: sqrt, - use_arch: target_feature = "sse2", - use_intrinsic: target_arch = "wasm32", + use_arch: any( + all(target_arch = "wasm32", intrinsics_enabled), + target_feature = "sse2" + ), args: x, } diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index 23f9a8443..319335163 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -18,8 +18,10 @@ pub fn sqrtf(x: f32) -> f32 { select_implementation! { name: sqrtf, - use_arch: target_feature = "sse2", - use_intrinsic: target_arch = "wasm32", + use_arch: any( + all(target_arch = "wasm32", intrinsics_enabled), + target_feature = "sse2" + ), args: x, } diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs index 9441eace5..f5094b9da 100644 --- a/libm/src/math/support/macros.rs +++ b/libm/src/math/support/macros.rs @@ -39,13 +39,8 @@ macro_rules! cfg_if { (@__identity $($tokens:tt)*) => { $($tokens)* }; } -/// Choose among using an intrinsic, an arch-specific implementation, and the function body. -/// Returns directly if the intrinsic or arch is used, otherwise continue with the rest of the -/// function. -/// -/// Specify a `use_intrinsic` meta field if the intrinsic is (1) available on the platforms (i.e. -/// LLVM lowers it without libcalls that may recurse), (2) it is likely to be more performant. -/// Intrinsics require wrappers in the `math::arch::intrinsics` module. +/// Choose between using an arch-specific implementation and the function body. Returns directly +/// if the arch implementation is used, otherwise continue with the rest of the function. /// /// Specify a `use_arch` meta field if an architecture-specific implementation is provided. /// These live in the `math::arch::some_target_arch` module. @@ -53,8 +48,7 @@ macro_rules! cfg_if { /// Specify a `use_arch_required` meta field if something architecture-specific must be used /// regardless of feature configuration (`force-soft-floats`). /// -/// The passed meta options do not need to account for relevant Cargo features -/// (`unstable-intrinsics`, `arch`, `force-soft-floats`), this macro handles that part. +/// The passed meta options do not need to account for the `arch` target feature. macro_rules! select_implementation { ( name: $fn_name:ident, @@ -64,15 +58,12 @@ macro_rules! select_implementation { // Configuration meta for when to use the arch module regardless of whether softfloats // have been requested. $( use_arch_required: $use_arch_required:meta, )? - // Configuration meta for when to call intrinsics and let LLVM figure it out - $( use_intrinsic: $use_intrinsic:meta, )? args: $($arg:ident),+ , ) => { // FIXME: these use paths that are a pretty fragile (`super`). We should figure out // something better w.r.t. how this is vendored into compiler-builtins. // However, we do need a few things from `arch` that are used even with soft floats. - // select_implementation! { @cfg $($use_arch_required)?; if true { @@ -89,16 +80,6 @@ macro_rules! select_implementation { return super::arch::$fn_name( $($arg),+ ); } } - - // Never use intrinsics if we are forcing soft floats, and only enable with the - // `unstable-intrinsics` feature. - #[cfg(intrinsics_enabled)] - select_implementation! { - @cfg $( $use_intrinsic )?; - if true { - return super::arch::intrinsics::$fn_name( $($arg),+ ); - } - } }; // Coalesce helper to construct an expression only if a config is provided diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs index d85bffb40..7e5c4f2c2 100644 --- a/libm/src/math/trunc.rs +++ b/libm/src/math/trunc.rs @@ -7,7 +7,7 @@ use core::f64; pub fn trunc(x: f64) -> f64 { select_implementation! { name: trunc, - use_intrinsic: target_arch = "wasm32", + use_arch: all(target_arch = "wasm32", intrinsics_enabled), args: x, } diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs index 82017b87b..b491747d9 100644 --- a/libm/src/math/truncf.rs +++ b/libm/src/math/truncf.rs @@ -7,7 +7,7 @@ use core::f32; pub fn truncf(x: f32) -> f32 { select_implementation! { name: truncf, - use_intrinsic: target_arch = "wasm32", + use_arch: all(target_arch = "wasm32", intrinsics_enabled), args: x, } From eb035f6647e5f7426a3336969ec74976ced16049 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 7 Jan 2025 01:49:52 +0000 Subject: [PATCH 1040/1459] Update the `libm` submodule --- libm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm b/libm index 424c3ece1..44770b969 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 424c3ece1a7546de8530fa9d0fbf90d3b182cd18 +Subproject commit 44770b96920557baf38990d2ee4142e166be579d From 537d20e865053d0066a366e60abff472ac9dd3ac Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 7 Jan 2025 01:57:53 +0000 Subject: [PATCH 1041/1459] chore: release v0.1.141 --- CHANGELOG.md | 9 +++++++++ Cargo.toml | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f88859143..ccfc97d1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.141](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.140...compiler_builtins-v0.1.141) - 2025-01-07 + +### Other + +- Update the `libm` submodule +- Fix new `clippy::precedence` errors +- Rename `EXP_MAX` to `EXP_SAT` +- Shorten prefixes for float constants + ## [0.1.140](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.139...compiler_builtins-v0.1.140) - 2024-12-26 ### Other diff --git a/Cargo.toml b/Cargo.toml index 33d5c06b7..a0c4cfaaf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.140" +version = "0.1.141" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 9899d0048358be799fbf19b494aecd1be3a7c59e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 7 Jan 2025 02:54:37 +0000 Subject: [PATCH 1042/1459] Increase the allowed ULP for `tgammaf` Extensive tests report that the precision isn't actually 0: ---- mp_extensive_tgammaf ---- input: (-0.00063536887,) (0xba268ee2,) expected: -1574.4668 0xc4c4cef0 actual: -1574.4667 0xc4c4ceef Caused by: ulp 1 > 0 Update ULP to reflect this. After this change, `tgammaf` extensive tests pass. --- libm/crates/libm-test/src/precision.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 03bf7cecc..cbe4bdf88 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -44,7 +44,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { // Operations that aren't required to be exact, but our implementations are. Bn::Cbrt if ctx.fn_ident != Id::Cbrt => 0, - Bn::Tgamma if ctx.fn_ident != Id::Tgamma => 0, // Bessel functions have large inaccuracies. Bn::J0 | Bn::J1 | Bn::Y0 | Bn::Y1 | Bn::Jn | Bn::Yn => 8_000_000, @@ -78,6 +77,8 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { Bn::Sinh => 2, Bn::Tan => 1, Bn::Tanh => 2, + // tgammaf has higher accuracy than tgamma. + Bn::Tgamma if ctx.fn_ident != Id::Tgamma => 1, Bn::Tgamma => 20, }; From 6f72bab33177a146387a1d859468a648d724c6d2 Mon Sep 17 00:00:00 2001 From: beetrees Date: Tue, 7 Jan 2025 13:51:15 +0000 Subject: [PATCH 1043/1459] Make extensive tests exhaustive if there are enough iterations available --- .../libm-test/src/gen/domain_logspace.rs | 2 +- libm/crates/libm-test/src/gen/extensive.rs | 237 +++++++++++++----- libm/crates/libm-test/src/lib.rs | 2 +- libm/crates/libm-test/src/num.rs | 89 ++++++- .../crates/libm-test/tests/z_extensive/run.rs | 3 +- 5 files changed, 252 insertions(+), 81 deletions(-) diff --git a/libm/crates/libm-test/src/gen/domain_logspace.rs b/libm/crates/libm-test/src/gen/domain_logspace.rs index 3d8a3e7fe..c6963ad43 100644 --- a/libm/crates/libm-test/src/gen/domain_logspace.rs +++ b/libm/crates/libm-test/src/gen/domain_logspace.rs @@ -27,5 +27,5 @@ where let start = domain.range_start(); let end = domain.range_end(); let steps = OpITy::::try_from(ntests).unwrap_or(OpITy::::MAX); - logspace(start, end, steps).map(|v| (v,)) + logspace(start, end, steps).0.map(|v| (v,)) } diff --git a/libm/crates/libm-test/src/gen/extensive.rs b/libm/crates/libm-test/src/gen/extensive.rs index d724226e9..fb709e546 100644 --- a/libm/crates/libm-test/src/gen/extensive.rs +++ b/libm/crates/libm-test/src/gen/extensive.rs @@ -1,19 +1,18 @@ use std::fmt; use std::ops::RangeInclusive; -use libm::support::MinInt; +use libm::support::{Float, MinInt}; use crate::domain::HasDomain; -use crate::gen::KnownSize; use crate::op::OpITy; use crate::run_cfg::{int_range, iteration_count}; -use crate::{CheckCtx, GeneratorKind, MathOp, logspace}; +use crate::{CheckCtx, GeneratorKind, MathOp, linear_ints, logspace}; /// Generate a sequence of inputs that either cover the domain in completeness (for smaller float /// types and single argument functions) or provide evenly spaced inputs across the domain with /// approximately `u32::MAX` total iterations. pub trait ExtensiveInput { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator + Send; + fn get_cases(ctx: &CheckCtx) -> (impl Iterator + Send, u64); } /// Construct an iterator from `logspace` and also calculate the total number of steps expected @@ -21,24 +20,60 @@ pub trait ExtensiveInput { fn logspace_steps( start: Op::FTy, end: Op::FTy, - ctx: &CheckCtx, - argnum: usize, + max_steps: u64, ) -> (impl Iterator + Clone, u64) where Op: MathOp, OpITy: TryFrom, + u64: TryFrom, Error: fmt::Debug>, RangeInclusive>: Iterator, { - let max_steps = iteration_count(ctx, GeneratorKind::Extensive, argnum); let max_steps = OpITy::::try_from(max_steps).unwrap_or(OpITy::::MAX); - let iter = logspace(start, end, max_steps); + let (iter, steps) = logspace(start, end, max_steps); + + // `steps` will be <= the original `max_steps`, which is a `u64`. + (iter, steps.try_into().unwrap()) +} + +/// Represents the iterator in either `Left` or `Right`. +enum EitherIter { + A(A), + B(B), +} - // `logspace` can't implement `ExactSizeIterator` because of the range, but its size hint - // should be accurate (assuming <= usize::MAX iterations). - let size_hint = iter.size_hint(); - assert_eq!(size_hint.0, size_hint.1.unwrap()); +impl, B: Iterator> Iterator for EitherIter { + type Item = T; - (iter, size_hint.0.try_into().unwrap()) + fn next(&mut self) -> Option { + match self { + Self::A(iter) => iter.next(), + Self::B(iter) => iter.next(), + } + } + + fn size_hint(&self) -> (usize, Option) { + match self { + Self::A(iter) => iter.size_hint(), + Self::B(iter) => iter.size_hint(), + } + } +} + +/// Gets the total number of possible values, returning `None` if that number doesn't fit in a +/// `u64`. +fn value_count() -> Option +where + u64: TryFrom, +{ + u64::try_from(F::Int::MAX).ok().and_then(|max| max.checked_add(1)) +} + +/// Returns an iterator of every possible value of type `F`. +fn all_values() -> impl Iterator +where + RangeInclusive: Iterator, +{ + (F::Int::MIN..=F::Int::MAX).map(|bits| F::from_bits(bits)) } macro_rules! impl_extensive_input { @@ -48,12 +83,23 @@ macro_rules! impl_extensive_input { Op: MathOp, Op: HasDomain, { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { - let start = Op::DOMAIN.range_start(); - let end = Op::DOMAIN.range_end(); - let (iter0, steps0) = logspace_steps::(start, end, ctx, 0); - let iter0 = iter0.map(|v| (v,)); - KnownSize::new(iter0, steps0) + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0); + // `f16` and `f32` can have exhaustive tests. + match value_count::() { + Some(steps0) if steps0 <= max_steps0 => { + let iter0 = all_values(); + let iter0 = iter0.map(|v| (v,)); + (EitherIter::A(iter0), steps0) + } + _ => { + let start = Op::DOMAIN.range_start(); + let end = Op::DOMAIN.range_end(); + let (iter0, steps0) = logspace_steps::(start, end, max_steps0); + let iter0 = iter0.map(|v| (v,)); + (EitherIter::B(iter0), steps0) + } + } } } @@ -61,15 +107,28 @@ macro_rules! impl_extensive_input { where Op: MathOp, { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { - let start = <$fty>::NEG_INFINITY; - let end = <$fty>::INFINITY; - let (iter0, steps0) = logspace_steps::(start, end, ctx, 0); - let (iter1, steps1) = logspace_steps::(start, end, ctx, 1); - let iter = - iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); - let count = steps0.checked_mul(steps1).unwrap(); - KnownSize::new(iter, count) + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0); + let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1); + // `f16` can have exhaustive tests. + match value_count::() { + Some(count) if count <= max_steps0 && count <= max_steps1 => { + let iter = all_values() + .flat_map(|first| all_values().map(move |second| (first, second))); + (EitherIter::A(iter), count.checked_mul(count).unwrap()) + } + _ => { + let start = <$fty>::NEG_INFINITY; + let end = <$fty>::INFINITY; + let (iter0, steps0) = logspace_steps::(start, end, max_steps0); + let (iter1, steps1) = logspace_steps::(start, end, max_steps1); + let iter = iter0.flat_map(move |first| { + iter1.clone().map(move |second| (first, second)) + }); + let count = steps0.checked_mul(steps1).unwrap(); + (EitherIter::B(iter), count) + } + } } } @@ -77,22 +136,41 @@ macro_rules! impl_extensive_input { where Op: MathOp, { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { - let start = <$fty>::NEG_INFINITY; - let end = <$fty>::INFINITY; - - let (iter0, steps0) = logspace_steps::(start, end, ctx, 0); - let (iter1, steps1) = logspace_steps::(start, end, ctx, 1); - let (iter2, steps2) = logspace_steps::(start, end, ctx, 2); - - let iter = iter0 - .flat_map(move |first| iter1.clone().map(move |second| (first, second))) - .flat_map(move |(first, second)| { - iter2.clone().map(move |third| (first, second, third)) - }); - let count = steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap(); - - KnownSize::new(iter, count) + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0); + let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1); + let max_steps2 = iteration_count(ctx, GeneratorKind::Extensive, 2); + // `f16` can be exhaustive tested if `LIBM_EXTENSIVE_TESTS` is incresed. + match value_count::() { + Some(count) + if count <= max_steps0 && count <= max_steps1 && count <= max_steps2 => + { + let iter = all_values().flat_map(|first| { + all_values().flat_map(move |second| { + all_values().map(move |third| (first, second, third)) + }) + }); + (EitherIter::A(iter), count.checked_pow(3).unwrap()) + } + _ => { + let start = <$fty>::NEG_INFINITY; + let end = <$fty>::INFINITY; + + let (iter0, steps0) = logspace_steps::(start, end, max_steps0); + let (iter1, steps1) = logspace_steps::(start, end, max_steps1); + let (iter2, steps2) = logspace_steps::(start, end, max_steps2); + + let iter = iter0 + .flat_map(move |first| iter1.clone().map(move |second| (first, second))) + .flat_map(move |(first, second)| { + iter2.clone().map(move |third| (first, second, third)) + }); + let count = + steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap(); + + (EitherIter::B(iter), count) + } + } } } @@ -100,19 +178,32 @@ macro_rules! impl_extensive_input { where Op: MathOp, { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { - let start = <$fty>::NEG_INFINITY; - let end = <$fty>::INFINITY; + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let range0 = int_range(ctx, GeneratorKind::Extensive, 0); + let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0); + let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1); + match value_count::() { + Some(count1) if count1 <= max_steps1 => { + let (iter0, steps0) = linear_ints(range0, max_steps0); + let iter = iter0 + .flat_map(move |first| all_values().map(move |second| (first, second))); + (EitherIter::A(iter), steps0.checked_mul(count1).unwrap()) + } + _ => { + let start = <$fty>::NEG_INFINITY; + let end = <$fty>::INFINITY; - let iter0 = int_range(ctx, GeneratorKind::Extensive, 0); - let steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0); - let (iter1, steps1) = logspace_steps::(start, end, ctx, 1); + let (iter0, steps0) = linear_ints(range0, max_steps0); + let (iter1, steps1) = logspace_steps::(start, end, max_steps1); - let iter = - iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); - let count = steps0.checked_mul(steps1).unwrap(); + let iter = iter0.flat_map(move |first| { + iter1.clone().map(move |second| (first, second)) + }); + let count = steps0.checked_mul(steps1).unwrap(); - KnownSize::new(iter, count) + (EitherIter::B(iter), count) + } + } } } @@ -120,19 +211,33 @@ macro_rules! impl_extensive_input { where Op: MathOp, { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { - let start = <$fty>::NEG_INFINITY; - let end = <$fty>::INFINITY; + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0); + let range1 = int_range(ctx, GeneratorKind::Extensive, 1); + let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1); + match value_count::() { + Some(count0) if count0 <= max_steps0 => { + let (iter1, steps1) = linear_ints(range1, max_steps1); + let iter = all_values().flat_map(move |first| { + iter1.clone().map(move |second| (first, second)) + }); + (EitherIter::A(iter), count0.checked_mul(steps1).unwrap()) + } + _ => { + let start = <$fty>::NEG_INFINITY; + let end = <$fty>::INFINITY; - let (iter0, steps0) = logspace_steps::(start, end, ctx, 0); - let iter1 = int_range(ctx, GeneratorKind::Extensive, 0); - let steps1 = iteration_count(ctx, GeneratorKind::Extensive, 0); + let (iter0, steps0) = logspace_steps::(start, end, max_steps0); + let (iter1, steps1) = linear_ints(range1, max_steps1); - let iter = - iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); - let count = steps0.checked_mul(steps1).unwrap(); + let iter = iter0.flat_map(move |first| { + iter1.clone().map(move |second| (first, second)) + }); + let count = steps0.checked_mul(steps1).unwrap(); - KnownSize::new(iter, count) + (EitherIter::B(iter), count) + } + } } } }; @@ -145,10 +250,10 @@ impl_extensive_input!(f64); #[cfg(f128_enabled)] impl_extensive_input!(f128); -/// Create a test case iterator for extensive inputs. +/// Create a test case iterator for extensive inputs. Also returns the total test case count. pub fn get_test_cases( ctx: &CheckCtx, -) -> impl ExactSizeIterator + Send + use<'_, Op> +) -> (impl Iterator + Send + use<'_, Op>, u64) where Op: MathOp, Op::RustArgs: ExtensiveInput, diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 251114a0d..6e7017f09 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -22,7 +22,7 @@ use std::time::SystemTime; pub use f8_impl::f8; pub use libm::support::{Float, Int, IntTy, MinInt}; -pub use num::{FloatExt, logspace}; +pub use num::{FloatExt, linear_ints, logspace}; pub use op::{ BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustFn, OpRustRet, Ty, }; diff --git a/libm/crates/libm-test/src/num.rs b/libm/crates/libm-test/src/num.rs index f693ef02f..eed941423 100644 --- a/libm/crates/libm-test/src/num.rs +++ b/libm/crates/libm-test/src/num.rs @@ -215,7 +215,13 @@ fn as_ulp_steps(x: F) -> Option { /// to logarithmic spacing of their values. /// /// Note that this tends to skip negative zero, so that needs to be checked explicitly. -pub fn logspace(start: F, end: F, steps: F::Int) -> impl Iterator + Clone +/// +/// Returns `(iterator, iterator_length)`. +pub fn logspace( + start: F, + end: F, + steps: F::Int, +) -> (impl Iterator + Clone, F::Int) where RangeInclusive: Iterator, { @@ -223,17 +229,42 @@ where assert!(!end.is_nan()); assert!(end >= start); - let mut steps = steps.checked_sub(F::Int::ONE).expect("`steps` must be at least 2"); + let steps = steps.checked_sub(F::Int::ONE).expect("`steps` must be at least 2"); let between = ulp_between(start, end).expect("`start` or `end` is NaN"); let spacing = (between / steps).max(F::Int::ONE); - steps = steps.min(between); // At maximum, one step per ULP + let steps = steps.min(between); // At maximum, one step per ULP let mut x = start; - (F::Int::ZERO..=steps).map(move |_| { - let ret = x; - x = x.n_up(spacing); - ret - }) + ( + (F::Int::ZERO..=steps).map(move |_| { + let ret = x; + x = x.n_up(spacing); + ret + }), + steps + F::Int::ONE, + ) +} + +/// Returns an iterator of up to `steps` integers evenly distributed. +pub fn linear_ints( + range: RangeInclusive, + steps: u64, +) -> (impl Iterator + Clone, u64) { + let steps = steps.checked_sub(1).unwrap(); + let between = u64::from(range.start().abs_diff(*range.end())); + let spacing = i32::try_from((between / steps).max(1)).unwrap(); + let steps = steps.min(between); + let mut x: i32 = *range.start(); + ( + (0..=steps).map(move |_| { + let res = x; + // Wrapping add to avoid panic on last item (where `x` could overflow past i32::MAX as + // there is no next item). + x = x.wrapping_add(spacing); + res + }), + steps + 1, + ) } #[cfg(test)] @@ -422,19 +453,55 @@ mod tests { #[test] fn test_logspace() { - let ls: Vec<_> = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 2).collect(); + let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 2); + let ls: Vec<_> = ls.collect(); let exp = [f8::from_bits(0x0), f8::from_bits(0x4)]; assert_eq!(ls, exp); + assert_eq!(ls.len(), usize::from(count)); - let ls: Vec<_> = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 3).collect(); + let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 3); + let ls: Vec<_> = ls.collect(); let exp = [f8::from_bits(0x0), f8::from_bits(0x2), f8::from_bits(0x4)]; assert_eq!(ls, exp); + assert_eq!(ls.len(), usize::from(count)); // Check that we include all values with no repeats if `steps` exceeds the maximum number // of steps. - let ls: Vec<_> = logspace(f8::from_bits(0x0), f8::from_bits(0x3), 10).collect(); + let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x3), 10); + let ls: Vec<_> = ls.collect(); let exp = [f8::from_bits(0x0), f8::from_bits(0x1), f8::from_bits(0x2), f8::from_bits(0x3)]; assert_eq!(ls, exp); + assert_eq!(ls.len(), usize::from(count)); + } + + #[test] + fn test_linear_ints() { + let (ints, count) = linear_ints(0..=4, 2); + let ints: Vec<_> = ints.collect(); + let exp = [0, 4]; + assert_eq!(ints, exp); + assert_eq!(ints.len(), usize::try_from(count).unwrap()); + + let (ints, count) = linear_ints(0..=4, 3); + let ints: Vec<_> = ints.collect(); + let exp = [0, 2, 4]; + assert_eq!(ints, exp); + assert_eq!(ints.len(), usize::try_from(count).unwrap()); + + // Check that we include all values with no repeats if `steps` exceeds the maximum number + // of steps. + let (ints, count) = linear_ints(0x0..=0x3, 10); + let ints: Vec<_> = ints.collect(); + let exp = [0, 1, 2, 3]; + assert_eq!(ints, exp); + assert_eq!(ints.len(), usize::try_from(count).unwrap()); + + // Check that there are no panics around `i32::MAX`. + let (ints, count) = linear_ints(i32::MAX - 1..=i32::MAX, 5); + let ints: Vec<_> = ints.collect(); + let exp = [i32::MAX - 1, i32::MAX]; + assert_eq!(ints, exp); + assert_eq!(ints.len(), usize::try_from(count).unwrap()); } #[test] diff --git a/libm/crates/libm-test/tests/z_extensive/run.rs b/libm/crates/libm-test/tests/z_extensive/run.rs index 7acff5324..07f4d5370 100644 --- a/libm/crates/libm-test/tests/z_extensive/run.rs +++ b/libm/crates/libm-test/tests/z_extensive/run.rs @@ -106,8 +106,7 @@ where let completed = AtomicU64::new(0); let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr); - let cases = &mut extensive::get_test_cases::(&ctx); - let total: u64 = cases.len().try_into().unwrap(); + let (ref mut cases, total) = extensive::get_test_cases::(&ctx); let pb = Progress::new(Op::NAME, total); let test_single_chunk = |mp_vals: &mut Op::MpTy, input_vec: Vec| -> TestResult { From 463e9df3f0d44af2684e235492b4961f17412168 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 7 Jan 2025 17:30:05 -0500 Subject: [PATCH 1044/1459] Account for optimization levels other than numbers The build script currently panics with `opt-level=z` or `opt-level=s`. Account for this here. --- libm/configure.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libm/configure.rs b/libm/configure.rs index a18937c3c..8b8ba9815 100644 --- a/libm/configure.rs +++ b/libm/configure.rs @@ -7,7 +7,7 @@ use std::path::PathBuf; pub struct Config { pub manifest_dir: PathBuf, pub out_dir: PathBuf, - pub opt_level: u8, + pub opt_level: String, pub cargo_features: Vec, pub target_arch: String, pub target_env: String, @@ -31,7 +31,7 @@ impl Config { Self { manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()), out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()), - opt_level: env::var("OPT_LEVEL").unwrap().parse().unwrap(), + opt_level: env::var("OPT_LEVEL").unwrap(), cargo_features, target_arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(), target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(), @@ -91,7 +91,7 @@ fn emit_arch_cfg() { fn emit_optimization_cfg(cfg: &Config) { println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)"); - if cfg.opt_level >= 2 { + if !matches!(cfg.opt_level.as_str(), "0" | "1") { println!("cargo:rustc-cfg=optimizations_enabled"); } } From ad261507454428a9825700290ef2cf596bb5040f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 7 Jan 2025 18:11:40 -0500 Subject: [PATCH 1045/1459] Update the `libm` submodule This includes [1], which fixes a bug parsing non-numeric optimization levels. [1]: https://github.com/rust-lang/libm/pull/417 --- libm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm b/libm index 44770b969..8e82616f1 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 44770b96920557baf38990d2ee4142e166be579d +Subproject commit 8e82616f154b06cf4ee9cdb82a4f56474a403d04 From 4e3cc6d13e0331c4a3851828a851933a7ba130b4 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 7 Jan 2025 18:12:19 -0500 Subject: [PATCH 1046/1459] Account for optimization levels other than numbers The build script currently panics with `opt-level=z` or `opt-level=s`. Account for this here. This is the `compiler-builtins` version of [1]. Fixes: https://github.com/rust-lang/compiler-builtins/issues/742 [1]: https://github.com/rust-lang/libm/pull/417 --- build.rs | 2 +- configure.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/build.rs b/build.rs index 2bb8651d7..f512fc2e6 100644 --- a/build.rs +++ b/build.rs @@ -121,7 +121,7 @@ fn configure_libm(target: &Target) { } println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)"); - if target.opt_level >= 2 { + if !matches!(target.opt_level.as_str(), "0" | "1") { println!("cargo:rustc-cfg=optimizations_enabled"); } diff --git a/configure.rs b/configure.rs index 6cfbe11c2..87bc7a0ee 100644 --- a/configure.rs +++ b/configure.rs @@ -6,7 +6,7 @@ use std::env; #[allow(dead_code)] pub struct Target { pub triple: String, - pub opt_level: u8, + pub opt_level: String, pub cargo_features: Vec, pub os: String, pub arch: String, @@ -32,7 +32,7 @@ impl Target { Self { triple: env::var("TARGET").unwrap(), os: env::var("CARGO_CFG_TARGET_OS").unwrap(), - opt_level: env::var("OPT_LEVEL").unwrap().parse().unwrap(), + opt_level: env::var("OPT_LEVEL").unwrap(), cargo_features, arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(), vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), From 84eed866d3c2f131ce5892b14faca317d2ed2449 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 7 Jan 2025 23:21:35 +0000 Subject: [PATCH 1047/1459] chore: release v0.1.142 --- CHANGELOG.md | 6 ++++++ Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ccfc97d1b..305f2790f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.142](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.141...compiler_builtins-v0.1.142) - 2025-01-07 + +### Other + +- Account for optimization levels other than numbers + ## [0.1.141](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.140...compiler_builtins-v0.1.141) - 2025-01-07 ### Other diff --git a/Cargo.toml b/Cargo.toml index a0c4cfaaf..943adc4c1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.141" +version = "0.1.142" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 6bb43c7c29d8a09a16c1e75335d25c9c812c6864 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 5 Jan 2025 11:21:00 +0000 Subject: [PATCH 1048/1459] Add tests against MPFR for `remquo` and `remquof` Rug does not yet expose this function, but it is possible to use the MPFR bindings directly. --- libm/crates/libm-test/Cargo.toml | 2 +- libm/crates/libm-test/src/mpfloat.rs | 50 +++++++++++++++++++ libm/crates/libm-test/tests/multiprecision.rs | 4 -- .../crates/libm-test/tests/z_extensive/run.rs | 4 -- 4 files changed, 51 insertions(+), 9 deletions(-) diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 371beb19a..621e587c5 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -35,7 +35,7 @@ paste = "1.0.15" rand = "0.8.5" rand_chacha = "0.3.1" rayon = "1.10.0" -rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "std"] } +rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "integer", "std"] } [target.'cfg(target_family = "wasm")'.dependencies] # Enable randomness on WASM diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index f2b7b2f25..f71e72cd5 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -4,10 +4,13 @@ //! a struct named `Operation` that implements [`MpOp`]. use std::cmp::Ordering; +use std::ffi::{c_int, c_long}; use az::Az; +use gmp_mpfr_sys::mpfr::rnd_t; use rug::Assign; pub use rug::Float as MpFloat; +use rug::float::Round; use rug::float::Round::Nearest; use rug::ops::{PowAssignRound, RemAssignRound}; @@ -361,6 +364,32 @@ macro_rules! impl_op_for_ty { } } + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + ( + new_mpfloat::(), + new_mpfloat::(), + new_mpfloat::() + ) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let (ord, ql) = mpfr_remquo(&mut this.2, &this.0, &this.1, Nearest); + + // `remquo` integer results are sign-magnitude representation. Transfer the + // sign bit from the long result to the int result. + let clear = !(1 << (c_int::BITS - 1)); + let sign = ((ql >> (c_long::BITS - 1)) as i32) << (c_int::BITS - 1); + let q = (ql as i32) & clear | sign; + + (prep_retval::(&mut this.2, ord), q) + } + } + impl MpOp for crate::op::[]::Routine { type MpTy = MpFloat; @@ -441,3 +470,24 @@ impl MpOp for crate::op::lgammaf_r::Routine { (ret, sign as i32) } } + +/// `rug` does not provide `remquo` so this exposes `mpfr_remquo`. See rug#76. +fn mpfr_remquo(r: &mut MpFloat, x: &MpFloat, y: &MpFloat, round: Round) -> (Ordering, c_long) { + let r = r.as_raw_mut(); + let x = x.as_raw(); + let y = y.as_raw(); + let mut q: c_long = 0; + + let round = match round { + Round::Nearest => rnd_t::RNDN, + Round::Zero => rnd_t::RNDZ, + Round::Up => rnd_t::RNDU, + Round::Down => rnd_t::RNDD, + Round::AwayZero => rnd_t::RNDA, + _ => unreachable!(), + }; + + // SAFETY: mutable and const pointers are valid and do not alias, by Rust's rules. + let ord = unsafe { gmp_mpfr_sys::mpfr::remquo(r, &mut q, x, y, round) }; + (ord.cmp(&0), q) +} diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 7961b0802..e2766cfda 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -50,10 +50,6 @@ libm_macros::for_each_function! { [jn, jnf, yn, ynf], ], skip: [ - // FIXME: MPFR tests needed - remquo, - remquof, - // FIXME: test needed, see // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392 nextafter, diff --git a/libm/crates/libm-test/tests/z_extensive/run.rs b/libm/crates/libm-test/tests/z_extensive/run.rs index 07f4d5370..7ee967851 100644 --- a/libm/crates/libm-test/tests/z_extensive/run.rs +++ b/libm/crates/libm-test/tests/z_extensive/run.rs @@ -48,10 +48,6 @@ fn register_all_tests() -> Vec { callback: mp_extensive_tests, extra: [all_tests], skip: [ - // FIXME: MPFR tests needed - remquo, - remquof, - // FIXME: test needed, see // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392 nextafter, From 3ba613de5ed3c7240b17dab0f979a43ac82c8b50 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 11 Jan 2025 21:24:06 +0000 Subject: [PATCH 1049/1459] Use `core::arch::wasm` functions rather than intrinsics These wasm functions are available in `core::arch::wasm32` since [1], so we can use them while avoiding the possibly-recursive `intrinsics::*` calls (in practice none of those should always lower to libcalls on wasm, but that is up to LLVM). Since these require an unstable feature, they are still gated under `unstable-intrinsics`. [1]: https://github.com/rust-lang/stdarch/pull/1677 --- libm/src/lib.rs | 1 + libm/src/math/arch/wasm32.rs | 30 ++++++++++-------------------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 327e3d6e6..b0e431211 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -2,6 +2,7 @@ #![no_std] #![cfg_attr(intrinsics_enabled, allow(internal_features))] #![cfg_attr(intrinsics_enabled, feature(core_intrinsics))] +#![cfg_attr(all(intrinsics_enabled, target_family = "wasm"), feature(wasm_numeric_instr))] #![cfg_attr(f128_enabled, feature(f128))] #![cfg_attr(f16_enabled, feature(f16))] #![allow(clippy::assign_op_pattern)] diff --git a/libm/src/math/arch/wasm32.rs b/libm/src/math/arch/wasm32.rs index 09df8624e..384445f12 100644 --- a/libm/src/math/arch/wasm32.rs +++ b/libm/src/math/arch/wasm32.rs @@ -1,16 +1,12 @@ -//! Wasm asm is not stable; just use intrinsics for operations that have asm routine equivalents. -//! -//! Note that we need to be absolutely certain that everything here lowers to assembly operations, -//! otherwise libcalls will be recursive. +//! Wasm has builtins for simple float operations. Use the unstable `core::arch` intrinsics which +//! are significantly faster than soft float operations. pub fn ceil(x: f64) -> f64 { - // SAFETY: safe intrinsic with no preconditions - unsafe { core::intrinsics::ceilf64(x) } + core::arch::wasm32::f64_ceil(x) } pub fn ceilf(x: f32) -> f32 { - // SAFETY: safe intrinsic with no preconditions - unsafe { core::intrinsics::ceilf32(x) } + core::arch::wasm32::f32_ceil(x) } pub fn fabs(x: f64) -> f64 { @@ -22,31 +18,25 @@ pub fn fabsf(x: f32) -> f32 { } pub fn floor(x: f64) -> f64 { - // SAFETY: safe intrinsic with no preconditions - unsafe { core::intrinsics::floorf64(x) } + core::arch::wasm32::f64_floor(x) } pub fn floorf(x: f32) -> f32 { - // SAFETY: safe intrinsic with no preconditions - unsafe { core::intrinsics::floorf32(x) } + core::arch::wasm32::f32_floor(x) } pub fn sqrt(x: f64) -> f64 { - // SAFETY: safe intrinsic with no preconditions - unsafe { core::intrinsics::sqrtf64(x) } + core::arch::wasm32::f64_sqrt(x) } pub fn sqrtf(x: f32) -> f32 { - // SAFETY: safe intrinsic with no preconditions - unsafe { core::intrinsics::sqrtf32(x) } + core::arch::wasm32::f32_sqrt(x) } pub fn trunc(x: f64) -> f64 { - // SAFETY: safe intrinsic with no preconditions - unsafe { core::intrinsics::truncf64(x) } + core::arch::wasm32::f64_trunc(x) } pub fn truncf(x: f32) -> f32 { - // SAFETY: safe intrinsic with no preconditions - unsafe { core::intrinsics::truncf32(x) } + core::arch::wasm32::f32_trunc(x) } From 5295c42bce1ad84dd32b7ee3eec677bfba57185a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 11 Jan 2025 22:04:32 +0000 Subject: [PATCH 1050/1459] Split `cast` into `cast` and `cast_lossy` There is a difference in intent between wishing to cast and truncate the value, and expecting the input to be within range. To make this clear, add separate `cast_lossy` and `cast_from_lossy` to indicate what that truncation is intended, leaving `cast` and `cast_from` to only be casts that expected not to truncate. Actually enforcing this at runtime is likely to have a cost, so just `debug_assert!` that `cast` doesn't truncate. --- libm/src/math/support/int_traits.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index 380313c1e..0f2d72d9b 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -343,18 +343,30 @@ impl_h_int!( /// Trait to express (possibly lossy) casting of integers #[allow(unused)] pub trait CastInto: Copy { + /// By default, casts should be exact. fn cast(self) -> T; + + /// Call for casts that are expected to truncate. + fn cast_lossy(self) -> T; } #[allow(unused)] pub trait CastFrom: Copy { + /// By default, casts should be exact. fn cast_from(value: T) -> Self; + + /// Call for casts that are expected to truncate. + fn cast_from_lossy(value: T) -> Self; } impl + Copy> CastFrom for T { fn cast_from(value: U) -> Self { value.cast() } + + fn cast_from_lossy(value: U) -> Self { + value.cast_lossy() + } } macro_rules! cast_into { @@ -364,6 +376,13 @@ macro_rules! cast_into { ($ty:ty; $($into:ty),*) => {$( impl CastInto<$into> for $ty { fn cast(self) -> $into { + // All we can really do to enforce casting rules is check the rules when in + // debug mode. + debug_assert!(<$into>::try_from(self).is_ok(), "failed cast from {self}"); + self as $into + } + + fn cast_lossy(self) -> $into { self as $into } } From ace0c2667f2dd78c66b78e82c2b44cdbede75534 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 11 Jan 2025 23:22:12 +0000 Subject: [PATCH 1051/1459] Reorder tests in `run.sh` I do not believe Cargo separately caches crates with different sets of features enabled. So, ensuring that tests run with `unstable-intrinsics` are always grouped should slightly reduce runtime. As an added benefit, all the debug mode tests run first so initial feedback is available faster. --- libm/ci/run.sh | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 70fc271f1..89c9c8631 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -75,16 +75,14 @@ if [ "${BUILD_ONLY:-}" = "1" ]; then else cmd="cargo test --all --target $target $extra_flags" - # Test without intrinsics + # Test once without intrinsics, once with intrinsics enabled $cmd - $cmd --release - - # Test with intrinsic use $cmd --features unstable-intrinsics + $cmd --features unstable-intrinsics --benches + + # Test the same in release mode, which also increases coverage. + $cmd --release $cmd --release --features unstable-intrinsics - - # Make sure benchmarks have correct results - $cmd --benches - $cmd --benches --release + $cmd --release --features unstable-intrinsics --benches fi From 6077675ee544aab95969dbb13c2bebde78602bfe Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 11 Jan 2025 23:40:19 +0000 Subject: [PATCH 1052/1459] Remove `ExpInt` from `Float`, always use `i32` instead `ExpInt` is likely to only have performance benefits on 16-bit platforms, but makes working with the exponent more difficult. It seems like a worthwhile tradeoff to instead just use `i32`, so do that here. --- libm/crates/libm-test/src/f8_impl.rs | 5 ----- libm/src/math/support/float_traits.rs | 22 ++++++++-------------- libm/src/math/support/int_traits.rs | 2 ++ 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/libm/crates/libm-test/src/f8_impl.rs b/libm/crates/libm-test/src/f8_impl.rs index d378863f2..299553d20 100644 --- a/libm/crates/libm-test/src/f8_impl.rs +++ b/libm/crates/libm-test/src/f8_impl.rs @@ -20,7 +20,6 @@ pub struct f8(u8); impl Float for f8 { type Int = u8; type SignedInt = i8; - type ExpInt = i8; const ZERO: Self = Self(0b0_0000_000); const NEG_ZERO: Self = Self(0b1_0000_000); @@ -62,10 +61,6 @@ impl Float for f8 { self.0 & Self::SIGN_MASK != 0 } - fn exp(self) -> Self::ExpInt { - unimplemented!() - } - fn from_bits(a: Self::Int) -> Self { Self(a) } diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 697050966..f795527db 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -1,6 +1,6 @@ use core::{fmt, mem, ops}; -use super::int_traits::{Int, MinInt}; +use super::int_traits::{CastInto, Int, MinInt}; /// Trait for some basic operations on floats #[allow(dead_code)] @@ -25,9 +25,6 @@ pub trait Float: /// A int of the same width as the float type SignedInt: Int + MinInt; - /// An int capable of containing the exponent bits plus a sign bit. This is signed. - type ExpInt: Int; - const ZERO: Self; const NEG_ZERO: Self; const ONE: Self; @@ -98,7 +95,9 @@ pub trait Float: } /// Returns the exponent, not adjusting for bias. - fn exp(self) -> Self::ExpInt; + fn exp(self) -> i32 { + ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS).cast() + } /// Returns the significand with no implicit bit (or the "fractional" part) fn frac(self) -> Self::Int { @@ -146,7 +145,6 @@ macro_rules! float_impl { $ty:ident, $ity:ident, $sity:ident, - $expty:ident, $bits:expr, $significand_bits:expr, $from_bits:path @@ -154,7 +152,6 @@ macro_rules! float_impl { impl Float for $ty { type Int = $ity; type SignedInt = $sity; - type ExpInt = $expty; const ZERO: Self = 0.0; const NEG_ZERO: Self = -0.0; @@ -191,9 +188,6 @@ macro_rules! float_impl { fn is_sign_negative(self) -> bool { self.is_sign_negative() } - fn exp(self) -> Self::ExpInt { - ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt - } fn from_bits(a: Self::Int) -> Self { Self::from_bits(a) } @@ -226,11 +220,11 @@ macro_rules! float_impl { } #[cfg(f16_enabled)] -float_impl!(f16, u16, i16, i8, 16, 10, f16::from_bits); -float_impl!(f32, u32, i32, i16, 32, 23, f32_from_bits); -float_impl!(f64, u64, i64, i16, 64, 52, f64_from_bits); +float_impl!(f16, u16, i16, 16, 10, f16::from_bits); +float_impl!(f32, u32, i32, 32, 23, f32_from_bits); +float_impl!(f64, u64, i64, 64, 52, f64_from_bits); #[cfg(f128_enabled)] -float_impl!(f128, u128, i128, i16, 128, 112, f128::from_bits); +float_impl!(f128, u128, i128, 128, 112, f128::from_bits); /* FIXME(msrv): vendor some things that are not const stable at our MSRV */ diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index 0f2d72d9b..459f0a58b 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -55,6 +55,8 @@ pub trait Int: + ops::BitAnd + cmp::Ord + CastInto + + CastInto + + CastFrom + CastFrom { fn signed(self) -> OtherSign; From ba4aebc77f5d7669341997f9f3085ec40afcdc17 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 11 Jan 2025 22:30:30 +0000 Subject: [PATCH 1053/1459] Add a `release-checked` profile with debug and overflow assertions A failing debug assertion or overflow without correctly wrapping or saturating is a bug, but the `debug` profile that has these enabled does not run enough test cases to hit edge cases that may trigger these. Add a new `release-checked` profile that enables debug assertions and overflow checks. This seems to only extend per-function test time by a few seconds (or around a minute on longer extensive tests), so enable this as the default on CI. In order to ensure `no_panic` still gets checked, add a build-only step to CI. --- libm/.github/workflows/main.yml | 3 ++- libm/Cargo.toml | 7 +++++++ libm/build.rs | 6 +++++- libm/ci/run.sh | 8 +++++--- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 320800f2e..98505ea35 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -238,7 +238,8 @@ jobs: LIBM_EXTENSIVE_TESTS="$CHANGED" cargo t \ --features test-multiprecision,unstable \ - --release -- extensive + --profile release-checked \ + -- extensive - name: Print test logs if available run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi shell: bash diff --git a/libm/Cargo.toml b/libm/Cargo.toml index dc362779e..0e444b583 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -61,3 +61,10 @@ no-panic = "0.1.30" # This is needed for no-panic to correctly detect the lack of panics [profile.release] lto = "fat" + +# Release mode with debug assertions +[profile.release-checked] +inherits = "release" +debug-assertions = true +lto = "fat" +overflow-checks = true diff --git a/libm/build.rs b/libm/build.rs index 9c9e0e723..ca4a639a1 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -13,8 +13,12 @@ fn main() { #[allow(unexpected_cfgs)] if !cfg!(feature = "checked") { let lvl = env::var("OPT_LEVEL").unwrap(); - if lvl != "0" { + if lvl != "0" && !cfg!(debug_assertions) { println!("cargo:rustc-cfg=assert_no_panic"); + } else if env::var("ENSURE_NO_PANIC").is_ok() { + // Give us a defensive way of ensureing that no-panic is checked when we + // expect it to be. + panic!("`assert_no_panic `was not enabled"); } } diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 89c9c8631..244a22a07 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -81,8 +81,10 @@ else $cmd --features unstable-intrinsics --benches # Test the same in release mode, which also increases coverage. - $cmd --release - $cmd --release --features unstable-intrinsics - $cmd --release --features unstable-intrinsics --benches + $cmd --profile release-checked + $cmd --profile release-checked --features unstable-intrinsics + $cmd --profile release-checked --features unstable-intrinsics --benches + + ENSURE_NO_PANIC=1 cargo build --target "$target" --release fi From 198cfe8484813bc67f45b8977885a09b8e622c29 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 12 Jan 2025 03:25:25 +0000 Subject: [PATCH 1054/1459] Add `biteq` and `exp_unbiased` to `Float` These are two convenience methods. Additionally, add tests for the trait methods, and an `assert_biteq!` macro to check and print the output. --- libm/src/math/support/float_traits.rs | 125 ++++++++++++++++++++++++-- libm/src/math/support/int_traits.rs | 8 +- libm/src/math/support/macros.rs | 20 +++++ 3 files changed, 143 insertions(+), 10 deletions(-) diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index f795527db..3aa0d844a 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -1,6 +1,6 @@ use core::{fmt, mem, ops}; -use super::int_traits::{CastInto, Int, MinInt}; +use super::int_traits::{CastFrom, CastInto, Int, MinInt}; /// Trait for some basic operations on floats #[allow(dead_code)] @@ -73,11 +73,18 @@ pub trait Float: self.to_bits().signed() } + /// Check bitwise equality. + fn biteq(self, rhs: Self) -> bool { + self.to_bits() == rhs.to_bits() + } + /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be - /// represented in multiple different ways. This method returns `true` if two NaNs are - /// compared. + /// represented in multiple different ways. + /// + /// This method returns `true` if two NaNs are compared. Use [`biteq`](Self::biteq) instead + /// if `NaN` should not be treated separately. fn eq_repr(self, rhs: Self) -> bool { - if self.is_nan() && rhs.is_nan() { true } else { self.to_bits() == rhs.to_bits() } + if self.is_nan() && rhs.is_nan() { true } else { self.biteq(rhs) } } /// Returns true if the value is NaN. @@ -94,17 +101,22 @@ pub trait Float: (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO } - /// Returns the exponent, not adjusting for bias. + /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero. fn exp(self) -> i32 { ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS).cast() } + /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero. + fn exp_unbiased(self) -> i32 { + self.exp() - (Self::EXP_BIAS as i32) + } + /// Returns the significand with no implicit bit (or the "fractional" part) fn frac(self) -> Self::Int { self.to_bits() & Self::SIG_MASK } - /// Returns the significand with implicit bit + /// Returns the significand with implicit bit. fn imp_frac(self) -> Self::Int { self.frac() | Self::IMPLICIT_BIT } @@ -113,11 +125,11 @@ pub trait Float: fn from_bits(a: Self::Int) -> Self; /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. - fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self { + fn from_parts(negative: bool, exponent: i32, significand: Self::Int) -> Self { let sign = if negative { Self::Int::ONE } else { Self::Int::ZERO }; Self::from_bits( (sign << (Self::BITS - 1)) - | ((exponent << Self::SIG_BITS) & Self::EXP_MASK) + | (Self::Int::cast_from(exponent as u32 & Self::EXP_MAX) << Self::SIG_BITS) | (significand & Self::SIG_MASK), ) } @@ -239,3 +251,100 @@ pub const fn f64_from_bits(bits: u64) -> f64 { // SAFETY: POD cast with no preconditions unsafe { mem::transmute::(bits) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg(f16_enabled)] + fn check_f16() { + // Constants + assert_eq!(f16::EXP_MAX, 0b11111); + assert_eq!(f16::EXP_BIAS, 15); + + // `exp_unbiased` + assert_eq!(f16::FRAC_PI_2.exp_unbiased(), 0); + assert_eq!((1.0f16 / 2.0).exp_unbiased(), -1); + assert_eq!(f16::MAX.exp_unbiased(), 15); + assert_eq!(f16::MIN.exp_unbiased(), 15); + assert_eq!(f16::MIN_POSITIVE.exp_unbiased(), -14); + // This is a convenience method and not ldexp, `exp_unbiased` does not return correct + // results for zero and subnormals. + assert_eq!(f16::ZERO.exp_unbiased(), -15); + assert_eq!(f16::from_bits(0x1).exp_unbiased(), -15); + + // `from_parts` + assert_biteq!(f16::from_parts(true, f16::EXP_BIAS as i32, 0), -1.0f16); + assert_biteq!(f16::from_parts(false, 0, 1), f16::from_bits(0x1)); + } + + #[test] + fn check_f32() { + // Constants + assert_eq!(f32::EXP_MAX, 0b11111111); + assert_eq!(f32::EXP_BIAS, 127); + + // `exp_unbiased` + assert_eq!(f32::FRAC_PI_2.exp_unbiased(), 0); + assert_eq!((1.0f32 / 2.0).exp_unbiased(), -1); + assert_eq!(f32::MAX.exp_unbiased(), 127); + assert_eq!(f32::MIN.exp_unbiased(), 127); + assert_eq!(f32::MIN_POSITIVE.exp_unbiased(), -126); + // This is a convenience method and not ldexp, `exp_unbiased` does not return correct + // results for zero and subnormals. + assert_eq!(f32::ZERO.exp_unbiased(), -127); + assert_eq!(f32::from_bits(0x1).exp_unbiased(), -127); + + // `from_parts` + assert_biteq!(f32::from_parts(true, f32::EXP_BIAS as i32, 0), -1.0f32); + assert_biteq!(f32::from_parts(false, 10 + f32::EXP_BIAS as i32, 0), hf32!("0x1p10")); + assert_biteq!(f32::from_parts(false, 0, 1), f32::from_bits(0x1)); + } + + #[test] + fn check_f64() { + // Constants + assert_eq!(f64::EXP_MAX, 0b11111111111); + assert_eq!(f64::EXP_BIAS, 1023); + + // `exp_unbiased` + assert_eq!(f64::FRAC_PI_2.exp_unbiased(), 0); + assert_eq!((1.0f64 / 2.0).exp_unbiased(), -1); + assert_eq!(f64::MAX.exp_unbiased(), 1023); + assert_eq!(f64::MIN.exp_unbiased(), 1023); + assert_eq!(f64::MIN_POSITIVE.exp_unbiased(), -1022); + // This is a convenience method and not ldexp, `exp_unbiased` does not return correct + // results for zero and subnormals. + assert_eq!(f64::ZERO.exp_unbiased(), -1023); + assert_eq!(f64::from_bits(0x1).exp_unbiased(), -1023); + + // `from_parts` + assert_biteq!(f64::from_parts(true, f64::EXP_BIAS as i32, 0), -1.0f64); + assert_biteq!(f64::from_parts(false, 10 + f64::EXP_BIAS as i32, 0), hf64!("0x1p10")); + assert_biteq!(f64::from_parts(false, 0, 1), f64::from_bits(0x1)); + } + + #[test] + #[cfg(f128_enabled)] + fn check_f128() { + // Constants + assert_eq!(f128::EXP_MAX, 0b111111111111111); + assert_eq!(f128::EXP_BIAS, 16383); + + // `exp_unbiased` + assert_eq!(f128::FRAC_PI_2.exp_unbiased(), 0); + assert_eq!((1.0f128 / 2.0).exp_unbiased(), -1); + assert_eq!(f128::MAX.exp_unbiased(), 16383); + assert_eq!(f128::MIN.exp_unbiased(), 16383); + assert_eq!(f128::MIN_POSITIVE.exp_unbiased(), -16382); + // This is a convenience method and not ldexp, `exp_unbiased` does not return correct + // results for zero and subnormals. + assert_eq!(f128::ZERO.exp_unbiased(), -16383); + assert_eq!(f128::from_bits(0x1).exp_unbiased(), -16383); + + // `from_parts` + assert_biteq!(f128::from_parts(true, f128::EXP_BIAS as i32, 0), -1.0f128); + assert_biteq!(f128::from_parts(false, 0, 1), f128::from_bits(0x1)); + } +} diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index 459f0a58b..db799c030 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -54,10 +54,14 @@ pub trait Int: + ops::BitXor + ops::BitAnd + cmp::Ord - + CastInto - + CastInto + CastFrom + + CastFrom + CastFrom + + CastFrom + + CastInto + + CastInto + + CastInto + + CastInto { fn signed(self) -> OtherSign; fn unsigned(self) -> Self::Unsigned; diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs index f5094b9da..076fdf1f7 100644 --- a/libm/src/math/support/macros.rs +++ b/libm/src/math/support/macros.rs @@ -106,3 +106,23 @@ macro_rules! hf64 { X }}; } + +/// Assert `F::biteq` with better messages. +#[cfg(test)] +macro_rules! assert_biteq { + ($left:expr, $right:expr, $($arg:tt)*) => {{ + let bits = ($left.to_bits() * 0).leading_zeros(); // hack to get the width from the value + assert!( + $left.biteq($right), + "\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})", + l = $left, + lb = $left.to_bits(), + r = $right, + rb = $right.to_bits(), + width = ((bits / 4) + 2) as usize + ); + }}; + ($left:expr, $right:expr $(,)?) => { + assert_biteq!($left, $right,) + }; +} From 08dd8a3d1303ab5bf8d2481eb22207228e4dba2e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 12 Jan 2025 03:55:46 +0000 Subject: [PATCH 1055/1459] Always use the same seed for benchmarking It would be preferable to switch to a different generator, or at least set the seed within the benchmark, but this is the most straightforward way to make things simple. --- libm/.github/workflows/main.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 98505ea35..b14ab40ec 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -156,7 +156,11 @@ jobs: - uses: Swatinem/rust-cache@v2 - name: Download musl source run: ./ci/download-musl.sh - - run: cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl + - run: | + # Always use the same seed for benchmarks. Ideally we should switch to a + # non-random generator. + export LIBM_SEED=benchesbenchesbenchesbencheswoo! + cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl msrv: name: Check MSRV From ca21cbda01637397a10c37339df8cab45bd90cba Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 12 Jan 2025 07:43:24 +0000 Subject: [PATCH 1056/1459] Expose C versions of `libm` functions in the `cb` crate `compiler_builtins` exposes an `extern "C"` version of `libm` routines, so add the same here. There really isn't much to test here (unless we later add tests against C `libm` suites), but one nice benefit is this gives us a library with unmangled names that is easy to `objdump`. In accordance with that, also update `cb` to be a `staticlib`. Unfortunately this also means we have to remove it from the workspace, since Cargo doesn't allow setting `panic = "abort"` for a single crate. --- libm/.github/workflows/main.yml | 2 +- libm/.gitignore | 3 +- libm/Cargo.toml | 5 +- .../compiler-builtins-smoke-test/Cargo.toml | 18 +- .../compiler-builtins-smoke-test/build.rs | 7 + .../compiler-builtins-smoke-test/src/lib.rs | 171 ++++++++++++++++++ 6 files changed, 200 insertions(+), 6 deletions(-) create mode 100644 libm/crates/compiler-builtins-smoke-test/build.rs diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index b14ab40ec..7e371d613 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -144,7 +144,7 @@ jobs: - name: Install Rust run: rustup update nightly --no-self-update && rustup default nightly - uses: Swatinem/rust-cache@v2 - - run: cargo build -p cb + - run: cargo test --manifest-path crates/compiler-builtins-smoke-test/Cargo.toml benchmarks: name: Benchmarks diff --git a/libm/.gitignore b/libm/.gitignore index b6a532751..4e9c9c03d 100644 --- a/libm/.gitignore +++ b/libm/.gitignore @@ -2,8 +2,7 @@ .#* /bin /math/src -/math/target -/target +target Cargo.lock musl/ **.tar.gz diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 0e444b583..fc881b77e 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -43,7 +43,6 @@ force-soft-floats = [] [workspace] resolver = "2" members = [ - "crates/compiler-builtins-smoke-test", "crates/libm-macros", "crates/libm-test", "crates/musl-math-sys", @@ -53,6 +52,10 @@ default-members = [ "crates/libm-macros", "crates/libm-test", ] +exclude = [ + # Requires `panic = abort` so can't be a member of the workspace + "crates/compiler-builtins-smoke-test", +] [dev-dependencies] no-panic = "0.1.30" diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index 1f09ce99c..d578b0dcd 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -6,15 +6,21 @@ edition = "2021" publish = false [lib] +crate-type = ["staticlib"] test = false bench = false +[features] +default = ["arch", "unstable-float"] + +# Copied from `libm`'s root `Cargo.toml`' +unstable-float = [] +arch = [] + [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = [ "cfg(arch_enabled)", "cfg(assert_no_panic)", - "cfg(f128_enabled)", - "cfg(f16_enabled)", "cfg(intrinsics_enabled)", 'cfg(feature, values("checked"))', 'cfg(feature, values("force-soft-floats"))', @@ -22,3 +28,11 @@ unexpected_cfgs = { level = "warn", check-cfg = [ 'cfg(feature, values("unstable-intrinsics"))', 'cfg(feature, values("unstable-public-internals"))', ] } + +[profile.dev] +panic = "abort" + +[profile.release] +panic = "abort" +codegen-units = 1 +lto = "fat" diff --git a/libm/crates/compiler-builtins-smoke-test/build.rs b/libm/crates/compiler-builtins-smoke-test/build.rs new file mode 100644 index 000000000..401b7e1eb --- /dev/null +++ b/libm/crates/compiler-builtins-smoke-test/build.rs @@ -0,0 +1,7 @@ +#[path = "../../configure.rs"] +mod configure; + +fn main() { + let cfg = configure::Config::from_env(); + configure::emit_libm_config(&cfg); +} diff --git a/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/libm/crates/compiler-builtins-smoke-test/src/lib.rs index e3a51a575..95ecb840d 100644 --- a/libm/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/libm/crates/compiler-builtins-smoke-test/src/lib.rs @@ -1,8 +1,12 @@ //! Fake compiler-builtins crate //! //! This is used to test that we can source import `libm` into the compiler-builtins crate. +//! Additionally, it provides a `#[no_mangle]` C API that can be easier to inspect than the +//! default `.rlib`. #![feature(core_intrinsics)] +#![feature(f16)] +#![feature(f128)] #![allow(internal_features)] #![no_std] @@ -10,3 +14,170 @@ #[allow(clippy::all)] // We don't get `libm`'s list of `allow`s, so just ignore Clippy. #[path = "../../../src/math/mod.rs"] pub mod libm; + +use core::ffi::c_int; + +/// Mark functions `#[no_mangle]` and with the C ABI. +macro_rules! no_mangle { + ($( $name:ident( $($tt:tt)+ ) -> $ret:ty; )+) => { + $( no_mangle!(@inner $name( $($tt)+ ) -> $ret); )+ + }; + + // Handle simple functions with single return types + (@inner $name:ident( $($arg:ident: $aty:ty),+ ) -> $ret:ty) => { + #[no_mangle] + extern "C" fn $name($($arg: $aty),+) -> $ret { + libm::$name($($arg),+) + } + }; + + + // Functions with `&mut` return values need to be handled differently, use `|` to + // separate inputs vs. outputs. + ( + @inner $name:ident( $($arg:ident: $aty:ty),+ | $($rarg:ident: $rty:ty),+) -> $ret:ty + ) => { + #[no_mangle] + extern "C" fn $name($($arg: $aty,)+ $($rarg: $rty),+) -> $ret { + let ret; + (ret, $(*$rarg),+) = libm::$name($($arg),+); + ret + } + }; +} + +no_mangle! { + frexp(x: f64 | y: &mut c_int) -> f64; + frexpf(x: f32 | y: &mut c_int) -> f32; + acos(x: f64) -> f64; + acosf(x: f32) -> f32; + acosh(x: f64) -> f64; + acoshf(x: f32) -> f32; + asin(x: f64) -> f64; + asinf(x: f32) -> f32; + asinh(x: f64) -> f64; + asinhf(x: f32) -> f32; + atan(x: f64) -> f64; + atan2(x: f64, y: f64) -> f64; + atan2f(x: f32, y: f32) -> f32; + atanf(x: f32) -> f32; + atanh(x: f64) -> f64; + atanhf(x: f32) -> f32; + cbrt(x: f64) -> f64; + cbrtf(x: f32) -> f32; + ceil(x: f64) -> f64; + ceilf(x: f32) -> f32; + copysign(x: f64, y: f64) -> f64; + copysignf(x: f32, y: f32) -> f32; + copysignf128(x: f128, y: f128) -> f128; + copysignf16(x: f16, y: f16) -> f16; + cos(x: f64) -> f64; + cosf(x: f32) -> f32; + cosh(x: f64) -> f64; + coshf(x: f32) -> f32; + erf(x: f64) -> f64; + erfc(x: f64) -> f64; + erfcf(x: f32) -> f32; + erff(x: f32) -> f32; + exp(x: f64) -> f64; + exp10(x: f64) -> f64; + exp10f(x: f32) -> f32; + exp2(x: f64) -> f64; + exp2f(x: f32) -> f32; + expf(x: f32) -> f32; + expm1(x: f64) -> f64; + expm1f(x: f32) -> f32; + fabs(x: f64) -> f64; + fabsf(x: f32) -> f32; + fabsf128(x: f128) -> f128; + fabsf16(x: f16) -> f16; + fdim(x: f64, y: f64) -> f64; + fdimf(x: f32, y: f32) -> f32; + floor(x: f64) -> f64; + floorf(x: f32) -> f32; + fma(x: f64, y: f64, z: f64) -> f64; + fmaf(x: f32, y: f32, z: f32) -> f32; + fmax(x: f64, y: f64) -> f64; + fmaxf(x: f32, y: f32) -> f32; + fmin(x: f64, y: f64) -> f64; + fminf(x: f32, y: f32) -> f32; + fmod(x: f64, y: f64) -> f64; + fmodf(x: f32, y: f32) -> f32; + hypot(x: f64, y: f64) -> f64; + hypotf(x: f32, y: f32) -> f32; + ilogb(x: f64) -> c_int; + ilogbf(x: f32) -> c_int; + j0(x: f64) -> f64; + j0f(x: f32) -> f32; + j1(x: f64) -> f64; + j1f(x: f32) -> f32; + jn(x: c_int, y: f64) -> f64; + jnf(x: c_int, y: f32) -> f32; + ldexp(x: f64, y: c_int) -> f64; + ldexpf(x: f32, y: c_int) -> f32; + lgamma(x: f64) -> f64; + lgamma_r(x: f64 | r: &mut c_int) -> f64; + lgammaf(x: f32) -> f32; + lgammaf_r(x: f32 | r: &mut c_int) -> f32; + log(x: f64) -> f64; + log10(x: f64) -> f64; + log10f(x: f32) -> f32; + log1p(x: f64) -> f64; + log1pf(x: f32) -> f32; + log2(x: f64) -> f64; + log2f(x: f32) -> f32; + logf(x: f32) -> f32; + modf(x: f64 | r: &mut f64) -> f64; + modff(x: f32 | r: &mut f32) -> f32; + nextafter(x: f64, y: f64) -> f64; + nextafterf(x: f32, y: f32) -> f32; + pow(x: f64, y: f64) -> f64; + powf(x: f32, y: f32) -> f32; + remainder(x: f64, y: f64) -> f64; + remainderf(x: f32, y: f32) -> f32; + remquo(x: f64, y: f64 | q: &mut c_int) -> f64; + remquof(x: f32, y: f32 | q: &mut c_int) -> f32; + rint(x: f64) -> f64; + rintf(x: f32) -> f32; + round(x: f64) -> f64; + roundf(x: f32) -> f32; + scalbn(x: f64, y: c_int) -> f64; + scalbnf(x: f32, y: c_int) -> f32; + sin(x: f64) -> f64; + sinf(x: f32) -> f32; + sinh(x: f64) -> f64; + sinhf(x: f32) -> f32; + sqrt(x: f64) -> f64; + sqrtf(x: f32) -> f32; + tan(x: f64) -> f64; + tanf(x: f32) -> f32; + tanh(x: f64) -> f64; + tanhf(x: f32) -> f32; + tgamma(x: f64) -> f64; + tgammaf(x: f32) -> f32; + trunc(x: f64) -> f64; + truncf(x: f32) -> f32; + y0(x: f64) -> f64; + y0f(x: f32) -> f32; + y1(x: f64) -> f64; + y1f(x: f32) -> f32; + yn(x: c_int, y: f64) -> f64; + ynf(x: c_int, y: f32) -> f32; +} + +/* sincos has no direct return type, not worth handling in the macro */ + +#[no_mangle] +extern "C" fn sincos(x: f64, s: &mut f64, c: &mut f64) { + (*s, *c) = libm::sincos(x); +} + +#[no_mangle] +extern "C" fn sincosf(x: f32, s: &mut f32, c: &mut f32) { + (*s, *c) = libm::sincosf(x); +} + +#[panic_handler] +fn panic(_info: &core::panic::PanicInfo) -> ! { + loop {} +} From fc5ed90c68ccfe75a7685a97eee8e97aa6efd676 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 12 Jan 2025 04:24:03 +0000 Subject: [PATCH 1057/1459] Add a new precision adjustment for i586 `rint` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `rint` had a couple recent failures from the random tests: ---- mp_random_rint stdout ---- Random Mpfr rint arg 1/1: 10000 iterations (10000 total) using `LIBM_SEED=Fl1f69DaJnwkHN2FeuCXaBFRvJYsPvEY` thread 'mp_random_rint' panicked at crates/libm-test/tests/multiprecision.rs:41:49: called `Result::unwrap()` on an `Err` value: input: (-849751480.5001163,) (0xc1c95316dc4003d0,) expected: -849751481.0 0xc1c95316dc800000 actual: -849751480.0 0xc1c95316dc000000 Caused by: ulp 8388608 > 100000 And: ---- mp_random_rint stdout ---- Random Mpfr rint arg 1/1: 10000 iterations (10000 total) using `LIBM_SEED=XN7VCGhX3Wu6Mzn8COvJPITyZlGP7gN7` thread 'mp_random_rint' panicked at crates/libm-test/tests/multiprecision.rs:41:49: called `Result::unwrap()` on an `Err` value: input: (-12493089.499809155,) (0xc167d4242ffe6fc5,) expected: -12493089.0 0xc167d42420000000 actual: -12493090.0 0xc167d42440000000 Caused by: ulp 536870912 > 100000 It seems we just implement an incorrect rounding mode. Replace the existing `rint` override with an xfail if the difference is 0.0 <= ε <= 1.0. --- libm/crates/libm-test/src/precision.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index cbe4bdf88..75b99c652 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -114,7 +114,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { Id::Exp10 | Id::Exp10f => ulp = 1_000_000, Id::Exp2 | Id::Exp2f => ulp = 10_000_000, Id::Log1p | Id::Log1pf => ulp = 2, - Id::Rint => ulp = 100_000, Id::Round => ulp = 1, Id::Tan => ulp = 2, _ => (), @@ -261,6 +260,15 @@ impl MaybeOverride<(f64,)> for SpecialCase { } } + if cfg!(x86_no_sse) + && ctx.base_name == BaseName::Rint + && (expected - actual).abs() <= F::ONE + && (expected - actual).abs() > F::ZERO + { + // Our rounding mode is incorrect. + return XFAIL; + } + if ctx.base_name == BaseName::Acosh && input.0 < 1.0 { // The function is undefined for the inputs, musl and our libm both return // random results. From 1666f4116b28bea716dde20d398d96a779866883 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 12 Jan 2025 08:02:44 +0000 Subject: [PATCH 1058/1459] Add a new precision adjustment for i586 `exp2f` There was a recent failure from the random tests: ---- mp_random_exp2f stdout ---- Random Mpfr exp2f arg 1/1: 10000 iterations (10000 total) using `LIBM_SEED=fqgMuzs6eqH1VZSEmQpLnThnaIyRUOWe` thread 'mp_random_exp2f' panicked at crates/libm-test/tests/multiprecision.rs:41:49: called `Result::unwrap()` on an `Err` value: input: (127.97238,) (0x42fff1dc,) expected: 3.3383009e38 0x7f7b2556 actual: inf 0x7f800000 Caused by: mismatched infinities Add an xfail for mismatched infinities on i586. --- libm/crates/libm-test/src/precision.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 75b99c652..15913fe6d 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -211,6 +211,15 @@ impl MaybeOverride<(f32,)> for SpecialCase { return XFAIL; } + if cfg!(x86_no_sse) + && ctx.base_name == BaseName::Exp2 + && !expected.is_infinite() + && actual.is_infinite() + { + // We return infinity when there is a representable value. Test input: 127.97238 + return XFAIL; + } + maybe_check_nan_bits(actual, expected, ctx) } From 45af77150a5ab873cf6d7850dc30f559b41ca2cb Mon Sep 17 00:00:00 2001 From: Hanna Kruppe Date: Sun, 12 Jan 2025 10:56:30 +0100 Subject: [PATCH 1059/1459] Use wasm32 arch intrinsics for rint{,f} --- libm/etc/function-definitions.json | 2 ++ libm/src/math/arch/mod.rs | 4 +++- libm/src/math/arch/wasm32.rs | 8 ++++++++ libm/src/math/rint.rs | 6 ++++++ libm/src/math/rintf.rs | 6 ++++++ 5 files changed, 25 insertions(+), 1 deletion(-) diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 3cf7e0fed..f60a7e567 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -604,12 +604,14 @@ "rint": { "sources": [ "src/libm_helper.rs", + "src/math/arch/wasm32.rs", "src/math/rint.rs" ], "type": "f64" }, "rintf": { "sources": [ + "src/math/arch/wasm32.rs", "src/math/rintf.rs" ], "type": "f32" diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs index bd79ae1c6..3992419cb 100644 --- a/libm/src/math/arch/mod.rs +++ b/libm/src/math/arch/mod.rs @@ -11,7 +11,9 @@ cfg_if! { if #[cfg(all(target_arch = "wasm32", intrinsics_enabled))] { mod wasm32; - pub use wasm32::{ceil, ceilf, fabs, fabsf, floor, floorf, sqrt, sqrtf, trunc, truncf}; + pub use wasm32::{ + ceil, ceilf, fabs, fabsf, floor, floorf, rint, rintf, sqrt, sqrtf, trunc, truncf, + }; } else if #[cfg(target_feature = "sse2")] { mod i686; pub use i686::{sqrt, sqrtf}; diff --git a/libm/src/math/arch/wasm32.rs b/libm/src/math/arch/wasm32.rs index 384445f12..de80c8a58 100644 --- a/libm/src/math/arch/wasm32.rs +++ b/libm/src/math/arch/wasm32.rs @@ -25,6 +25,14 @@ pub fn floorf(x: f32) -> f32 { core::arch::wasm32::f32_floor(x) } +pub fn rint(x: f64) -> f64 { + core::arch::wasm32::f64_nearest(x) +} + +pub fn rintf(x: f32) -> f32 { + core::arch::wasm32::f32_nearest(x) +} + pub fn sqrt(x: f64) -> f64 { core::arch::wasm32::f64_sqrt(x) } diff --git a/libm/src/math/rint.rs b/libm/src/math/rint.rs index cbdc3c2b9..50192ffdf 100644 --- a/libm/src/math/rint.rs +++ b/libm/src/math/rint.rs @@ -1,5 +1,11 @@ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn rint(x: f64) -> f64 { + select_implementation! { + name: rint, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + let one_over_e = 1.0 / f64::EPSILON; let as_u64: u64 = x.to_bits(); let exponent: u64 = (as_u64 >> 52) & 0x7ff; diff --git a/libm/src/math/rintf.rs b/libm/src/math/rintf.rs index 2d22c9393..64968b6be 100644 --- a/libm/src/math/rintf.rs +++ b/libm/src/math/rintf.rs @@ -1,5 +1,11 @@ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn rintf(x: f32) -> f32 { + select_implementation! { + name: rintf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + let one_over_e = 1.0 / f32::EPSILON; let as_u32: u32 = x.to_bits(); let exponent: u32 = (as_u32 >> 23) & 0xff; From 5562dd36496a781a0a514e9f2432485b557b8f62 Mon Sep 17 00:00:00 2001 From: Hanna Kruppe Date: Sun, 12 Jan 2025 11:16:40 +0100 Subject: [PATCH 1060/1459] Introduce arch::aarch64 and use it for rint{,f} --- libm/etc/function-definitions.json | 2 ++ libm/src/math/arch/aarch64.rs | 33 ++++++++++++++++++++++++++++++ libm/src/math/arch/mod.rs | 7 +++++++ libm/src/math/rint.rs | 5 ++++- libm/src/math/rintf.rs | 5 ++++- 5 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 libm/src/math/arch/aarch64.rs diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index f60a7e567..39b6c9702 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -604,6 +604,7 @@ "rint": { "sources": [ "src/libm_helper.rs", + "src/math/arch/aarch64.rs", "src/math/arch/wasm32.rs", "src/math/rint.rs" ], @@ -611,6 +612,7 @@ }, "rintf": { "sources": [ + "src/math/arch/aarch64.rs", "src/math/arch/wasm32.rs", "src/math/rintf.rs" ], diff --git a/libm/src/math/arch/aarch64.rs b/libm/src/math/arch/aarch64.rs new file mode 100644 index 000000000..374ec11bf --- /dev/null +++ b/libm/src/math/arch/aarch64.rs @@ -0,0 +1,33 @@ +use core::arch::aarch64::{ + float32x2_t, float64x1_t, vdup_n_f32, vdup_n_f64, vget_lane_f32, vget_lane_f64, vrndn_f32, + vrndn_f64, +}; + +pub fn rint(x: f64) -> f64 { + // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module. + let x_vec: float64x1_t = unsafe { vdup_n_f64(x) }; + + // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module. + let result_vec: float64x1_t = unsafe { vrndn_f64(x_vec) }; + + // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module. + let result: f64 = unsafe { vget_lane_f64::<0>(result_vec) }; + + result +} + +pub fn rintf(x: f32) -> f32 { + // There's a scalar form of this instruction (FRINTN) but core::arch doesn't expose it, so we + // have to use the vector form and drop the other lanes afterwards. + + // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module. + let x_vec: float32x2_t = unsafe { vdup_n_f32(x) }; + + // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module. + let result_vec: float32x2_t = unsafe { vrndn_f32(x_vec) }; + + // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module. + let result: f32 = unsafe { vget_lane_f32::<0>(result_vec) }; + + result +} diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs index 3992419cb..091d7650a 100644 --- a/libm/src/math/arch/mod.rs +++ b/libm/src/math/arch/mod.rs @@ -17,6 +17,13 @@ cfg_if! { } else if #[cfg(target_feature = "sse2")] { mod i686; pub use i686::{sqrt, sqrtf}; + } else if #[cfg(all( + target_arch = "aarch64", // TODO: also arm64ec? + target_feature = "neon", + target_endian = "little", // see https://github.com/rust-lang/stdarch/issues/1484 + ))] { + mod aarch64; + pub use aarch64::{rint, rintf}; } } diff --git a/libm/src/math/rint.rs b/libm/src/math/rint.rs index 50192ffdf..c9ea6402e 100644 --- a/libm/src/math/rint.rs +++ b/libm/src/math/rint.rs @@ -2,7 +2,10 @@ pub fn rint(x: f64) -> f64 { select_implementation! { name: rint, - use_arch: all(target_arch = "wasm32", intrinsics_enabled), + use_arch: any( + all(target_arch = "wasm32", intrinsics_enabled), + all(target_arch = "aarch64", target_feature = "neon", target_endian = "little"), + ), args: x, } diff --git a/libm/src/math/rintf.rs b/libm/src/math/rintf.rs index 64968b6be..33b5b3dde 100644 --- a/libm/src/math/rintf.rs +++ b/libm/src/math/rintf.rs @@ -2,7 +2,10 @@ pub fn rintf(x: f32) -> f32 { select_implementation! { name: rintf, - use_arch: all(target_arch = "wasm32", intrinsics_enabled), + use_arch: any( + all(target_arch = "wasm32", intrinsics_enabled), + all(target_arch = "aarch64", target_feature = "neon", target_endian = "little"), + ), args: x, } From aecd822aabcf26a331de29cda793aaffb95c6c0e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 03:14:43 +0000 Subject: [PATCH 1061/1459] Rename the `test-multiprecision` feature to `build-mpfr` Currently the features that control what we test against are `build-musl` and `test-multiprecision`. I didn't name them very consistently and there isn't really any reason for that. Rename `test-multiprecision` to `build-mpfr` to better reflect what it actually does and to be more consistent with `build-musl`. --- libm/.github/workflows/main.yml | 4 ++-- libm/CONTRIBUTING.md | 2 +- libm/ci/run.sh | 8 ++++---- libm/crates/libm-test/Cargo.toml | 2 +- libm/crates/libm-test/src/lib.rs | 2 +- libm/crates/libm-test/src/run_cfg.rs | 2 +- libm/crates/libm-test/tests/multiprecision.rs | 2 +- libm/crates/libm-test/tests/z_extensive/main.rs | 6 +++--- 8 files changed, 14 insertions(+), 14 deletions(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 7e371d613..d54288574 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -133,7 +133,7 @@ jobs: run: ./ci/download-musl.sh - run: | cargo clippy --all \ - --features libm-test/build-musl,libm-test/test-multiprecision \ + --features libm-test/build-musl,libm-test/build-mpfr \ --all-targets builtins: @@ -241,7 +241,7 @@ jobs: fi LIBM_EXTENSIVE_TESTS="$CHANGED" cargo t \ - --features test-multiprecision,unstable \ + --features build-mpfr,unstable \ --profile release-checked \ -- extensive - name: Print test logs if available diff --git a/libm/CONTRIBUTING.md b/libm/CONTRIBUTING.md index aadcdf036..f7560878d 100644 --- a/libm/CONTRIBUTING.md +++ b/libm/CONTRIBUTING.md @@ -69,7 +69,7 @@ If you'd like to run tests with randomized inputs that get compared against infinite-precision results, run: ```sh -cargo test --features libm-test/test-multiprecision,libm-test/build-musl --release +cargo test --features libm-test/build-mpfr,libm-test/build-musl --release ``` The multiprecision tests use the [`rug`] crate for bindings to MPFR. MPFR can diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 244a22a07..63678620c 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -44,11 +44,11 @@ case "$target" in # Targets that aren't cross compiled work fine # FIXME(ci): we should be able to enable aarch64 Linux here once GHA # support rolls out. - x86_64*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;; - i686*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;; - i586*) extra_flags="$extra_flags --features libm-test/test-multiprecision --features gmp-mpfr-sys/force-cross" ;; + x86_64*) extra_flags="$extra_flags --features libm-test/build-mpfr" ;; + i686*) extra_flags="$extra_flags --features libm-test/build-mpfr" ;; + i586*) extra_flags="$extra_flags --features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross" ;; # Apple aarch64 is native - aarch64*apple*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;; + aarch64*apple*) extra_flags="$extra_flags --features libm-test/build-mpfr" ;; esac # FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI. diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 621e587c5..3cf4a08e1 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -12,7 +12,7 @@ unstable-float = ["libm/unstable-float", "rug?/nightly-float"] # Generate tests which are random inputs and the outputs are calculated with # musl libc. -test-multiprecision = ["dep:az", "dep:rug", "dep:gmp-mpfr-sys"] +build-mpfr = ["dep:az", "dep:rug", "dep:gmp-mpfr-sys"] # Build our own musl for testing and benchmarks build-musl = ["dep:musl-math-sys"] diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 6e7017f09..cb89f1c8b 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -5,7 +5,7 @@ pub mod domain; mod f8_impl; pub mod gen; -#[cfg(feature = "test-multiprecision")] +#[cfg(feature = "build-mpfr")] pub mod mpfloat; mod num; pub mod op; diff --git a/libm/crates/libm-test/src/run_cfg.rs b/libm/crates/libm-test/src/run_cfg.rs index 48a654caa..4a52091fe 100644 --- a/libm/crates/libm-test/src/run_cfg.rs +++ b/libm/crates/libm-test/src/run_cfg.rs @@ -126,7 +126,7 @@ impl TestEnv { let id = ctx.fn_ident; let op = id.math_op(); - let will_run_mp = cfg!(feature = "test-multiprecision"); + let will_run_mp = cfg!(feature = "build-mpfr"); // Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run in QEMU. Start // with a reduced number on these platforms. diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index e2766cfda..2d8856e16 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -1,6 +1,6 @@ //! Test with "infinite precision" -#![cfg(feature = "test-multiprecision")] +#![cfg(feature = "build-mpfr")] use libm_test::domain::HasDomain; use libm_test::gen::random::RandomInput; diff --git a/libm/crates/libm-test/tests/z_extensive/main.rs b/libm/crates/libm-test/tests/z_extensive/main.rs index 3a2af88bd..5448cb6ea 100644 --- a/libm/crates/libm-test/tests/z_extensive/main.rs +++ b/libm/crates/libm-test/tests/z_extensive/main.rs @@ -1,14 +1,14 @@ //! `main` is just a wrapper to handle configuration. -#[cfg(not(feature = "test-multiprecision"))] +#[cfg(not(feature = "build-mpfr"))] fn main() { eprintln!("multiprecision not enabled; skipping extensive tests"); } -#[cfg(feature = "test-multiprecision")] +#[cfg(feature = "build-mpfr")] mod run; -#[cfg(feature = "test-multiprecision")] +#[cfg(feature = "build-mpfr")] fn main() { run::run(); } From 6eef4db29e170b75aed8d9e4f40e0fe756d0d245 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 03:17:54 +0000 Subject: [PATCH 1062/1459] Enable `build-mpfr` and `build-musl` by default Most users who are developing this crate are likely running on a Unix system, since there isn't much to test against otherwise. For convenience, enable the features required to run these tests by default. --- libm/.github/workflows/main.yml | 5 +---- libm/CONTRIBUTING.md | 11 ++++------- libm/ci/run.sh | 27 ++++++++++++++++----------- libm/crates/libm-test/Cargo.toml | 2 +- 4 files changed, 22 insertions(+), 23 deletions(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index d54288574..35b307f77 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -131,10 +131,7 @@ jobs: - uses: Swatinem/rust-cache@v2 - name: Download musl source run: ./ci/download-musl.sh - - run: | - cargo clippy --all \ - --features libm-test/build-musl,libm-test/build-mpfr \ - --all-targets + - run: cargo clippy --all --all-features --all-targets builtins: name: Check use with compiler-builtins diff --git a/libm/CONTRIBUTING.md b/libm/CONTRIBUTING.md index f7560878d..ba7f78ca0 100644 --- a/libm/CONTRIBUTING.md +++ b/libm/CONTRIBUTING.md @@ -62,15 +62,12 @@ Check [PR #65] for an example. Normal tests can be executed with: ```sh -cargo test +# `--release` ables more test cases +cargo test --release ``` -If you'd like to run tests with randomized inputs that get compared against -infinite-precision results, run: - -```sh -cargo test --features libm-test/build-mpfr,libm-test/build-musl --release -``` +If you are on a system that cannot build musl or MPFR, passing +`--no-default-features` will run some limited tests. The multiprecision tests use the [`rug`] crate for bindings to MPFR. MPFR can be difficult to build on non-Unix systems, refer to [`gmp_mpfr_sys`] for help. diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 63678620c..35b84809f 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -14,7 +14,15 @@ if [ -z "$target" ]; then target="$host_target" fi -extra_flags="" +# We enumerate features manually. +extra_flags="--no-default-features" + +# Enable arch-specific routines when available. +extra_flags="$extra_flags --features arch" + +# Always enable `unstable-float` since it expands available API but does not +# change any implementations. +extra_flags="$extra_flags --features unstable-float" # We need to specifically skip tests for musl-math-sys on systems that can't # build musl since otherwise `--all` will activate it. @@ -57,14 +65,8 @@ case "$target" in *windows-gnu) extra_flags="$extra_flags --exclude libm-macros" ;; esac -# Make sure we can build with overriding features. We test the indibidual -# features it controls separately. -cargo check --no-default-features -cargo check --features "force-soft-floats" - -# Always enable `unstable-float` since it expands available API but does not -# change any implementations. -extra_flags="$extra_flags --features unstable-float" +# Make sure we can build with overriding features. +cargo check -p libm --no-default-features if [ "${BUILD_ONLY:-}" = "1" ]; then cmd="cargo build --target $target --package libm" @@ -80,11 +82,14 @@ else $cmd --features unstable-intrinsics $cmd --features unstable-intrinsics --benches - # Test the same in release mode, which also increases coverage. + # Test the same in release mode, which also increases coverage. Also ensure + # the soft float routines are checked. $cmd --profile release-checked + $cmd --profile release-checked --features force-soft-floats $cmd --profile release-checked --features unstable-intrinsics $cmd --profile release-checked --features unstable-intrinsics --benches - ENSURE_NO_PANIC=1 cargo build --target "$target" --release + # Ensure that the routines do not panic. + ENSURE_NO_PANIC=1 cargo build -p libm --target "$target" --no-default-features --release fi diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 3cf4a08e1..d3f18ab3e 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" publish = false [features] -default = ["unstable-float"] +default = ["build-mpfr", "build-musl", "unstable-float"] # Propagated from libm because this affects which functions we test. unstable-float = ["libm/unstable-float", "rug?/nightly-float"] From 22dd3158618a34b4376fd72519130fd6243ba2e6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 03:12:12 +0000 Subject: [PATCH 1063/1459] Add a utility crate for quick evaluation Introduce a simple binary that can run arbitrary input against any of the available implementations (musl, MPFR, our libm). This provides an easy way to check results, or run specific cases against a debugger. Examples: $ cargo run -p util -- eval libm pow 1.6 2.4 3.089498284311124 $ cargo run -p util -- eval mpfr pow 1.6 2.4 3.089498284311124 $ cargo run -p util -- eval musl tgamma 1.2344597839132 0.9097442657960874 $ cargo run -p util -- eval mpfr tgamma 1.2344597839132 0.9097442657960874 $ cargo run -p util -- eval libm tgamma 1.2344597839132 0.9097442657960871 $ cargo run -p util -- eval musl sincos 3.1415926535 (8.979318433952318e-11, -1.0) --- libm/Cargo.toml | 1 + libm/crates/libm-test/src/mpfloat.rs | 26 +++ libm/crates/util/Cargo.toml | 19 +++ libm/crates/util/build.rs | 9 + libm/crates/util/src/main.rs | 243 +++++++++++++++++++++++++++ 5 files changed, 298 insertions(+) create mode 100644 libm/crates/util/Cargo.toml create mode 100644 libm/crates/util/build.rs create mode 100644 libm/crates/util/src/main.rs diff --git a/libm/Cargo.toml b/libm/Cargo.toml index fc881b77e..820c01347 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -46,6 +46,7 @@ members = [ "crates/libm-macros", "crates/libm-test", "crates/musl-math-sys", + "crates/util", ] default-members = [ ".", diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index f71e72cd5..092f5f1d2 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -471,6 +471,32 @@ impl MpOp for crate::op::lgammaf_r::Routine { } } +/* stub implementations so we don't need to special case them */ + +impl MpOp for crate::op::nextafter::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + unimplemented!("nextafter does not yet have a MPFR operation"); + } + + fn run(_this: &mut Self::MpTy, _input: Self::RustArgs) -> Self::RustRet { + unimplemented!("nextafter does not yet have a MPFR operation"); + } +} + +impl MpOp for crate::op::nextafterf::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + unimplemented!("nextafter does not yet have a MPFR operation"); + } + + fn run(_this: &mut Self::MpTy, _input: Self::RustArgs) -> Self::RustRet { + unimplemented!("nextafter does not yet have a MPFR operation"); + } +} + /// `rug` does not provide `remquo` so this exposes `mpfr_remquo`. See rug#76. fn mpfr_remquo(r: &mut MpFloat, x: &MpFloat, y: &MpFloat, round: Round) -> (Ordering, c_long) { let r = r.as_raw_mut(); diff --git a/libm/crates/util/Cargo.toml b/libm/crates/util/Cargo.toml new file mode 100644 index 000000000..acf5db704 --- /dev/null +++ b/libm/crates/util/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "util" +version = "0.1.0" +edition = "2021" +publish = false + +[features] +default = ["build-musl", "build-mpfr", "unstable-float"] +build-musl = ["libm-test/build-musl", "dep:musl-math-sys"] +build-mpfr = ["libm-test/build-mpfr", "dep:az", "dep:rug"] +unstable-float = ["libm/unstable-float", "libm-test/unstable-float", "rug?/nightly-float"] + +[dependencies] +az = { version = "1.2.1", optional = true } +libm = { path = "../..", default-features = false } +libm-macros = { path = "../libm-macros" } +libm-test = { path = "../libm-test", default-features = false } +musl-math-sys = { path = "../musl-math-sys", optional = true } +rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "std"] } diff --git a/libm/crates/util/build.rs b/libm/crates/util/build.rs new file mode 100644 index 000000000..0745ef3dd --- /dev/null +++ b/libm/crates/util/build.rs @@ -0,0 +1,9 @@ +#![allow(unexpected_cfgs)] + +#[path = "../../configure.rs"] +mod configure; + +fn main() { + let cfg = configure::Config::from_env(); + configure::emit_libm_config(&cfg); +} diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs new file mode 100644 index 000000000..f7bd31bb6 --- /dev/null +++ b/libm/crates/util/src/main.rs @@ -0,0 +1,243 @@ +//! Helper CLI utility for common tasks. + +#![cfg_attr(f16_enabled, feature(f16))] +#![cfg_attr(f128_enabled, feature(f128))] + +use std::any::type_name; +use std::env; +use std::str::FromStr; + +#[cfg(feature = "build-mpfr")] +use az::Az; +#[cfg(feature = "build-mpfr")] +use libm_test::mpfloat::MpOp; +use libm_test::{MathOp, TupleCall}; + +const USAGE: &str = "\ +usage: + +cargo run -p util -- + +SUBCOMMAND: + eval inputs... + Evaulate the expression with a given basis. This can be useful for + running routines with a debugger, or quickly checking input. Examples: + * eval musl sinf 1.234 # print the results of musl sinf(1.234f32) + * eval mpfr pow 1.234 2.432 # print the results of mpfr pow(1.234, 2.432) +"; + +fn main() { + let args = env::args().collect::>(); + let str_args = args.iter().map(|s| s.as_str()).collect::>(); + + match &str_args.as_slice()[1..] { + ["eval", basis, op, inputs @ ..] => do_eval(basis, op, inputs), + _ => { + println!("{USAGE}\nunrecognized input `{str_args:?}`"); + std::process::exit(1); + } + } +} + +macro_rules! handle_call { + ( + fn_name: $fn_name:ident, + CFn: $CFn:ty, + RustFn: $RustFn:ty, + RustArgs: $RustArgs:ty, + attrs: [$($attr:meta),*], + extra: ($basis:ident, $op:ident, $inputs:ident), + fn_extra: $musl_fn:expr, + ) => { + $(#[$attr])* + if $op == stringify!($fn_name) { + type Op = libm_test::op::$fn_name::Routine; + + let input = <$RustArgs>::parse($inputs); + let libm_fn: ::RustFn = libm::$fn_name; + + let output = match $basis { + "libm" => input.call(libm_fn), + #[cfg(feature = "build-musl")] + "musl" => { + let musl_fn: ::CFn = + $musl_fn.unwrap_or_else(|| panic!("no musl function for {}", $op)); + input.call(musl_fn) + } + #[cfg(feature = "build-mpfr")] + "mpfr" => { + let mut mp = ::new_mp(); + Op::run(&mut mp, input) + } + _ => panic!("unrecognized or disabled basis '{}'", $basis), + }; + println!("{output:?}"); + return; + } + }; +} + +/// Evaluate the specified operation with a given basis. +fn do_eval(basis: &str, op: &str, inputs: &[&str]) { + libm_macros::for_each_function! { + callback: handle_call, + emit_types: [CFn, RustFn, RustArgs], + extra: (basis, op, inputs), + fn_extra: match MACRO_FN_NAME { + copysignf16 | copysignf128 | fabsf16 | fabsf128 => None, + _ => Some(musl_math_sys::MACRO_FN_NAME) + } + } + + panic!("no operation matching {op}"); +} + +/// Parse a tuple from a space-delimited string. +trait ParseTuple { + fn parse(input: &[&str]) -> Self; +} + +macro_rules! impl_parse_tuple { + ($ty:ty) => { + impl ParseTuple for ($ty,) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 1, "expected a single argument, got {input:?}"); + (parse(input, 0),) + } + } + + impl ParseTuple for ($ty, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse(input, 0), parse(input, 1)) + } + } + + impl ParseTuple for ($ty, i32) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse(input, 0), parse(input, 1)) + } + } + + impl ParseTuple for (i32, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse(input, 0), parse(input, 1)) + } + } + + impl ParseTuple for ($ty, $ty, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected three arguments, got {input:?}"); + (parse(input, 0), parse(input, 1), parse(input, 3)) + } + } + }; +} + +#[allow(unused_macros)] +#[cfg(feature = "build-mpfr")] +macro_rules! impl_parse_tuple_via_rug { + ($ty:ty) => { + impl ParseTuple for ($ty,) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 1, "expected a single argument, got {input:?}"); + (parse_rug(input, 0),) + } + } + + impl ParseTuple for ($ty, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse_rug(input, 0), parse_rug(input, 1)) + } + } + + impl ParseTuple for ($ty, i32) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse_rug(input, 0), parse(input, 1)) + } + } + + impl ParseTuple for (i32, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse(input, 0), parse_rug(input, 1)) + } + } + + impl ParseTuple for ($ty, $ty, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected three arguments, got {input:?}"); + (parse_rug(input, 0), parse_rug(input, 1), parse_rug(input, 3)) + } + } + }; +} + +// Fallback for when Rug is not built. +#[allow(unused_macros)] +#[cfg(not(feature = "build-mpfr"))] +macro_rules! impl_parse_tuple_via_rug { + ($ty:ty) => { + impl ParseTuple for ($ty,) { + fn parse(_input: &[&str]) -> Self { + panic!("parsing this type requires the `build-mpfr` feature") + } + } + + impl ParseTuple for ($ty, $ty) { + fn parse(_input: &[&str]) -> Self { + panic!("parsing this type requires the `build-mpfr` feature") + } + } + + impl ParseTuple for ($ty, i32) { + fn parse(_input: &[&str]) -> Self { + panic!("parsing this type requires the `build-mpfr` feature") + } + } + + impl ParseTuple for (i32, $ty) { + fn parse(_input: &[&str]) -> Self { + panic!("parsing this type requires the `build-mpfr` feature") + } + } + + impl ParseTuple for ($ty, $ty, $ty) { + fn parse(_input: &[&str]) -> Self { + panic!("parsing this type requires the `build-mpfr` feature") + } + } + }; +} + +impl_parse_tuple!(f32); +impl_parse_tuple!(f64); + +#[cfg(f16_enabled)] +impl_parse_tuple_via_rug!(f16); +#[cfg(f128_enabled)] +impl_parse_tuple_via_rug!(f128); + +/// Try to parse the number, printing a nice message on failure. +fn parse(input: &[&str], idx: usize) -> F { + let s = input[idx]; + s.parse().unwrap_or_else(|_| panic!("invalid {} input '{s}'", type_name::())) +} + +/// Try to parse the float type going via `rug`, for `f16` and `f128` which don't yet implement +/// `FromStr`. +#[cfg(feature = "build-mpfr")] +fn parse_rug(input: &[&str], idx: usize) -> F +where + rug::Float: az::Cast, +{ + let s = input[idx]; + let x = + rug::Float::parse(s).unwrap_or_else(|_| panic!("invalid {} input '{s}'", type_name::())); + let x = rug::Float::with_val(F::BITS, x); + x.az() +} From 3950fb08873bd80158d85bf54bd36c6de3270d62 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 11 Jan 2025 23:38:19 +0000 Subject: [PATCH 1064/1459] Add a generic version of `trunc` The algorithm is identical for both types, so this is a straightforward routine to port. --- libm/src/math/generic/mod.rs | 2 ++ libm/src/math/generic/trunc.rs | 54 ++++++++++++++++++++++++++++++++++ libm/src/math/mod.rs | 2 +- libm/src/math/trunc.rs | 30 +------------------ libm/src/math/truncf.rs | 22 +------------- 5 files changed, 59 insertions(+), 51 deletions(-) create mode 100644 libm/src/math/generic/trunc.rs diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index 08524b685..e5166ca10 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -1,5 +1,7 @@ mod copysign; mod fabs; +mod trunc; pub use copysign::copysign; pub use fabs::fabs; +pub use trunc::trunc; diff --git a/libm/src/math/generic/trunc.rs b/libm/src/math/generic/trunc.rs new file mode 100644 index 000000000..5d0ba6109 --- /dev/null +++ b/libm/src/math/generic/trunc.rs @@ -0,0 +1,54 @@ +use super::super::{Float, Int, IntTy, MinInt}; + +pub fn trunc(x: F) -> F { + let mut xi: F::Int = x.to_bits(); + let e: i32 = x.exp_unbiased(); + + // C1: The represented value has no fractional part, so no truncation is needed + if e >= F::SIG_BITS as i32 { + return x; + } + + let mask = if e < 0 { + // C2: If the exponent is negative, the result will be zero so we mask out everything + // except the sign. + F::SIGN_MASK + } else { + // C3: Otherwise, we mask out the last `e` bits of the significand. + !(F::SIG_MASK >> e.unsigned()) + }; + + // C4: If the to-be-masked-out portion is already zero, we have an exact result + if (xi & !mask) == IntTy::::ZERO { + return x; + } + + // C5: Otherwise the result is inexact and we will truncate. Raise `FE_INEXACT`, mask the + // result, and return. + force_eval!(x + F::MAX); + xi &= mask; + F::from_bits(xi) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sanity_check() { + assert_biteq!(trunc(1.1f32), 1.0); + assert_biteq!(trunc(1.1f64), 1.0); + + // C1 + assert_biteq!(trunc(hf32!("0x1p23")), hf32!("0x1p23")); + assert_biteq!(trunc(hf64!("0x1p52")), hf64!("0x1p52")); + assert_biteq!(trunc(hf32!("-0x1p23")), hf32!("-0x1p23")); + assert_biteq!(trunc(hf64!("-0x1p52")), hf64!("-0x1p52")); + + // C2 + assert_biteq!(trunc(hf32!("0x1p-1")), 0.0); + assert_biteq!(trunc(hf64!("0x1p-1")), 0.0); + assert_biteq!(trunc(hf32!("-0x1p-1")), -0.0); + assert_biteq!(trunc(hf64!("-0x1p-1")), -0.0); + } +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 5baf35e42..c0d038a0c 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -121,7 +121,7 @@ use self::rem_pio2::rem_pio2; use self::rem_pio2_large::rem_pio2_large; use self::rem_pio2f::rem_pio2f; #[allow(unused_imports)] -use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt}; +use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, IntTy, MinInt}; // Public modules mod acos; diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs index 7e5c4f2c2..2cc8aaa7e 100644 --- a/libm/src/math/trunc.rs +++ b/libm/src/math/trunc.rs @@ -1,5 +1,3 @@ -use core::f64; - /// Rounds the number toward 0 to the closest integral value (f64). /// /// This effectively removes the decimal part of the number, leaving the integral part. @@ -11,31 +9,5 @@ pub fn trunc(x: f64) -> f64 { args: x, } - let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 - - let mut i: u64 = x.to_bits(); - let mut e: i64 = ((i >> 52) & 0x7ff) as i64 - 0x3ff + 12; - let m: u64; - - if e >= 52 + 12 { - return x; - } - if e < 12 { - e = 1; - } - m = -1i64 as u64 >> e; - if (i & m) == 0 { - return x; - } - force_eval!(x + x1p120); - i &= !m; - f64::from_bits(i) -} - -#[cfg(test)] -mod tests { - #[test] - fn sanity_check() { - assert_eq!(super::trunc(1.1), 1.0); - } + super::generic::trunc(x) } diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs index b491747d9..14533a267 100644 --- a/libm/src/math/truncf.rs +++ b/libm/src/math/truncf.rs @@ -1,5 +1,3 @@ -use core::f32; - /// Rounds the number toward 0 to the closest integral value (f32). /// /// This effectively removes the decimal part of the number, leaving the integral part. @@ -11,25 +9,7 @@ pub fn truncf(x: f32) -> f32 { args: x, } - let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 - - let mut i: u32 = x.to_bits(); - let mut e: i32 = ((i >> 23) & 0xff) as i32 - 0x7f + 9; - let m: u32; - - if e >= 23 + 9 { - return x; - } - if e < 9 { - e = 1; - } - m = -1i32 as u32 >> e; - if (i & m) == 0 { - return x; - } - force_eval!(x + x1p120); - i &= !m; - f32::from_bits(i) + super::generic::trunc(x) } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 From a5d4d3c316a85ce8d87a393e35241d1a31e622b6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 12 Jan 2025 04:12:56 +0000 Subject: [PATCH 1065/1459] Add `truncf16` and `truncf128` Use the generic algorithms to provide implementations for these routines. --- .../compiler-builtins-smoke-test/src/lib.rs | 2 ++ libm/crates/libm-macros/src/shared.rs | 4 ++-- libm/crates/libm-test/benches/random.rs | 2 +- libm/crates/libm-test/src/domain.rs | 10 ++++++++++ libm/crates/libm-test/src/mpfloat.rs | 3 +++ .../crates/libm-test/tests/compare_built_musl.rs | 4 +++- libm/crates/util/src/main.rs | 2 +- libm/etc/function-definitions.json | 16 ++++++++++++++++ libm/etc/function-list.txt | 2 ++ libm/src/math/generic/trunc.rs | 3 +++ libm/src/math/mod.rs | 4 ++++ libm/src/math/truncf128.rs | 7 +++++++ libm/src/math/truncf16.rs | 7 +++++++ 13 files changed, 61 insertions(+), 5 deletions(-) create mode 100644 libm/src/math/truncf128.rs create mode 100644 libm/src/math/truncf16.rs diff --git a/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/libm/crates/compiler-builtins-smoke-test/src/lib.rs index 95ecb840d..3416a2229 100644 --- a/libm/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/libm/crates/compiler-builtins-smoke-test/src/lib.rs @@ -157,6 +157,8 @@ no_mangle! { tgammaf(x: f32) -> f32; trunc(x: f64) -> f64; truncf(x: f32) -> f32; + truncf128(x: f128) -> f128; + truncf16(x: f16) -> f16; y0(x: f64) -> f64; y0f(x: f32) -> f32; y1(x: f64) -> f64; diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index 16547404f..24fccd6f2 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F16, Signature { args: &[Ty::F16], returns: &[Ty::F16] }, None, - &["fabsf16"], + &["fabsf16", "truncf16"], ), ( // `fn(f32) -> f32` @@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F128, Signature { args: &[Ty::F128], returns: &[Ty::F128] }, None, - &["fabsf128"], + &["fabsf128", "truncf128"], ), ( // `(f16, f16) -> f16` diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index cd1e2d2cc..8c6afff25 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -117,7 +117,7 @@ libm_macros::for_each_function! { exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)), // Musl does not provide `f16` and `f128` functions - copysignf16 | copysignf128 | fabsf16 | fabsf128 => (false, None), + copysignf16 | copysignf128 | fabsf16 | fabsf128 | truncf16 | truncf128 => (false, None), // By default we never skip (false) and always have a musl function available _ => (false, Some(musl_math_sys::MACRO_FN_NAME)) diff --git a/libm/crates/libm-test/src/domain.rs b/libm/crates/libm-test/src/domain.rs index 52393d402..adafb9faa 100644 --- a/libm/crates/libm-test/src/domain.rs +++ b/libm/crates/libm-test/src/domain.rs @@ -199,3 +199,13 @@ impl HasDomain for crate::op::fabsf16::Routine { impl HasDomain for crate::op::fabsf128::Routine { const DOMAIN: Domain = Domain::::UNBOUNDED; } + +#[cfg(f16_enabled)] +impl HasDomain for crate::op::truncf16::Routine { + const DOMAIN: Domain = Domain::::UNBOUNDED; +} + +#[cfg(f128_enabled)] +impl HasDomain for crate::op::truncf128::Routine { + const DOMAIN: Domain = Domain::::UNBOUNDED; +} diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 092f5f1d2..2a740ed47 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -141,6 +141,7 @@ libm_macros::for_each_function! { lgamma_r, lgammaf_r, modf, modff, nextafter, nextafterf, pow,powf, remquo, remquof, scalbn, scalbnf, sincos, sincosf, yn, ynf, copysignf16, copysignf128, fabsf16, fabsf128, + truncf16, truncf128, ], fn_extra: match MACRO_FN_NAME { // Remap function names that are different between mpfr and libm @@ -202,11 +203,13 @@ impl_no_round! { #[cfg(f16_enabled)] impl_no_round! { fabsf16 => abs_mut; + truncf16 => trunc_mut; } #[cfg(f128_enabled)] impl_no_round! { fabsf128 => abs_mut; + truncf128 => trunc_mut; } /// Some functions are difficult to do in a generic way. Implement them here. diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index b91d7f9f5..a395c6c5d 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -48,7 +48,7 @@ where libm_macros::for_each_function! { callback: musl_rand_tests, // Musl does not support `f16` and `f128` on all platforms. - skip: [copysignf16, copysignf128, fabsf16, fabsf128], + skip: [copysignf16, copysignf128, fabsf16, fabsf128, truncf16, truncf128], attributes: [ #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586 [exp10, exp10f, exp2, exp2f, rint] @@ -146,5 +146,7 @@ libm_macros::for_each_function! { // Not provided by musl fabsf16, fabsf128, + truncf16, + truncf128, ], } diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index f7bd31bb6..c8a03068a 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -84,7 +84,7 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { emit_types: [CFn, RustFn, RustArgs], extra: (basis, op, inputs), fn_extra: match MACRO_FN_NAME { - copysignf16 | copysignf128 | fabsf16 | fabsf128 => None, + copysignf16 | copysignf128 | fabsf16 | fabsf128 | truncf16 | truncf128 => None, _ => Some(musl_math_sys::MACRO_FN_NAME) } } diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 39b6c9702..86fa02101 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -743,6 +743,7 @@ "sources": [ "src/libm_helper.rs", "src/math/arch/wasm32.rs", + "src/math/generic/trunc.rs", "src/math/trunc.rs" ], "type": "f64" @@ -750,10 +751,25 @@ "truncf": { "sources": [ "src/math/arch/wasm32.rs", + "src/math/generic/trunc.rs", "src/math/truncf.rs" ], "type": "f32" }, + "truncf128": { + "sources": [ + "src/math/generic/trunc.rs", + "src/math/truncf128.rs" + ], + "type": "f128" + }, + "truncf16": { + "sources": [ + "src/math/generic/trunc.rs", + "src/math/truncf16.rs" + ], + "type": "f16" + }, "y0": { "sources": [ "src/libm_helper.rs", diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt index 0a1bbab24..8aa901762 100644 --- a/libm/etc/function-list.txt +++ b/libm/etc/function-list.txt @@ -111,6 +111,8 @@ tgamma tgammaf trunc truncf +truncf128 +truncf16 y0 y0f y1 diff --git a/libm/src/math/generic/trunc.rs b/libm/src/math/generic/trunc.rs index 5d0ba6109..ca5f1bdd6 100644 --- a/libm/src/math/generic/trunc.rs +++ b/libm/src/math/generic/trunc.rs @@ -1,3 +1,6 @@ +/* SPDX-License-Identifier: MIT + * origin: musl src/math/trunc.c */ + use super::super::{Float, Int, IntTy, MinInt}; pub fn trunc(x: F) -> F { diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index c0d038a0c..723be0e1d 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -343,9 +343,11 @@ cfg_if! { if #[cfg(f16_enabled)] { mod copysignf16; mod fabsf16; + mod truncf16; pub use self::copysignf16::copysignf16; pub use self::fabsf16::fabsf16; + pub use self::truncf16::truncf16; } } @@ -353,9 +355,11 @@ cfg_if! { if #[cfg(f128_enabled)] { mod copysignf128; mod fabsf128; + mod truncf128; pub use self::copysignf128::copysignf128; pub use self::fabsf128::fabsf128; + pub use self::truncf128::truncf128; } } diff --git a/libm/src/math/truncf128.rs b/libm/src/math/truncf128.rs new file mode 100644 index 000000000..9dccc0d0e --- /dev/null +++ b/libm/src/math/truncf128.rs @@ -0,0 +1,7 @@ +/// Rounds the number toward 0 to the closest integral value (f128). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf128(x: f128) -> f128 { + super::generic::trunc(x) +} diff --git a/libm/src/math/truncf16.rs b/libm/src/math/truncf16.rs new file mode 100644 index 000000000..d7c3d225c --- /dev/null +++ b/libm/src/math/truncf16.rs @@ -0,0 +1,7 @@ +/// Rounds the number toward 0 to the closest integral value (f16). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf16(x: f16) -> f16 { + super::generic::trunc(x) +} From bd2ad96f5e882b62b291a98ec4c0c5baddad8f5a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 10:17:51 +0000 Subject: [PATCH 1066/1459] Disable `util` and `libm-macros` for optimized tests These crates take time building in CI, especially with the release profile having LTO enabled, but there isn't really any reason to test them with different features or in release mode. Disable this to save some CI runtime. --- libm/ci/run.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 35b84809f..94ff54cb7 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -77,8 +77,14 @@ if [ "${BUILD_ONLY:-}" = "1" ]; then else cmd="cargo test --all --target $target $extra_flags" - # Test once without intrinsics, once with intrinsics enabled + # Test once without intrinsics $cmd + + # Exclude the macros and utile crates from the rest of the tests to save CI + # runtime, they shouldn't have anything feature- or opt-level-dependent. + cmd="$cmd --exclude util --exclude libm-macros" + + # Test once with intrinsics enabled $cmd --features unstable-intrinsics $cmd --features unstable-intrinsics --benches From 85a2553600f02b79804d475b3e4cd3682607494b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 10:29:56 +0000 Subject: [PATCH 1067/1459] Format the MPFR manual implementation list --- libm/crates/libm-test/src/mpfloat.rs | 48 ++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 2a740ed47..a4aad81f7 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -135,13 +135,47 @@ libm_macros::for_each_function! { emit_types: [RustFn], skip: [ // Most of these need a manual implementation - fabs, ceil, copysign, floor, rint, round, trunc, - fabsf, ceilf, copysignf, floorf, rintf, roundf, truncf, - fmod, fmodf, frexp, frexpf, ilogb, ilogbf, jn, jnf, ldexp, ldexpf, - lgamma_r, lgammaf_r, modf, modff, nextafter, nextafterf, pow,powf, - remquo, remquof, scalbn, scalbnf, sincos, sincosf, yn, ynf, - copysignf16, copysignf128, fabsf16, fabsf128, - truncf16, truncf128, + ceil, + ceilf, + copysign, + copysignf, + copysignf128, + copysignf16, + fabs, + fabsf, + fabsf128, + fabsf16,floor, + floorf, + fmod, + fmodf, + frexp, + frexpf, + ilogb, + ilogbf, + jn, + jnf, + ldexp,ldexpf, + lgamma_r, + lgammaf_r, + modf, + modff, + nextafter, + nextafterf, + pow, + powf,remquo, + remquof, + rint, + rintf, + round, + roundf, + scalbn, + scalbnf, + sincos,sincosf, + trunc, + truncf, + truncf128, + truncf16,yn, + ynf, ], fn_extra: match MACRO_FN_NAME { // Remap function names that are different between mpfr and libm From f9e6cd7a7d13812fb8e52947cec143119c746350 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 13:49:43 +0000 Subject: [PATCH 1068/1459] Add a generic version of `fdim` --- libm/etc/function-definitions.json | 6 ++++-- libm/src/math/fdim.rs | 12 +----------- libm/src/math/fdimf.rs | 12 +----------- libm/src/math/generic/fdim.rs | 13 +++++++++++++ libm/src/math/generic/mod.rs | 2 ++ 5 files changed, 21 insertions(+), 24 deletions(-) create mode 100644 libm/src/math/generic/fdim.rs diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 86fa02101..1e6d260fe 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -289,13 +289,15 @@ "fdim": { "sources": [ "src/libm_helper.rs", - "src/math/fdim.rs" + "src/math/fdim.rs", + "src/math/generic/fdim.rs" ], "type": "f64" }, "fdimf": { "sources": [ - "src/math/fdimf.rs" + "src/math/fdimf.rs", + "src/math/generic/fdim.rs" ], "type": "f32" }, diff --git a/libm/src/math/fdim.rs b/libm/src/math/fdim.rs index 7c58cb5a9..10ffa2881 100644 --- a/libm/src/math/fdim.rs +++ b/libm/src/math/fdim.rs @@ -1,5 +1,3 @@ -use core::f64; - /// Positive difference (f64) /// /// Determines the positive difference between arguments, returning: @@ -10,13 +8,5 @@ use core::f64; /// A range error may occur. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fdim(x: f64, y: f64) -> f64 { - if x.is_nan() { - x - } else if y.is_nan() { - y - } else if x > y { - x - y - } else { - 0.0 - } + super::generic::fdim(x, y) } diff --git a/libm/src/math/fdimf.rs b/libm/src/math/fdimf.rs index 2abd49a64..367ef517c 100644 --- a/libm/src/math/fdimf.rs +++ b/libm/src/math/fdimf.rs @@ -1,5 +1,3 @@ -use core::f32; - /// Positive difference (f32) /// /// Determines the positive difference between arguments, returning: @@ -10,13 +8,5 @@ use core::f32; /// A range error may occur. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fdimf(x: f32, y: f32) -> f32 { - if x.is_nan() { - x - } else if y.is_nan() { - y - } else if x > y { - x - y - } else { - 0.0 - } + super::generic::fdim(x, y) } diff --git a/libm/src/math/generic/fdim.rs b/libm/src/math/generic/fdim.rs new file mode 100644 index 000000000..2e54a41de --- /dev/null +++ b/libm/src/math/generic/fdim.rs @@ -0,0 +1,13 @@ +use super::super::Float; + +pub fn fdim(x: F, y: F) -> F { + if x.is_nan() { + x + } else if y.is_nan() { + y + } else if x > y { + x - y + } else { + F::ZERO + } +} diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index e5166ca10..2b068d6c5 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -1,7 +1,9 @@ mod copysign; mod fabs; +mod fdim; mod trunc; pub use copysign::copysign; pub use fabs::fabs; +pub use fdim::fdim; pub use trunc::trunc; From 2d2fcd4e83df56e35646b83fc8f6a35babd12be5 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 13:58:40 +0000 Subject: [PATCH 1069/1459] Add `fdimf16` and `fdimf128` Use the generic algorithms to provide implementations for these routines. --- .../compiler-builtins-smoke-test/src/lib.rs | 2 ++ libm/crates/libm-macros/src/shared.rs | 4 ++-- libm/crates/libm-test/benches/random.rs | 9 ++++++++- libm/crates/libm-test/src/domain.rs | 10 ++++++++++ libm/crates/libm-test/src/mpfloat.rs | 2 +- .../libm-test/tests/compare_built_musl.rs | 17 ++++++++++++++--- libm/crates/libm-test/tests/multiprecision.rs | 2 ++ libm/crates/util/src/main.rs | 9 ++++++++- libm/etc/function-definitions.json | 14 ++++++++++++++ libm/etc/function-list.txt | 2 ++ libm/src/libm_helper.rs | 2 ++ libm/src/math/fdimf128.rs | 12 ++++++++++++ libm/src/math/fdimf16.rs | 12 ++++++++++++ libm/src/math/mod.rs | 4 ++++ 14 files changed, 93 insertions(+), 8 deletions(-) create mode 100644 libm/src/math/fdimf128.rs create mode 100644 libm/src/math/fdimf16.rs diff --git a/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/libm/crates/compiler-builtins-smoke-test/src/lib.rs index 3416a2229..b9521eb07 100644 --- a/libm/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/libm/crates/compiler-builtins-smoke-test/src/lib.rs @@ -93,6 +93,8 @@ no_mangle! { fabsf16(x: f16) -> f16; fdim(x: f64, y: f64) -> f64; fdimf(x: f32, y: f32) -> f32; + fdimf128(x: f128, y: f128) -> f128; + fdimf16(x: f16, y: f16) -> f16; floor(x: f64) -> f64; floorf(x: f32) -> f32; fma(x: f64, y: f64, z: f64) -> f64; diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index 24fccd6f2..608381962 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -47,7 +47,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F16, Signature { args: &[Ty::F16, Ty::F16], returns: &[Ty::F16] }, None, - &["copysignf16"], + &["copysignf16", "fdimf16"], ), ( // `(f32, f32) -> f32` @@ -90,7 +90,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F128, Signature { args: &[Ty::F128, Ty::F128], returns: &[Ty::F128] }, None, - &["copysignf128"], + &["copysignf128", "fdimf128"], ), ( // `(f32, f32, f32) -> f32` diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 8c6afff25..e79002277 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -117,7 +117,14 @@ libm_macros::for_each_function! { exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)), // Musl does not provide `f16` and `f128` functions - copysignf16 | copysignf128 | fabsf16 | fabsf128 | truncf16 | truncf128 => (false, None), + copysignf128 + | copysignf16 + | fabsf128 + | fabsf16 + | fdimf128 + | fdimf16 + | truncf128 + | truncf16 => (false, None), // By default we never skip (false) and always have a musl function available _ => (false, Some(musl_math_sys::MACRO_FN_NAME)) diff --git a/libm/crates/libm-test/src/domain.rs b/libm/crates/libm-test/src/domain.rs index adafb9faa..68b91bf02 100644 --- a/libm/crates/libm-test/src/domain.rs +++ b/libm/crates/libm-test/src/domain.rs @@ -200,6 +200,16 @@ impl HasDomain for crate::op::fabsf128::Routine { const DOMAIN: Domain = Domain::::UNBOUNDED; } +#[cfg(f16_enabled)] +impl HasDomain for crate::op::fdimf16::Routine { + const DOMAIN: Domain = Domain::::UNBOUNDED; +} + +#[cfg(f128_enabled)] +impl HasDomain for crate::op::fdimf128::Routine { + const DOMAIN: Domain = Domain::::UNBOUNDED; +} + #[cfg(f16_enabled)] impl HasDomain for crate::op::truncf16::Routine { const DOMAIN: Domain = Domain::::UNBOUNDED; diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index a4aad81f7..9d95356d3 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -181,7 +181,7 @@ libm_macros::for_each_function! { // Remap function names that are different between mpfr and libm expm1 | expm1f => exp_m1, fabs | fabsf => abs, - fdim | fdimf => positive_diff, + fdim | fdimf | fdimf16 | fdimf128 => positive_diff, fma | fmaf => mul_add, fmax | fmaxf => max, fmin | fminf => min, diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index a395c6c5d..836c425a5 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -48,7 +48,16 @@ where libm_macros::for_each_function! { callback: musl_rand_tests, // Musl does not support `f16` and `f128` on all platforms. - skip: [copysignf16, copysignf128, fabsf16, fabsf128, truncf16, truncf128], + skip: [ + copysignf128, + copysignf16, + fabsf128, + fabsf16, + fdimf128, + fdimf16, + truncf128, + truncf16, + ], attributes: [ #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586 [exp10, exp10f, exp2, exp2f, rint] @@ -144,9 +153,11 @@ libm_macros::for_each_function! { ynf, // Not provided by musl - fabsf16, fabsf128, - truncf16, + fabsf16, + fdimf128, + fdimf16, truncf128, + truncf16, ], } diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 2d8856e16..123abfdaf 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -120,6 +120,8 @@ libm_macros::for_each_function! { copysignf128, fdim, fdimf, + fdimf16, + fdimf128, fma, fmaf, fmax, diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index c8a03068a..b979c60ad 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -84,7 +84,14 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { emit_types: [CFn, RustFn, RustArgs], extra: (basis, op, inputs), fn_extra: match MACRO_FN_NAME { - copysignf16 | copysignf128 | fabsf16 | fabsf128 | truncf16 | truncf128 => None, + copysignf128 + | copysignf16 + | fabsf128 + | fabsf16 + | fdimf128 + | fdimf16 + | truncf128 + | truncf16 => None, _ => Some(musl_math_sys::MACRO_FN_NAME) } } diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 1e6d260fe..dbaac931c 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -301,6 +301,20 @@ ], "type": "f32" }, + "fdimf128": { + "sources": [ + "src/math/fdimf128.rs", + "src/math/generic/fdim.rs" + ], + "type": "f128" + }, + "fdimf16": { + "sources": [ + "src/math/fdimf16.rs", + "src/math/generic/fdim.rs" + ], + "type": "f16" + }, "floor": { "sources": [ "src/libm_helper.rs", diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt index 8aa901762..7f96a4362 100644 --- a/libm/etc/function-list.txt +++ b/libm/etc/function-list.txt @@ -43,6 +43,8 @@ fabsf128 fabsf16 fdim fdimf +fdimf128 +fdimf16 floor floorf fma diff --git a/libm/src/libm_helper.rs b/libm/src/libm_helper.rs index f087267e4..73bae4567 100644 --- a/libm/src/libm_helper.rs +++ b/libm/src/libm_helper.rs @@ -176,6 +176,7 @@ libm_helper! { funcs: { (fn copysign(x: f16, y: f16) -> (f16); => copysignf16); (fn fabs(x: f16) -> (f16); => fabsf16); + (fn fdim(x: f16, y: f16) -> (f16); => fdimf16); } } @@ -185,5 +186,6 @@ libm_helper! { funcs: { (fn copysign(x: f128, y: f128) -> (f128); => copysignf128); (fn fabs(x: f128) -> (f128); => fabsf128); + (fn fdim(x: f128, y: f128) -> (f128); => fdimf128); } } diff --git a/libm/src/math/fdimf128.rs b/libm/src/math/fdimf128.rs new file mode 100644 index 000000000..6f3d1d0ff --- /dev/null +++ b/libm/src/math/fdimf128.rs @@ -0,0 +1,12 @@ +/// Positive difference (f128) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf128(x: f128, y: f128) -> f128 { + super::generic::fdim(x, y) +} diff --git a/libm/src/math/fdimf16.rs b/libm/src/math/fdimf16.rs new file mode 100644 index 000000000..37bd68858 --- /dev/null +++ b/libm/src/math/fdimf16.rs @@ -0,0 +1,12 @@ +/// Positive difference (f16) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf16(x: f16, y: f16) -> f16 { + super::generic::fdim(x, y) +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 723be0e1d..03adb6be1 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -343,10 +343,12 @@ cfg_if! { if #[cfg(f16_enabled)] { mod copysignf16; mod fabsf16; + mod fdimf16; mod truncf16; pub use self::copysignf16::copysignf16; pub use self::fabsf16::fabsf16; + pub use self::fdimf16::fdimf16; pub use self::truncf16::truncf16; } } @@ -355,10 +357,12 @@ cfg_if! { if #[cfg(f128_enabled)] { mod copysignf128; mod fabsf128; + mod fdimf128; mod truncf128; pub use self::copysignf128::copysignf128; pub use self::fabsf128::fabsf128; + pub use self::fdimf128::fdimf128; pub use self::truncf128::truncf128; } } From 926b597929c7bb99c401e05a94d12e03219dae58 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 22:57:38 +0000 Subject: [PATCH 1070/1459] Don't set `codegen-units=1` by default in CI We can set this only for the release profile, there isn't any reason to have it set for debug tests. --- libm/Cargo.toml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 820c01347..f84f3eac6 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -61,14 +61,15 @@ exclude = [ [dev-dependencies] no-panic = "0.1.30" - -# This is needed for no-panic to correctly detect the lack of panics [profile.release] +# Options for no-panic to correctly detect the lack of panics +codegen-units = 1 lto = "fat" # Release mode with debug assertions [profile.release-checked] -inherits = "release" +codegen-units = 1 debug-assertions = true +inherits = "release" lto = "fat" overflow-checks = true From 4009f0d2ae2e9661d469b361fb0672abed0fba8b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 23:00:39 +0000 Subject: [PATCH 1071/1459] Reduce indentation in `run.sh` using early return --- libm/ci/run.sh | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 94ff54cb7..008f32d5b 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -3,8 +3,6 @@ set -eux export RUST_BACKTRACE="${RUST_BACKTRACE:-full}" -# Needed for no-panic to correct detect a lack of panics -export RUSTFLAGS="${RUSTFLAGS:-} -Ccodegen-units=1" target="${1:-}" @@ -69,33 +67,36 @@ esac cargo check -p libm --no-default-features if [ "${BUILD_ONLY:-}" = "1" ]; then + # If we are on targets that can't run tests, verify that we can build. cmd="cargo build --target $target --package libm" $cmd $cmd --features unstable-intrinsics echo "can't run tests on $target; skipping" -else - cmd="cargo test --all --target $target $extra_flags" + exit +fi - # Test once without intrinsics - $cmd +# Otherwise, run the test suite. - # Exclude the macros and utile crates from the rest of the tests to save CI - # runtime, they shouldn't have anything feature- or opt-level-dependent. - cmd="$cmd --exclude util --exclude libm-macros" +cmd="cargo test --all --target $target $extra_flags" - # Test once with intrinsics enabled - $cmd --features unstable-intrinsics - $cmd --features unstable-intrinsics --benches - - # Test the same in release mode, which also increases coverage. Also ensure - # the soft float routines are checked. - $cmd --profile release-checked - $cmd --profile release-checked --features force-soft-floats - $cmd --profile release-checked --features unstable-intrinsics - $cmd --profile release-checked --features unstable-intrinsics --benches - - # Ensure that the routines do not panic. - ENSURE_NO_PANIC=1 cargo build -p libm --target "$target" --no-default-features --release -fi +# Test once without intrinsics +$cmd + +# Exclude the macros and utile crates from the rest of the tests to save CI +# runtime, they shouldn't have anything feature- or opt-level-dependent. +cmd="$cmd --exclude util --exclude libm-macros" + +# Test once with intrinsics enabled +$cmd --features unstable-intrinsics +$cmd --features unstable-intrinsics --benches + +# Test the same in release mode, which also increases coverage. Also ensure +# the soft float routines are checked. +$cmd --profile release-checked +$cmd --profile release-checked --features force-soft-floats +$cmd --profile release-checked --features unstable-intrinsics +$cmd --profile release-checked --features unstable-intrinsics --benches +# Ensure that the routines do not panic. +ENSURE_NO_PANIC=1 cargo build -p libm --target "$target" --no-default-features --release From af19b000a5afb28ea3471f10b38da1fc9332842d Mon Sep 17 00:00:00 2001 From: quaternic <57393910+quaternic@users.noreply.github.com> Date: Tue, 14 Jan 2025 03:55:26 +0200 Subject: [PATCH 1072/1459] Simplify and optimize `fdim` (#442) The cases with NaN arguments can be handled by the same x - y expression, and this generates much better code: https://godbolt.org/z/f3rnT8jx4. --- libm/src/math/generic/fdim.rs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/libm/src/math/generic/fdim.rs b/libm/src/math/generic/fdim.rs index 2e54a41de..bf971cd7d 100644 --- a/libm/src/math/generic/fdim.rs +++ b/libm/src/math/generic/fdim.rs @@ -1,13 +1,5 @@ use super::super::Float; pub fn fdim(x: F, y: F) -> F { - if x.is_nan() { - x - } else if y.is_nan() { - y - } else if x > y { - x - y - } else { - F::ZERO - } + if x <= y { F::ZERO } else { x - y } } From dc3c73a62918f096f2d94e4b88be0e6758c00de6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 23:40:42 +0000 Subject: [PATCH 1073/1459] Use cargo-nextest for running tests in CI The test suite for this repo has quite a lot of tests, and it is difficult to tell which contribute the most to the long CI runtime. libtest does have an unstable flag to report test times, but that is inconvenient to use because it needs to be passed only to libtest binaries. Switch to cargo-nextest [1] which provides time reporting and, overall, a better test UI. It may also improve test runtime, though this seems unlikely since we have larger test binaries with many small tests (nextest benefits the most when there are larger binaries that can be run in parallel). For anyone running locally without, `run.sh` should still fall back to `cargo test` if `cargo-nextest` is not available. This diff includes some cleanup and consistency changes to other CI-related files. [1]: https://nexte.st --- libm/.github/workflows/main.yml | 2 ++ libm/.github/workflows/publish.yml | 2 +- libm/ci/download-musl.sh | 2 +- libm/ci/run-docker.sh | 8 +++-- libm/ci/run.sh | 58 ++++++++++++++++++++---------- 5 files changed, 48 insertions(+), 24 deletions(-) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yml index 35b307f77..30976d472 100644 --- a/libm/.github/workflows/main.yml +++ b/libm/.github/workflows/main.yml @@ -6,6 +6,7 @@ on: pull_request: env: + CARGO_TERM_COLOR: always CARGO_TERM_VERBOSE: true RUSTDOCFLAGS: -Dwarnings RUSTFLAGS: -Dwarnings @@ -88,6 +89,7 @@ jobs: rustup default "$channel" rustup target add "${{ matrix.target }}" rustup component add clippy llvm-tools-preview + - uses: taiki-e/install-action@nextest - uses: Swatinem/rust-cache@v2 with: key: ${{ matrix.target }} diff --git a/libm/.github/workflows/publish.yml b/libm/.github/workflows/publish.yml index e715c6187..15904079d 100644 --- a/libm/.github/workflows/publish.yml +++ b/libm/.github/workflows/publish.yml @@ -12,7 +12,7 @@ on: jobs: release-plz: name: Release-plz - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - name: Checkout repository uses: actions/checkout@v4 diff --git a/libm/ci/download-musl.sh b/libm/ci/download-musl.sh index d0d8b310e..039e96157 100755 --- a/libm/ci/download-musl.sh +++ b/libm/ci/download-musl.sh @@ -7,7 +7,7 @@ fname=musl-1.2.5.tar.gz sha=a9a118bbe84d8764da0ea0d28b3ab3fae8477fc7e4085d90102b8596fc7c75e4 mkdir musl -curl "https://musl.libc.org/releases/$fname" -O +curl -L "https://musl.libc.org/releases/$fname" -O case "$(uname -s)" in MINGW*) diff --git a/libm/ci/run-docker.sh b/libm/ci/run-docker.sh index d9f29656d..6626e7226 100755 --- a/libm/ci/run-docker.sh +++ b/libm/ci/run-docker.sh @@ -24,12 +24,14 @@ run() { # will be owned by root mkdir -p target - docker build -t "$target" "ci/docker/$target" + set_env="HOME=/tmp PATH=\$PATH:/rust/bin:/cargo/bin" + docker build -t "libm-$target" "ci/docker/$target" docker run \ --rm \ --user "$(id -u):$(id -g)" \ -e CI \ -e RUSTFLAGS \ + -e CARGO_TERM_COLOR \ -e CARGO_HOME=/cargo \ -e CARGO_TARGET_DIR=/target \ -e "EMULATED=$emulated" \ @@ -39,8 +41,8 @@ run() { -v "$(rustc --print sysroot):/rust:ro" \ --init \ -w /checkout \ - "$target" \ - sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/run.sh $target" + "libm-$target" \ + sh -c "$set_env exec ci/run.sh $target" } if [ -z "$1" ]; then diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 008f32d5b..08ffaa81c 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -3,8 +3,10 @@ set -eux export RUST_BACKTRACE="${RUST_BACKTRACE:-full}" +export NEXTEST_STATUS_LEVEL=all target="${1:-}" +flags="" if [ -z "$target" ]; then host_target=$(rustc -vV | awk '/^host/ { print $2 }') @@ -13,22 +15,22 @@ if [ -z "$target" ]; then fi # We enumerate features manually. -extra_flags="--no-default-features" +flags="$flags --no-default-features" # Enable arch-specific routines when available. -extra_flags="$extra_flags --features arch" +flags="$flags --features arch" # Always enable `unstable-float` since it expands available API but does not # change any implementations. -extra_flags="$extra_flags --features unstable-float" +flags="$flags --features unstable-float" # We need to specifically skip tests for musl-math-sys on systems that can't # build musl since otherwise `--all` will activate it. case "$target" in # Can't build at all on MSVC, WASM, or thumb - *windows-msvc*) extra_flags="$extra_flags --exclude musl-math-sys" ;; - *wasm*) extra_flags="$extra_flags --exclude musl-math-sys" ;; - *thumb*) extra_flags="$extra_flags --exclude musl-math-sys" ;; + *windows-msvc*) flags="$flags --exclude musl-math-sys" ;; + *wasm*) flags="$flags --exclude musl-math-sys" ;; + *thumb*) flags="$flags --exclude musl-math-sys" ;; # We can build musl on MinGW but running tests gets a stack overflow *windows-gnu*) ;; @@ -38,7 +40,7 @@ case "$target" in *powerpc64le*) ;; # Everything else gets musl enabled - *) extra_flags="$extra_flags --features libm-test/build-musl" ;; + *) flags="$flags --features libm-test/build-musl" ;; esac # Configure which targets test against MPFR @@ -50,17 +52,17 @@ case "$target" in # Targets that aren't cross compiled work fine # FIXME(ci): we should be able to enable aarch64 Linux here once GHA # support rolls out. - x86_64*) extra_flags="$extra_flags --features libm-test/build-mpfr" ;; - i686*) extra_flags="$extra_flags --features libm-test/build-mpfr" ;; - i586*) extra_flags="$extra_flags --features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross" ;; + x86_64*) flags="$flags --features libm-test/build-mpfr" ;; + i686*) flags="$flags --features libm-test/build-mpfr" ;; + i586*) flags="$flags --features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross" ;; # Apple aarch64 is native - aarch64*apple*) extra_flags="$extra_flags --features libm-test/build-mpfr" ;; + aarch64*apple*) flags="$flags --features libm-test/build-mpfr" ;; esac # FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI. # case "$target" in - *windows-gnu) extra_flags="$extra_flags --exclude libm-macros" ;; + *windows-gnu) flags="$flags --exclude libm-macros" ;; esac # Make sure we can build with overriding features. @@ -76,13 +78,31 @@ if [ "${BUILD_ONLY:-}" = "1" ]; then exit fi -# Otherwise, run the test suite. - -cmd="cargo test --all --target $target $extra_flags" +flags="$flags --all --target $target" +cmd="cargo test $flags" +profile="--profile" + +# If nextest is available, use that +command -v cargo-nextest && nextest=1 || nextest=0 +if [ "$nextest" = "1" ]; then + # Workaround for https://github.com/nextest-rs/nextest/issues/2066 + if [ -f /.dockerenv ]; then + cfg_file="/tmp/nextest-config.toml" + echo "[store]" >> "$cfg_file" + echo "dir = \"$CARGO_TARGET_DIR/nextest\"" >> "$cfg_file" + cfg_flag="--config-file $cfg_file" + fi + + cmd="cargo nextest run ${cfg_flag:-} $flags" + profile="--cargo-profile" +fi # Test once without intrinsics $cmd +# Run doctests if they were excluded by nextest +[ "$nextest" = "1" ] && cargo test --doc $flags + # Exclude the macros and utile crates from the rest of the tests to save CI # runtime, they shouldn't have anything feature- or opt-level-dependent. cmd="$cmd --exclude util --exclude libm-macros" @@ -93,10 +113,10 @@ $cmd --features unstable-intrinsics --benches # Test the same in release mode, which also increases coverage. Also ensure # the soft float routines are checked. -$cmd --profile release-checked -$cmd --profile release-checked --features force-soft-floats -$cmd --profile release-checked --features unstable-intrinsics -$cmd --profile release-checked --features unstable-intrinsics --benches +$cmd "$profile" release-checked +$cmd "$profile" release-checked --features force-soft-floats +$cmd "$profile" release-checked --features unstable-intrinsics +$cmd "$profile" release-checked --features unstable-intrinsics --benches # Ensure that the routines do not panic. ENSURE_NO_PANIC=1 cargo build -p libm --target "$target" --no-default-features --release From 6d8b8503ae8bc1c127ffbddae96061c709976d56 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 14 Jan 2025 03:24:14 +0000 Subject: [PATCH 1074/1459] Change `.yml` files to the canonical extension `.yaml` --- libm/.github/workflows/{main.yml => main.yaml} | 0 libm/.github/workflows/{publish.yml => publish.yaml} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename libm/.github/workflows/{main.yml => main.yaml} (100%) rename libm/.github/workflows/{publish.yml => publish.yaml} (100%) diff --git a/libm/.github/workflows/main.yml b/libm/.github/workflows/main.yaml similarity index 100% rename from libm/.github/workflows/main.yml rename to libm/.github/workflows/main.yaml diff --git a/libm/.github/workflows/publish.yml b/libm/.github/workflows/publish.yaml similarity index 100% rename from libm/.github/workflows/publish.yml rename to libm/.github/workflows/publish.yaml From e116e0cee75f4cc5b729400a322fa10be719ba35 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 14 Jan 2025 07:46:20 +0000 Subject: [PATCH 1075/1459] Slightly restructure `ci/calculate-exhaustive-matrix.py` Change this script into a generic CI utility that we will be able to expand in the future. --- libm/.github/workflows/main.yaml | 3 +- ...culate-exhaustive-matrix.py => ci-util.py} | 30 +++++++++++++++---- 2 files changed, 26 insertions(+), 7 deletions(-) rename libm/ci/{calculate-exhaustive-matrix.py => ci-util.py} (87%) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index 30976d472..40b67c4c2 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -7,7 +7,6 @@ on: env: CARGO_TERM_COLOR: always - CARGO_TERM_VERBOSE: true RUSTDOCFLAGS: -Dwarnings RUSTFLAGS: -Dwarnings RUST_BACKTRACE: full @@ -202,7 +201,7 @@ jobs: - name: Fetch pull request ref run: git fetch origin "$GITHUB_REF:$GITHUB_REF" if: github.event_name == 'pull_request' - - run: python3 ci/calculate-exhaustive-matrix.py >> "$GITHUB_OUTPUT" + - run: python3 ci/ci-util.py generate-matrix >> "$GITHUB_OUTPUT" id: script extensive: diff --git a/libm/ci/calculate-exhaustive-matrix.py b/libm/ci/ci-util.py similarity index 87% rename from libm/ci/calculate-exhaustive-matrix.py rename to libm/ci/ci-util.py index 8b42f9389..733ec26fa 100755 --- a/libm/ci/calculate-exhaustive-matrix.py +++ b/libm/ci/ci-util.py @@ -1,18 +1,30 @@ #!/usr/bin/env python3 -"""Calculate which exhaustive tests should be run as part of CI. +"""Utilities for CI. This dynamically prepares a list of routines that had a source file change based on git history. """ +import json import subprocess as sp import sys -import json from dataclasses import dataclass +from inspect import cleandoc from os import getenv from pathlib import Path from typing import TypedDict +USAGE = cleandoc( + """ + usage: + + ./ci/ci-util.py + + SUBCOMMAND: + generate-matrix Calculate a matrix of which functions had source change, + print that as JSON object. + """ +) REPO_ROOT = Path(__file__).parent.parent GIT = ["git", "-C", REPO_ROOT] @@ -139,9 +151,17 @@ def eprint(*args, **kwargs): def main(): - ctx = Context() - output = ctx.make_workflow_output() - print(f"matrix={output}") + match sys.argv[1:]: + case ["generate-matrix"]: + ctx = Context() + output = ctx.make_workflow_output() + print(f"matrix={output}") + case ["--help" | "-h"]: + print(USAGE) + exit() + case _: + eprint(USAGE) + exit(1) if __name__ == "__main__": From 9e98cfd9f95e2c352318d18e0ec4a70a63afc4c7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 15 Jan 2025 00:54:55 +0000 Subject: [PATCH 1076/1459] Pass --max-fail to nextest so it doesn't fail fast --- libm/ci/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 08ffaa81c..bb749b72a 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -93,7 +93,7 @@ if [ "$nextest" = "1" ]; then cfg_flag="--config-file $cfg_file" fi - cmd="cargo nextest run ${cfg_flag:-} $flags" + cmd="cargo nextest run ${cfg_flag:-} --max-fail=10 $flags" profile="--cargo-profile" fi From 6fb6eaba4c27388ceac9558f0343d280d6378997 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 15 Jan 2025 01:05:38 +0000 Subject: [PATCH 1077/1459] Add an override for a recent failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Failed on i686: ──── STDERR: libm-test::bench/random y1f/crate thread 'main' panicked at crates/libm-test/benches/random.rs:76:65: called `Result::unwrap()` on an `Err` value: ynf Caused by: 0: input: (213, 109.15641) (0x000000d5, 0x42da5015) expected: -3.3049217e38 0xff78a27a actual: -inf 0xff800000 1: mismatched infinities --- libm/crates/libm-test/src/precision.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 15913fe6d..0b3fe89be 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -454,6 +454,13 @@ impl MaybeOverride<(i32, f32)> for SpecialCase { XFAIL } + // `ynf(213, 109.15641) = -inf` with our library, should be finite. + (_, BaseName::Yn) + if input.0 > 200 && !expected.is_infinite() && actual.is_infinite() => + { + XFAIL + } + _ => None, } } From d1d275d7d5266a1e9f712355976b682ff03b05bd Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 15 Dec 2024 09:11:37 +0000 Subject: [PATCH 1078/1459] Use a C-safe return type for `__rust_[ui]128_*` overflowing intrinsics Most of our Rust-specific overflowing intrinsics currently return `(i128, bool)`, which is not guaranteed to have a stable ABI. Switch to returning the overflow via a mutable parameter and only directly returning the integer result. `__rust_i128_mulo` now matches the function signature of `__muloti4`, but they do not share the same ABI on Windows so we cannot easily deduplicate them. --- src/int/addsub.rs | 24 ++++++++++++++++-------- src/int/mul.rs | 12 ++++++++---- testcrate/tests/addsub.rs | 18 ++++++++++-------- testcrate/tests/mul.rs | 9 +++++---- 4 files changed, 39 insertions(+), 24 deletions(-) diff --git a/src/int/addsub.rs b/src/int/addsub.rs index e95590d84..1f84e8eb1 100644 --- a/src/int/addsub.rs +++ b/src/int/addsub.rs @@ -66,31 +66,39 @@ intrinsics! { AddSub::add(a,b) } - pub extern "C" fn __rust_i128_addo(a: i128, b: i128) -> (i128, bool) { - a.addo(b) + pub extern "C" fn __rust_i128_addo(a: i128, b: i128, oflow: &mut i32) -> i128 { + let (add, o) = a.addo(b); + *oflow = o.into(); + add } pub extern "C" fn __rust_u128_add(a: u128, b: u128) -> u128 { AddSub::add(a,b) } - pub extern "C" fn __rust_u128_addo(a: u128, b: u128) -> (u128, bool) { - a.addo(b) + pub extern "C" fn __rust_u128_addo(a: u128, b: u128, oflow: &mut i32) -> u128 { + let (add, o) = a.addo(b); + *oflow = o.into(); + add } pub extern "C" fn __rust_i128_sub(a: i128, b: i128) -> i128 { AddSub::sub(a,b) } - pub extern "C" fn __rust_i128_subo(a: i128, b: i128) -> (i128, bool) { - a.subo(b) + pub extern "C" fn __rust_i128_subo(a: i128, b: i128, oflow: &mut i32) -> i128 { + let (sub, o) = a.subo(b); + *oflow = o.into(); + sub } pub extern "C" fn __rust_u128_sub(a: u128, b: u128) -> u128 { AddSub::sub(a,b) } - pub extern "C" fn __rust_u128_subo(a: u128, b: u128) -> (u128, bool) { - a.subo(b) + pub extern "C" fn __rust_u128_subo(a: u128, b: u128, oflow: &mut i32) -> u128 { + let (sub, o) = a.subo(b); + *oflow = o.into(); + sub } } diff --git a/src/int/mul.rs b/src/int/mul.rs index e0093a725..040c69342 100644 --- a/src/int/mul.rs +++ b/src/int/mul.rs @@ -128,11 +128,15 @@ intrinsics! { mul } - pub extern "C" fn __rust_i128_mulo(a: i128, b: i128) -> (i128, bool) { - i128_overflowing_mul(a, b) + pub extern "C" fn __rust_i128_mulo(a: i128, b: i128, oflow: &mut i32) -> i128 { + let (mul, o) = i128_overflowing_mul(a, b); + *oflow = o.into(); + mul } - pub extern "C" fn __rust_u128_mulo(a: u128, b: u128) -> (u128, bool) { - a.mulo(b) + pub extern "C" fn __rust_u128_mulo(a: u128, b: u128, oflow: &mut i32) -> u128 { + let (mul, o) = a.mulo(b); + *oflow = o.into(); + mul } } diff --git a/testcrate/tests/addsub.rs b/testcrate/tests/addsub.rs index 284a2bf5a..3c0e20f77 100644 --- a/testcrate/tests/addsub.rs +++ b/testcrate/tests/addsub.rs @@ -44,20 +44,22 @@ mod int_addsub { use compiler_builtins::int::addsub::{$fn_add, $fn_sub}; fuzz_2(N, |x: $i, y: $i| { - let add0 = x.overflowing_add(y); - let sub0 = x.overflowing_sub(y); - let add1: ($i, bool) = $fn_add(x, y); - let sub1: ($i, bool) = $fn_sub(x, y); - if add0.0 != add1.0 || add0.1 != add1.1 { + let (add0, add_o0)= x.overflowing_add(y); + let (sub0, sub_o0)= x.overflowing_sub(y); + let mut add_o1 = 0; + let mut sub_o1 = 0; + let add1: $i = $fn_add(x, y, &mut add_o1); + let sub1: $i = $fn_sub(x, y, &mut sub_o1); + if add0 != add1 || i32::from(add_o0) != add_o1 { panic!( "{}({}, {}): std: {:?}, builtins: {:?}", - stringify!($fn_add), x, y, add0, add1 + stringify!($fn_add), x, y, (add0, add_o0) , (add1, add_o1) ); } - if sub0.0 != sub1.0 || sub0.1 != sub1.1 { + if sub0 != sub1 || i32::from(sub_o0) != sub_o1 { panic!( "{}({}, {}): std: {:?}, builtins: {:?}", - stringify!($fn_sub), x, y, sub0, sub1 + stringify!($fn_sub), x, y, (sub0, sub_o0) , (sub1, sub_o1) ); } }); diff --git a/testcrate/tests/mul.rs b/testcrate/tests/mul.rs index 449d19480..2113b177d 100644 --- a/testcrate/tests/mul.rs +++ b/testcrate/tests/mul.rs @@ -73,9 +73,10 @@ mod int_overflowing_mul { use compiler_builtins::int::mul::{__rust_i128_mulo, __rust_u128_mulo}; fuzz_2(N, |x: u128, y: u128| { + let mut o1 = 0; let (mul0, o0) = x.overflowing_mul(y); - let (mul1, o1) = __rust_u128_mulo(x, y); - if mul0 != mul1 || o0 != o1 { + let mul1 = __rust_u128_mulo(x, y, &mut o1); + if mul0 != mul1 || i32::from(o0) != o1 { panic!( "__rust_u128_mulo({}, {}): std: ({}, {}), builtins: ({}, {})", x, y, mul0, o0, mul1, o1 @@ -84,8 +85,8 @@ mod int_overflowing_mul { let x = x as i128; let y = y as i128; let (mul0, o0) = x.overflowing_mul(y); - let (mul1, o1) = __rust_i128_mulo(x, y); - if mul0 != mul1 || o0 != o1 { + let mul1 = __rust_i128_mulo(x, y, &mut o1); + if mul0 != mul1 || i32::from(o0) != o1 { panic!( "__rust_i128_mulo({}, {}): std: ({}, {}), builtins: ({}, {})", x, y, mul0, o0, mul1, o1 From 6e1d8d4fb8f6a62d2266d3fd5d3b713353ad6905 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 15 Jan 2025 03:53:29 +0000 Subject: [PATCH 1079/1459] chore: release v0.1.143 --- CHANGELOG.md | 6 ++++++ Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 305f2790f..3c21dc2d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.143](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.142...compiler_builtins-v0.1.143) - 2025-01-15 + +### Other + +- Use a C-safe return type for `__rust_[ui]128_*` overflowing intrinsics + ## [0.1.142](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.141...compiler_builtins-v0.1.142) - 2025-01-07 ### Other diff --git a/Cargo.toml b/Cargo.toml index 943adc4c1..4d5bbe4a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.142" +version = "0.1.143" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From d4abaf4efafee62b33259b782449c615a8e9b574 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 15 Jan 2025 05:27:14 +0000 Subject: [PATCH 1080/1459] Eliminate the use of `public_test_dep!` Replace `public_test_dep!` by placing optionally public items into new modules, then controlling what is exported with the `public-test-deps` feature. This is nicer for automatic formatting and diagnostics. This is a reland of 2e2a9255 ("Eliminate the use of `public_test_dep!`"), which was reverted in 47e50fd2 ('Revert "Eliminate the use of..."') due to a bug exposed at [1]. This was fixed in [2], so the cleanup should be able to be applied again. [1]: https://github.com/rust-lang/rust/pull/128691 [2]: https://github.com/rust-lang/rust/pull/135278 --- src/float/mod.rs | 195 +---------- src/float/traits.rs | 189 +++++++++++ src/int/leading_zeros.rs | 227 ++++++------- src/int/mod.rs | 424 +----------------------- src/int/specialized_div_rem/delegate.rs | 4 +- src/int/trailing_zeros.rs | 69 ++-- src/int/traits.rs | 411 +++++++++++++++++++++++ src/macros.rs | 16 - 8 files changed, 763 insertions(+), 772 deletions(-) create mode 100644 src/float/traits.rs create mode 100644 src/int/traits.rs diff --git a/src/float/mod.rs b/src/float/mod.rs index 6ee55950e..41b308626 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -1,7 +1,3 @@ -use core::ops; - -use crate::int::{DInt, Int, MinInt}; - pub mod add; pub mod cmp; pub mod conv; @@ -10,192 +6,11 @@ pub mod extend; pub mod mul; pub mod pow; pub mod sub; +pub(crate) mod traits; pub mod trunc; -/// Wrapper to extract the integer type half of the float's size -pub(crate) type HalfRep = <::Int as DInt>::H; - -public_test_dep! { -/// Trait for some basic operations on floats -#[allow(dead_code)] -pub(crate) trait Float: - Copy - + core::fmt::Debug - + PartialEq - + PartialOrd - + ops::AddAssign - + ops::MulAssign - + ops::Add - + ops::Sub - + ops::Div - + ops::Rem -{ - /// A uint of the same width as the float - type Int: Int; - - /// A int of the same width as the float - type SignedInt: Int + MinInt; - - /// An int capable of containing the exponent bits plus a sign bit. This is signed. - type ExpInt: Int; - - const ZERO: Self; - const ONE: Self; - - /// The bitwidth of the float type. - const BITS: u32; - - /// The bitwidth of the significand. - const SIG_BITS: u32; - - /// The bitwidth of the exponent. - const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; - - /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite - /// representation. - /// - /// This is in the rightmost position, use `EXP_MASK` for the shifted value. - const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1; - - /// The exponent bias value. - const EXP_BIAS: u32 = Self::EXP_SAT >> 1; - - /// A mask for the sign bit. - const SIGN_MASK: Self::Int; - - /// A mask for the significand. - const SIG_MASK: Self::Int; - - /// The implicit bit of the float format. - const IMPLICIT_BIT: Self::Int; - - /// A mask for the exponent. - const EXP_MASK: Self::Int; - - /// Returns `self` transmuted to `Self::Int` - fn to_bits(self) -> Self::Int; - - /// Returns `self` transmuted to `Self::SignedInt` - fn to_bits_signed(self) -> Self::SignedInt; - - /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be - /// represented in multiple different ways. This method returns `true` if two NaNs are - /// compared. - fn eq_repr(self, rhs: Self) -> bool; - - /// Returns true if the sign is negative - fn is_sign_negative(self) -> bool; - - /// Returns the exponent, not adjusting for bias. - fn exp(self) -> Self::ExpInt; - - /// Returns the significand with no implicit bit (or the "fractional" part) - fn frac(self) -> Self::Int; - - /// Returns the significand with implicit bit - fn imp_frac(self) -> Self::Int; - - /// Returns a `Self::Int` transmuted back to `Self` - fn from_bits(a: Self::Int) -> Self; - - /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. - fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self; - - fn abs(self) -> Self { - let abs_mask = !Self::SIGN_MASK ; - Self::from_bits(self.to_bits() & abs_mask) - } - - /// Returns (normalized exponent, normalized significand) - fn normalize(significand: Self::Int) -> (i32, Self::Int); - - /// Returns if `self` is subnormal - fn is_subnormal(self) -> bool; -} -} - -macro_rules! float_impl { - ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { - impl Float for $ty { - type Int = $ity; - type SignedInt = $sity; - type ExpInt = $expty; - - const ZERO: Self = 0.0; - const ONE: Self = 1.0; - - const BITS: u32 = $bits; - const SIG_BITS: u32 = $significand_bits; - - const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); - const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1; - const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS; - const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK); - - fn to_bits(self) -> Self::Int { - self.to_bits() - } - fn to_bits_signed(self) -> Self::SignedInt { - self.to_bits() as Self::SignedInt - } - fn eq_repr(self, rhs: Self) -> bool { - #[cfg(feature = "mangled-names")] - fn is_nan(x: $ty) -> bool { - // When using mangled-names, the "real" compiler-builtins might not have the - // necessary builtin (__unordtf2) to test whether `f128` is NaN. - // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin - // x is NaN if all the bits of the exponent are set and the significand is non-0 - x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0 - } - #[cfg(not(feature = "mangled-names"))] - fn is_nan(x: $ty) -> bool { - x.is_nan() - } - if is_nan(self) && is_nan(rhs) { - true - } else { - self.to_bits() == rhs.to_bits() - } - } - fn is_sign_negative(self) -> bool { - self.is_sign_negative() - } - fn exp(self) -> Self::ExpInt { - ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt - } - fn frac(self) -> Self::Int { - self.to_bits() & Self::SIG_MASK - } - fn imp_frac(self) -> Self::Int { - self.frac() | Self::IMPLICIT_BIT - } - fn from_bits(a: Self::Int) -> Self { - Self::from_bits(a) - } - fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self { - Self::from_bits( - ((negative as Self::Int) << (Self::BITS - 1)) - | ((exponent << Self::SIG_BITS) & Self::EXP_MASK) - | (significand & Self::SIG_MASK), - ) - } - fn normalize(significand: Self::Int) -> (i32, Self::Int) { - let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); - ( - 1i32.wrapping_sub(shift as i32), - significand << shift as Self::Int, - ) - } - fn is_subnormal(self) -> bool { - (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO - } - } - }; -} +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use traits::{Float, HalfRep}; -#[cfg(f16_enabled)] -float_impl!(f16, u16, i16, i8, 16, 10); -float_impl!(f32, u32, i32, i16, 32, 23); -float_impl!(f64, u64, i64, i16, 64, 52); -#[cfg(f128_enabled)] -float_impl!(f128, u128, i128, i16, 128, 112); +#[cfg(feature = "public-test-deps")] +pub use traits::{Float, HalfRep}; diff --git a/src/float/traits.rs b/src/float/traits.rs new file mode 100644 index 000000000..8ccaa7bcb --- /dev/null +++ b/src/float/traits.rs @@ -0,0 +1,189 @@ +use core::ops; + +use crate::int::{DInt, Int, MinInt}; + +/// Wrapper to extract the integer type half of the float's size +pub type HalfRep = <::Int as DInt>::H; + +/// Trait for some basic operations on floats +#[allow(dead_code)] +pub trait Float: + Copy + + core::fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::MulAssign + + ops::Add + + ops::Sub + + ops::Div + + ops::Rem +{ + /// A uint of the same width as the float + type Int: Int; + + /// A int of the same width as the float + type SignedInt: Int + MinInt; + + /// An int capable of containing the exponent bits plus a sign bit. This is signed. + type ExpInt: Int; + + const ZERO: Self; + const ONE: Self; + + /// The bitwidth of the float type. + const BITS: u32; + + /// The bitwidth of the significand. + const SIG_BITS: u32; + + /// The bitwidth of the exponent. + const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; + + /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite + /// representation. + /// + /// This is in the rightmost position, use `EXP_MASK` for the shifted value. + const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1; + + /// The exponent bias value. + const EXP_BIAS: u32 = Self::EXP_SAT >> 1; + + /// A mask for the sign bit. + const SIGN_MASK: Self::Int; + + /// A mask for the significand. + const SIG_MASK: Self::Int; + + /// The implicit bit of the float format. + const IMPLICIT_BIT: Self::Int; + + /// A mask for the exponent. + const EXP_MASK: Self::Int; + + /// Returns `self` transmuted to `Self::Int` + fn to_bits(self) -> Self::Int; + + /// Returns `self` transmuted to `Self::SignedInt` + fn to_bits_signed(self) -> Self::SignedInt; + + /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be + /// represented in multiple different ways. This method returns `true` if two NaNs are + /// compared. + fn eq_repr(self, rhs: Self) -> bool; + + /// Returns true if the sign is negative + fn is_sign_negative(self) -> bool; + + /// Returns the exponent, not adjusting for bias. + fn exp(self) -> Self::ExpInt; + + /// Returns the significand with no implicit bit (or the "fractional" part) + fn frac(self) -> Self::Int; + + /// Returns the significand with implicit bit + fn imp_frac(self) -> Self::Int; + + /// Returns a `Self::Int` transmuted back to `Self` + fn from_bits(a: Self::Int) -> Self; + + /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. + fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self; + + fn abs(self) -> Self { + let abs_mask = !Self::SIGN_MASK; + Self::from_bits(self.to_bits() & abs_mask) + } + + /// Returns (normalized exponent, normalized significand) + fn normalize(significand: Self::Int) -> (i32, Self::Int); + + /// Returns if `self` is subnormal + fn is_subnormal(self) -> bool; +} + +macro_rules! float_impl { + ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { + impl Float for $ty { + type Int = $ity; + type SignedInt = $sity; + type ExpInt = $expty; + + const ZERO: Self = 0.0; + const ONE: Self = 1.0; + + const BITS: u32 = $bits; + const SIG_BITS: u32 = $significand_bits; + + const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); + const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1; + const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS; + const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK); + + fn to_bits(self) -> Self::Int { + self.to_bits() + } + fn to_bits_signed(self) -> Self::SignedInt { + self.to_bits() as Self::SignedInt + } + fn eq_repr(self, rhs: Self) -> bool { + #[cfg(feature = "mangled-names")] + fn is_nan(x: $ty) -> bool { + // When using mangled-names, the "real" compiler-builtins might not have the + // necessary builtin (__unordtf2) to test whether `f128` is NaN. + // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin + // x is NaN if all the bits of the exponent are set and the significand is non-0 + x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0 + } + #[cfg(not(feature = "mangled-names"))] + fn is_nan(x: $ty) -> bool { + x.is_nan() + } + if is_nan(self) && is_nan(rhs) { + true + } else { + self.to_bits() == rhs.to_bits() + } + } + fn is_sign_negative(self) -> bool { + self.is_sign_negative() + } + fn exp(self) -> Self::ExpInt { + ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt + } + fn frac(self) -> Self::Int { + self.to_bits() & Self::SIG_MASK + } + fn imp_frac(self) -> Self::Int { + self.frac() | Self::IMPLICIT_BIT + } + fn from_bits(a: Self::Int) -> Self { + Self::from_bits(a) + } + fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self { + Self::from_bits( + ((negative as Self::Int) << (Self::BITS - 1)) + | ((exponent << Self::SIG_BITS) & Self::EXP_MASK) + | (significand & Self::SIG_MASK), + ) + } + fn normalize(significand: Self::Int) -> (i32, Self::Int) { + let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); + ( + 1i32.wrapping_sub(shift as i32), + significand << shift as Self::Int, + ) + } + fn is_subnormal(self) -> bool { + (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO + } + } + }; +} + +#[cfg(f16_enabled)] +float_impl!(f16, u16, i16, i8, 16, 10); +float_impl!(f32, u32, i32, i16, 32, 23); +float_impl!(f64, u64, i64, i16, 64, 52); +#[cfg(f128_enabled)] +float_impl!(f128, u128, i128, i16, 128, 112); diff --git a/src/int/leading_zeros.rs b/src/int/leading_zeros.rs index 1fee9fcf5..ba735aa74 100644 --- a/src/int/leading_zeros.rs +++ b/src/int/leading_zeros.rs @@ -3,135 +3,138 @@ // adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`. // Compilers will insert the check for zero in cases where it is needed. -use crate::int::{CastInto, Int}; +#[cfg(feature = "public-test-deps")] +pub use implementation::{leading_zeros_default, leading_zeros_riscv}; +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use implementation::{leading_zeros_default, leading_zeros_riscv}; -public_test_dep! { -/// Returns the number of leading binary zeros in `x`. -#[allow(dead_code)] -pub(crate) fn leading_zeros_default>(x: T) -> usize { - // The basic idea is to test if the higher bits of `x` are zero and bisect the number - // of leading zeros. It is possible for all branches of the bisection to use the same - // code path by conditionally shifting the higher parts down to let the next bisection - // step work on the higher or lower parts of `x`. Instead of starting with `z == 0` - // and adding to the number of zeros, it is slightly faster to start with - // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros, - // because it simplifies the final bisection step. - let mut x = x; - // the number of potential leading zeros - let mut z = T::BITS as usize; - // a temporary - let mut t: T; +mod implementation { + use crate::int::{CastInto, Int}; - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - t = x >> 32; + /// Returns the number of leading binary zeros in `x`. + #[allow(dead_code)] + pub fn leading_zeros_default>(x: T) -> usize { + // The basic idea is to test if the higher bits of `x` are zero and bisect the number + // of leading zeros. It is possible for all branches of the bisection to use the same + // code path by conditionally shifting the higher parts down to let the next bisection + // step work on the higher or lower parts of `x`. Instead of starting with `z == 0` + // and adding to the number of zeros, it is slightly faster to start with + // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros, + // because it simplifies the final bisection step. + let mut x = x; + // the number of potential leading zeros + let mut z = T::BITS as usize; + // a temporary + let mut t: T; + + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + t = x >> 32; + if t != T::ZERO { + z -= 32; + x = t; + } + } + if T::BITS >= 32 { + t = x >> 16; + if t != T::ZERO { + z -= 16; + x = t; + } + } + const { assert!(T::BITS >= 16) }; + t = x >> 8; if t != T::ZERO { - z -= 32; + z -= 8; x = t; } - } - if T::BITS >= 32 { - t = x >> 16; + t = x >> 4; if t != T::ZERO { - z -= 16; + z -= 4; x = t; } - } - const { assert!(T::BITS >= 16) }; - t = x >> 8; - if t != T::ZERO { - z -= 8; - x = t; - } - t = x >> 4; - if t != T::ZERO { - z -= 4; - x = t; - } - t = x >> 2; - if t != T::ZERO { - z -= 2; - x = t; - } - // the last two bisections are combined into one conditional - t = x >> 1; - if t != T::ZERO { - z - 2 - } else { - z - x.cast() - } + t = x >> 2; + if t != T::ZERO { + z -= 2; + x = t; + } + // the last two bisections are combined into one conditional + t = x >> 1; + if t != T::ZERO { + z - 2 + } else { + z - x.cast() + } - // We could potentially save a few cycles by using the LUT trick from - // "https://embeddedgurus.com/state-space/2014/09/ - // fast-deterministic-and-portable-counting-leading-zeros/". - // However, 256 bytes for a LUT is too large for embedded use cases. We could remove - // the last 3 bisections and use this 16 byte LUT for the rest of the work: - //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]; - //z -= LUT[x] as usize; - //z - // However, it ends up generating about the same number of instructions. When benchmarked - // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO - // execution effects. Changing to using a LUT and branching is risky for smaller cores. -} -} + // We could potentially save a few cycles by using the LUT trick from + // "https://embeddedgurus.com/state-space/2014/09/ + // fast-deterministic-and-portable-counting-leading-zeros/". + // However, 256 bytes for a LUT is too large for embedded use cases. We could remove + // the last 3 bisections and use this 16 byte LUT for the rest of the work: + //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]; + //z -= LUT[x] as usize; + //z + // However, it ends up generating about the same number of instructions. When benchmarked + // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO + // execution effects. Changing to using a LUT and branching is risky for smaller cores. + } -// The above method does not compile well on RISC-V (because of the lack of predicated -// instructions), producing code with many branches or using an excessively long -// branchless solution. This method takes advantage of the set-if-less-than instruction on -// RISC-V that allows `(x >= power-of-two) as usize` to be branchless. + // The above method does not compile well on RISC-V (because of the lack of predicated + // instructions), producing code with many branches or using an excessively long + // branchless solution. This method takes advantage of the set-if-less-than instruction on + // RISC-V that allows `(x >= power-of-two) as usize` to be branchless. -public_test_dep! { -/// Returns the number of leading binary zeros in `x`. -#[allow(dead_code)] -pub(crate) fn leading_zeros_riscv>(x: T) -> usize { - let mut x = x; - // the number of potential leading zeros - let mut z = T::BITS; - // a temporary - let mut t: u32; + /// Returns the number of leading binary zeros in `x`. + #[allow(dead_code)] + pub fn leading_zeros_riscv>(x: T) -> usize { + let mut x = x; + // the number of potential leading zeros + let mut z = T::BITS; + // a temporary + let mut t: u32; - // RISC-V does not have a set-if-greater-than-or-equal instruction and - // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is - // still the most optimal method. A conditional set can only be turned into a single - // immediate instruction if `x` is compared with an immediate `imm` (that can fit into - // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the - // right). If we try to save an instruction by using `x < imm` for each bisection, we - // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, - // but the immediate will never fit into 12 bits and never save an instruction. - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise - // `t` is set to 0. - t = ((x >= (T::ONE << 32)) as u32) << 5; - // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the - // next step to process. + // RISC-V does not have a set-if-greater-than-or-equal instruction and + // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is + // still the most optimal method. A conditional set can only be turned into a single + // immediate instruction if `x` is compared with an immediate `imm` (that can fit into + // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the + // right). If we try to save an instruction by using `x < imm` for each bisection, we + // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, + // but the immediate will never fit into 12 bits and never save an instruction. + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise + // `t` is set to 0. + t = ((x >= (T::ONE << 32)) as u32) << 5; + // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the + // next step to process. + x >>= t; + // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential + // leading zeros + z -= t; + } + if T::BITS >= 32 { + t = ((x >= (T::ONE << 16)) as u32) << 4; + x >>= t; + z -= t; + } + const { assert!(T::BITS >= 16) }; + t = ((x >= (T::ONE << 8)) as u32) << 3; x >>= t; - // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential - // leading zeros z -= t; - } - if T::BITS >= 32 { - t = ((x >= (T::ONE << 16)) as u32) << 4; + t = ((x >= (T::ONE << 4)) as u32) << 2; + x >>= t; + z -= t; + t = ((x >= (T::ONE << 2)) as u32) << 1; x >>= t; z -= t; + t = (x >= (T::ONE << 1)) as u32; + x >>= t; + z -= t; + // All bits except the LSB are guaranteed to be zero for this final bisection step. + // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. + z as usize - x.cast() } - const { assert!(T::BITS >= 16) }; - t = ((x >= (T::ONE << 8)) as u32) << 3; - x >>= t; - z -= t; - t = ((x >= (T::ONE << 4)) as u32) << 2; - x >>= t; - z -= t; - t = ((x >= (T::ONE << 2)) as u32) << 1; - x >>= t; - z -= t; - t = (x >= (T::ONE << 1)) as u32; - x >>= t; - z -= t; - // All bits except the LSB are guaranteed to be zero for this final bisection step. - // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. - z as usize - x.cast() -} } intrinsics! { diff --git a/src/int/mod.rs b/src/int/mod.rs index c0d5a6715..1f1be711b 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -1,5 +1,3 @@ -use core::ops; - mod specialized_div_rem; pub mod addsub; @@ -10,425 +8,13 @@ pub mod mul; pub mod sdiv; pub mod shift; pub mod trailing_zeros; +mod traits; pub mod udiv; pub use big::{i256, u256}; -public_test_dep! { -/// Minimal integer implementations needed on all integer types, including wide integers. -#[allow(dead_code)] -pub(crate) trait MinInt: Copy - + core::fmt::Debug - + ops::BitOr - + ops::Not - + ops::Shl -{ - - /// Type with the same width but other signedness - type OtherSign: MinInt; - /// Unsigned version of Self - type UnsignedInt: MinInt; - - /// If `Self` is a signed integer - const SIGNED: bool; - - /// The bitwidth of the int type - const BITS: u32; - - const ZERO: Self; - const ONE: Self; - const MIN: Self; - const MAX: Self; -} -} - -public_test_dep! { -/// Trait for some basic operations on integers -#[allow(dead_code)] -pub(crate) trait Int: MinInt - + PartialEq - + PartialOrd - + ops::AddAssign - + ops::SubAssign - + ops::BitAndAssign - + ops::BitOrAssign - + ops::BitXorAssign - + ops::ShlAssign - + ops::ShrAssign - + ops::Add - + ops::Sub - + ops::Mul - + ops::Div - + ops::Shr - + ops::BitXor - + ops::BitAnd -{ - /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing - /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, - /// 112,119,120,125,126,127]. - const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(::BITS); - - /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. - const FUZZ_NUM: usize = { - let log2 = (::BITS - 1).count_ones() as usize; - if log2 == 3 { - // case for u8 - 6 - } else { - // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate - // boundaries. - 8 + (4 * (log2 - 4)) - } - }; - - fn unsigned(self) -> Self::UnsignedInt; - fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; - fn unsigned_abs(self) -> Self::UnsignedInt; - - fn from_bool(b: bool) -> Self; - - /// Prevents the need for excessive conversions between signed and unsigned - fn logical_shr(self, other: u32) -> Self; - - /// Absolute difference between two integers. - fn abs_diff(self, other: Self) -> Self::UnsignedInt; - - // copied from primitive integers, but put in a trait - fn is_zero(self) -> bool; - fn wrapping_neg(self) -> Self; - fn wrapping_add(self, other: Self) -> Self; - fn wrapping_mul(self, other: Self) -> Self; - fn wrapping_sub(self, other: Self) -> Self; - fn wrapping_shl(self, other: u32) -> Self; - fn wrapping_shr(self, other: u32) -> Self; - fn rotate_left(self, other: u32) -> Self; - fn overflowing_add(self, other: Self) -> (Self, bool); - fn leading_zeros(self) -> u32; - fn ilog2(self) -> u32; -} -} - -pub(crate) const fn make_fuzz_lengths(bits: u32) -> [u8; 20] { - let mut v = [0u8; 20]; - v[0] = 0; - v[1] = 1; - v[2] = 2; // important for parity and the iX::MIN case when reversed - let mut i = 3; - - // No need for any more until the byte boundary, because there should be no algorithms - // that are sensitive to anything not next to byte boundaries after 2. We also scale - // in powers of two, which is important to prevent u128 corner tests from getting too - // big. - let mut l = 8; - loop { - if l >= ((bits / 2) as u8) { - break; - } - // get both sides of the byte boundary - v[i] = l - 1; - i += 1; - v[i] = l; - i += 1; - l *= 2; - } - - if bits != 8 { - // add the lower side of the middle boundary - v[i] = ((bits / 2) - 1) as u8; - i += 1; - } - - // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS - // boundary because of algorithms that split the high part up. We reverse the scaling - // as we go to Self::BITS. - let mid = i; - let mut j = 1; - loop { - v[i] = (bits as u8) - (v[mid - j]) - 1; - if j == mid { - break; - } - i += 1; - j += 1; - } - v -} - -macro_rules! int_impl_common { - ($ty:ty) => { - fn from_bool(b: bool) -> Self { - b as $ty - } - - fn logical_shr(self, other: u32) -> Self { - Self::from_unsigned(self.unsigned().wrapping_shr(other)) - } - - fn is_zero(self) -> bool { - self == Self::ZERO - } - - fn wrapping_neg(self) -> Self { - ::wrapping_neg(self) - } - - fn wrapping_add(self, other: Self) -> Self { - ::wrapping_add(self, other) - } - - fn wrapping_mul(self, other: Self) -> Self { - ::wrapping_mul(self, other) - } - fn wrapping_sub(self, other: Self) -> Self { - ::wrapping_sub(self, other) - } - - fn wrapping_shl(self, other: u32) -> Self { - ::wrapping_shl(self, other) - } - - fn wrapping_shr(self, other: u32) -> Self { - ::wrapping_shr(self, other) - } - - fn rotate_left(self, other: u32) -> Self { - ::rotate_left(self, other) - } - - fn overflowing_add(self, other: Self) -> (Self, bool) { - ::overflowing_add(self, other) - } - - fn leading_zeros(self) -> u32 { - ::leading_zeros(self) - } - - fn ilog2(self) -> u32 { - ::ilog2(self) - } - }; -} - -macro_rules! int_impl { - ($ity:ty, $uty:ty) => { - impl MinInt for $uty { - type OtherSign = $ity; - type UnsignedInt = $uty; - - const BITS: u32 = ::ZERO.count_zeros(); - const SIGNED: bool = Self::MIN != Self::ZERO; - - const ZERO: Self = 0; - const ONE: Self = 1; - const MIN: Self = ::MIN; - const MAX: Self = ::MAX; - } - - impl Int for $uty { - fn unsigned(self) -> $uty { - self - } - - // It makes writing macros easier if this is implemented for both signed and unsigned - #[allow(clippy::wrong_self_convention)] - fn from_unsigned(me: $uty) -> Self { - me - } - - fn unsigned_abs(self) -> Self { - self - } - - fn abs_diff(self, other: Self) -> Self { - self.abs_diff(other) - } - - int_impl_common!($uty); - } - - impl MinInt for $ity { - type OtherSign = $uty; - type UnsignedInt = $uty; - - const BITS: u32 = ::ZERO.count_zeros(); - const SIGNED: bool = Self::MIN != Self::ZERO; - - const ZERO: Self = 0; - const ONE: Self = 1; - const MIN: Self = ::MIN; - const MAX: Self = ::MAX; - } - - impl Int for $ity { - fn unsigned(self) -> $uty { - self as $uty - } - - fn from_unsigned(me: $uty) -> Self { - me as $ity - } - - fn unsigned_abs(self) -> Self::UnsignedInt { - self.unsigned_abs() - } - - fn abs_diff(self, other: Self) -> $uty { - self.abs_diff(other) - } - - int_impl_common!($ity); - } - }; -} - -int_impl!(isize, usize); -int_impl!(i8, u8); -int_impl!(i16, u16); -int_impl!(i32, u32); -int_impl!(i64, u64); -int_impl!(i128, u128); - -public_test_dep! { -/// Trait for integers twice the bit width of another integer. This is implemented for all -/// primitives except for `u8`, because there is not a smaller primitive. -pub(crate) trait DInt: MinInt { - /// Integer that is half the bit width of the integer this trait is implemented for - type H: HInt; - - /// Returns the low half of `self` - fn lo(self) -> Self::H; - /// Returns the high half of `self` - fn hi(self) -> Self::H; - /// Returns the low and high halves of `self` as a tuple - fn lo_hi(self) -> (Self::H, Self::H) { - (self.lo(), self.hi()) - } - /// Constructs an integer using lower and higher half parts - fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { - lo.zero_widen() | hi.widen_hi() - } -} -} - -public_test_dep! { -/// Trait for integers half the bit width of another integer. This is implemented for all -/// primitives except for `u128`, because it there is not a larger primitive. -pub(crate) trait HInt: Int { - /// Integer that is double the bit width of the integer this trait is implemented for - type D: DInt + MinInt; - - // NB: some of the below methods could have default implementations (e.g. `widen_hi`), but for - // unknown reasons this can cause infinite recursion when optimizations are disabled. See - // for context. - - /// Widens (using default extension) the integer to have double bit width - fn widen(self) -> Self::D; - /// Widens (zero extension only) the integer to have double bit width. This is needed to get - /// around problems with associated type bounds (such as `Int`) being unstable - fn zero_widen(self) -> Self::D; - /// Widens the integer to have double bit width and shifts the integer into the higher bits - fn widen_hi(self) -> Self::D; - /// Widening multiplication with zero widening. This cannot overflow. - fn zero_widen_mul(self, rhs: Self) -> Self::D; - /// Widening multiplication. This cannot overflow. - fn widen_mul(self, rhs: Self) -> Self::D; -} -} - -macro_rules! impl_d_int { - ($($X:ident $D:ident),*) => { - $( - impl DInt for $D { - type H = $X; - - fn lo(self) -> Self::H { - self as $X - } - fn hi(self) -> Self::H { - (self >> <$X as MinInt>::BITS) as $X - } - } - )* - }; -} - -macro_rules! impl_h_int { - ($($H:ident $uH:ident $X:ident),*) => { - $( - impl HInt for $H { - type D = $X; - - fn widen(self) -> Self::D { - self as $X - } - fn zero_widen(self) -> Self::D { - (self as $uH) as $X - } - fn zero_widen_mul(self, rhs: Self) -> Self::D { - self.zero_widen().wrapping_mul(rhs.zero_widen()) - } - fn widen_mul(self, rhs: Self) -> Self::D { - self.widen().wrapping_mul(rhs.widen()) - } - fn widen_hi(self) -> Self::D { - (self as $X) << ::BITS - } - } - )* - }; -} - -impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); -impl_h_int!( - u8 u8 u16, - u16 u16 u32, - u32 u32 u64, - u64 u64 u128, - i8 u8 i16, - i16 u16 i32, - i32 u32 i64, - i64 u64 i128 -); - -public_test_dep! { -/// Trait to express (possibly lossy) casting of integers -pub(crate) trait CastInto: Copy { - fn cast(self) -> T; -} - -pub(crate) trait CastFrom:Copy { - fn cast_from(value: T) -> Self; -} -} - -impl + Copy> CastFrom for T { - fn cast_from(value: U) -> Self { - value.cast() - } -} - -macro_rules! cast_into { - ($ty:ty) => { - cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); - }; - ($ty:ty; $($into:ty),*) => {$( - impl CastInto<$into> for $ty { - fn cast(self) -> $into { - self as $into - } - } - )*}; -} +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; -cast_into!(usize); -cast_into!(isize); -cast_into!(u8); -cast_into!(i8); -cast_into!(u16); -cast_into!(i16); -cast_into!(u32); -cast_into!(i32); -cast_into!(u64); -cast_into!(i64); -cast_into!(u128); -cast_into!(i128); +#[cfg(feature = "public-test-deps")] +pub use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; diff --git a/src/int/specialized_div_rem/delegate.rs b/src/int/specialized_div_rem/delegate.rs index 330c6e4f8..f5c6e5023 100644 --- a/src/int/specialized_div_rem/delegate.rs +++ b/src/int/specialized_div_rem/delegate.rs @@ -185,7 +185,6 @@ macro_rules! impl_delegate { }; } -public_test_dep! { /// Returns `n / d` and sets `*rem = n % d`. /// /// This specialization exists because: @@ -195,7 +194,7 @@ public_test_dep! { /// delegate algorithm strategy the only reasonably fast way to perform `u128` division. // used on SPARC #[allow(dead_code)] -pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { +pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { use super::*; let duo_lo = duo as u64; let duo_hi = (duo >> 64) as u64; @@ -316,4 +315,3 @@ pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { } } } -} diff --git a/src/int/trailing_zeros.rs b/src/int/trailing_zeros.rs index cea366b07..dbc0cce9f 100644 --- a/src/int/trailing_zeros.rs +++ b/src/int/trailing_zeros.rs @@ -1,44 +1,49 @@ -use crate::int::{CastInto, Int}; +#[cfg(feature = "public-test-deps")] +pub use implementation::trailing_zeros; +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use implementation::trailing_zeros; -public_test_dep! { -/// Returns number of trailing binary zeros in `x`. -#[allow(dead_code)] -pub(crate) fn trailing_zeros + CastInto + CastInto>(x: T) -> usize { - let mut x = x; - let mut r: u32 = 0; - let mut t: u32; +mod implementation { + use crate::int::{CastInto, Int}; - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - r += ((CastInto::::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 - x >>= r; // remove 32 zero bits - } + /// Returns number of trailing binary zeros in `x`. + #[allow(dead_code)] + pub fn trailing_zeros + CastInto + CastInto>(x: T) -> usize { + let mut x = x; + let mut r: u32 = 0; + let mut t: u32; - if T::BITS >= 32 { - t = ((CastInto::::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 - r += t; - x >>= t; // x = [0 - 0xFFFF] + higher garbage bits - } + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + r += ((CastInto::::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 + x >>= r; // remove 32 zero bits + } - const { assert!(T::BITS >= 16) }; - t = ((CastInto::::cast(x) == 0) as u32) << 3; - x >>= t; // x = [0 - 0xFF] + higher garbage bits - r += t; + if T::BITS >= 32 { + t = ((CastInto::::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 + r += t; + x >>= t; // x = [0 - 0xFFFF] + higher garbage bits + } - let mut x: u8 = x.cast(); + const { assert!(T::BITS >= 16) }; + t = ((CastInto::::cast(x) == 0) as u32) << 3; + x >>= t; // x = [0 - 0xFF] + higher garbage bits + r += t; - t = (((x & 0x0F) == 0) as u32) << 2; - x >>= t; // x = [0 - 0xF] + higher garbage bits - r += t; + let mut x: u8 = x.cast(); - t = (((x & 0x3) == 0) as u32) << 1; - x >>= t; // x = [0 - 0x3] + higher garbage bits - r += t; + t = (((x & 0x0F) == 0) as u32) << 2; + x >>= t; // x = [0 - 0xF] + higher garbage bits + r += t; - x &= 3; + t = (((x & 0x3) == 0) as u32) << 1; + x >>= t; // x = [0 - 0x3] + higher garbage bits + r += t; - r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg()) -} + x &= 3; + + r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg()) + } } intrinsics! { diff --git a/src/int/traits.rs b/src/int/traits.rs new file mode 100644 index 000000000..9b079e2aa --- /dev/null +++ b/src/int/traits.rs @@ -0,0 +1,411 @@ +use core::ops; + +/// Minimal integer implementations needed on all integer types, including wide integers. +#[allow(dead_code)] +pub trait MinInt: + Copy + + core::fmt::Debug + + ops::BitOr + + ops::Not + + ops::Shl +{ + /// Type with the same width but other signedness + type OtherSign: MinInt; + /// Unsigned version of Self + type UnsignedInt: MinInt; + + /// If `Self` is a signed integer + const SIGNED: bool; + + /// The bitwidth of the int type + const BITS: u32; + + const ZERO: Self; + const ONE: Self; + const MIN: Self; + const MAX: Self; +} + +/// Trait for some basic operations on integers +#[allow(dead_code)] +pub trait Int: + MinInt + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::SubAssign + + ops::BitAndAssign + + ops::BitOrAssign + + ops::BitXorAssign + + ops::ShlAssign + + ops::ShrAssign + + ops::Add + + ops::Sub + + ops::Mul + + ops::Div + + ops::Shr + + ops::BitXor + + ops::BitAnd +{ + /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing + /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, + /// 112,119,120,125,126,127]. + const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(::BITS); + + /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. + const FUZZ_NUM: usize = { + let log2 = (::BITS - 1).count_ones() as usize; + if log2 == 3 { + // case for u8 + 6 + } else { + // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate + // boundaries. + 8 + (4 * (log2 - 4)) + } + }; + + fn unsigned(self) -> Self::UnsignedInt; + fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; + fn unsigned_abs(self) -> Self::UnsignedInt; + + fn from_bool(b: bool) -> Self; + + /// Prevents the need for excessive conversions between signed and unsigned + fn logical_shr(self, other: u32) -> Self; + + /// Absolute difference between two integers. + fn abs_diff(self, other: Self) -> Self::UnsignedInt; + + // copied from primitive integers, but put in a trait + fn is_zero(self) -> bool; + fn wrapping_neg(self) -> Self; + fn wrapping_add(self, other: Self) -> Self; + fn wrapping_mul(self, other: Self) -> Self; + fn wrapping_sub(self, other: Self) -> Self; + fn wrapping_shl(self, other: u32) -> Self; + fn wrapping_shr(self, other: u32) -> Self; + fn rotate_left(self, other: u32) -> Self; + fn overflowing_add(self, other: Self) -> (Self, bool); + fn leading_zeros(self) -> u32; + fn ilog2(self) -> u32; +} + +pub(crate) const fn make_fuzz_lengths(bits: u32) -> [u8; 20] { + let mut v = [0u8; 20]; + v[0] = 0; + v[1] = 1; + v[2] = 2; // important for parity and the iX::MIN case when reversed + let mut i = 3; + + // No need for any more until the byte boundary, because there should be no algorithms + // that are sensitive to anything not next to byte boundaries after 2. We also scale + // in powers of two, which is important to prevent u128 corner tests from getting too + // big. + let mut l = 8; + loop { + if l >= ((bits / 2) as u8) { + break; + } + // get both sides of the byte boundary + v[i] = l - 1; + i += 1; + v[i] = l; + i += 1; + l *= 2; + } + + if bits != 8 { + // add the lower side of the middle boundary + v[i] = ((bits / 2) - 1) as u8; + i += 1; + } + + // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS + // boundary because of algorithms that split the high part up. We reverse the scaling + // as we go to Self::BITS. + let mid = i; + let mut j = 1; + loop { + v[i] = (bits as u8) - (v[mid - j]) - 1; + if j == mid { + break; + } + i += 1; + j += 1; + } + v +} + +macro_rules! int_impl_common { + ($ty:ty) => { + fn from_bool(b: bool) -> Self { + b as $ty + } + + fn logical_shr(self, other: u32) -> Self { + Self::from_unsigned(self.unsigned().wrapping_shr(other)) + } + + fn is_zero(self) -> bool { + self == Self::ZERO + } + + fn wrapping_neg(self) -> Self { + ::wrapping_neg(self) + } + + fn wrapping_add(self, other: Self) -> Self { + ::wrapping_add(self, other) + } + + fn wrapping_mul(self, other: Self) -> Self { + ::wrapping_mul(self, other) + } + fn wrapping_sub(self, other: Self) -> Self { + ::wrapping_sub(self, other) + } + + fn wrapping_shl(self, other: u32) -> Self { + ::wrapping_shl(self, other) + } + + fn wrapping_shr(self, other: u32) -> Self { + ::wrapping_shr(self, other) + } + + fn rotate_left(self, other: u32) -> Self { + ::rotate_left(self, other) + } + + fn overflowing_add(self, other: Self) -> (Self, bool) { + ::overflowing_add(self, other) + } + + fn leading_zeros(self) -> u32 { + ::leading_zeros(self) + } + + fn ilog2(self) -> u32 { + ::ilog2(self) + } + }; +} + +macro_rules! int_impl { + ($ity:ty, $uty:ty) => { + impl MinInt for $uty { + type OtherSign = $ity; + type UnsignedInt = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $uty { + fn unsigned(self) -> $uty { + self + } + + // It makes writing macros easier if this is implemented for both signed and unsigned + #[allow(clippy::wrong_self_convention)] + fn from_unsigned(me: $uty) -> Self { + me + } + + fn unsigned_abs(self) -> Self { + self + } + + fn abs_diff(self, other: Self) -> Self { + self.abs_diff(other) + } + + int_impl_common!($uty); + } + + impl MinInt for $ity { + type OtherSign = $uty; + type UnsignedInt = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $ity { + fn unsigned(self) -> $uty { + self as $uty + } + + fn from_unsigned(me: $uty) -> Self { + me as $ity + } + + fn unsigned_abs(self) -> Self::UnsignedInt { + self.unsigned_abs() + } + + fn abs_diff(self, other: Self) -> $uty { + self.abs_diff(other) + } + + int_impl_common!($ity); + } + }; +} + +int_impl!(isize, usize); +int_impl!(i8, u8); +int_impl!(i16, u16); +int_impl!(i32, u32); +int_impl!(i64, u64); +int_impl!(i128, u128); + +/// Trait for integers twice the bit width of another integer. This is implemented for all +/// primitives except for `u8`, because there is not a smaller primitive. +pub trait DInt: MinInt { + /// Integer that is half the bit width of the integer this trait is implemented for + type H: HInt; + + /// Returns the low half of `self` + fn lo(self) -> Self::H; + /// Returns the high half of `self` + fn hi(self) -> Self::H; + /// Returns the low and high halves of `self` as a tuple + fn lo_hi(self) -> (Self::H, Self::H) { + (self.lo(), self.hi()) + } + /// Constructs an integer using lower and higher half parts + fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { + lo.zero_widen() | hi.widen_hi() + } +} + +/// Trait for integers half the bit width of another integer. This is implemented for all +/// primitives except for `u128`, because it there is not a larger primitive. +pub trait HInt: Int { + /// Integer that is double the bit width of the integer this trait is implemented for + type D: DInt + MinInt; + + // NB: some of the below methods could have default implementations (e.g. `widen_hi`), but for + // unknown reasons this can cause infinite recursion when optimizations are disabled. See + // for context. + + /// Widens (using default extension) the integer to have double bit width + fn widen(self) -> Self::D; + /// Widens (zero extension only) the integer to have double bit width. This is needed to get + /// around problems with associated type bounds (such as `Int`) being unstable + fn zero_widen(self) -> Self::D; + /// Widens the integer to have double bit width and shifts the integer into the higher bits + fn widen_hi(self) -> Self::D; + /// Widening multiplication with zero widening. This cannot overflow. + fn zero_widen_mul(self, rhs: Self) -> Self::D; + /// Widening multiplication. This cannot overflow. + fn widen_mul(self, rhs: Self) -> Self::D; +} + +macro_rules! impl_d_int { + ($($X:ident $D:ident),*) => { + $( + impl DInt for $D { + type H = $X; + + fn lo(self) -> Self::H { + self as $X + } + fn hi(self) -> Self::H { + (self >> <$X as MinInt>::BITS) as $X + } + } + )* + }; +} + +macro_rules! impl_h_int { + ($($H:ident $uH:ident $X:ident),*) => { + $( + impl HInt for $H { + type D = $X; + + fn widen(self) -> Self::D { + self as $X + } + fn zero_widen(self) -> Self::D { + (self as $uH) as $X + } + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen().wrapping_mul(rhs.zero_widen()) + } + fn widen_mul(self, rhs: Self) -> Self::D { + self.widen().wrapping_mul(rhs.widen()) + } + fn widen_hi(self) -> Self::D { + (self as $X) << ::BITS + } + } + )* + }; +} + +impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); +impl_h_int!( + u8 u8 u16, + u16 u16 u32, + u32 u32 u64, + u64 u64 u128, + i8 u8 i16, + i16 u16 i32, + i32 u32 i64, + i64 u64 i128 +); + +/// Trait to express (possibly lossy) casting of integers +pub trait CastInto: Copy { + fn cast(self) -> T; +} + +pub trait CastFrom: Copy { + fn cast_from(value: T) -> Self; +} + +impl + Copy> CastFrom for T { + fn cast_from(value: U) -> Self { + value.cast() + } +} + +macro_rules! cast_into { + ($ty:ty) => { + cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); + }; + ($ty:ty; $($into:ty),*) => {$( + impl CastInto<$into> for $ty { + fn cast(self) -> $into { + self as $into + } + } + )*}; +} + +cast_into!(usize); +cast_into!(isize); +cast_into!(u8); +cast_into!(i8); +cast_into!(u16); +cast_into!(i16); +cast_into!(u32); +cast_into!(i32); +cast_into!(u64); +cast_into!(i64); +cast_into!(u128); +cast_into!(i128); diff --git a/src/macros.rs b/src/macros.rs index f51e49e98..91606d42b 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -1,21 +1,5 @@ //! Macros shared throughout the compiler-builtins implementation -/// Changes the visibility to `pub` if feature "public-test-deps" is set -#[cfg(not(feature = "public-test-deps"))] -macro_rules! public_test_dep { - ($(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*) => { - $(#[$($meta)*])* pub(crate) $ident $($tokens)* - }; -} - -/// Changes the visibility to `pub` if feature "public-test-deps" is set -#[cfg(feature = "public-test-deps")] -macro_rules! public_test_dep { - {$(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*} => { - $(#[$($meta)*])* pub $ident $($tokens)* - }; -} - /// The "main macro" used for defining intrinsics. /// /// The compiler-builtins library is super platform-specific with tons of crazy From 09f78cdccf3f22dbfb15dbbb3d1552ddb7e33258 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 15 Jan 2025 21:19:09 +0000 Subject: [PATCH 1081/1459] chore: release v0.1.144 --- CHANGELOG.md | 6 ++++++ Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c21dc2d2..a1f95d178 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.144](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.143...compiler_builtins-v0.1.144) - 2025-01-15 + +### Other + +- Eliminate the use of `public_test_dep!` + ## [0.1.143](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.142...compiler_builtins-v0.1.143) - 2025-01-15 ### Other diff --git a/Cargo.toml b/Cargo.toml index 4d5bbe4a9..0b0b7c365 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.143" +version = "0.1.144" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From c6c12c72a315d756983baa38f4d4c463d9be5600 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 7 Jan 2025 06:28:04 +0000 Subject: [PATCH 1082/1459] Replace `HasDomain` to enable multi-argument edge case and domain tests This also allows reusing the same generator logic between logspace tests and extensive tests, so comes with a nice bit of cleanup. Changes: * Make the generator part of `CheckCtx` since a `Generator` and `CheckCtx` are almost always passed together. * Rename `domain_logspace` to `spaced` since this no longer only operates within a domain and we may want to handle integer spacing. * Domain is now calculated at runtime rather than using traits, which is much easier to work with. * With the above, domains for multidimensional functions are added. * The extensive test generator code tests has been combined with the domain_logspace generator code. With this, the domain tests have just become a subset of extensive tests. These were renamed to "quickspace" since, technically, the extensive tests are also "domain" or "domain logspace" tests. * Edge case generators now handle functions with multiple inputs. * The test runners can be significantly cleaned up and deduplicated. --- libm/crates/libm-test/benches/random.rs | 4 +- .../crates/libm-test/examples/plot_domains.rs | 28 +- libm/crates/libm-test/src/domain.rs | 303 ++++++++++-------- libm/crates/libm-test/src/gen.rs | 3 +- .../libm-test/src/gen/domain_logspace.rs | 31 -- libm/crates/libm-test/src/gen/edge_cases.rs | 185 +++++++++-- libm/crates/libm-test/src/gen/random.rs | 26 +- .../src/gen/{extensive.rs => spaced.rs} | 93 +++--- libm/crates/libm-test/src/run_cfg.rs | 60 +++- .../libm-test/tests/compare_built_musl.rs | 132 ++------ libm/crates/libm-test/tests/multiprecision.rs | 142 ++------ .../crates/libm-test/tests/z_extensive/run.rs | 24 +- 12 files changed, 525 insertions(+), 506 deletions(-) delete mode 100644 libm/crates/libm-test/src/gen/domain_logspace.rs rename libm/crates/libm-test/src/gen/{extensive.rs => spaced.rs} (76%) diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index e79002277..dcc7c1aca 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -4,7 +4,7 @@ use std::time::Duration; use criterion::{Criterion, criterion_main}; use libm_test::gen::random; use libm_test::gen::random::RandomInput; -use libm_test::{CheckBasis, CheckCtx, MathOp, TupleCall}; +use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, TupleCall}; /// Benchmark with this many items to get a variety const BENCH_ITER_ITEMS: usize = if cfg!(feature = "short-benchmarks") { 50 } else { 500 }; @@ -52,7 +52,7 @@ where { let name = Op::NAME; - let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl); + let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl, GeneratorKind::Random); let benchvec: Vec<_> = random::get_test_cases::(&ctx).take(BENCH_ITER_ITEMS).collect(); diff --git a/libm/crates/libm-test/examples/plot_domains.rs b/libm/crates/libm-test/examples/plot_domains.rs index 626511245..fb7b854df 100644 --- a/libm/crates/libm-test/examples/plot_domains.rs +++ b/libm/crates/libm-test/examples/plot_domains.rs @@ -12,9 +12,9 @@ use std::path::Path; use std::process::Command; use std::{env, fs}; -use libm_test::domain::HasDomain; -use libm_test::gen::{domain_logspace, edge_cases}; -use libm_test::{CheckBasis, CheckCtx, MathOp, op}; +use libm_test::gen::spaced::SpacedInput; +use libm_test::gen::{edge_cases, spaced}; +use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, op}; const JL_PLOT: &str = "examples/plot_file.jl"; @@ -52,23 +52,13 @@ fn main() { /// Run multiple generators for a single operator. fn plot_one_operator(out_dir: &Path, config: &mut String) where - Op: MathOp + HasDomain, + Op: MathOp, + Op::RustArgs: SpacedInput, { - let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr); - plot_one_generator( - out_dir, - &ctx, - "logspace", - config, - domain_logspace::get_test_cases::(&ctx), - ); - plot_one_generator( - out_dir, - &ctx, - "edge_cases", - config, - edge_cases::get_test_cases::(&ctx), - ); + let mut ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr, GeneratorKind::QuickSpaced); + plot_one_generator(out_dir, &ctx, "logspace", config, spaced::get_test_cases::(&ctx).0); + ctx.gen_kind = GeneratorKind::EdgeCases; + plot_one_generator(out_dir, &ctx, "edge_cases", config, edge_cases::get_test_cases::(&ctx)); } /// Plot the output of a single generator. diff --git a/libm/crates/libm-test/src/domain.rs b/libm/crates/libm-test/src/domain.rs index 68b91bf02..5d650c00a 100644 --- a/libm/crates/libm-test/src/domain.rs +++ b/libm/crates/libm-test/src/domain.rs @@ -1,11 +1,13 @@ //! Traits and operations related to bounds of a function. use std::fmt; -use std::ops::{self, Bound}; +use std::ops::Bound; -use crate::{Float, FloatExt}; +use libm::support::Int; -/// Representation of a function's domain. +use crate::{BaseName, Float, FloatExt, Identifier}; + +/// Representation of a single dimension of a function's domain. #[derive(Clone, Debug)] pub struct Domain { /// Start of the region for which a function is defined (ignoring poles). @@ -39,56 +41,131 @@ impl Domain { } } +/// A value that may be any float type or any integer type. +#[derive(Clone, Debug)] +pub enum EitherPrim { + Float(F), + Int(I), +} + +impl EitherPrim { + pub fn unwrap_float(self) -> F { + match self { + EitherPrim::Float(f) => f, + EitherPrim::Int(_) => panic!("expected float; got {self:?}"), + } + } + + pub fn unwrap_int(self) -> I { + match self { + EitherPrim::Float(_) => panic!("expected int; got {self:?}"), + EitherPrim::Int(i) => i, + } + } +} + +/// Convenience 1-dimensional float domains. impl Domain { /// x ∈ ℝ - pub const UNBOUNDED: Self = + const UNBOUNDED: Self = Self { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None }; /// x ∈ ℝ >= 0 - pub const POSITIVE: Self = + const POSITIVE: Self = Self { start: Bound::Included(F::ZERO), end: Bound::Unbounded, check_points: None }; /// x ∈ ℝ > 0 - pub const STRICTLY_POSITIVE: Self = + const STRICTLY_POSITIVE: Self = Self { start: Bound::Excluded(F::ZERO), end: Bound::Unbounded, check_points: None }; + /// Wrap in the float variant of [`EitherPrim`]. + const fn into_prim_float(self) -> EitherPrim> { + EitherPrim::Float(self) + } +} + +/// Convenience 1-dimensional integer domains. +impl Domain { + /// x ∈ ℝ + const UNBOUNDED_INT: Self = + Self { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None }; + + /// Wrap in the int variant of [`EitherPrim`]. + const fn into_prim_int(self) -> EitherPrim, Self> { + EitherPrim::Int(self) + } +} + +/// Multidimensional domains, represented as an array of 1-D domains. +impl EitherPrim, Domain> { + /// x ∈ ℝ + const UNBOUNDED1: [Self; 1] = + [Domain { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None } + .into_prim_float()]; + + /// {x1, x2} ∈ ℝ + const UNBOUNDED2: [Self; 2] = + [Domain::UNBOUNDED.into_prim_float(), Domain::UNBOUNDED.into_prim_float()]; + + /// {x1, x2, x3} ∈ ℝ + const UNBOUNDED3: [Self; 3] = [ + Domain::UNBOUNDED.into_prim_float(), + Domain::UNBOUNDED.into_prim_float(), + Domain::UNBOUNDED.into_prim_float(), + ]; + + /// {x1, x2} ∈ ℝ, one float and one int + const UNBOUNDED_F_I: [Self; 2] = + [Domain::UNBOUNDED.into_prim_float(), Domain::UNBOUNDED_INT.into_prim_int()]; + + /// x ∈ ℝ >= 0 + const POSITIVE: [Self; 1] = [Domain::POSITIVE.into_prim_float()]; + + /// x ∈ ℝ > 0 + const STRICTLY_POSITIVE: [Self; 1] = [Domain::STRICTLY_POSITIVE.into_prim_float()]; + /// Used for versions of `asin` and `acos`. - pub const INVERSE_TRIG_PERIODIC: Self = Self { + const INVERSE_TRIG_PERIODIC: [Self; 1] = [Domain { start: Bound::Included(F::NEG_ONE), end: Bound::Included(F::ONE), check_points: None, - }; + } + .into_prim_float()]; /// Domain for `acosh` - pub const ACOSH: Self = - Self { start: Bound::Included(F::ONE), end: Bound::Unbounded, check_points: None }; + const ACOSH: [Self; 1] = + [Domain { start: Bound::Included(F::ONE), end: Bound::Unbounded, check_points: None } + .into_prim_float()]; /// Domain for `atanh` - pub const ATANH: Self = Self { + const ATANH: [Self; 1] = [Domain { start: Bound::Excluded(F::NEG_ONE), end: Bound::Excluded(F::ONE), check_points: None, - }; + } + .into_prim_float()]; /// Domain for `sin`, `cos`, and `tan` - pub const TRIG: Self = Self { - // TODO + const TRIG: [Self; 1] = [Domain { + // Trig functions have special behavior at fractions of π. check_points: Some(|| Box::new([-F::PI, -F::FRAC_PI_2, F::FRAC_PI_2, F::PI].into_iter())), - ..Self::UNBOUNDED - }; + ..Domain::UNBOUNDED + } + .into_prim_float()]; /// Domain for `log` in various bases - pub const LOG: Self = Self::STRICTLY_POSITIVE; + const LOG: [Self; 1] = Self::STRICTLY_POSITIVE; /// Domain for `log1p` i.e. `log(1 + x)` - pub const LOG1P: Self = - Self { start: Bound::Excluded(F::NEG_ONE), end: Bound::Unbounded, check_points: None }; + const LOG1P: [Self; 1] = + [Domain { start: Bound::Excluded(F::NEG_ONE), end: Bound::Unbounded, check_points: None } + .into_prim_float()]; /// Domain for `sqrt` - pub const SQRT: Self = Self::POSITIVE; + const SQRT: [Self; 1] = Self::POSITIVE; /// Domain for `gamma` - pub const GAMMA: Self = Self { + const GAMMA: [Self; 1] = [Domain { check_points: Some(|| { // Negative integers are asymptotes Box::new((0..u8::MAX).map(|scale| { @@ -100,122 +177,84 @@ impl Domain { })) }), // Whether or not gamma is defined for negative numbers is implementation dependent - ..Self::UNBOUNDED - }; + ..Domain::UNBOUNDED + } + .into_prim_float()]; /// Domain for `loggamma` - pub const LGAMMA: Self = Self::STRICTLY_POSITIVE; -} + const LGAMMA: [Self; 1] = Self::STRICTLY_POSITIVE; -/// Implement on `op::*` types to indicate how they are bounded. -pub trait HasDomain -where - T: Copy + fmt::Debug + ops::Add + ops::Sub + PartialOrd + 'static, -{ - const DOMAIN: Domain; + /// Domain for `jn` and `yn`. + // FIXME: the domain should provide some sort of "reasonable range" so we don't actually test + // the entire system unbounded. + const BESSEL_N: [Self; 2] = + [Domain::UNBOUNDED_INT.into_prim_int(), Domain::UNBOUNDED.into_prim_float()]; } -/// Implement [`HasDomain`] for both the `f32` and `f64` variants of a function. -macro_rules! impl_has_domain { - ($($fn_name:ident => $domain:expr;)*) => { - paste::paste! { - $( - // Implement for f64 functions - impl HasDomain for $crate::op::$fn_name::Routine { - const DOMAIN: Domain = Domain::::$domain; - } - - // Implement for f32 functions - impl HasDomain for $crate::op::[< $fn_name f >]::Routine { - const DOMAIN: Domain = Domain::::$domain; - } - )* - } +/// Get the domain for a given function. +pub fn get_domain( + id: Identifier, + argnum: usize, +) -> EitherPrim, Domain> { + let x = match id.base_name() { + BaseName::Acos => &EitherPrim::INVERSE_TRIG_PERIODIC[..], + BaseName::Acosh => &EitherPrim::ACOSH[..], + BaseName::Asin => &EitherPrim::INVERSE_TRIG_PERIODIC[..], + BaseName::Asinh => &EitherPrim::UNBOUNDED1[..], + BaseName::Atan => &EitherPrim::UNBOUNDED1[..], + BaseName::Atan2 => &EitherPrim::UNBOUNDED2[..], + BaseName::Cbrt => &EitherPrim::UNBOUNDED1[..], + BaseName::Atanh => &EitherPrim::ATANH[..], + BaseName::Ceil => &EitherPrim::UNBOUNDED1[..], + BaseName::Cosh => &EitherPrim::UNBOUNDED1[..], + BaseName::Copysign => &EitherPrim::UNBOUNDED2[..], + BaseName::Cos => &EitherPrim::TRIG[..], + BaseName::Exp => &EitherPrim::UNBOUNDED1[..], + BaseName::Erf => &EitherPrim::UNBOUNDED1[..], + BaseName::Erfc => &EitherPrim::UNBOUNDED1[..], + BaseName::Expm1 => &EitherPrim::UNBOUNDED1[..], + BaseName::Exp10 => &EitherPrim::UNBOUNDED1[..], + BaseName::Exp2 => &EitherPrim::UNBOUNDED1[..], + BaseName::Frexp => &EitherPrim::UNBOUNDED1[..], + BaseName::Fabs => &EitherPrim::UNBOUNDED1[..], + BaseName::Fdim => &EitherPrim::UNBOUNDED2[..], + BaseName::Floor => &EitherPrim::UNBOUNDED1[..], + BaseName::Fma => &EitherPrim::UNBOUNDED3[..], + BaseName::Fmax => &EitherPrim::UNBOUNDED2[..], + BaseName::Fmin => &EitherPrim::UNBOUNDED2[..], + BaseName::Fmod => &EitherPrim::UNBOUNDED2[..], + BaseName::Hypot => &EitherPrim::UNBOUNDED2[..], + BaseName::Ilogb => &EitherPrim::UNBOUNDED1[..], + BaseName::J0 => &EitherPrim::UNBOUNDED1[..], + BaseName::J1 => &EitherPrim::UNBOUNDED1[..], + BaseName::Jn => &EitherPrim::BESSEL_N[..], + BaseName::Ldexp => &EitherPrim::UNBOUNDED_F_I[..], + BaseName::Lgamma => &EitherPrim::LGAMMA[..], + BaseName::LgammaR => &EitherPrim::LGAMMA[..], + BaseName::Log => &EitherPrim::LOG[..], + BaseName::Log10 => &EitherPrim::LOG[..], + BaseName::Log1p => &EitherPrim::LOG1P[..], + BaseName::Log2 => &EitherPrim::LOG[..], + BaseName::Modf => &EitherPrim::UNBOUNDED1[..], + BaseName::Nextafter => &EitherPrim::UNBOUNDED2[..], + BaseName::Pow => &EitherPrim::UNBOUNDED2[..], + BaseName::Remainder => &EitherPrim::UNBOUNDED2[..], + BaseName::Remquo => &EitherPrim::UNBOUNDED2[..], + BaseName::Rint => &EitherPrim::UNBOUNDED1[..], + BaseName::Round => &EitherPrim::UNBOUNDED1[..], + BaseName::Scalbn => &EitherPrim::UNBOUNDED_F_I[..], + BaseName::Sin => &EitherPrim::TRIG[..], + BaseName::Sincos => &EitherPrim::TRIG[..], + BaseName::Sinh => &EitherPrim::UNBOUNDED1[..], + BaseName::Sqrt => &EitherPrim::SQRT[..], + BaseName::Tan => &EitherPrim::TRIG[..], + BaseName::Tanh => &EitherPrim::UNBOUNDED1[..], + BaseName::Tgamma => &EitherPrim::GAMMA[..], + BaseName::Trunc => &EitherPrim::UNBOUNDED1[..], + BaseName::Y0 => &EitherPrim::UNBOUNDED1[..], + BaseName::Y1 => &EitherPrim::UNBOUNDED1[..], + BaseName::Yn => &EitherPrim::BESSEL_N[..], }; -} - -// Tie functions together with their domains. -impl_has_domain! { - acos => INVERSE_TRIG_PERIODIC; - acosh => ACOSH; - asin => INVERSE_TRIG_PERIODIC; - asinh => UNBOUNDED; - atan => UNBOUNDED; - atanh => ATANH; - cbrt => UNBOUNDED; - ceil => UNBOUNDED; - cos => TRIG; - cosh => UNBOUNDED; - erf => UNBOUNDED; - erfc => UNBOUNDED; - exp => UNBOUNDED; - exp10 => UNBOUNDED; - exp2 => UNBOUNDED; - expm1 => UNBOUNDED; - fabs => UNBOUNDED; - floor => UNBOUNDED; - frexp => UNBOUNDED; - ilogb => UNBOUNDED; - j0 => UNBOUNDED; - j1 => UNBOUNDED; - lgamma => LGAMMA; - log => LOG; - log10 => LOG; - log1p => LOG1P; - log2 => LOG; - modf => UNBOUNDED; - rint => UNBOUNDED; - round => UNBOUNDED; - sin => TRIG; - sincos => TRIG; - sinh => UNBOUNDED; - sqrt => SQRT; - tan => TRIG; - tanh => UNBOUNDED; - tgamma => GAMMA; - trunc => UNBOUNDED; - y0 => UNBOUNDED; - y1 => UNBOUNDED; -} - -/* Manual implementations, these functions don't follow `foo`->`foof` naming */ - -impl HasDomain for crate::op::lgammaf_r::Routine { - const DOMAIN: Domain = Domain::::LGAMMA; -} - -impl HasDomain for crate::op::lgamma_r::Routine { - const DOMAIN: Domain = Domain::::LGAMMA; -} - -/* Not all `f16` and `f128` functions exist yet so we can't easily use the macros. */ - -#[cfg(f16_enabled)] -impl HasDomain for crate::op::fabsf16::Routine { - const DOMAIN: Domain = Domain::::UNBOUNDED; -} - -#[cfg(f128_enabled)] -impl HasDomain for crate::op::fabsf128::Routine { - const DOMAIN: Domain = Domain::::UNBOUNDED; -} - -#[cfg(f16_enabled)] -impl HasDomain for crate::op::fdimf16::Routine { - const DOMAIN: Domain = Domain::::UNBOUNDED; -} - -#[cfg(f128_enabled)] -impl HasDomain for crate::op::fdimf128::Routine { - const DOMAIN: Domain = Domain::::UNBOUNDED; -} - -#[cfg(f16_enabled)] -impl HasDomain for crate::op::truncf16::Routine { - const DOMAIN: Domain = Domain::::UNBOUNDED; -} -#[cfg(f128_enabled)] -impl HasDomain for crate::op::truncf128::Routine { - const DOMAIN: Domain = Domain::::UNBOUNDED; + x[argnum].clone() } diff --git a/libm/crates/libm-test/src/gen.rs b/libm/crates/libm-test/src/gen.rs index e2bfcdf34..e0a7f5766 100644 --- a/libm/crates/libm-test/src/gen.rs +++ b/libm/crates/libm-test/src/gen.rs @@ -1,9 +1,8 @@ //! Different generators that can create random or systematic bit patterns. -pub mod domain_logspace; pub mod edge_cases; -pub mod extensive; pub mod random; +pub mod spaced; /// A wrapper to turn any iterator into an `ExactSizeIterator`. Asserts the final result to ensure /// the provided size was correct. diff --git a/libm/crates/libm-test/src/gen/domain_logspace.rs b/libm/crates/libm-test/src/gen/domain_logspace.rs deleted file mode 100644 index c6963ad43..000000000 --- a/libm/crates/libm-test/src/gen/domain_logspace.rs +++ /dev/null @@ -1,31 +0,0 @@ -//! A generator that produces logarithmically spaced values within domain bounds. - -use std::ops::RangeInclusive; - -use libm::support::{IntTy, MinInt}; - -use crate::domain::HasDomain; -use crate::op::OpITy; -use crate::run_cfg::{GeneratorKind, iteration_count}; -use crate::{CheckCtx, MathOp, logspace}; - -/// Create a range of logarithmically spaced inputs within a function's domain. -/// -/// This allows us to get reasonably thorough coverage without wasting time on values that are -/// NaN or out of range. Random tests will still cover values that are excluded here. -pub fn get_test_cases(ctx: &CheckCtx) -> impl Iterator -where - Op: MathOp + HasDomain, - IntTy: TryFrom, - RangeInclusive>: Iterator, -{ - let domain = Op::DOMAIN; - let ntests = iteration_count(ctx, GeneratorKind::Domain, 0); - - // We generate logspaced inputs within a specific range, excluding values that are out of - // range in order to make iterations useful (random tests still cover the full range). - let start = domain.range_start(); - let end = domain.range_end(); - let steps = OpITy::::try_from(ntests).unwrap_or(OpITy::::MAX); - logspace(start, end, steps).0.map(|v| (v,)) -} diff --git a/libm/crates/libm-test/src/gen/edge_cases.rs b/libm/crates/libm-test/src/gen/edge_cases.rs index 1f27c1467..d4014bdb3 100644 --- a/libm/crates/libm-test/src/gen/edge_cases.rs +++ b/libm/crates/libm-test/src/gen/edge_cases.rs @@ -1,20 +1,28 @@ //! A generator that checks a handful of cases near infinities, zeros, asymptotes, and NaNs. -use libm::support::Float; +use libm::support::{Float, Int}; -use crate::domain::HasDomain; +use crate::domain::get_domain; +use crate::gen::KnownSize; use crate::run_cfg::{check_near_count, check_point_count}; -use crate::{CheckCtx, FloatExt, MathOp}; +use crate::{CheckCtx, FloatExt, MathOp, test_log}; + +/// Generate a sequence of edge cases, e.g. numbers near zeroes and infiniteis. +pub trait EdgeCaseInput { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator + Send; +} /// Create a list of values around interesting points (infinities, zeroes, NaNs). -pub fn get_test_cases(ctx: &CheckCtx) -> impl Iterator +fn float_edge_cases( + ctx: &CheckCtx, + argnum: usize, +) -> (impl Iterator + Clone, u64) where - Op: MathOp + HasDomain, - F: Float, + Op: MathOp, { let mut ret = Vec::new(); let values = &mut ret; - let domain = Op::DOMAIN; + let domain = get_domain::<_, i8>(ctx.fn_ident, argnum).unwrap_float(); let domain_start = domain.range_start(); let domain_end = domain.range_end(); @@ -22,17 +30,17 @@ where let near_points = check_near_count(ctx); // Check near some notable constants - count_up(F::ONE, near_points, values); - count_up(F::ZERO, near_points, values); - count_up(F::NEG_ONE, near_points, values); - count_down(F::ONE, near_points, values); - count_down(F::ZERO, near_points, values); - count_down(F::NEG_ONE, near_points, values); - values.push(F::NEG_ZERO); + count_up(Op::FTy::ONE, near_points, values); + count_up(Op::FTy::ZERO, near_points, values); + count_up(Op::FTy::NEG_ONE, near_points, values); + count_down(Op::FTy::ONE, near_points, values); + count_down(Op::FTy::ZERO, near_points, values); + count_down(Op::FTy::NEG_ONE, near_points, values); + values.push(Op::FTy::NEG_ZERO); // Check values near the extremes - count_up(F::NEG_INFINITY, near_points, values); - count_down(F::INFINITY, near_points, values); + count_up(Op::FTy::NEG_INFINITY, near_points, values); + count_down(Op::FTy::INFINITY, near_points, values); count_down(domain_end, near_points, values); count_up(domain_start, near_points, values); count_down(domain_start, near_points, values); @@ -40,8 +48,8 @@ where count_down(domain_end, near_points, values); // Check some special values that aren't included in the above ranges - values.push(F::NAN); - values.extend(F::consts().iter()); + values.push(Op::FTy::NAN); + values.extend(Op::FTy::consts().iter()); // Check around asymptotes if let Some(f) = domain.check_points { @@ -56,7 +64,18 @@ where values.sort_by_key(|x| x.to_bits()); values.dedup_by_key(|x| x.to_bits()); - ret.into_iter().map(|v| (v,)) + let count = ret.len().try_into().unwrap(); + + test_log(&format!( + "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {count} edge cases", + gen_kind = ctx.gen_kind, + basis = ctx.basis, + fn_ident = ctx.fn_ident, + arg = argnum + 1, + args = ctx.input_count(), + )); + + (ret.into_iter(), count) } /// Add `AROUND` values starting at and including `x` and counting up. Uses the smallest possible @@ -84,3 +103,131 @@ fn count_down(mut x: F, points: u64, values: &mut Vec) { count += 1; } } + +/// Create a list of values around interesting integer points (min, zero, max). +pub fn int_edge_cases( + ctx: &CheckCtx, + _argnum: usize, +) -> (impl Iterator + Clone, u64) { + let mut values = Vec::new(); + let near_points = check_near_count(ctx); + + for up_from in [I::MIN, I::ZERO] { + let mut x = up_from; + for _ in 0..near_points { + values.push(x); + x += I::ONE; + } + } + + for down_from in [I::ZERO, I::MAX] { + let mut x = down_from; + for _ in 0..near_points { + values.push(x); + x -= I::ONE; + } + } + + values.sort(); + values.dedup(); + let len = values.len().try_into().unwrap(); + (values.into_iter(), len) +} + +macro_rules! impl_edge_case_input { + ($fty:ty) => { + impl EdgeCaseInput for ($fty,) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + let (iter0, steps0) = float_edge_cases::(ctx, 0); + let iter0 = iter0.map(|v| (v,)); + KnownSize::new(iter0, steps0) + } + } + + impl EdgeCaseInput for ($fty, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + let (iter0, steps0) = float_edge_cases::(ctx, 0); + let (iter1, steps1) = float_edge_cases::(ctx, 1); + let iter = + iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); + let count = steps0.checked_mul(steps1).unwrap(); + KnownSize::new(iter, count) + } + } + + impl EdgeCaseInput for ($fty, $fty, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + let (iter0, steps0) = float_edge_cases::(ctx, 0); + let (iter1, steps1) = float_edge_cases::(ctx, 1); + let (iter2, steps2) = float_edge_cases::(ctx, 2); + + let iter = iter0 + .flat_map(move |first| iter1.clone().map(move |second| (first, second))) + .flat_map(move |(first, second)| { + iter2.clone().map(move |third| (first, second, third)) + }); + let count = steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap(); + + KnownSize::new(iter, count) + } + } + + impl EdgeCaseInput for (i32, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + let (iter0, steps0) = int_edge_cases(ctx, 0); + let (iter1, steps1) = float_edge_cases::(ctx, 1); + + let iter = + iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); + let count = steps0.checked_mul(steps1).unwrap(); + + KnownSize::new(iter, count) + } + } + + impl EdgeCaseInput for ($fty, i32) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + let (iter0, steps0) = float_edge_cases::(ctx, 0); + let (iter1, steps1) = int_edge_cases(ctx, 1); + + let iter = + iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); + let count = steps0.checked_mul(steps1).unwrap(); + + KnownSize::new(iter, count) + } + } + }; +} + +#[cfg(f16_enabled)] +impl_edge_case_input!(f16); +impl_edge_case_input!(f32); +impl_edge_case_input!(f64); +#[cfg(f128_enabled)] +impl_edge_case_input!(f128); + +pub fn get_test_cases( + ctx: &CheckCtx, +) -> impl ExactSizeIterator + use<'_, Op> +where + Op: MathOp, + Op::RustArgs: EdgeCaseInput, +{ + Op::RustArgs::get_cases(ctx) +} diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index 6b08e560d..56c39981a 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -9,8 +9,8 @@ use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; use super::KnownSize; +use crate::CheckCtx; use crate::run_cfg::{int_range, iteration_count}; -use crate::{CheckCtx, GeneratorKind}; pub(crate) const SEED_ENV: &str = "LIBM_SEED"; @@ -52,7 +52,7 @@ macro_rules! impl_random_input { ($fty:ty) => { impl RandomInput for ($fty,) { fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { - let count = iteration_count(ctx, GeneratorKind::Random, 0); + let count = iteration_count(ctx, 0); let iter = random_floats(count).map(|f: $fty| (f,)); KnownSize::new(iter, count) } @@ -60,8 +60,8 @@ macro_rules! impl_random_input { impl RandomInput for ($fty, $fty) { fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { - let count0 = iteration_count(ctx, GeneratorKind::Random, 0); - let count1 = iteration_count(ctx, GeneratorKind::Random, 1); + let count0 = iteration_count(ctx, 0); + let count1 = iteration_count(ctx, 1); let iter = random_floats(count0) .flat_map(move |f1: $fty| random_floats(count1).map(move |f2: $fty| (f1, f2))); KnownSize::new(iter, count0 * count1) @@ -70,9 +70,9 @@ macro_rules! impl_random_input { impl RandomInput for ($fty, $fty, $fty) { fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { - let count0 = iteration_count(ctx, GeneratorKind::Random, 0); - let count1 = iteration_count(ctx, GeneratorKind::Random, 1); - let count2 = iteration_count(ctx, GeneratorKind::Random, 2); + let count0 = iteration_count(ctx, 0); + let count1 = iteration_count(ctx, 1); + let count2 = iteration_count(ctx, 2); let iter = random_floats(count0).flat_map(move |f1: $fty| { random_floats(count1).flat_map(move |f2: $fty| { random_floats(count2).map(move |f3: $fty| (f1, f2, f3)) @@ -84,9 +84,9 @@ macro_rules! impl_random_input { impl RandomInput for (i32, $fty) { fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { - let count0 = iteration_count(ctx, GeneratorKind::Random, 0); - let count1 = iteration_count(ctx, GeneratorKind::Random, 1); - let range0 = int_range(ctx, GeneratorKind::Random, 0); + let count0 = iteration_count(ctx, 0); + let count1 = iteration_count(ctx, 1); + let range0 = int_range(ctx, 0); let iter = random_ints(count0, range0) .flat_map(move |f1: i32| random_floats(count1).map(move |f2: $fty| (f1, f2))); KnownSize::new(iter, count0 * count1) @@ -95,9 +95,9 @@ macro_rules! impl_random_input { impl RandomInput for ($fty, i32) { fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { - let count0 = iteration_count(ctx, GeneratorKind::Random, 0); - let count1 = iteration_count(ctx, GeneratorKind::Random, 1); - let range1 = int_range(ctx, GeneratorKind::Random, 1); + let count0 = iteration_count(ctx, 0); + let count1 = iteration_count(ctx, 1); + let range1 = int_range(ctx, 1); let iter = random_floats(count0).flat_map(move |f1: $fty| { random_ints(count1, range1.clone()).map(move |f2: i32| (f1, f2)) }); diff --git a/libm/crates/libm-test/src/gen/extensive.rs b/libm/crates/libm-test/src/gen/spaced.rs similarity index 76% rename from libm/crates/libm-test/src/gen/extensive.rs rename to libm/crates/libm-test/src/gen/spaced.rs index fb709e546..bea3f4c7e 100644 --- a/libm/crates/libm-test/src/gen/extensive.rs +++ b/libm/crates/libm-test/src/gen/spaced.rs @@ -3,23 +3,23 @@ use std::ops::RangeInclusive; use libm::support::{Float, MinInt}; -use crate::domain::HasDomain; +use crate::domain::get_domain; use crate::op::OpITy; use crate::run_cfg::{int_range, iteration_count}; -use crate::{CheckCtx, GeneratorKind, MathOp, linear_ints, logspace}; +use crate::{CheckCtx, MathOp, linear_ints, logspace}; -/// Generate a sequence of inputs that either cover the domain in completeness (for smaller float +/// Generate a sequence of inputs that eiher cover the domain in completeness (for smaller float /// types and single argument functions) or provide evenly spaced inputs across the domain with /// approximately `u32::MAX` total iterations. -pub trait ExtensiveInput { +pub trait SpacedInput { fn get_cases(ctx: &CheckCtx) -> (impl Iterator + Send, u64); } /// Construct an iterator from `logspace` and also calculate the total number of steps expected /// for that iterator. fn logspace_steps( - start: Op::FTy, - end: Op::FTy, + ctx: &CheckCtx, + argnum: usize, max_steps: u64, ) -> (impl Iterator + Clone, u64) where @@ -28,6 +28,11 @@ where u64: TryFrom, Error: fmt::Debug>, RangeInclusive>: Iterator, { + // i8 is a dummy type here, it can be any integer. + let domain = get_domain::(ctx.fn_ident, argnum).unwrap_float(); + let start = domain.range_start(); + let end = domain.range_end(); + let max_steps = OpITy::::try_from(max_steps).unwrap_or(OpITy::::MAX); let (iter, steps) = logspace(start, end, max_steps); @@ -76,15 +81,14 @@ where (F::Int::MIN..=F::Int::MAX).map(|bits| F::from_bits(bits)) } -macro_rules! impl_extensive_input { +macro_rules! impl_spaced_input { ($fty:ty) => { - impl ExtensiveInput for ($fty,) + impl SpacedInput for ($fty,) where Op: MathOp, - Op: HasDomain, { fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { - let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0); + let max_steps0 = iteration_count(ctx, 0); // `f16` and `f32` can have exhaustive tests. match value_count::() { Some(steps0) if steps0 <= max_steps0 => { @@ -93,9 +97,7 @@ macro_rules! impl_extensive_input { (EitherIter::A(iter0), steps0) } _ => { - let start = Op::DOMAIN.range_start(); - let end = Op::DOMAIN.range_end(); - let (iter0, steps0) = logspace_steps::(start, end, max_steps0); + let (iter0, steps0) = logspace_steps::(ctx, 0, max_steps0); let iter0 = iter0.map(|v| (v,)); (EitherIter::B(iter0), steps0) } @@ -103,13 +105,13 @@ macro_rules! impl_extensive_input { } } - impl ExtensiveInput for ($fty, $fty) + impl SpacedInput for ($fty, $fty) where Op: MathOp, { fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { - let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0); - let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1); + let max_steps0 = iteration_count(ctx, 0); + let max_steps1 = iteration_count(ctx, 1); // `f16` can have exhaustive tests. match value_count::() { Some(count) if count <= max_steps0 && count <= max_steps1 => { @@ -118,10 +120,8 @@ macro_rules! impl_extensive_input { (EitherIter::A(iter), count.checked_mul(count).unwrap()) } _ => { - let start = <$fty>::NEG_INFINITY; - let end = <$fty>::INFINITY; - let (iter0, steps0) = logspace_steps::(start, end, max_steps0); - let (iter1, steps1) = logspace_steps::(start, end, max_steps1); + let (iter0, steps0) = logspace_steps::(ctx, 0, max_steps0); + let (iter1, steps1) = logspace_steps::(ctx, 1, max_steps1); let iter = iter0.flat_map(move |first| { iter1.clone().map(move |second| (first, second)) }); @@ -132,14 +132,14 @@ macro_rules! impl_extensive_input { } } - impl ExtensiveInput for ($fty, $fty, $fty) + impl SpacedInput for ($fty, $fty, $fty) where Op: MathOp, { fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { - let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0); - let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1); - let max_steps2 = iteration_count(ctx, GeneratorKind::Extensive, 2); + let max_steps0 = iteration_count(ctx, 0); + let max_steps1 = iteration_count(ctx, 1); + let max_steps2 = iteration_count(ctx, 2); // `f16` can be exhaustive tested if `LIBM_EXTENSIVE_TESTS` is incresed. match value_count::() { Some(count) @@ -153,12 +153,9 @@ macro_rules! impl_extensive_input { (EitherIter::A(iter), count.checked_pow(3).unwrap()) } _ => { - let start = <$fty>::NEG_INFINITY; - let end = <$fty>::INFINITY; - - let (iter0, steps0) = logspace_steps::(start, end, max_steps0); - let (iter1, steps1) = logspace_steps::(start, end, max_steps1); - let (iter2, steps2) = logspace_steps::(start, end, max_steps2); + let (iter0, steps0) = logspace_steps::(ctx, 0, max_steps0); + let (iter1, steps1) = logspace_steps::(ctx, 1, max_steps1); + let (iter2, steps2) = logspace_steps::(ctx, 2, max_steps2); let iter = iter0 .flat_map(move |first| iter1.clone().map(move |second| (first, second))) @@ -174,14 +171,14 @@ macro_rules! impl_extensive_input { } } - impl ExtensiveInput for (i32, $fty) + impl SpacedInput for (i32, $fty) where Op: MathOp, { fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { - let range0 = int_range(ctx, GeneratorKind::Extensive, 0); - let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0); - let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1); + let range0 = int_range(ctx, 0); + let max_steps0 = iteration_count(ctx, 0); + let max_steps1 = iteration_count(ctx, 1); match value_count::() { Some(count1) if count1 <= max_steps1 => { let (iter0, steps0) = linear_ints(range0, max_steps0); @@ -190,11 +187,8 @@ macro_rules! impl_extensive_input { (EitherIter::A(iter), steps0.checked_mul(count1).unwrap()) } _ => { - let start = <$fty>::NEG_INFINITY; - let end = <$fty>::INFINITY; - let (iter0, steps0) = linear_ints(range0, max_steps0); - let (iter1, steps1) = logspace_steps::(start, end, max_steps1); + let (iter1, steps1) = logspace_steps::(ctx, 1, max_steps1); let iter = iter0.flat_map(move |first| { iter1.clone().map(move |second| (first, second)) @@ -207,14 +201,14 @@ macro_rules! impl_extensive_input { } } - impl ExtensiveInput for ($fty, i32) + impl SpacedInput for ($fty, i32) where Op: MathOp, { fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { - let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0); - let range1 = int_range(ctx, GeneratorKind::Extensive, 1); - let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1); + let max_steps0 = iteration_count(ctx, 0); + let range1 = int_range(ctx, 1); + let max_steps1 = iteration_count(ctx, 1); match value_count::() { Some(count0) if count0 <= max_steps0 => { let (iter1, steps1) = linear_ints(range1, max_steps1); @@ -224,10 +218,7 @@ macro_rules! impl_extensive_input { (EitherIter::A(iter), count0.checked_mul(steps1).unwrap()) } _ => { - let start = <$fty>::NEG_INFINITY; - let end = <$fty>::INFINITY; - - let (iter0, steps0) = logspace_steps::(start, end, max_steps0); + let (iter0, steps0) = logspace_steps::(ctx, 0, max_steps0); let (iter1, steps1) = linear_ints(range1, max_steps1); let iter = iter0.flat_map(move |first| { @@ -244,11 +235,11 @@ macro_rules! impl_extensive_input { } #[cfg(f16_enabled)] -impl_extensive_input!(f16); -impl_extensive_input!(f32); -impl_extensive_input!(f64); +impl_spaced_input!(f16); +impl_spaced_input!(f32); +impl_spaced_input!(f64); #[cfg(f128_enabled)] -impl_extensive_input!(f128); +impl_spaced_input!(f128); /// Create a test case iterator for extensive inputs. Also returns the total test case count. pub fn get_test_cases( @@ -256,7 +247,7 @@ pub fn get_test_cases( ) -> (impl Iterator + Send + use<'_, Op>, u64) where Op: MathOp, - Op::RustArgs: ExtensiveInput, + Op::RustArgs: SpacedInput, { Op::RustArgs::get_cases(ctx) } diff --git a/libm/crates/libm-test/src/run_cfg.rs b/libm/crates/libm-test/src/run_cfg.rs index 4a52091fe..6763de8bc 100644 --- a/libm/crates/libm-test/src/run_cfg.rs +++ b/libm/crates/libm-test/src/run_cfg.rs @@ -39,11 +39,12 @@ pub struct CheckCtx { pub base_name_str: &'static str, /// Source of truth for tests. pub basis: CheckBasis, + pub gen_kind: GeneratorKind, } impl CheckCtx { /// Create a new check context, using the default ULP for the function. - pub fn new(fn_ident: Identifier, basis: CheckBasis) -> Self { + pub fn new(fn_ident: Identifier, basis: CheckBasis, gen_kind: GeneratorKind) -> Self { let mut ret = Self { ulp: 0, fn_ident, @@ -51,10 +52,16 @@ impl CheckCtx { base_name: fn_ident.base_name(), base_name_str: fn_ident.base_name().as_str(), basis, + gen_kind, }; ret.ulp = crate::default_ulp(&ret); ret } + + /// The number of input arguments for this function. + pub fn input_count(&self) -> usize { + self.fn_ident.math_op().rust_sig.args.len() + } } /// Possible items to test against @@ -66,11 +73,13 @@ pub enum CheckBasis { Mpfr, } -/// The different kinds of generators that provide test input. +/// The different kinds of generators that provide test input, which account for input pattern +/// and quantity. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum GeneratorKind { - Domain, + EdgeCases, Extensive, + QuickSpaced, Random, } @@ -155,7 +164,7 @@ impl TestEnv { } /// The number of iterations to run for a given test. -pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -> u64 { +pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { let t_env = TestEnv::from_env(ctx); // Ideally run 5M tests @@ -185,10 +194,13 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) - // Run fewer random tests than domain tests. let random_iter_count = domain_iter_count / 100; - let mut total_iterations = match gen_kind { - GeneratorKind::Domain => domain_iter_count, + let mut total_iterations = match ctx.gen_kind { + GeneratorKind::QuickSpaced => domain_iter_count, GeneratorKind::Random => random_iter_count, GeneratorKind::Extensive => *EXTENSIVE_MAX_ITERATIONS, + GeneratorKind::EdgeCases => { + unimplemented!("edge case tests shoudn't need `iteration_count`") + } }; // FMA has a huge domain but is reasonably fast to run, so increase iterations. @@ -213,16 +225,18 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) - }; let total = ntests.pow(t_env.input_count.try_into().unwrap()); - let seed_msg = match gen_kind { - GeneratorKind::Domain | GeneratorKind::Extensive => String::new(), + let seed_msg = match ctx.gen_kind { + GeneratorKind::QuickSpaced | GeneratorKind::Extensive => String::new(), GeneratorKind::Random => { format!(" using `{SEED_ENV}={}`", str::from_utf8(SEED.as_slice()).unwrap()) } + GeneratorKind::EdgeCases => unreachable!(), }; test_log(&format!( "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {ntests} iterations \ ({total} total){seed_msg}", + gen_kind = ctx.gen_kind, basis = ctx.basis, fn_ident = ctx.fn_ident, arg = argnum + 1, @@ -233,7 +247,7 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) - } /// Some tests require that an integer be kept within reasonable limits; generate that here. -pub fn int_range(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -> RangeInclusive { +pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive { let t_env = TestEnv::from_env(ctx); if !matches!(ctx.base_name, BaseName::Jn | BaseName::Yn) { @@ -252,22 +266,42 @@ pub fn int_range(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -> Rang let extensive_range = (-0xfff)..=0xfffff; - match gen_kind { + match ctx.gen_kind { GeneratorKind::Extensive => extensive_range, - GeneratorKind::Domain | GeneratorKind::Random => non_extensive_range, + GeneratorKind::QuickSpaced | GeneratorKind::Random => non_extensive_range, + GeneratorKind::EdgeCases => extensive_range, } } /// For domain tests, limit how many asymptotes or specified check points we test. pub fn check_point_count(ctx: &CheckCtx) -> usize { + assert_eq!( + ctx.gen_kind, + GeneratorKind::EdgeCases, + "check_point_count is intended for edge case tests" + ); let t_env = TestEnv::from_env(ctx); if t_env.slow_platform || !cfg!(optimizations_enabled) { 4 } else { 10 } } /// When validating points of interest (e.g. asymptotes, inflection points, extremes), also check /// this many surrounding values. -pub fn check_near_count(_ctx: &CheckCtx) -> u64 { - if cfg!(optimizations_enabled) { 100 } else { 10 } +pub fn check_near_count(ctx: &CheckCtx) -> u64 { + assert_eq!( + ctx.gen_kind, + GeneratorKind::EdgeCases, + "check_near_count is intended for edge case tests" + ); + if cfg!(optimizations_enabled) { + // Taper based on the number of inputs. + match ctx.input_count() { + 1 | 2 => 100, + 3 => 50, + x => panic!("unexpected argument count {x}"), + } + } else { + 10 + } } /// Check whether extensive actions should be run or skipped. diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 836c425a5..f540a0b15 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -9,150 +9,78 @@ // There are some targets we can't build musl for #![cfg(feature = "build-musl")] -use libm_test::domain::HasDomain; -use libm_test::gen::random::RandomInput; -use libm_test::gen::{domain_logspace, edge_cases, random}; -use libm_test::{CheckBasis, CheckCtx, CheckOutput, MathOp, TupleCall}; +use libm_test::gen::{edge_cases, random, spaced}; +use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall}; -macro_rules! musl_rand_tests { - ( - fn_name: $fn_name:ident, - attrs: [$($attr:meta),*], - ) => { - paste::paste! { - #[test] - $(#[$attr])* - fn [< musl_random_ $fn_name >]() { - test_one_random::(musl_math_sys::$fn_name); - } - } - }; -} - -fn test_one_random(musl_fn: Op::CFn) -where - Op: MathOp, - Op::RustArgs: RandomInput, -{ - let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl); - let cases = random::get_test_cases::(&ctx); +const BASIS: CheckBasis = CheckBasis::Musl; +fn musl_runner( + ctx: &CheckCtx, + cases: impl Iterator, + musl_fn: Op::CFn, +) { for input in cases { let musl_res = input.call(musl_fn); let crate_res = input.call(Op::ROUTINE); - crate_res.validate(musl_res, input, &ctx).unwrap(); + crate_res.validate(musl_res, input, ctx).unwrap(); } } -libm_macros::for_each_function! { - callback: musl_rand_tests, - // Musl does not support `f16` and `f128` on all platforms. - skip: [ - copysignf128, - copysignf16, - fabsf128, - fabsf16, - fdimf128, - fdimf16, - truncf128, - truncf16, - ], - attributes: [ - #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586 - [exp10, exp10f, exp2, exp2f, rint] - ], -} - /// Test against musl with generators from a domain. -macro_rules! musl_domain_tests { +macro_rules! musl_tests { ( fn_name: $fn_name:ident, attrs: [$($attr:meta),*], ) => { paste::paste! { + #[test] + $(#[$attr])* + fn [< musl_random_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Random); + let cases = random::get_test_cases::<::RustArgs>(&ctx); + musl_runner::(&ctx, cases, musl_math_sys::$fn_name); + } + #[test] $(#[$attr])* fn [< musl_edge_case_ $fn_name >]() { type Op = libm_test::op::$fn_name::Routine; - domain_test_runner::( - edge_cases::get_test_cases::, - musl_math_sys::$fn_name, - ); + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::EdgeCases); + let cases = edge_cases::get_test_cases::(&ctx); + musl_runner::(&ctx, cases, musl_math_sys::$fn_name); } #[test] $(#[$attr])* - fn [< musl_logspace_ $fn_name >]() { + fn [< musl_quickspace_ $fn_name >]() { type Op = libm_test::op::$fn_name::Routine; - domain_test_runner::( - domain_logspace::get_test_cases::, - musl_math_sys::$fn_name, - ); + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::QuickSpaced); + let cases = spaced::get_test_cases::(&ctx).0; + musl_runner::(&ctx, cases, musl_math_sys::$fn_name); } } }; } -/// Test a single routine against domaine-aware inputs. -fn domain_test_runner(gen: impl FnOnce(&CheckCtx) -> I, musl_fn: Op::CFn) -where - Op: MathOp, - Op: HasDomain, - I: Iterator, -{ - let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl); - let cases = gen(&ctx); - - for input in cases { - let musl_res = input.call(musl_fn); - let crate_res = input.call(Op::ROUTINE); - - crate_res.validate(musl_res, input, &ctx).unwrap(); - } -} - libm_macros::for_each_function! { - callback: musl_domain_tests, + callback: musl_tests, attributes: [], skip: [ - // Functions with multiple inputs - atan2, - atan2f, - copysign, - copysignf, - copysignf16, - copysignf128, - fdim, - fdimf, - fma, - fmaf, - fmax, - fmaxf, - fmin, - fminf, - fmod, - fmodf, - hypot, - hypotf, + // TODO integer inputs jn, jnf, ldexp, ldexpf, - nextafter, - nextafterf, - pow, - powf, - remainder, - remainderf, - remquo, - remquof, scalbn, scalbnf, yn, ynf, // Not provided by musl + copysignf128, + copysignf16, fabsf128, fabsf16, fdimf128, diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 123abfdaf..761ca1f85 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -2,151 +2,69 @@ #![cfg(feature = "build-mpfr")] -use libm_test::domain::HasDomain; -use libm_test::gen::random::RandomInput; -use libm_test::gen::{domain_logspace, edge_cases, random}; +use libm_test::gen::{edge_cases, random, spaced}; use libm_test::mpfloat::MpOp; -use libm_test::{CheckBasis, CheckCtx, CheckOutput, MathOp, OpFTy, OpRustFn, OpRustRet, TupleCall}; +use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall}; -/// Test against MPFR with random inputs. -macro_rules! mp_rand_tests { - ( - fn_name: $fn_name:ident, - attrs: [$($attr:meta),*], - ) => { - paste::paste! { - #[test] - $(#[$attr])* - fn [< mp_random_ $fn_name >]() { - test_one_random::(); - } - } - }; -} +const BASIS: CheckBasis = CheckBasis::Mpfr; -/// Test a single routine with random inputs -fn test_one_random() -where - Op: MathOp + MpOp, - Op::RustArgs: RandomInput, -{ +fn mp_runner(ctx: &CheckCtx, cases: impl Iterator) { let mut mp_vals = Op::new_mp(); - let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr); - let cases = random::get_test_cases::(&ctx); - for input in cases { let mp_res = Op::run(&mut mp_vals, input); let crate_res = input.call(Op::ROUTINE); - crate_res.validate(mp_res, input, &ctx).unwrap(); + crate_res.validate(mp_res, input, ctx).unwrap(); } } -libm_macros::for_each_function! { - callback: mp_rand_tests, - attributes: [ - // Also an assertion failure on i686: at `MPFR_ASSERTN (! mpfr_erangeflag_p ())` - #[ignore = "large values are infeasible in MPFR"] - [jn, jnf, yn, ynf], - ], - skip: [ - // FIXME: test needed, see - // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392 - nextafter, - nextafterf, - ], -} - -/// Test against MPFR with generators from a domain. -macro_rules! mp_domain_tests { +macro_rules! mp_tests { ( fn_name: $fn_name:ident, attrs: [$($attr:meta),*], ) => { paste::paste! { + #[test] + $(#[$attr])* + fn [< mp_random_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Random); + let cases = random::get_test_cases::<::RustArgs>(&ctx); + mp_runner::(&ctx, cases); + } + #[test] $(#[$attr])* fn [< mp_edge_case_ $fn_name >]() { type Op = libm_test::op::$fn_name::Routine; - domain_test_runner::(edge_cases::get_test_cases::); + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::EdgeCases); + let cases = edge_cases::get_test_cases::(&ctx); + mp_runner::(&ctx, cases); } #[test] $(#[$attr])* - fn [< mp_logspace_ $fn_name >]() { + fn [< mp_quickspace_ $fn_name >]() { type Op = libm_test::op::$fn_name::Routine; - domain_test_runner::(domain_logspace::get_test_cases::); + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::QuickSpaced); + let cases = spaced::get_test_cases::(&ctx).0; + mp_runner::(&ctx, cases); } } }; } -/// Test a single routine against domaine-aware inputs. -fn domain_test_runner(gen: impl FnOnce(&CheckCtx) -> I) -where - // Complicated generics... - // The operation must take a single float argument (unary only) - Op: MathOp::FTy,)>, - // It must also support multiprecision operations - Op: MpOp, - // And it must have a domain specified - Op: HasDomain, - // The single float argument tuple must be able to call the `RustFn` and return `RustRet` - (OpFTy,): TupleCall, Output = OpRustRet>, - I: Iterator, -{ - let mut mp_vals = Op::new_mp(); - let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr); - let cases = gen(&ctx); - - for input in cases { - let mp_res = Op::run(&mut mp_vals, input); - let crate_res = input.call(Op::ROUTINE); - - crate_res.validate(mp_res, input, &ctx).unwrap(); - } -} - libm_macros::for_each_function! { - callback: mp_domain_tests, - attributes: [], + callback: mp_tests, + attributes: [ + // Also an assertion failure on i686: at `MPFR_ASSERTN (! mpfr_erangeflag_p ())` + #[ignore = "large values are infeasible in MPFR"] + [jn, jnf, yn, ynf], + ], skip: [ - // Functions with multiple inputs - atan2, - atan2f, - copysign, - copysignf, - copysignf16, - copysignf128, - fdim, - fdimf, - fdimf16, - fdimf128, - fma, - fmaf, - fmax, - fmaxf, - fmin, - fminf, - fmod, - fmodf, - hypot, - hypotf, - jn, - jnf, - ldexp, - ldexpf, + // FIXME: test needed, see + // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392 nextafter, nextafterf, - pow, - powf, - remainder, - remainderf, - remquo, - remquof, - scalbn, - scalbnf, - yn, - ynf, ], } diff --git a/libm/crates/libm-test/tests/z_extensive/run.rs b/libm/crates/libm-test/tests/z_extensive/run.rs index 7ee967851..a323c9110 100644 --- a/libm/crates/libm-test/tests/z_extensive/run.rs +++ b/libm/crates/libm-test/tests/z_extensive/run.rs @@ -6,13 +6,18 @@ use std::sync::atomic::{AtomicU64, Ordering}; use std::time::Duration; use indicatif::{ProgressBar, ProgressStyle}; -use libm_test::gen::extensive::{self, ExtensiveInput}; +use libm_test::gen::spaced; use libm_test::mpfloat::MpOp; use libm_test::{ - CheckBasis, CheckCtx, CheckOutput, MathOp, TestResult, TupleCall, skip_extensive_test, + CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TestResult, TupleCall, + skip_extensive_test, }; use libtest_mimic::{Arguments, Trial}; use rayon::prelude::*; +use spaced::SpacedInput; + +const BASIS: CheckBasis = CheckBasis::Mpfr; +const GEN_KIND: GeneratorKind = GeneratorKind::Extensive; /// Run the extensive test suite. pub fn run() { @@ -62,10 +67,10 @@ fn register_all_tests() -> Vec { fn register_single_test(all: &mut Vec) where Op: MathOp + MpOp, - Op::RustArgs: ExtensiveInput + Send, + Op::RustArgs: SpacedInput + Send, { let test_name = format!("mp_extensive_{}", Op::NAME); - let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr); + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GEN_KIND); let skip = skip_extensive_test(&ctx); let runner = move || { @@ -73,7 +78,7 @@ where panic!("extensive tests should be run with --release"); } - let res = run_single_test::(); + let res = run_single_test::(&ctx); let e = match res { Ok(()) => return Ok(()), Err(e) => e, @@ -91,18 +96,17 @@ where } /// Test runner for a signle routine. -fn run_single_test() -> TestResult +fn run_single_test(ctx: &CheckCtx) -> TestResult where Op: MathOp + MpOp, - Op::RustArgs: ExtensiveInput + Send, + Op::RustArgs: SpacedInput + Send, { // Small delay before printing anything so other output from the runner has a chance to flush. std::thread::sleep(Duration::from_millis(500)); eprintln!(); let completed = AtomicU64::new(0); - let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr); - let (ref mut cases, total) = extensive::get_test_cases::(&ctx); + let (ref mut cases, total) = spaced::get_test_cases::(ctx); let pb = Progress::new(Op::NAME, total); let test_single_chunk = |mp_vals: &mut Op::MpTy, input_vec: Vec| -> TestResult { @@ -110,7 +114,7 @@ where // Test the input. let mp_res = Op::run(mp_vals, input); let crate_res = input.call(Op::ROUTINE); - crate_res.validate(mp_res, input, &ctx)?; + crate_res.validate(mp_res, input, ctx)?; let completed = completed.fetch_add(1, Ordering::Relaxed) + 1; pb.update(completed, input); From 6c20ebf2529ee70c45adec83cec4b161026bac0f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 21:10:12 +0000 Subject: [PATCH 1083/1459] Adjust precision and add xfails based on new tests --- libm/crates/libm-test/src/precision.rs | 149 +++++++++++++++++++++++-- libm/src/math/support/float_traits.rs | 7 +- 2 files changed, 143 insertions(+), 13 deletions(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 0b3fe89be..3cb5e420f 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -102,6 +102,15 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { } } + if cfg!(target_arch = "x86") { + match ctx.fn_ident { + // Input `fma(0.999999999999999, 1.0000000000000013, 0.0) = 1.0000000000000002` is + // incorrect on i586 and i686. + Id::Fma => ulp = 1, + _ => (), + } + } + // In some cases, our implementation is less accurate than musl on i586. if cfg!(x86_no_sse) { match ctx.fn_ident { @@ -370,59 +379,129 @@ fn maybe_check_nan_bits(actual: F, expected: F, ctx: &CheckCtx) -> Opt impl MaybeOverride<(f16, f16)> for SpecialCase { fn check_float( input: (f16, f16), - _actual: F, + actual: F, expected: F, _ulp: &mut u32, ctx: &CheckCtx, ) -> Option { - maybe_skip_binop_nan(input, expected, ctx) + binop_common(input, actual, expected, ctx) } } impl MaybeOverride<(f32, f32)> for SpecialCase { fn check_float( input: (f32, f32), - _actual: F, + actual: F, expected: F, _ulp: &mut u32, ctx: &CheckCtx, ) -> Option { - maybe_skip_binop_nan(input, expected, ctx) + if ctx.base_name == BaseName::Fmin + && input.0.biteq(f32::NEG_ZERO) + && input.1.biteq(f32::ZERO) + && expected.biteq(F::NEG_ZERO) + && actual.biteq(F::ZERO) + { + return XFAIL; + } + + binop_common(input, actual, expected, ctx) + } + + fn check_int( + _input: (f32, f32), + actual: I, + expected: I, + ctx: &CheckCtx, + ) -> Option { + remquo_common(actual, expected, ctx) } } impl MaybeOverride<(f64, f64)> for SpecialCase { fn check_float( input: (f64, f64), - _actual: F, + actual: F, expected: F, _ulp: &mut u32, ctx: &CheckCtx, ) -> Option { - maybe_skip_binop_nan(input, expected, ctx) + if ctx.base_name == BaseName::Fmin + && input.0.biteq(f64::NEG_ZERO) + && input.1.biteq(f64::ZERO) + && expected.biteq(F::ZERO) + && actual.biteq(F::NEG_ZERO) + { + return XFAIL; + } + + binop_common(input, actual, expected, ctx) + } + + fn check_int( + _input: (f64, f64), + actual: I, + expected: I, + ctx: &CheckCtx, + ) -> Option { + remquo_common(actual, expected, ctx) + } +} + +fn remquo_common(actual: I, expected: I, ctx: &CheckCtx) -> Option { + // FIXME: Our MPFR implementation disagrees with musl and may need to be updated. + if ctx.basis == CheckBasis::Mpfr + && ctx.base_name == BaseName::Remquo + && expected == I::MIN + && actual == I::ZERO + { + return XFAIL; } + + None } #[cfg(f128_enabled)] impl MaybeOverride<(f128, f128)> for SpecialCase { fn check_float( input: (f128, f128), - _actual: F, + actual: F, expected: F, _ulp: &mut u32, ctx: &CheckCtx, ) -> Option { - maybe_skip_binop_nan(input, expected, ctx) + binop_common(input, actual, expected, ctx) } } -/// Musl propagates NaNs if one is provided as the input, but we return the other input. // F1 and F2 are always the same type, this is just to please generics -fn maybe_skip_binop_nan( +fn binop_common( input: (F1, F1), + actual: F2, expected: F2, ctx: &CheckCtx, ) -> Option { + /* FIXME(#439): we do not compare signed zeros */ + + if ctx.base_name == BaseName::Fmin + && input.0.biteq(F1::NEG_ZERO) + && input.1.biteq(F1::ZERO) + && expected.biteq(F2::NEG_ZERO) + && actual.biteq(F2::ZERO) + { + return XFAIL; + } + + if ctx.base_name == BaseName::Fmax + && input.0.biteq(F1::NEG_ZERO) + && input.1.biteq(F1::ZERO) + && expected.biteq(F2::ZERO) + && actual.biteq(F2::NEG_ZERO) + { + return XFAIL; + } + + // Musl propagates NaNs if one is provided as the input, but we return the other input. match (&ctx.basis, ctx.base_name) { (Musl, BaseName::Fmin | BaseName::Fmax) if (input.0.is_nan() || input.1.is_nan()) && expected.is_nan() => @@ -509,7 +588,53 @@ fn bessel_prec_dropoff( None } -impl MaybeOverride<(f32, f32, f32)> for SpecialCase {} -impl MaybeOverride<(f64, f64, f64)> for SpecialCase {} impl MaybeOverride<(f32, i32)> for SpecialCase {} impl MaybeOverride<(f64, i32)> for SpecialCase {} + +impl MaybeOverride<(f32, f32, f32)> for SpecialCase { + fn check_float( + input: (f32, f32, f32), + actual: F, + expected: F, + _ulp: &mut u32, + ctx: &CheckCtx, + ) -> Option { + ternop_common(input, actual, expected, ctx) + } +} +impl MaybeOverride<(f64, f64, f64)> for SpecialCase { + fn check_float( + input: (f64, f64, f64), + actual: F, + expected: F, + _ulp: &mut u32, + ctx: &CheckCtx, + ) -> Option { + ternop_common(input, actual, expected, ctx) + } +} + +// F1 and F2 are always the same type, this is just to please generics +fn ternop_common( + input: (F1, F1, F1), + actual: F2, + expected: F2, + ctx: &CheckCtx, +) -> Option { + // FIXME(fma): 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result + // of fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the + // exact result". Our implementation returns the wrong sign: + // fma(5e-324, -5e-324, 0.0) = 0.0 (should be -0.0) + if ctx.base_name == BaseName::Fma + && (input.0.is_sign_negative() ^ input.1.is_sign_negative()) + && input.0 != F1::ZERO + && input.1 != F1::ZERO + && input.2.biteq(F1::ZERO) + && expected.biteq(F2::NEG_ZERO) + && actual.biteq(F2::ZERO) + { + return XFAIL; + } + + None +} diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 3aa0d844a..647f4f5e2 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -93,9 +93,14 @@ pub trait Float: /// Returns true if the value is +inf or -inf. fn is_infinite(self) -> bool; - /// Returns true if the sign is negative. + /// Returns true if the sign is negative. Extracts the sign bit regardless of zero or NaN. fn is_sign_negative(self) -> bool; + /// Returns true if the sign is positive. Extracts the sign bit regardless of zero or NaN. + fn is_sign_positive(self) -> bool { + !self.is_sign_negative() + } + /// Returns if `self` is subnormal fn is_subnormal(self) -> bool { (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO From b234447f21cb933e735f211e1f0431c679de393a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 22:48:03 +0000 Subject: [PATCH 1084/1459] Increase the CI timeout --- libm/.github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index 40b67c4c2..99a32a82e 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -14,7 +14,7 @@ env: jobs: test: name: Build and test - timeout-minutes: 25 + timeout-minutes: 40 strategy: fail-fast: false matrix: From bc48f016e6ef16cf3a3981ecb03d30fd9795011c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 16 Jan 2025 07:30:51 +0000 Subject: [PATCH 1085/1459] Provide a way to override iteration count Benchmarks need a way to limit how many iterations get run. Introuce a way to inject this information here. --- libm/crates/libm-test/src/run_cfg.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/libm/crates/libm-test/src/run_cfg.rs b/libm/crates/libm-test/src/run_cfg.rs index 6763de8bc..3e91101f6 100644 --- a/libm/crates/libm-test/src/run_cfg.rs +++ b/libm/crates/libm-test/src/run_cfg.rs @@ -40,6 +40,8 @@ pub struct CheckCtx { /// Source of truth for tests. pub basis: CheckBasis, pub gen_kind: GeneratorKind, + /// If specified, this value will override the value returned by [`iteration_count`]. + pub override_iterations: Option, } impl CheckCtx { @@ -53,6 +55,7 @@ impl CheckCtx { base_name_str: fn_ident.base_name().as_str(), basis, gen_kind, + override_iterations: None, }; ret.ulp = crate::default_ulp(&ret); ret @@ -62,6 +65,10 @@ impl CheckCtx { pub fn input_count(&self) -> usize { self.fn_ident.math_op().rust_sig.args.len() } + + pub fn override_iterations(&mut self, count: u64) { + self.override_iterations = Some(count) + } } /// Possible items to test against @@ -71,6 +78,8 @@ pub enum CheckBasis { Musl, /// Check against infinite precision (MPFR). Mpfr, + /// Benchmarks or other times when this is not relevant. + None, } /// The different kinds of generators that provide test input, which account for input pattern @@ -216,6 +225,12 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { total_iterations = 800; } + let mut overridden = false; + if let Some(count) = ctx.override_iterations { + total_iterations = count; + overridden = true; + } + // Adjust for the number of inputs let ntests = match t_env.input_count { 1 => total_iterations, @@ -223,6 +238,7 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { 3 => (total_iterations as f64).cbrt().ceil() as u64, _ => panic!("test has more than three arguments"), }; + let total = ntests.pow(t_env.input_count.try_into().unwrap()); let seed_msg = match ctx.gen_kind { @@ -235,12 +251,13 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { test_log(&format!( "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {ntests} iterations \ - ({total} total){seed_msg}", + ({total} total){seed_msg}{omsg}", gen_kind = ctx.gen_kind, basis = ctx.basis, fn_ident = ctx.fn_ident, arg = argnum + 1, args = t_env.input_count, + omsg = if overridden { " (overridden)" } else { "" } )); ntests From ea00a144727678fc9594cf4fa68f33a90830df43 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 16 Jan 2025 07:30:38 +0000 Subject: [PATCH 1086/1459] Add benchmarks using iai-callgrind Running walltime benchmarks in CI is notoriously unstable, Introduce benchmarks that instead use instruction count and other more reproducible metrics, using `iai-callgrind` [1], which we are able to run in CI with a high degree of reproducibility. Inputs to this benchmark are a logspace sweep, which gives an approximation for real-world use, but may fail to indicate outlier cases. [1]: https://github.com/iai-callgrind/iai-callgrind --- libm/Cargo.toml | 4 + libm/crates/libm-test/Cargo.toml | 9 ++ libm/crates/libm-test/benches/icount.rs | 175 ++++++++++++++++++++++++ libm/crates/libm-test/src/lib.rs | 3 +- libm/crates/libm-test/src/op.rs | 2 + 5 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 libm/crates/libm-test/benches/icount.rs diff --git a/libm/Cargo.toml b/libm/Cargo.toml index f84f3eac6..18d89997d 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -73,3 +73,7 @@ debug-assertions = true inherits = "release" lto = "fat" overflow-checks = true + +[profile.bench] +# Required for iai-callgrind +debug = true diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index d3f18ab3e..3a1ba8796 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -20,6 +20,9 @@ build-musl = ["dep:musl-math-sys"] # Enable report generation without bringing in more dependencies by default benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] +# Enable icount benchmarks (requires iai-callgrind and valgrind) +icount = ["dep:iai-callgrind"] + # Run with a reduced set of benchmarks, such as for CI short-benchmarks = [] @@ -27,6 +30,7 @@ short-benchmarks = [] anyhow = "1.0.90" az = { version = "1.2.1", optional = true } gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false, features = ["mpfr"] } +iai-callgrind = { version = "0.14.0", optional = true } indicatif = { version = "0.17.9", default-features = false } libm = { path = "../..", features = ["unstable-public-internals"] } libm-macros = { path = "../libm-macros" } @@ -48,6 +52,11 @@ rand = { version = "0.8.5", optional = true } criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } libtest-mimic = "0.8.1" +[[bench]] +name = "icount" +harness = false +required-features = ["icount"] + [[bench]] name = "random" harness = false diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs new file mode 100644 index 000000000..3a66249e8 --- /dev/null +++ b/libm/crates/libm-test/benches/icount.rs @@ -0,0 +1,175 @@ +//! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable. + +use std::hint::black_box; + +use iai_callgrind::{library_benchmark, library_benchmark_group, main}; +use libm_test::gen::spaced; +use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op}; + +const BENCH_ITER_ITEMS: u64 = 500; + +macro_rules! icount_benches { + ( + fn_name: $fn_name:ident, + attrs: [$($_attr:meta),*], + ) => { + paste::paste! { + // Construct benchmark inputs from the logspace generator. + fn [< setup_ $fn_name >]() -> Vec> { + type Op = op::$fn_name::Routine; + let mut ctx = CheckCtx::new( + Op::IDENTIFIER, + CheckBasis::None, + GeneratorKind::QuickSpaced + ); + ctx.override_iterations(BENCH_ITER_ITEMS); + let ret = spaced::get_test_cases::(&ctx).0.collect::>(); + println!("operation {}, {} steps", Op::NAME, ret.len()); + ret + } + + // Run benchmarks with the above inputs. + #[library_benchmark] + #[bench::logspace([< setup_ $fn_name >]())] + fn [< icount_bench_ $fn_name >](cases: Vec>) { + type Op = op::$fn_name::Routine; + let f = black_box(Op::ROUTINE); + for input in cases.iter().copied() { + input.call(f); + } + } + + library_benchmark_group!( + name = [< icount_bench_ $fn_name _group >]; + benchmarks = [< icount_bench_ $fn_name >] + ); + } + }; +} + +libm_macros::for_each_function! { + callback: icount_benches, +} + +main!( + library_benchmark_groups = icount_bench_acos_group, + icount_bench_acosf_group, + icount_bench_acosh_group, + icount_bench_acoshf_group, + icount_bench_asin_group, + icount_bench_asinf_group, + icount_bench_asinh_group, + icount_bench_asinhf_group, + icount_bench_atan2_group, + icount_bench_atan2f_group, + icount_bench_atan_group, + icount_bench_atanf_group, + icount_bench_atanh_group, + icount_bench_atanhf_group, + icount_bench_cbrt_group, + icount_bench_cbrtf_group, + icount_bench_ceil_group, + icount_bench_ceilf_group, + icount_bench_copysign_group, + icount_bench_copysignf128_group, + icount_bench_copysignf16_group, + icount_bench_copysignf_group, + icount_bench_cos_group, + icount_bench_cosf_group, + icount_bench_cosh_group, + icount_bench_coshf_group, + icount_bench_erf_group, + icount_bench_erfc_group, + icount_bench_erfcf_group, + icount_bench_erff_group, + icount_bench_exp10_group, + icount_bench_exp10f_group, + icount_bench_exp2_group, + icount_bench_exp2f_group, + icount_bench_exp_group, + icount_bench_expf_group, + icount_bench_expm1_group, + icount_bench_expm1f_group, + icount_bench_fabs_group, + icount_bench_fabsf128_group, + icount_bench_fabsf16_group, + icount_bench_fabsf_group, + icount_bench_fdim_group, + icount_bench_fdimf128_group, + icount_bench_fdimf16_group, + icount_bench_fdimf_group, + icount_bench_floor_group, + icount_bench_floorf_group, + icount_bench_fma_group, + icount_bench_fmaf_group, + icount_bench_fmax_group, + icount_bench_fmaxf_group, + icount_bench_fmin_group, + icount_bench_fminf_group, + icount_bench_fmod_group, + icount_bench_fmodf_group, + icount_bench_frexp_group, + icount_bench_frexpf_group, + icount_bench_hypot_group, + icount_bench_hypotf_group, + icount_bench_ilogb_group, + icount_bench_ilogbf_group, + icount_bench_j0_group, + icount_bench_j0f_group, + icount_bench_j1_group, + icount_bench_j1f_group, + icount_bench_jn_group, + icount_bench_jnf_group, + icount_bench_ldexp_group, + icount_bench_ldexpf_group, + icount_bench_lgamma_group, + icount_bench_lgamma_r_group, + icount_bench_lgammaf_group, + icount_bench_lgammaf_r_group, + icount_bench_log10_group, + icount_bench_log10f_group, + icount_bench_log1p_group, + icount_bench_log1pf_group, + icount_bench_log2_group, + icount_bench_log2f_group, + icount_bench_log_group, + icount_bench_logf_group, + icount_bench_modf_group, + icount_bench_modff_group, + icount_bench_nextafter_group, + icount_bench_nextafterf_group, + icount_bench_pow_group, + icount_bench_powf_group, + icount_bench_remainder_group, + icount_bench_remainderf_group, + icount_bench_remquo_group, + icount_bench_remquof_group, + icount_bench_rint_group, + icount_bench_rintf_group, + icount_bench_round_group, + icount_bench_roundf_group, + icount_bench_scalbn_group, + icount_bench_scalbnf_group, + icount_bench_sin_group, + icount_bench_sinf_group, + icount_bench_sinh_group, + icount_bench_sinhf_group, + icount_bench_sqrt_group, + icount_bench_sqrtf_group, + icount_bench_tan_group, + icount_bench_tanf_group, + icount_bench_tanh_group, + icount_bench_tanhf_group, + icount_bench_tgamma_group, + icount_bench_tgammaf_group, + icount_bench_trunc_group, + icount_bench_truncf128_group, + icount_bench_truncf16_group, + icount_bench_truncf_group, + icount_bench_y0_group, + icount_bench_y0f_group, + icount_bench_y1_group, + icount_bench_y1f_group, + icount_bench_yn_group, + icount_bench_ynf_group, +); diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index cb89f1c8b..b90423c1b 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -24,7 +24,8 @@ pub use f8_impl::f8; pub use libm::support::{Float, Int, IntTy, MinInt}; pub use num::{FloatExt, linear_ints, logspace}; pub use op::{ - BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustFn, OpRustRet, Ty, + BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustArgs, OpRustFn, OpRustRet, + Ty, }; pub use precision::{MaybeOverride, SpecialCase, default_ulp}; use run_cfg::EXTENSIVE_MAX_ITERATIONS; diff --git a/libm/crates/libm-test/src/op.rs b/libm/crates/libm-test/src/op.rs index 8329d3424..239c9a3e1 100644 --- a/libm/crates/libm-test/src/op.rs +++ b/libm/crates/libm-test/src/op.rs @@ -100,6 +100,8 @@ pub type OpCFn = ::CFn; pub type OpCRet = ::CRet; /// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types). pub type OpRustFn = ::RustFn; +/// Access the associated `RustArgs` type from an op (helper to avoid ambiguous associated types). +pub type OpRustArgs = ::RustArgs; /// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types). pub type OpRustRet = ::RustRet; From b5699dd1ef318f9f22d78b67416fd3625e0915af Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 16 Jan 2025 07:31:34 +0000 Subject: [PATCH 1087/1459] Run iai-callgrind benchmarks in CI Add support in `ci-util.py` for finding the most recent baseline and downloading it, which new tests can then be compared against. Arbitrarily select nightly-2025-01-16 as the rustc version to pin to in benchmarks. --- libm/.github/workflows/main.yaml | 58 ++++++++++- libm/ci/ci-util.py | 159 +++++++++++++++++++++++++++++-- 2 files changed, 207 insertions(+), 10 deletions(-) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index 99a32a82e..9face9311 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -10,6 +10,7 @@ env: RUSTDOCFLAGS: -Dwarnings RUSTFLAGS: -Dwarnings RUST_BACKTRACE: full + BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results jobs: test: @@ -147,19 +148,70 @@ jobs: benchmarks: name: Benchmarks runs-on: ubuntu-24.04 + timeout-minutes: 20 steps: - uses: actions/checkout@master - - name: Install Rust - run: rustup update nightly --no-self-update && rustup default nightly + - uses: taiki-e/install-action@cargo-binstall + + - name: Set up dependencies + run: | + rustup update "$BENCHMARK_RUSTC" --no-self-update + rustup default "$BENCHMARK_RUSTC" + # Install the version of iai-callgrind-runner that is specified in Cargo.toml + iai_version="$(cargo metadata --format-version=1 --features icount | + jq -r '.packages[] | select(.name == "iai-callgrind").version')" + cargo binstall -y iai-callgrind-runner --version "$iai_version" + sudo apt-get install valgrind + - uses: Swatinem/rust-cache@v2 - name: Download musl source run: ./ci/download-musl.sh - - run: | + + - name: Run icount benchmarks + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -eux + iai_home="iai-home" + # Download the baseline from master + ./ci/ci-util.py locate-baseline --download --extract + + # Run iai-callgrind benchmarks + cargo bench --no-default-features \ + --features unstable,unstable-float,icount \ + --bench icount \ + -- \ + --save-baseline=default \ + --home "$(pwd)/$iai_home" \ + --regression='ir=5.0' \ + --save-summary + # NB: iai-callgrind should exit on error but does not, so we inspect the sumary + # for errors. See https://github.com/iai-callgrind/iai-callgrind/issues/337 + ./ci/ci-util.py check-regressions "$iai_home" + + # Name and tar the new baseline + name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}" + echo "BASELINE_NAME=$name" >> "$GITHUB_ENV" + tar cJf "$name.tar.xz" "$iai_home" + + - name: Upload the benchmark baseline + uses: actions/upload-artifact@v4 + with: + name: ${{ env.BASELINE_NAME }} + path: ${{ env.BASELINE_NAME }}.tar.xz + + - name: Run wall time benchmarks + run: | # Always use the same seed for benchmarks. Ideally we should switch to a # non-random generator. export LIBM_SEED=benchesbenchesbenchesbencheswoo! cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl + - name: Print test logs if available + if: always() + run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi + shell: bash + msrv: name: Check MSRV runs-on: ubuntu-24.04 diff --git a/libm/ci/ci-util.py b/libm/ci/ci-util.py index 733ec26fa..1ec69b002 100755 --- a/libm/ci/ci-util.py +++ b/libm/ci/ci-util.py @@ -9,6 +9,7 @@ import subprocess as sp import sys from dataclasses import dataclass +from glob import glob, iglob from inspect import cleandoc from os import getenv from pathlib import Path @@ -18,16 +19,33 @@ """ usage: - ./ci/ci-util.py + ./ci/ci-util.py [flags] - SUBCOMMAND: - generate-matrix Calculate a matrix of which functions had source change, - print that as JSON object. + COMMAND: + generate-matrix + Calculate a matrix of which functions had source change, print that as + a JSON object. + + locate-baseline [--download] [--extract] + Locate the most recent benchmark baseline available in CI and, if flags + specify, download and extract it. Never exits with nonzero status if + downloading fails. + + Note that `--extract` will overwrite files in `iai-home`. + + check-regressions [iai-home] + Check `iai-home` (or `iai-home` if unspecified) for `summary.json` + files and see if there are any regressions. This is used as a workaround + for `iai-callgrind` not exiting with error status; see + . """ ) REPO_ROOT = Path(__file__).parent.parent GIT = ["git", "-C", REPO_ROOT] +DEFAULT_BRANCH = "master" +WORKFLOW_NAME = "CI" # Workflow that generates the benchmark artifacts +ARTIFACT_GLOB = "baseline-icount*" # Don't run exhaustive tests if these files change, even if they contaiin a function # definition. @@ -40,6 +58,11 @@ TYPES = ["f16", "f32", "f64", "f128"] +def eprint(*args, **kwargs): + """Print to stderr.""" + print(*args, file=sys.stderr, **kwargs) + + class FunctionDef(TypedDict): """Type for an entry in `function-definitions.json`""" @@ -145,9 +168,125 @@ def make_workflow_output(self) -> str: return output -def eprint(*args, **kwargs): - """Print to stderr.""" - print(*args, file=sys.stderr, **kwargs) +def locate_baseline(flags: list[str]) -> None: + """Find the most recent baseline from CI, download it if specified. + + This returns rather than erroring, even if the `gh` commands fail. This is to avoid + erroring in CI if the baseline is unavailable (artifact time limit exceeded, first + run on the branch, etc). + """ + + download = False + extract = False + + while len(flags) > 0: + match flags[0]: + case "--download": + download = True + case "--extract": + extract = True + case _: + eprint(USAGE) + exit(1) + flags = flags[1:] + + if extract and not download: + eprint("cannot extract without downloading") + exit(1) + + try: + # Locate the most recent job to complete with success on our branch + latest_job = sp.check_output( + [ + "gh", + "run", + "list", + "--limit=1", + "--status=success", + f"--branch={DEFAULT_BRANCH}", + "--json=databaseId,url,headSha,conclusion,createdAt," + "status,workflowDatabaseId,workflowName", + f'--jq=select(.[].workflowName == "{WORKFLOW_NAME}")', + ], + text=True, + ) + eprint(f"latest: '{latest_job}'") + except sp.CalledProcessError as e: + eprint(f"failed to run github command: {e}") + return + + try: + latest = json.loads(latest_job)[0] + eprint("latest job: ", json.dumps(latest, indent=4)) + except json.JSONDecodeError as e: + eprint(f"failed to decode json '{latest_job}', {e}") + return + + if not download: + eprint("--download not specified, returning") + return + + job_id = latest.get("databaseId") + if job_id is None: + eprint("skipping download step") + return + + sp.run( + ["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"], + check=False, + ) + + if not extract: + eprint("skipping extraction step") + return + + # Find the baseline with the most recent timestamp. GH downloads the files to e.g. + # `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together. + candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}") + if len(candidate_baselines) == 0: + eprint("no possible baseline directories found") + return + + candidate_baselines.sort(reverse=True) + baseline_archive = candidate_baselines[0] + eprint(f"extracting {baseline_archive}") + sp.run(["tar", "xJvf", baseline_archive], check=True) + eprint("baseline extracted successfully") + + +def check_iai_regressions(iai_home: str | None | Path): + """Find regressions in iai summary.json files, exit with failure if any are + found. + """ + if iai_home is None: + iai_home = "iai-home" + iai_home = Path(iai_home) + + found_summaries = False + regressions = [] + for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True): + found_summaries = True + with open(iai_home / summary_path, "r") as f: + summary = json.load(f) + + summary_regs = [] + run = summary["callgrind_summary"]["callgrind_run"] + name_entry = {"name": f"{summary["function_name"]}.{summary["id"]}"} + + for segment in run["segments"]: + summary_regs.extend(segment["regressions"]) + + summary_regs.extend(run["total"]["regressions"]) + + regressions.extend(name_entry | reg for reg in summary_regs) + + if not found_summaries: + eprint(f"did not find any summary.json files within {iai_home}") + exit(1) + + if len(regressions) > 0: + eprint("Found regressions:", json.dumps(regressions, indent=4)) + exit(1) def main(): @@ -156,6 +295,12 @@ def main(): ctx = Context() output = ctx.make_workflow_output() print(f"matrix={output}") + case ["locate-baseline", *flags]: + locate_baseline(flags) + case ["check-regressions"]: + check_iai_regressions(None) + case ["check-regressions", iai_home]: + check_iai_regressions(iai_home) case ["--help" | "-h"]: print(USAGE) exit() From 1b891584228ff58f31867770fa7e9c474f624796 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 16 Jan 2025 07:31:07 +0000 Subject: [PATCH 1088/1459] Reduce the warm up and measurement time for `short-benchmarks` The icount benchmarks are what we will be relying on in CI more than the existing benchmarks. There isn't much reason to keep these around, but there isn't much point in dropping them either. So, just reduce the runtime. --- libm/crates/libm-test/benches/random.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index dcc7c1aca..888161265 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -151,8 +151,8 @@ pub fn musl_random() { // about the same time as other tests. if cfg!(feature = "short-benchmarks") { criterion = criterion - .warm_up_time(Duration::from_millis(500)) - .measurement_time(Duration::from_millis(1000)); + .warm_up_time(Duration::from_millis(200)) + .measurement_time(Duration::from_millis(600)); } criterion = criterion.configure_from_args(); From b9c35dddfcfb1ef3affc3ed908c52714756d5750 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 16 Jan 2025 09:47:00 +0000 Subject: [PATCH 1089/1459] Add an xfail for recent ynf failures This failed a couple of times recently in CI, once on i686 and once on aarch64-apple: thread 'main' panicked at crates/libm-test/benches/random.rs:76:65: called `Result::unwrap()` on an `Err` value: ynf Caused by: 0: input: (681, 509.90924) (0x000002a9, 0x43fef462) expected: -3.2161271e38 0xff71f45b actual: -inf 0xff800000 1: mismatched infinities thread 'main' panicked at crates/libm-test/benches/random.rs:76:65: called `Result::unwrap()` on an `Err` value: ynf Caused by: 0: input: (132, 50.46604) (0x00000084, 0x4249dd3a) expected: -3.3364996e38 0xff7b02a5 actual: -inf 0xff800000 1: mismatched infinities Add a new override to account for this. --- libm/crates/libm-test/src/precision.rs | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 3cb5e420f..9d17ab8cc 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -4,6 +4,7 @@ use core::f32; use CheckBasis::{Mpfr, Musl}; +use libm::support::CastFrom; use {BaseName as Bn, Identifier as Id}; use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult}; @@ -524,7 +525,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase { ctx: &CheckCtx, ) -> Option { match (&ctx.basis, ctx.base_name) { - (Musl, _) => bessel_prec_dropoff(input, ulp, ctx), + (Musl, _) => bessel_prec_dropoff(input, actual, expected, ulp, ctx), // We return +0.0, MPFR returns -0.0 (Mpfr, BaseName::Jn | BaseName::Yn) @@ -554,7 +555,7 @@ impl MaybeOverride<(i32, f64)> for SpecialCase { ctx: &CheckCtx, ) -> Option { match (&ctx.basis, ctx.base_name) { - (Musl, _) => bessel_prec_dropoff(input, ulp, ctx), + (Musl, _) => bessel_prec_dropoff(input, actual, expected, ulp, ctx), // We return +0.0, MPFR returns -0.0 (Mpfr, BaseName::Jn | BaseName::Yn) @@ -569,8 +570,10 @@ impl MaybeOverride<(i32, f64)> for SpecialCase { } /// Our bessel functions blow up with large N values -fn bessel_prec_dropoff( - input: (i32, F), +fn bessel_prec_dropoff( + input: (i32, F1), + actual: F2, + expected: F2, ulp: &mut u32, ctx: &CheckCtx, ) -> Option { @@ -585,6 +588,17 @@ fn bessel_prec_dropoff( } } + // Values near infinity sometimes get cut off for us. `ynf(681, 509.90924) = -inf` but should + // be -3.2161271e38. + if ctx.fn_ident == Identifier::Ynf + && !expected.is_infinite() + && actual.is_infinite() + && (expected.abs().to_bits().abs_diff(actual.abs().to_bits()) + < F2::Int::cast_from(1_000_000u32)) + { + return XFAIL; + } + None } From eff6c9104f5e26754343e3b9a89606aa03c0009a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 16 Jan 2025 20:30:47 +0000 Subject: [PATCH 1090/1459] Remove the limit for querying a baseline `--limit=1` seems to apply before `jq` filtering, meaning our `WORKFLOW_NAME` ("CI") workflow may not appear in the input to the jq query. Removing `--limit` provides a default amount of inputs that jq can then filter from, so this works better. --- libm/ci/ci-util.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/libm/ci/ci-util.py b/libm/ci/ci-util.py index 1ec69b002..7a9f1bd2b 100755 --- a/libm/ci/ci-util.py +++ b/libm/ci/ci-util.py @@ -201,22 +201,24 @@ def locate_baseline(flags: list[str]) -> None: "gh", "run", "list", - "--limit=1", "--status=success", f"--branch={DEFAULT_BRANCH}", "--json=databaseId,url,headSha,conclusion,createdAt," "status,workflowDatabaseId,workflowName", - f'--jq=select(.[].workflowName == "{WORKFLOW_NAME}")', + # Return the first array element matching our workflow name. NB: cannot + # just use `--limit=1`, jq filtering happens after limiting. We also + # cannot just use `--workflow` because GH gets confused from + # different file names in history. + f'--jq=[.[] | select(.workflowName == "{WORKFLOW_NAME}")][0]', ], text=True, ) - eprint(f"latest: '{latest_job}'") except sp.CalledProcessError as e: eprint(f"failed to run github command: {e}") return try: - latest = json.loads(latest_job)[0] + latest = json.loads(latest_job) eprint("latest job: ", json.dumps(latest, indent=4)) except json.JSONDecodeError as e: eprint(f"failed to decode json '{latest_job}', {e}") From d08896e69651ce64ae3238aa45883b78efa0186c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 16 Jan 2025 19:50:29 +0000 Subject: [PATCH 1091/1459] Switch to the arm-linux runner and enable MPFR The free arm64 Linux runners are now available [1]. Switch to using this image in CI, and enable tests against MPFR since this is now a native platform. [1]: https://github.blog/changelog/2025-01-16-linux-arm64-hosted-runners-now-available-for-free-in-public-repositories-public-preview/ --- libm/.github/workflows/main.yaml | 2 +- libm/ci/run.sh | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index 9face9311..8c0ff237d 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -23,7 +23,7 @@ jobs: - target: aarch64-apple-darwin os: macos-15 - target: aarch64-unknown-linux-gnu - os: ubuntu-24.04 + os: ubuntu-24.04-arm - target: aarch64-pc-windows-msvc os: windows-2025 build_only: 1 # Can't run on x86 hosts diff --git a/libm/ci/run.sh b/libm/ci/run.sh index bb749b72a..6b2f07ab2 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -49,14 +49,12 @@ case "$target" in *windows-msvc*) ;; # FIXME: MinGW should be able to build MPFR, but setup in CI is nontrivial. *windows-gnu*) ;; - # Targets that aren't cross compiled work fine - # FIXME(ci): we should be able to enable aarch64 Linux here once GHA - # support rolls out. - x86_64*) flags="$flags --features libm-test/build-mpfr" ;; - i686*) flags="$flags --features libm-test/build-mpfr" ;; - i586*) flags="$flags --features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross" ;; - # Apple aarch64 is native + # Targets that aren't cross compiled in CI work fine aarch64*apple*) flags="$flags --features libm-test/build-mpfr" ;; + aarch64*linux*) flags="$flags --features libm-test/build-mpfr" ;; + i586*) flags="$flags --features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross" ;; + i686*) flags="$flags --features libm-test/build-mpfr" ;; + x86_64*) flags="$flags --features libm-test/build-mpfr" ;; esac # FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI. From 71c1811a1a032b9ed507e52e80e06a745c57e47f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 21 Jan 2025 07:44:13 +0000 Subject: [PATCH 1092/1459] Run icount benchmarks once with softfloat and once with hardfloat These benchmarks are fast to run, so the time cost here is pretty minimal. Running softfloat benchmarks just ensures that we don't e.g. test the performance of `_mm_sqrt_ss` rather than our implementation, and running without softfloat gives us a way to see the effect of arch intrinsics. --- libm/.github/workflows/main.yaml | 24 +-------------- libm/ci/bench-icount.sh | 53 ++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 23 deletions(-) create mode 100755 libm/ci/bench-icount.sh diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index 8c0ff237d..f9d3a5a15 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -170,29 +170,7 @@ jobs: - name: Run icount benchmarks env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - set -eux - iai_home="iai-home" - # Download the baseline from master - ./ci/ci-util.py locate-baseline --download --extract - - # Run iai-callgrind benchmarks - cargo bench --no-default-features \ - --features unstable,unstable-float,icount \ - --bench icount \ - -- \ - --save-baseline=default \ - --home "$(pwd)/$iai_home" \ - --regression='ir=5.0' \ - --save-summary - # NB: iai-callgrind should exit on error but does not, so we inspect the sumary - # for errors. See https://github.com/iai-callgrind/iai-callgrind/issues/337 - ./ci/ci-util.py check-regressions "$iai_home" - - # Name and tar the new baseline - name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}" - echo "BASELINE_NAME=$name" >> "$GITHUB_ENV" - tar cJf "$name.tar.xz" "$iai_home" + run: ./ci/bench-icount.sh - name: Upload the benchmark baseline uses: actions/upload-artifact@v4 diff --git a/libm/ci/bench-icount.sh b/libm/ci/bench-icount.sh new file mode 100755 index 000000000..40b3ac95c --- /dev/null +++ b/libm/ci/bench-icount.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +set -eux + +iai_home="iai-home" + +# Download the baseline from master +./ci/ci-util.py locate-baseline --download --extract + +# Run benchmarks once +function run_icount_benchmarks() { + cargo_args=( + "--bench" "icount" + "--no-default-features" + "--features" "unstable,unstable-float,icount" + ) + + iai_args=( + "--home" "$(pwd)/$iai_home" + "--regression=ir=5.0" + "--save-summary" + ) + + # Parse `cargo_arg0 cargo_arg1 -- iai_arg0 iai_arg1` syntax + parsing_iai_args=0 + while [ "$#" -gt 0 ]; do + if [ "$parsing_iai_args" == "1" ]; then + iai_args+=("$1") + elif [ "$1" == "--" ]; then + parsing_iai_args=1 + else + cargo_args+=("$1") + fi + + shift + done + + # Run iai-callgrind benchmarks + cargo bench "${cargo_args[@]}" -- "${iai_args[@]}" + + # NB: iai-callgrind should exit on error but does not, so we inspect the sumary + # for errors. See https://github.com/iai-callgrind/iai-callgrind/issues/337 + ./ci/ci-util.py check-regressions --home "$iai_home" || true +} + +# Run once with softfloats, once with arch instructions enabled +run_icount_benchmarks --features force-soft-floats -- --save-baseline=softfloat +run_icount_benchmarks -- --save-baseline=hardfloat + +# Name and tar the new baseline +name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}" +echo "BASELINE_NAME=$name" >>"$GITHUB_ENV" +tar cJf "$name.tar.xz" "$iai_home" From 1d01a060cf3c1bfb38899314148dc4ca8f2eff7d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 21 Jan 2025 07:45:44 +0000 Subject: [PATCH 1093/1459] Run wall time benchmarks with `--features force-soft-floats` Similar to changes for `icount` benchmarks, this ensures we aren't testing the throughput of architecture instructions. --- libm/.github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index f9d3a5a15..f019c73f8 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -183,7 +183,7 @@ jobs: # Always use the same seed for benchmarks. Ideally we should switch to a # non-random generator. export LIBM_SEED=benchesbenchesbenchesbencheswoo! - cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl + cargo bench --all --features short-benchmarks,build-musl,force-soft-floats - name: Print test logs if available if: always() From a3eb9646f1ee2ee8e34fca65b1358ae71580d59d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 21 Jan 2025 07:47:41 +0000 Subject: [PATCH 1094/1459] Add a way to ignore benchmark regression checks Introduce a way to ignore the results of icount regression tests, by specifying `allow-regressions` in the pull request body. This should apply to both pull requests and the merges based on them, since `gh pr view` automatically handles both. --- libm/.github/workflows/main.yaml | 1 + libm/ci/bench-icount.sh | 7 +++- libm/ci/ci-util.py | 63 ++++++++++++++++++++++++++------ 3 files changed, 59 insertions(+), 12 deletions(-) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index f019c73f8..7693de655 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -170,6 +170,7 @@ jobs: - name: Run icount benchmarks env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} run: ./ci/bench-icount.sh - name: Upload the benchmark baseline diff --git a/libm/ci/bench-icount.sh b/libm/ci/bench-icount.sh index 40b3ac95c..3a2155f50 100755 --- a/libm/ci/bench-icount.sh +++ b/libm/ci/bench-icount.sh @@ -40,7 +40,12 @@ function run_icount_benchmarks() { # NB: iai-callgrind should exit on error but does not, so we inspect the sumary # for errors. See https://github.com/iai-callgrind/iai-callgrind/issues/337 - ./ci/ci-util.py check-regressions --home "$iai_home" || true + if [ -n "${PR_NUMBER:-}" ]; then + # If this is for a pull request, ignore regressions if specified. + ./ci/ci-util.py check-regressions --home "$iai_home" --allow-pr-override "$PR_NUMBER" + else + ./ci/ci-util.py check-regressions --home "$iai_home" || true + fi } # Run once with softfloats, once with arch instructions enabled diff --git a/libm/ci/ci-util.py b/libm/ci/ci-util.py index 7a9f1bd2b..7464fd425 100755 --- a/libm/ci/ci-util.py +++ b/libm/ci/ci-util.py @@ -33,11 +33,14 @@ Note that `--extract` will overwrite files in `iai-home`. - check-regressions [iai-home] + check-regressions [--home iai-home] [--allow-pr-override pr_number] Check `iai-home` (or `iai-home` if unspecified) for `summary.json` files and see if there are any regressions. This is used as a workaround for `iai-callgrind` not exiting with error status; see . + + If `--allow-pr-override` is specified, the regression check will not exit + with failure if any line in the PR starts with `allow-regressions`. """ ) @@ -46,6 +49,8 @@ DEFAULT_BRANCH = "master" WORKFLOW_NAME = "CI" # Workflow that generates the benchmark artifacts ARTIFACT_GLOB = "baseline-icount*" +# Place this in a PR body to skip regression checks (must be at the start of a line). +REGRESSION_DIRECTIVE = "ci: allow-regressions" # Don't run exhaustive tests if these files change, even if they contaiin a function # definition. @@ -256,12 +261,26 @@ def locate_baseline(flags: list[str]) -> None: eprint("baseline extracted successfully") -def check_iai_regressions(iai_home: str | None | Path): +def check_iai_regressions(args: list[str]): """Find regressions in iai summary.json files, exit with failure if any are found. """ - if iai_home is None: - iai_home = "iai-home" + + iai_home = "iai-home" + pr_number = False + + while len(args) > 0: + match args: + case ["--home", home, *rest]: + iai_home = home + args = rest + case ["--allow-pr-override", pr_num, *rest]: + pr_number = pr_num + args = rest + case _: + eprint(USAGE) + exit(1) + iai_home = Path(iai_home) found_summaries = False @@ -286,9 +305,33 @@ def check_iai_regressions(iai_home: str | None | Path): eprint(f"did not find any summary.json files within {iai_home}") exit(1) - if len(regressions) > 0: - eprint("Found regressions:", json.dumps(regressions, indent=4)) - exit(1) + if len(regressions) == 0: + eprint("No regressions found") + return + + eprint("Found regressions:", json.dumps(regressions, indent=4)) + + if pr_number is not None: + pr_info = sp.check_output( + [ + "gh", + "pr", + "view", + str(pr_number), + "--json=number,commits,body,createdAt", + "--jq=.commits |= map(.oid)", + ], + text=True, + ) + pr = json.loads(pr_info) + eprint("PR info:", json.dumps(pr, indent=4)) + + lines = pr["body"].splitlines() + if any(line.startswith(REGRESSION_DIRECTIVE) for line in lines): + eprint("PR allows regressions, returning") + return + + exit(1) def main(): @@ -299,10 +342,8 @@ def main(): print(f"matrix={output}") case ["locate-baseline", *flags]: locate_baseline(flags) - case ["check-regressions"]: - check_iai_regressions(None) - case ["check-regressions", iai_home]: - check_iai_regressions(iai_home) + case ["check-regressions", *args]: + check_iai_regressions(args) case ["--help" | "-h"]: print(USAGE) exit() From e7d17f78dd3a49bdbe4d4d546dd40fd4e040d56f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 21 Jan 2025 07:52:26 +0000 Subject: [PATCH 1095/1459] Ignore files relevant to benchmarking --- libm/.gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libm/.gitignore b/libm/.gitignore index 4e9c9c03d..a447c34cd 100644 --- a/libm/.gitignore +++ b/libm/.gitignore @@ -6,3 +6,7 @@ target Cargo.lock musl/ **.tar.gz + +# Benchmark cache +iai-home +baseline-* From ab375231663bfba221d8849f2243515d3f7c2815 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 22 Jan 2025 00:58:33 +0000 Subject: [PATCH 1096/1459] Remove trailing whitespace in scripts, run JuliaFormatter --- libm/ci/bench-icount.sh | 2 +- libm/ci/run.sh | 4 +- libm/crates/libm-test/examples/plot_file.jl | 52 +++++++++++++-------- 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/libm/ci/bench-icount.sh b/libm/ci/bench-icount.sh index 3a2155f50..4d93e257a 100755 --- a/libm/ci/bench-icount.sh +++ b/libm/ci/bench-icount.sh @@ -7,7 +7,7 @@ iai_home="iai-home" # Download the baseline from master ./ci/ci-util.py locate-baseline --download --extract -# Run benchmarks once +# Run benchmarks once function run_icount_benchmarks() { cargo_args=( "--bench" "icount" diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 6b2f07ab2..296986d97 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -90,7 +90,7 @@ if [ "$nextest" = "1" ]; then echo "dir = \"$CARGO_TARGET_DIR/nextest\"" >> "$cfg_file" cfg_flag="--config-file $cfg_file" fi - + cmd="cargo nextest run ${cfg_flag:-} --max-fail=10 $flags" profile="--cargo-profile" fi @@ -111,7 +111,7 @@ $cmd --features unstable-intrinsics --benches # Test the same in release mode, which also increases coverage. Also ensure # the soft float routines are checked. -$cmd "$profile" release-checked +$cmd "$profile" release-checked $cmd "$profile" release-checked --features force-soft-floats $cmd "$profile" release-checked --features unstable-intrinsics $cmd "$profile" release-checked --features unstable-intrinsics --benches diff --git a/libm/crates/libm-test/examples/plot_file.jl b/libm/crates/libm-test/examples/plot_file.jl index 14a128303..acffd9756 100644 --- a/libm/crates/libm-test/examples/plot_file.jl +++ b/libm/crates/libm-test/examples/plot_file.jl @@ -13,7 +13,7 @@ using CairoMakie using TOML function main()::Nothing - CairoMakie.activate!(px_per_unit=10) + CairoMakie.activate!(px_per_unit = 10) config_path = ARGS[1] cfg = Dict() @@ -75,15 +75,25 @@ function plot_one( gen_x = map((v) -> parse(Float32, v), inputs) do_plot( - fig, gen_x, func, xlims[1], xlims[2], + fig, + gen_x, + func, + xlims[1], + xlims[2], "$fn_name $gen_name (linear scale)", - lin_out_file, false, + lin_out_file, + false, ) do_plot( - fig, gen_x, func, xlims_log[1], xlims_log[2], + fig, + gen_x, + func, + xlims_log[1], + xlims_log[2], "$fn_name $gen_name (log scale)", - log_out_file, true, + log_out_file, + true, ) end @@ -97,7 +107,7 @@ function do_plot( title::String, out_file::String, logscale::Bool, -)::Nothing where F<:AbstractFloat +)::Nothing where {F<:AbstractFloat} println("plotting $title") # `gen_x` is the values the generator produces. `actual_x` is for plotting a @@ -116,32 +126,36 @@ function do_plot( actual_x = LinRange(input_min, input_max, steps) xscale = identity end - + gen_y = @. func(gen_x) actual_y = @. func(actual_x) - ax = Axis(fig[1, 1], xscale=xscale, title=title) + ax = Axis(fig[1, 1], xscale = xscale, title = title) lines!( - ax, actual_x, actual_y, color=(:lightblue, 0.6), - linewidth=6.0, label="true function", + ax, + actual_x, + actual_y, + color = (:lightblue, 0.6), + linewidth = 6.0, + label = "true function", ) scatter!( - ax, gen_x, gen_y, color=(:darkblue, 0.9), - markersize=markersize, label="checked inputs", + ax, + gen_x, + gen_y, + color = (:darkblue, 0.9), + markersize = markersize, + label = "checked inputs", ) - axislegend(ax, position=:rb, framevisible=false) + axislegend(ax, position = :rb, framevisible = false) save(out_file, fig) delete!(ax) end "Apply a function, returning the default if there is a domain error" -function map_or( - input::AbstractFloat, - f::Function, - default::Any -)::Union{AbstractFloat,Any} +function map_or(input::AbstractFloat, f::Function, default::Any)::Union{AbstractFloat,Any} try return f(input) catch @@ -151,7 +165,7 @@ end # Operations for logarithms that are symmetric about 0 C = 10 -symlog10(x::Number) = sign(x) * (log10(1 + abs(x)/(10^C))) +symlog10(x::Number) = sign(x) * (log10(1 + abs(x) / (10^C))) sympow10(x::Number) = (10^C) * (10^x - 1) main() From c7a8d75903d730413a506762b53c6bfeab8ca4ea Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 22 Jan 2025 02:35:29 +0000 Subject: [PATCH 1097/1459] Add a retry to the musl download This download has occasionally been failing in CI recently. Add a retry so this is less likely to cause the workflow to fail. --- libm/ci/download-musl.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/ci/download-musl.sh b/libm/ci/download-musl.sh index 039e96157..8a8c58550 100755 --- a/libm/ci/download-musl.sh +++ b/libm/ci/download-musl.sh @@ -7,7 +7,7 @@ fname=musl-1.2.5.tar.gz sha=a9a118bbe84d8764da0ea0d28b3ab3fae8477fc7e4085d90102b8596fc7c75e4 mkdir musl -curl -L "https://musl.libc.org/releases/$fname" -O +curl -L "https://musl.libc.org/releases/$fname" -O --retry 5 case "$(uname -s)" in MINGW*) From daae8671fca0598dce32be3aeee824b386f9e4a8 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 15 Jan 2025 11:34:17 +0000 Subject: [PATCH 1098/1459] Don't set `opt_level` in the musl build script `cc` automatically reads this from Cargo's `OPT_LEVEL` variable so we don't need to set it explicitly. Remove this so running in a debugger makes more sense. --- libm/crates/musl-math-sys/build.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/libm/crates/musl-math-sys/build.rs b/libm/crates/musl-math-sys/build.rs index 03deb4ff0..d75748159 100644 --- a/libm/crates/musl-math-sys/build.rs +++ b/libm/crates/musl-math-sys/build.rs @@ -151,7 +151,6 @@ fn build_musl_math(cfg: &Config) { .flag_if_supported("-ffreestanding") .flag_if_supported("-nostdinc") .define("_ALL_SOURCE", "1") - .opt_level(3) .define( "ROOT_INCLUDE_FEATURES", Some(musl_dir.join("include/features.h").to_str().unwrap()), From 5e0adae8c4b5acfc29b9a400982e02d581beb557 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 22 Jan 2025 05:29:36 +0000 Subject: [PATCH 1099/1459] Enable `force-soft-floats` for extensive tests Any architecture-specific float operations are likely to consist of only a few instructions, but the softfloat implementations are much more complex. Ensure this is what gets tested. --- libm/.github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index 7693de655..89c5facef 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -270,7 +270,7 @@ jobs: fi LIBM_EXTENSIVE_TESTS="$CHANGED" cargo t \ - --features build-mpfr,unstable \ + --features build-mpfr,unstable,force-soft-floats \ --profile release-checked \ -- extensive - name: Print test logs if available From 53495faefdd1b32922992eb7c6e7cf68a8ea85ec Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 12 Jan 2025 11:45:40 +0000 Subject: [PATCH 1100/1459] Port the most recent version of Musl's `sqrt` as a generic algorithm Musl commit 97e9b73d59 ("math: new software sqrt") adds a new algorithm using Goldschmidt division. Port this algorithm to Rust and make it generic, which shows a notable performance improvement over the existing algorithm. This also allows adding square root routines for `f16` and `f128`. --- libm/etc/function-definitions.json | 2 + libm/src/math/generic/mod.rs | 2 + libm/src/math/generic/sqrt.rs | 419 ++++++++++++++++++++++++++++ libm/src/math/sqrt.rs | 252 +---------------- libm/src/math/sqrtf.rs | 133 +-------- libm/src/math/support/int_traits.rs | 2 + libm/src/math/support/macros.rs | 22 +- libm/src/math/support/mod.rs | 11 + 8 files changed, 450 insertions(+), 393 deletions(-) create mode 100644 libm/src/math/generic/sqrt.rs diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index dbaac931c..9f7c8ab25 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -704,6 +704,7 @@ "src/libm_helper.rs", "src/math/arch/i686.rs", "src/math/arch/wasm32.rs", + "src/math/generic/sqrt.rs", "src/math/sqrt.rs" ], "type": "f64" @@ -712,6 +713,7 @@ "sources": [ "src/math/arch/i686.rs", "src/math/arch/wasm32.rs", + "src/math/generic/sqrt.rs", "src/math/sqrtf.rs" ], "type": "f32" diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index 2b068d6c5..3b5a2c3ef 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -1,9 +1,11 @@ mod copysign; mod fabs; mod fdim; +mod sqrt; mod trunc; pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; +pub use sqrt::sqrt; pub use trunc::trunc; diff --git a/libm/src/math/generic/sqrt.rs b/libm/src/math/generic/sqrt.rs new file mode 100644 index 000000000..a2e054f3c --- /dev/null +++ b/libm/src/math/generic/sqrt.rs @@ -0,0 +1,419 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/sqrt.c. Ported to generic Rust algorithm in 2025, TG. */ + +//! Generic square root algorithm. +//! +//! This routine operates around `m_u2`, a U.2 (fixed point with two integral bits) mantissa +//! within the range [1, 4). A table lookup provides an initial estimate, then goldschmidt +//! iterations at various widths are used to approach the real values. +//! +//! For the iterations, `r` is a U0 number that approaches `1/sqrt(m_u2)`, and `s` is a U2 number +//! that approaches `sqrt(m_u2)`. Recall that m_u2 ∈ [1, 4). +//! +//! With Newton-Raphson iterations, this would be: +//! +//! - `w = r * r w ~ 1 / m` +//! - `u = 3 - m * w u ~ 3 - m * w = 3 - m / m = 2` +//! - `r = r * u / 2 r ~ r` +//! +//! (Note that the righthand column does not show anything analytically meaningful (i.e. r ~ r), +//! since the value of performing one iteration is in reducing the error representable by `~`). +//! +//! Instead of Newton-Raphson iterations, Goldschmidt iterations are used to calculate +//! `s = m * r`: +//! +//! - `s = m * r s ~ m / sqrt(m)` +//! - `u = 3 - s * r u ~ 3 - (m / sqrt(m)) * (1 / sqrt(m)) = 3 - m / m = 2` +//! - `r = r * u / 2 r ~ r` +//! - `s = s * u / 2 s ~ s` +//! +//! The above is precise because it uses the original value `m`. There is also a faster version +//! that performs fewer steps but does not use `m`: +//! +//! - `u = 3 - s * r u ~ 3 - 1` +//! - `r = r * u / 2 r ~ r` +//! - `s = s * u / 2 s ~ s` +//! +//! Rounding errors accumulate faster with the second version, so it is only used for subsequent +//! iterations within the same width integer. The first version is always used for the first +//! iteration at a new width in order to avoid this accumulation. +//! +//! Goldschmidt has the advantage over Newton-Raphson that `sqrt(x)` and `1/sqrt(x)` are +//! computed at the same time, i.e. there is no need to calculate `1/sqrt(x)` and invert it. + +use super::super::support::{IntTy, cold_path, raise_invalid}; +use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt}; + +pub fn sqrt(x: F) -> F +where + F: Float + SqrtHelper, + F::Int: HInt, + F::Int: From, + F::Int: From, + F::Int: CastInto, + F::Int: CastInto, + u32: CastInto, +{ + let zero = IntTy::::ZERO; + let one = IntTy::::ONE; + + let mut ix = x.to_bits(); + + // Top is the exponent and sign, which may or may not be shifted. If the float fits into a + // `u32`, we can get by without paying shifting costs. + let noshift = F::BITS <= u32::BITS; + let (mut top, special_case) = if noshift { + let exp_lsb = one << F::SIG_BITS; + let special_case = ix.wrapping_sub(exp_lsb) >= F::EXP_MASK - exp_lsb; + (Exp::NoShift(()), special_case) + } else { + let top = u32::cast_from(ix >> F::SIG_BITS); + let special_case = top.wrapping_sub(1) >= F::EXP_MAX - 1; + (Exp::Shifted(top), special_case) + }; + + // Handle NaN, zero, and out of domain (<= 0) + if special_case { + cold_path(); + + // +/-0 + if ix << 1 == zero { + return x; + } + + // Positive infinity + if ix == F::EXP_MASK { + return x; + } + + // NaN or negative + if ix > F::EXP_MASK { + return raise_invalid(x); + } + + // Normalize subnormals by multiplying by 1.0 << SIG_BITS (e.g. 0x1p52 for doubles). + let scaled = x * F::from_parts(false, (F::SIG_BITS + F::EXP_BIAS) as i32, zero); + ix = scaled.to_bits(); + match top { + Exp::Shifted(ref mut v) => { + *v = scaled.exp().unsigned(); + *v = (*v).wrapping_sub(F::SIG_BITS); + } + Exp::NoShift(()) => { + ix = ix.wrapping_sub((F::SIG_BITS << F::SIG_BITS).cast()); + } + } + } + + // Reduce arguments such that `x = 4^e * m`: + // + // - m_u2 ∈ [1, 4), a fixed point U2.BITS number + // - 2^e is the exponent part of the result + let (m_u2, exp) = match top { + Exp::Shifted(top) => { + // We now know `x` is positive, so `top` is just its (biased) exponent + let mut e = top; + // Construct a fixed point representation of the mantissa. + let mut m_u2 = (ix | F::IMPLICIT_BIT) << F::EXP_BITS; + let even = (e & 1) != 0; + if even { + m_u2 >>= 1; + } + e = (e.wrapping_add(F::EXP_MAX >> 1)) >> 1; + (m_u2, Exp::Shifted(e)) + } + Exp::NoShift(()) => { + let even = ix & (one << F::SIG_BITS) != zero; + + // Exponent part of the return value + let mut e_noshift = ix >> 1; + // ey &= (F::EXP_MASK << 2) >> 2; // clear the top exponent bit (result = 1.0) + e_noshift += (F::EXP_MASK ^ (F::SIGN_MASK >> 1)) >> 1; + e_noshift &= F::EXP_MASK; + + let m1 = (ix << F::EXP_BITS) | F::SIGN_MASK; + let m0 = (ix << (F::EXP_BITS - 1)) & !F::SIGN_MASK; + let m_u2 = if even { m0 } else { m1 }; + + (m_u2, Exp::NoShift(e_noshift)) + } + }; + + // Extract the top 6 bits of the significand with the lowest bit of the exponent. + let i = usize::cast_from(ix >> (F::SIG_BITS - 6)) & 0b1111111; + + // Start with an initial guess for `r = 1 / sqrt(m)` from the table, and shift `m` as an + // initial value for `s = sqrt(m)`. See the module documentation for details. + let r1_u0: F::ISet1 = F::ISet1::cast_from(RSQRT_TAB[i]) << (F::ISet1::BITS - 16); + let s1_u2: F::ISet1 = ((m_u2) >> (F::BITS - F::ISet1::BITS)).cast(); + + // Perform iterations, if any, at quarter width (used for `f128`). + let (r1_u0, _s1_u2) = goldschmidt::(r1_u0, s1_u2, F::SET1_ROUNDS, false); + + // Widen values and perform iterations at half width (used for `f64` and `f128`). + let r2_u0: F::ISet2 = F::ISet2::from(r1_u0) << (F::ISet2::BITS - F::ISet1::BITS); + let s2_u2: F::ISet2 = ((m_u2) >> (F::BITS - F::ISet2::BITS)).cast(); + let (r2_u0, _s2_u2) = goldschmidt::(r2_u0, s2_u2, F::SET2_ROUNDS, false); + + // Perform final iterations at full width (used for all float types). + let r_u0: F::Int = F::Int::from(r2_u0) << (F::BITS - F::ISet2::BITS); + let s_u2: F::Int = m_u2; + let (_r_u0, s_u2) = goldschmidt::(r_u0, s_u2, F::FINAL_ROUNDS, true); + + // Shift back to mantissa position. + let mut m = s_u2 >> (F::EXP_BITS - 2); + + // The musl source includes the following comment (with literals replaced): + // + // > s < sqrt(m) < s + 0x1.09p-SIG_BITS + // > compute nearest rounded result: the nearest result to SIG_BITS bits is either s or + // > s+0x1p-SIG_BITS, we can decide by comparing (2^SIG_BITS s + 0.5)^2 to 2^(2*SIG_BITS) m. + // + // Expanding this with , with `SIG_BITS = p` and adjusting based on the operations done to + // `d0` and `d1`: + // + // - `2^(2p)m ≟ ((2^p)m + 0.5)^2` + // - `2^(2p)m ≟ 2^(2p)m^2 + (2^p)m + 0.25` + // - `2^(2p)m - m^2 ≟ (2^(2p) - 1)m^2 + (2^p)m + 0.25` + // - `(1 - 2^(2p))m + m^2 ≟ (1 - 2^(2p))m^2 + (1 - 2^p)m + 0.25` (?) + // + // I do not follow how the rounding bit is extracted from this comparison with the below + // operations. In any case, the algorithm is well tested. + + // The value needed to shift `m_u2` by to create `m*2^(2p)`. `2p = 2 * F::SIG_BITS`, + // `F::BITS - 2` accounts for the offset that `m_u2` already has. + let shift = 2 * F::SIG_BITS - (F::BITS - 2); + + // `2^(2p)m - m^2` + let d0 = (m_u2 << shift).wrapping_sub(m.wrapping_mul(m)); + // `m - 2^(2p)m + m^2` + let d1 = m.wrapping_sub(d0); + m += d1 >> (F::BITS - 1); + m &= F::SIG_MASK; + + match exp { + Exp::Shifted(e) => m |= IntTy::::cast_from(e) << F::SIG_BITS, + Exp::NoShift(e) => m |= e, + }; + + let mut y = F::from_bits(m); + + // FIXME(f16): the fenv math does not work for `f16` + if F::BITS > 16 { + // Handle rounding and inexact. `(m + 1)^2 == 2^shift m` is exact; for all other cases, add + // a tiny value to cause fenv effects. + let d2 = d1.wrapping_add(m).wrapping_add(one); + let mut tiny = if d2 == zero { + cold_path(); + zero + } else { + F::IMPLICIT_BIT + }; + + tiny |= (d1 ^ d2) & F::SIGN_MASK; + let t = F::from_bits(tiny); + y = y + t; + } + + y +} + +/// Multiply at the wider integer size, returning the high half. +fn wmulh(a: I, b: I) -> I { + a.widen_mul(b).hi() +} + +/// Perform `count` goldschmidt iterations, returning `(r_u0, s_u?)`. +/// +/// - `r_u0` is the reciprocal `r ~ 1 / sqrt(m)`, as U0. +/// - `s_u2` is the square root, `s ~ sqrt(m)`, as U2. +/// - `count` is the number of iterations to perform. +/// - `final_set` should be true if this is the last round (same-sized integer). If so, the +/// returned `s` will be U3, for later shifting. Otherwise, the returned `s` is U2. +/// +/// Note that performance relies on the optimizer being able to unroll these loops (reasonably +/// trivial, `count` is a constant when called). +#[inline] +fn goldschmidt(mut r_u0: I, mut s_u2: I, count: u32, final_set: bool) -> (I, I) +where + F: SqrtHelper, + I: HInt + From, +{ + let three_u2 = I::from(0b11u8) << (I::BITS - 2); + let mut u_u0 = r_u0; + + for i in 0..count { + // First iteration: `s = m*r` (`u_u0 = r_u0` set above) + // Subsequent iterations: `s=s*u/2` + s_u2 = wmulh(s_u2, u_u0); + + // Perform `s /= 2` if: + // + // 1. This is not the first iteration (the first iteration is `s = m*r`)... + // 2. ... and this is not the last set of iterations + // 3. ... or, if this is the last set, it is not the last iteration + // + // This step is not performed for the final iteration because the shift is combined with + // a later shift (moving `s` into the mantissa). + if i > 0 && (!final_set || i + 1 < count) { + s_u2 <<= 1; + } + + // u = 3 - s*r + let d_u2 = wmulh(s_u2, r_u0); + u_u0 = three_u2.wrapping_sub(d_u2); + + // r = r*u/2 + r_u0 = wmulh(r_u0, u_u0) << 1; + } + + (r_u0, s_u2) +} + +/// Representation of whether we shift the exponent into a `u32`, or modify it in place to save +/// the shift operations. +enum Exp { + /// The exponent has been shifted to a `u32` and is LSB-aligned. + Shifted(u32), + /// The exponent is in its natural position in integer repr. + NoShift(T), +} + +/// Size-specific constants related to the square root routine. +pub trait SqrtHelper: Float { + /// Integer for the first set of rounds. If unused, set to the same type as the next set. + type ISet1: HInt + Into + CastFrom + From; + /// Integer for the second set of rounds. If unused, set to the same type as the next set. + type ISet2: HInt + From + From; + + /// Number of rounds at `ISet1`. + const SET1_ROUNDS: u32 = 0; + /// Number of rounds at `ISet2`. + const SET2_ROUNDS: u32 = 0; + /// Number of rounds at `Self::Int`. + const FINAL_ROUNDS: u32; +} + +impl SqrtHelper for f32 { + type ISet1 = u32; // unused + type ISet2 = u32; // unused + + const FINAL_ROUNDS: u32 = 3; +} + +impl SqrtHelper for f64 { + type ISet1 = u32; // unused + type ISet2 = u32; + + const SET2_ROUNDS: u32 = 2; + const FINAL_ROUNDS: u32 = 2; +} + +/// A U0.16 representation of `1/sqrt(x)`. +/// +// / The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand. +#[rustfmt::skip] +static RSQRT_TAB: [u16; 128] = [ + 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43, + 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b, + 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1, + 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430, + 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59, + 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925, + 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479, + 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040, + 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234, + 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2, + 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1, + 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192, + 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f, + 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4, + 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59, + 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560, +]; + +#[cfg(test)] +mod tests { + use super::*; + + /// Test against edge cases from https://en.cppreference.com/w/cpp/numeric/math/sqrt + fn spec_test() + where + F: Float + SqrtHelper, + F::Int: HInt, + F::Int: From, + F::Int: From, + F::Int: CastInto, + F::Int: CastInto, + u32: CastInto, + { + // Not Asserted: FE_INVALID exception is raised if argument is negative. + assert!(sqrt(F::NEG_ONE).is_nan()); + assert!(sqrt(F::NAN).is_nan()); + for f in [F::ZERO, F::NEG_ZERO, F::INFINITY].iter().copied() { + assert_biteq!(sqrt(f), f); + } + } + + #[test] + fn sanity_check_f32() { + assert_biteq!(sqrt(100.0f32), 10.0); + assert_biteq!(sqrt(4.0f32), 2.0); + } + + #[test] + fn spec_tests_f32() { + spec_test::(); + } + + #[test] + #[allow(clippy::approx_constant)] + fn conformance_tests_f32() { + let cases = [ + (f32::PI, 0x3fe2dfc5_u32), + (10000.0f32, 0x42c80000_u32), + (f32::from_bits(0x0000000f), 0x1b2f456f_u32), + (f32::INFINITY, f32::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f32::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } + + #[test] + fn sanity_check_f64() { + assert_biteq!(sqrt(100.0f64), 10.0); + assert_biteq!(sqrt(4.0f64), 2.0); + } + + #[test] + fn spec_tests_f64() { + spec_test::(); + } + + #[test] + #[allow(clippy::approx_constant)] + fn conformance_tests_f64() { + let cases = [ + (f64::PI, 0x3ffc5bf891b4ef6a_u64), + (10000.0, 0x4059000000000000_u64), + (f64::from_bits(0x0000000f), 0x1e7efbdeb14f4eda_u64), + (f64::INFINITY, f64::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f64::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } +} diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 2fd7070b1..0e1d0cd2c 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -1,83 +1,3 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* sqrt(x) - * Return correctly rounded sqrt. - * ------------------------------------------ - * | Use the hardware sqrt if you have one | - * ------------------------------------------ - * Method: - * Bit by bit method using integer arithmetic. (Slow, but portable) - * 1. Normalization - * Scale x to y in [1,4) with even powers of 2: - * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then - * sqrt(x) = 2^k * sqrt(y) - * 2. Bit by bit computation - * Let q = sqrt(y) truncated to i bit after binary point (q = 1), - * i 0 - * i+1 2 - * s = 2*q , and y = 2 * ( y - q ). (1) - * i i i i - * - * To compute q from q , one checks whether - * i+1 i - * - * -(i+1) 2 - * (q + 2 ) <= y. (2) - * i - * -(i+1) - * If (2) is false, then q = q ; otherwise q = q + 2 . - * i+1 i i+1 i - * - * With some algebraic manipulation, it is not difficult to see - * that (2) is equivalent to - * -(i+1) - * s + 2 <= y (3) - * i i - * - * The advantage of (3) is that s and y can be computed by - * i i - * the following recurrence formula: - * if (3) is false - * - * s = s , y = y ; (4) - * i+1 i i+1 i - * - * otherwise, - * -i -(i+1) - * s = s + 2 , y = y - s - 2 (5) - * i+1 i i+1 i i - * - * One may easily use induction to prove (4) and (5). - * Note. Since the left hand side of (3) contain only i+2 bits, - * it does not necessary to do a full (53-bit) comparison - * in (3). - * 3. Final rounding - * After generating the 53 bits result, we compute one more bit. - * Together with the remainder, we can decide whether the - * result is exact, bigger than 1/2ulp, or less than 1/2ulp - * (it will never equal to 1/2ulp). - * The rounding mode can be detected by checking whether - * huge + tiny is equal to huge, and whether huge - tiny is - * equal to huge for some floating point number "huge" and "tiny". - * - * Special cases: - * sqrt(+-0) = +-0 ... exact - * sqrt(inf) = inf - * sqrt(-ve) = NaN ... with invalid signal - * sqrt(NaN) = NaN ... with invalid signal for signaling NaN - */ - -use core::f64; - /// The square root of `x` (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrt(x: f64) -> f64 { @@ -90,175 +10,5 @@ pub fn sqrt(x: f64) -> f64 { args: x, } - use core::num::Wrapping; - - const TINY: f64 = 1.0e-300; - - let mut z: f64; - let sign: Wrapping = Wrapping(0x80000000); - let mut ix0: i32; - let mut s0: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: Wrapping; - let mut t1: Wrapping; - let mut s1: Wrapping; - let mut ix1: Wrapping; - let mut q1: Wrapping; - - ix0 = (x.to_bits() >> 32) as i32; - ix1 = Wrapping(x.to_bits() as u32); - - /* take care of Inf and NaN */ - if (ix0 & 0x7ff00000) == 0x7ff00000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } - /* take care of zero */ - if ix0 <= 0 { - if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix0 < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } - /* normalize x */ - m = ix0 >> 20; - if m == 0 { - /* subnormal x */ - while ix0 == 0 { - m -= 21; - ix0 |= (ix1 >> 11).0 as i32; - ix1 <<= 21; - } - i = 0; - while (ix0 & 0x00100000) == 0 { - i += 1; - ix0 <<= 1; - } - m -= i - 1; - ix0 |= (ix1 >> (32 - i) as usize).0 as i32; - ix1 = ix1 << i as usize; - } - m -= 1023; /* unbias exponent */ - ix0 = (ix0 & 0x000fffff) | 0x00100000; - if (m & 1) == 1 { - /* odd m, double x to make it even */ - ix0 *= 2; - ix0 += ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - } - m >>= 1; /* m = [m/2] */ - - /* generate sqrt(x) bit by bit */ - ix0 *= 2; - ix0 += ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - q = 0; /* [q,q1] = sqrt(x) */ - q1 = Wrapping(0); - s0 = 0; - s1 = Wrapping(0); - r = Wrapping(0x00200000); /* r = moving bit from right to left */ - - while r != Wrapping(0) { - t = s0 + r.0 as i32; - if t <= ix0 { - s0 = t + r.0 as i32; - ix0 -= t; - q += r.0 as i32; - } - ix0 *= 2; - ix0 += ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } - - r = sign; - while r != Wrapping(0) { - t1 = s1 + r; - t = s0; - if t < ix0 || (t == ix0 && t1 <= ix1) { - s1 = t1 + r; - if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { - s0 += 1; - } - ix0 -= t; - if ix1 < t1 { - ix0 -= 1; - } - ix1 -= t1; - q1 += r; - } - ix0 *= 2; - ix0 += ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } - - /* use floating add to find out rounding direction */ - if (ix0 as u32 | ix1.0) != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if q1.0 == 0xffffffff { - q1 = Wrapping(0); - q += 1; - } else if z > 1.0 { - if q1.0 == 0xfffffffe { - q += 1; - } - q1 += Wrapping(2); - } else { - q1 += q1 & Wrapping(1); - } - } - } - ix0 = (q >> 1) + 0x3fe00000; - ix1 = q1 >> 1; - if (q & 1) == 1 { - ix1 |= sign; - } - ix0 += m << 20; - f64::from_bits(((ix0 as u64) << 32) | ix1.0 as u64) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(sqrt(100.0), 10.0); - assert_eq!(sqrt(4.0), 2.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt - #[test] - fn spec_tests() { - // Not Asserted: FE_INVALID exception is raised if argument is negative. - assert!(sqrt(-1.0).is_nan()); - assert!(sqrt(f64::NAN).is_nan()); - for f in [0.0, -0.0, f64::INFINITY].iter().copied() { - assert_eq!(sqrt(f), f); - } - } - - #[test] - #[allow(clippy::approx_constant)] - fn conformance_tests() { - let values = [3.14159265359, 10000.0, f64::from_bits(0x0000000f), f64::INFINITY]; - let results = [ - 4610661241675116657u64, - 4636737291354636288u64, - 2197470602079456986u64, - 9218868437227405312u64, - ]; - - for i in 0..values.len() { - let bits = f64::to_bits(sqrt(values[i])); - assert_eq!(results[i], bits); - } - } + super::generic::sqrt(x) } diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index 319335163..2e69a4b66 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -1,18 +1,3 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - /// The square root of `x` (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrtf(x: f32) -> f32 { @@ -25,121 +10,5 @@ pub fn sqrtf(x: f32) -> f32 { args: x, } - const TINY: f32 = 1.0e-30; - - let mut z: f32; - let sign: i32 = 0x80000000u32 as i32; - let mut ix: i32; - let mut s: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: u32; - - ix = x.to_bits() as i32; - - /* take care of Inf and NaN */ - if (ix as u32 & 0x7f800000) == 0x7f800000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } - - /* take care of zero */ - if ix <= 0 { - if (ix & !sign) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } - - /* normalize x */ - m = ix >> 23; - if m == 0 { - /* subnormal x */ - i = 0; - while ix & 0x00800000 == 0 { - ix <<= 1; - i = i + 1; - } - m -= i - 1; - } - m -= 127; /* unbias exponent */ - ix = (ix & 0x007fffff) | 0x00800000; - if m & 1 == 1 { - /* odd m, double x to make it even */ - ix += ix; - } - m >>= 1; /* m = [m/2] */ - - /* generate sqrt(x) bit by bit */ - ix += ix; - q = 0; - s = 0; - r = 0x01000000; /* r = moving bit from right to left */ - - while r != 0 { - t = s + r as i32; - if t <= ix { - s = t + r as i32; - ix -= t; - q += r as i32; - } - ix += ix; - r >>= 1; - } - - /* use floating add to find out rounding direction */ - if ix != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if z > 1.0 { - q += 2; - } else { - q += q & 1; - } - } - } - - ix = (q >> 1) + 0x3f000000; - ix += m << 23; - f32::from_bits(ix as u32) -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(sqrtf(100.0), 10.0); - assert_eq!(sqrtf(4.0), 2.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt - #[test] - fn spec_tests() { - // Not Asserted: FE_INVALID exception is raised if argument is negative. - assert!(sqrtf(-1.0).is_nan()); - assert!(sqrtf(f32::NAN).is_nan()); - for f in [0.0, -0.0, f32::INFINITY].iter().copied() { - assert_eq!(sqrtf(f), f); - } - } - - #[test] - #[allow(clippy::approx_constant)] - fn conformance_tests() { - let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), f32::INFINITY]; - let results = [1071833029u32, 1120403456u32, 456082799u32, 2139095040u32]; - - for i in 0..values.len() { - let bits = f32::to_bits(sqrtf(values[i])); - assert_eq!(results[i], bits); - } - } + super::generic::sqrt(x) } diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index db799c030..cf19762e8 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -55,10 +55,12 @@ pub trait Int: + ops::BitAnd + cmp::Ord + CastFrom + + CastFrom + CastFrom + CastFrom + CastFrom + CastInto + + CastInto + CastInto + CastInto + CastInto diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs index 076fdf1f7..c9a36c0db 100644 --- a/libm/src/math/support/macros.rs +++ b/libm/src/math/support/macros.rs @@ -110,19 +110,21 @@ macro_rules! hf64 { /// Assert `F::biteq` with better messages. #[cfg(test)] macro_rules! assert_biteq { - ($left:expr, $right:expr, $($arg:tt)*) => {{ - let bits = ($left.to_bits() * 0).leading_zeros(); // hack to get the width from the value + ($left:expr, $right:expr, $($tt:tt)*) => {{ + let l = $left; + let r = $right; + let bits = (l.to_bits() - l.to_bits()).leading_zeros(); // hack to get the width from the value assert!( - $left.biteq($right), - "\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})", - l = $left, - lb = $left.to_bits(), - r = $right, - rb = $right.to_bits(), - width = ((bits / 4) + 2) as usize + l.biteq(r), + "{}\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})", + format_args!($($tt)*), + lb = l.to_bits(), + rb = r.to_bits(), + width = ((bits / 4) + 2) as usize, + ); }}; ($left:expr, $right:expr $(,)?) => { - assert_biteq!($left, $right,) + assert_biteq!($left, $right, "") }; } diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index e2f4e0e98..b4a57a34e 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -10,3 +10,14 @@ pub(crate) use float_traits::{f32_from_bits, f64_from_bits}; #[allow(unused_imports)] pub use hex_float::{hf32, hf64}; pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; + +/// Hint to the compiler that the current path is cold. +pub fn cold_path() { + #[cfg(intrinsics_enabled)] + core::intrinsics::cold_path(); +} + +/// Return `x`, first raising `FE_INVALID`. +pub fn raise_invalid(x: F) -> F { + (x - x) / (x - x) +} From d5a502dbde2047973ed3b383d0062ac4ab2e7f8a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 15 Jan 2025 11:49:28 +0000 Subject: [PATCH 1101/1459] Copy the u256 implementation from compiler_builtins --- libm/src/math/support/big.rs | 302 +++++++++++++++++++++++++++++ libm/src/math/support/big/tests.rs | 110 +++++++++++ libm/src/math/support/mod.rs | 1 + 3 files changed, 413 insertions(+) create mode 100644 libm/src/math/support/big.rs create mode 100644 libm/src/math/support/big/tests.rs diff --git a/libm/src/math/support/big.rs b/libm/src/math/support/big.rs new file mode 100644 index 000000000..e0f5e5263 --- /dev/null +++ b/libm/src/math/support/big.rs @@ -0,0 +1,302 @@ +//! Integers used for wide operations, larger than `u128`. + +#![allow(unused)] + +#[cfg(test)] +mod tests; + +use core::{fmt, ops}; + +use super::{DInt, HInt, Int, MinInt}; + +const WORD_LO_MASK: u64 = 0x00000000ffffffff; +const WORD_HI_MASK: u64 = 0xffffffff00000000; +const WORD_FULL_MASK: u64 = 0xffffffffffffffff; +const U128_LO_MASK: u128 = u64::MAX as u128; +const U128_HI_MASK: u128 = (u64::MAX as u128) << 64; + +/// A 256-bit unsigned integer represented as 4 64-bit limbs. +/// +/// Each limb is a native-endian number, but the array is little-limb-endian. +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub struct u256(pub [u64; 4]); + +impl u256 { + pub const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX]); + + /// Reinterpret as a signed integer + pub fn signed(self) -> i256 { + i256(self.0) + } +} + +/// A 256-bit signed integer represented as 4 64-bit limbs. +/// +/// Each limb is a native-endian number, but the array is little-limb-endian. +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub struct i256(pub [u64; 4]); + +impl i256 { + /// Reinterpret as an unsigned integer + pub fn unsigned(self) -> u256 { + u256(self.0) + } +} + +impl MinInt for u256 { + type OtherSign = i256; + + type Unsigned = u256; + + const SIGNED: bool = false; + const BITS: u32 = 256; + const ZERO: Self = Self([0u64; 4]); + const ONE: Self = Self([1, 0, 0, 0]); + const MIN: Self = Self([0u64; 4]); + const MAX: Self = Self([u64::MAX; 4]); +} + +impl MinInt for i256 { + type OtherSign = u256; + + type Unsigned = u256; + + const SIGNED: bool = false; + const BITS: u32 = 256; + const ZERO: Self = Self([0u64; 4]); + const ONE: Self = Self([1, 0, 0, 0]); + const MIN: Self = Self([0, 0, 0, 1 << 63]); + const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX << 1]); +} + +macro_rules! impl_common { + ($ty:ty) => { + impl ops::BitOr for $ty { + type Output = Self; + + fn bitor(mut self, rhs: Self) -> Self::Output { + self.0[0] |= rhs.0[0]; + self.0[1] |= rhs.0[1]; + self.0[2] |= rhs.0[2]; + self.0[3] |= rhs.0[3]; + self + } + } + + impl ops::Not for $ty { + type Output = Self; + + fn not(self) -> Self::Output { + Self([!self.0[0], !self.0[1], !self.0[2], !self.0[3]]) + } + } + + impl ops::Shl for $ty { + type Output = Self; + + fn shl(self, rhs: u32) -> Self::Output { + unimplemented!("only used to meet trait bounds") + } + } + }; +} + +impl_common!(i256); +impl_common!(u256); + +impl ops::Shr for u256 { + type Output = Self; + + fn shr(self, rhs: u32) -> Self::Output { + assert!(rhs < Self::BITS, "attempted to shift right with overflow"); + + if rhs == 0 { + return self; + } + + let mut ret = self; + let byte_shift = rhs / 64; + let bit_shift = rhs % 64; + + for idx in 0..4 { + let base_idx = idx + byte_shift as usize; + + // FIXME(msrv): could be let...else. + let base = match ret.0.get(base_idx) { + Some(v) => v, + None => { + ret.0[idx] = 0; + continue; + } + }; + + let mut new_val = base >> bit_shift; + + if let Some(new) = ret.0.get(base_idx + 1) { + new_val |= new.overflowing_shl(64 - bit_shift).0; + } + + ret.0[idx] = new_val; + } + + ret + } +} + +macro_rules! word { + (1, $val:expr) => { + (($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64 + }; + (2, $val:expr) => { + (($val >> (32 * 2)) & Self::from(WORD_LO_MASK)) as u64 + }; + (3, $val:expr) => { + (($val >> (32 * 1)) & Self::from(WORD_LO_MASK)) as u64 + }; + (4, $val:expr) => { + (($val >> (32 * 0)) & Self::from(WORD_LO_MASK)) as u64 + }; +} + +impl HInt for u128 { + type D = u256; + + fn widen(self) -> Self::D { + let w0 = self & u128::from(u64::MAX); + let w1 = (self >> u64::BITS) & u128::from(u64::MAX); + u256([w0 as u64, w1 as u64, 0, 0]) + } + + fn zero_widen(self) -> Self::D { + self.widen() + } + + fn zero_widen_mul(self, rhs: Self) -> Self::D { + let product11: u64 = word!(1, self) * word!(1, rhs); + let product12: u64 = word!(1, self) * word!(2, rhs); + let product13: u64 = word!(1, self) * word!(3, rhs); + let product14: u64 = word!(1, self) * word!(4, rhs); + let product21: u64 = word!(2, self) * word!(1, rhs); + let product22: u64 = word!(2, self) * word!(2, rhs); + let product23: u64 = word!(2, self) * word!(3, rhs); + let product24: u64 = word!(2, self) * word!(4, rhs); + let product31: u64 = word!(3, self) * word!(1, rhs); + let product32: u64 = word!(3, self) * word!(2, rhs); + let product33: u64 = word!(3, self) * word!(3, rhs); + let product34: u64 = word!(3, self) * word!(4, rhs); + let product41: u64 = word!(4, self) * word!(1, rhs); + let product42: u64 = word!(4, self) * word!(2, rhs); + let product43: u64 = word!(4, self) * word!(3, rhs); + let product44: u64 = word!(4, self) * word!(4, rhs); + + let sum0: u128 = u128::from(product44); + let sum1: u128 = u128::from(product34) + u128::from(product43); + let sum2: u128 = u128::from(product24) + u128::from(product33) + u128::from(product42); + let sum3: u128 = u128::from(product14) + + u128::from(product23) + + u128::from(product32) + + u128::from(product41); + let sum4: u128 = u128::from(product13) + u128::from(product22) + u128::from(product31); + let sum5: u128 = u128::from(product12) + u128::from(product21); + let sum6: u128 = u128::from(product11); + + let r0: u128 = + (sum0 & u128::from(WORD_FULL_MASK)) + ((sum1 & u128::from(WORD_LO_MASK)) << 32); + let r1: u128 = (sum0 >> 64) + + ((sum1 >> 32) & u128::from(WORD_FULL_MASK)) + + (sum2 & u128::from(WORD_FULL_MASK)) + + ((sum3 << 32) & u128::from(WORD_HI_MASK)); + + let (lo, carry) = r0.overflowing_add(r1 << 64); + let hi = (r1 >> 64) + + (sum1 >> 96) + + (sum2 >> 64) + + (sum3 >> 32) + + sum4 + + (sum5 << 32) + + (sum6 << 64) + + u128::from(carry); + + u256([ + (lo & U128_LO_MASK) as u64, + ((lo >> 64) & U128_LO_MASK) as u64, + (hi & U128_LO_MASK) as u64, + ((hi >> 64) & U128_LO_MASK) as u64, + ]) + } + + fn widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen_mul(rhs) + } + + fn widen_hi(self) -> Self::D { + self.widen() << ::BITS + } +} + +impl HInt for i128 { + type D = i256; + + fn widen(self) -> Self::D { + let mut ret = self.unsigned().zero_widen().signed(); + if self.is_negative() { + ret.0[2] = u64::MAX; + ret.0[3] = u64::MAX; + } + ret + } + + fn zero_widen(self) -> Self::D { + self.unsigned().zero_widen().signed() + } + + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.unsigned().zero_widen_mul(rhs.unsigned()).signed() + } + + fn widen_mul(self, rhs: Self) -> Self::D { + unimplemented!("signed i128 widening multiply is not used") + } + + fn widen_hi(self) -> Self::D { + self.widen() << ::BITS + } +} + +impl DInt for u256 { + type H = u128; + + fn lo(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[0].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[1].to_le_bytes()); + u128::from_le_bytes(tmp) + } + + fn hi(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[2].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[3].to_le_bytes()); + u128::from_le_bytes(tmp) + } +} + +impl DInt for i256 { + type H = i128; + + fn lo(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[0].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[1].to_le_bytes()); + i128::from_le_bytes(tmp) + } + + fn hi(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[2].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[3].to_le_bytes()); + i128::from_le_bytes(tmp) + } +} diff --git a/libm/src/math/support/big/tests.rs b/libm/src/math/support/big/tests.rs new file mode 100644 index 000000000..f95f82973 --- /dev/null +++ b/libm/src/math/support/big/tests.rs @@ -0,0 +1,110 @@ +extern crate std; +use std::string::String; +use std::vec::Vec; +use std::{eprintln, format}; + +use super::{HInt, MinInt, i256, u256}; + +const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff; + +/// Print a `u256` as hex since we can't add format implementations +fn hexu(v: u256) -> String { + format!("0x{:016x}{:016x}{:016x}{:016x}", v.0[3], v.0[2], v.0[1], v.0[0]) +} + +#[test] +fn widen_u128() { + assert_eq!(u128::MAX.widen(), u256([u64::MAX, u64::MAX, 0, 0])); + assert_eq!(LOHI_SPLIT.widen(), u256([u64::MAX, 0xaaaaaaaaaaaaaaaa, 0, 0])); +} + +#[test] +fn widen_i128() { + assert_eq!((-1i128).widen(), u256::MAX.signed()); + assert_eq!( + (LOHI_SPLIT as i128).widen(), + i256([u64::MAX, 0xaaaaaaaaaaaaaaaa, u64::MAX, u64::MAX]) + ); + assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen()); +} + +#[test] +fn widen_mul_u128() { + let tests = [ + (u128::MAX / 2, 2_u128, u256([u64::MAX - 1, u64::MAX, 0, 0])), + (u128::MAX, 2_u128, u256([u64::MAX - 1, u64::MAX, 1, 0])), + (u128::MAX, u128::MAX, u256([1, 0, u64::MAX - 1, u64::MAX])), + (u128::MIN, u128::MIN, u256::ZERO), + (1234, 0, u256::ZERO), + (0, 1234, u256::ZERO), + ]; + + let mut errors = Vec::new(); + for (i, (a, b, exp)) in tests.iter().copied().enumerate() { + let res = a.widen_mul(b); + let res_z = a.zero_widen_mul(b); + assert_eq!(res, res_z); + if res != exp { + errors.push((i, a, b, exp, res)); + } + } + + for (i, a, b, exp, res) in &errors { + eprintln!("FAILURE ({i}): {a:#034x} * {b:#034x} = {} got {}", hexu(*exp), hexu(*res)); + } + assert!(errors.is_empty()); +} + +#[test] +fn not_u128() { + assert_eq!(!u256::ZERO, u256::MAX); +} + +#[test] +fn shr_u128() { + let only_low = [1, u16::MAX.into(), u32::MAX.into(), u64::MAX.into(), u128::MAX]; + + let mut errors = Vec::new(); + + for a in only_low { + for perturb in 0..10 { + let a = a.saturating_add(perturb); + for shift in 0..128 { + let res = a.widen() >> shift; + let expected = (a >> shift).widen(); + if res != expected { + errors.push((a.widen(), shift, res, expected)); + } + } + } + } + + let check = [ + (u256::MAX, 1, u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1])), + (u256::MAX, 5, u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5])), + (u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])), + (u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])), + (u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])), + (u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])), + (u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])), + (u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])), + (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])), + (u256::MAX, 192, u256([u64::MAX, 0, 0, 0])), + (u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])), + (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])), + (u256::MAX, 254, u256([0b11, 0, 0, 0])), + (u256::MAX, 255, u256([1, 0, 0, 0])), + ]; + + for (input, shift, expected) in check { + let res = input >> shift; + if res != expected { + errors.push((input, shift, res, expected)); + } + } + + for (a, b, res, expected) in &errors { + eprintln!("FAILURE: {} >> {b} = {} got {}", hexu(*a), hexu(*expected), hexu(*res),); + } + assert!(errors.is_empty()); +} diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index b4a57a34e..ddfc2e3e0 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -1,5 +1,6 @@ #[macro_use] pub mod macros; +mod big; mod float_traits; mod hex_float; mod int_traits; From ad9c639aa0b74f240ec1616b35cb79a9fd37896b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 21 Jan 2025 23:59:07 +0000 Subject: [PATCH 1102/1459] Add `sqrtf16` and `sqrtf128` Use the generic algorithms to provide implementations for these routines. --- libm/crates/libm-macros/src/shared.rs | 4 +- libm/crates/libm-test/benches/icount.rs | 2 + libm/crates/libm-test/benches/random.rs | 2 + .../libm-test/tests/compare_built_musl.rs | 2 + libm/crates/util/src/main.rs | 2 + libm/etc/function-definitions.json | 14 +++ libm/etc/function-list.txt | 2 + libm/src/math/generic/sqrt.rs | 92 +++++++++++++++++++ libm/src/math/mod.rs | 4 + libm/src/math/sqrtf128.rs | 5 + libm/src/math/sqrtf16.rs | 5 + 11 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 libm/src/math/sqrtf128.rs create mode 100644 libm/src/math/sqrtf16.rs diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index 608381962..d17bc6ffc 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F16, Signature { args: &[Ty::F16], returns: &[Ty::F16] }, None, - &["fabsf16", "truncf16"], + &["fabsf16", "sqrtf16", "truncf16"], ), ( // `fn(f32) -> f32` @@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F128, Signature { args: &[Ty::F128], returns: &[Ty::F128] }, None, - &["fabsf128", "truncf128"], + &["fabsf128", "sqrtf128", "truncf128"], ), ( // `(f16, f16) -> f16` diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index 3a66249e8..c8451f88c 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -155,6 +155,8 @@ main!( icount_bench_sinh_group, icount_bench_sinhf_group, icount_bench_sqrt_group, + icount_bench_sqrtf128_group, + icount_bench_sqrtf16_group, icount_bench_sqrtf_group, icount_bench_tan_group, icount_bench_tanf_group, diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 888161265..026841202 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -123,6 +123,8 @@ libm_macros::for_each_function! { | fabsf16 | fdimf128 | fdimf16 + | sqrtf16 + | sqrtf128 | truncf128 | truncf16 => (false, None), diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index f540a0b15..24703f273 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -87,5 +87,7 @@ libm_macros::for_each_function! { fdimf16, truncf128, truncf16, + sqrtf16, + sqrtf128, ], } diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index b979c60ad..cd68d9afd 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -90,6 +90,8 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { | fabsf16 | fdimf128 | fdimf16 + | sqrtf128 + | sqrtf16 | truncf128 | truncf16 => None, _ => Some(musl_math_sys::MACRO_FN_NAME) diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 9f7c8ab25..2d0af3bcf 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -718,6 +718,20 @@ ], "type": "f32" }, + "sqrtf128": { + "sources": [ + "src/math/generic/sqrt.rs", + "src/math/sqrtf128.rs" + ], + "type": "f128" + }, + "sqrtf16": { + "sources": [ + "src/math/generic/sqrt.rs", + "src/math/sqrtf16.rs" + ], + "type": "f16" + }, "tan": { "sources": [ "src/libm_helper.rs", diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt index 7f96a4362..47c34ab90 100644 --- a/libm/etc/function-list.txt +++ b/libm/etc/function-list.txt @@ -105,6 +105,8 @@ sinh sinhf sqrt sqrtf +sqrtf128 +sqrtf16 tan tanf tanh diff --git a/libm/src/math/generic/sqrt.rs b/libm/src/math/generic/sqrt.rs index a2e054f3c..c892f9997 100644 --- a/libm/src/math/generic/sqrt.rs +++ b/libm/src/math/generic/sqrt.rs @@ -294,6 +294,14 @@ pub trait SqrtHelper: Float { const FINAL_ROUNDS: u32; } +#[cfg(f16_enabled)] +impl SqrtHelper for f16 { + type ISet1 = u16; // unused + type ISet2 = u16; // unused + + const FINAL_ROUNDS: u32 = 2; +} + impl SqrtHelper for f32 { type ISet1 = u32; // unused type ISet2 = u32; // unused @@ -309,6 +317,16 @@ impl SqrtHelper for f64 { const FINAL_ROUNDS: u32 = 2; } +#[cfg(f128_enabled)] +impl SqrtHelper for f128 { + type ISet1 = u32; + type ISet2 = u64; + + const SET1_ROUNDS: u32 = 1; + const SET2_ROUNDS: u32 = 2; + const FINAL_ROUNDS: u32 = 2; +} + /// A U0.16 representation of `1/sqrt(x)`. /// // / The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand. @@ -355,6 +373,42 @@ mod tests { } } + #[test] + #[cfg(f16_enabled)] + fn sanity_check_f16() { + assert_biteq!(sqrt(100.0f16), 10.0); + assert_biteq!(sqrt(4.0f16), 2.0); + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(); + } + + #[test] + #[cfg(f16_enabled)] + #[allow(clippy::approx_constant)] + fn conformance_tests_f16() { + let cases = [ + (f16::PI, 0x3f17_u16), + // 10_000.0, using a hex literal for MSRV hack (Rust < 1.67 checks literal widths as + // part of the AST, so the `cfg` is irrelevant here). + (f16::from_bits(0x70e2), 0x5640_u16), + (f16::from_bits(0x0000000f), 0x13bf_u16), + (f16::INFINITY, f16::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f16::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } + #[test] fn sanity_check_f32() { assert_biteq!(sqrt(100.0f32), 10.0); @@ -416,4 +470,42 @@ mod tests { ); } } + + #[test] + #[cfg(f128_enabled)] + fn sanity_check_f128() { + assert_biteq!(sqrt(100.0f128), 10.0); + assert_biteq!(sqrt(4.0f128), 2.0); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(); + } + + #[test] + #[cfg(f128_enabled)] + #[allow(clippy::approx_constant)] + fn conformance_tests_f128() { + let cases = [ + (f128::PI, 0x3fffc5bf891b4ef6aa79c3b0520d5db9_u128), + // 10_000.0, see `f16` for reasoning. + ( + f128::from_bits(0x400c3880000000000000000000000000), + 0x40059000000000000000000000000000_u128, + ), + (f128::from_bits(0x0000000f), 0x1fc9efbdeb14f4ed9b17ae807907e1e9_u128), + (f128::INFINITY, f128::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f128::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } } diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 03adb6be1..3684025a6 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -344,11 +344,13 @@ cfg_if! { mod copysignf16; mod fabsf16; mod fdimf16; + mod sqrtf16; mod truncf16; pub use self::copysignf16::copysignf16; pub use self::fabsf16::fabsf16; pub use self::fdimf16::fdimf16; + pub use self::sqrtf16::sqrtf16; pub use self::truncf16::truncf16; } } @@ -358,11 +360,13 @@ cfg_if! { mod copysignf128; mod fabsf128; mod fdimf128; + mod sqrtf128; mod truncf128; pub use self::copysignf128::copysignf128; pub use self::fabsf128::fabsf128; pub use self::fdimf128::fdimf128; + pub use self::sqrtf128::sqrtf128; pub use self::truncf128::truncf128; } } diff --git a/libm/src/math/sqrtf128.rs b/libm/src/math/sqrtf128.rs new file mode 100644 index 000000000..eaef6ae0c --- /dev/null +++ b/libm/src/math/sqrtf128.rs @@ -0,0 +1,5 @@ +/// The square root of `x` (f128). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf128(x: f128) -> f128 { + return super::generic::sqrt(x); +} diff --git a/libm/src/math/sqrtf16.rs b/libm/src/math/sqrtf16.rs new file mode 100644 index 000000000..549bf902c --- /dev/null +++ b/libm/src/math/sqrtf16.rs @@ -0,0 +1,5 @@ +/// The square root of `x` (f16). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf16(x: f16) -> f16 { + return super::generic::sqrt(x); +} From ab81e7b3036aed047e2925d64f09998874acf615 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 11:58:17 +0000 Subject: [PATCH 1103/1459] Shift then mask, rather than mask then shift This may allow for small optimizations with larger float types since `u32` math can be used after shifting. LLVM may be already getting this anyway. --- libm/src/math/support/float_traits.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 647f4f5e2..2e467111f 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -1,6 +1,6 @@ use core::{fmt, mem, ops}; -use super::int_traits::{CastFrom, CastInto, Int, MinInt}; +use super::int_traits::{CastFrom, Int, MinInt}; /// Trait for some basic operations on floats #[allow(dead_code)] @@ -108,7 +108,7 @@ pub trait Float: /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero. fn exp(self) -> i32 { - ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS).cast() + (u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_MAX).signed() } /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero. From 3734276346df879d12a267e4d5c001f20f7c63d7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 22 Jan 2025 07:13:45 +0000 Subject: [PATCH 1104/1459] Make `Float::exp` return an unsigned integer `exp` does not perform any form of unbiasing, so there isn't any reason it should be signed. Change this. Additionally, add `EPSILON` to the `Float` trait. --- libm/crates/libm-test/src/f8_impl.rs | 2 ++ libm/src/math/generic/sqrt.rs | 2 +- libm/src/math/support/float_traits.rs | 8 +++++--- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/libm/crates/libm-test/src/f8_impl.rs b/libm/crates/libm-test/src/f8_impl.rs index 299553d20..96b783924 100644 --- a/libm/crates/libm-test/src/f8_impl.rs +++ b/libm/crates/libm-test/src/f8_impl.rs @@ -30,6 +30,8 @@ impl Float for f8 { const INFINITY: Self = Self(0b0_1111_000); const NEG_INFINITY: Self = Self(0b1_1111_000); const NAN: Self = Self(0b0_1111_100); + // FIXME: incorrect values + const EPSILON: Self = Self::ZERO; const PI: Self = Self::ZERO; const NEG_PI: Self = Self::ZERO; const FRAC_PI_2: Self = Self::ZERO; diff --git a/libm/src/math/generic/sqrt.rs b/libm/src/math/generic/sqrt.rs index c892f9997..22ee93f57 100644 --- a/libm/src/math/generic/sqrt.rs +++ b/libm/src/math/generic/sqrt.rs @@ -96,7 +96,7 @@ where ix = scaled.to_bits(); match top { Exp::Shifted(ref mut v) => { - *v = scaled.exp().unsigned(); + *v = scaled.exp(); *v = (*v).wrapping_sub(F::SIG_BITS); } Exp::NoShift(()) => { diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 2e467111f..1abb7c4de 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -34,6 +34,7 @@ pub trait Float: const NAN: Self; const MAX: Self; const MIN: Self; + const EPSILON: Self; const PI: Self; const NEG_PI: Self; const FRAC_PI_2: Self; @@ -107,13 +108,13 @@ pub trait Float: } /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero. - fn exp(self) -> i32 { - (u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_MAX).signed() + fn exp(self) -> u32 { + u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_MAX } /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero. fn exp_unbiased(self) -> i32 { - self.exp() - (Self::EXP_BIAS as i32) + self.exp().signed() - (Self::EXP_BIAS as i32) } /// Returns the significand with no implicit bit (or the "fractional" part) @@ -180,6 +181,7 @@ macro_rules! float_impl { const MAX: Self = -Self::MIN; // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS)); + const EPSILON: Self = <$ty>::EPSILON; const PI: Self = core::$ty::consts::PI; const NEG_PI: Self = -Self::PI; From a0393c4d6d4ccad00613b72a010191090fcdb08b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 11:17:03 +0000 Subject: [PATCH 1105/1459] Add a generic version of `ceil` Additionally, make use of this version to implement `ceil` and `ceilf`. Musl's `ceilf` algorithm seems to work better for all versions of the functions. Testing with a generic version of musl's `ceil` routine showed the following regressions: icount::icount_bench_ceil_group::icount_bench_ceil logspace:setup_ceil() Performance has regressed: Instructions (14064 > 13171) regressed by +6.78005% (>+5.00000) Baselines: softfloat|softfloat Instructions: 14064|13171 (+6.78005%) [+1.06780x] L1 Hits: 16697|15803 (+5.65715%) [+1.05657x] L2 Hits: 0|0 (No change) RAM Hits: 7|8 (-12.5000%) [-1.14286x] Total read+write: 16704|15811 (+5.64797%) [+1.05648x] Estimated Cycles: 16942|16083 (+5.34104%) [+1.05341x] icount::icount_bench_ceilf_group::icount_bench_ceilf logspace:setup_ceilf() Performance has regressed: Instructions (14732 > 9901) regressed by +48.7931% (>+5.00000) Baselines: softfloat|softfloat Instructions: 14732|9901 (+48.7931%) [+1.48793x] L1 Hits: 17494|12611 (+38.7202%) [+1.38720x] L2 Hits: 0|0 (No change) RAM Hits: 6|6 (No change) Total read+write: 17500|12617 (+38.7018%) [+1.38702x] Estimated Cycles: 17704|12821 (+38.0860%) [+1.38086x] --- libm/src/math/ceil.rs | 42 +---------------- libm/src/math/ceilf.rs | 51 +------------------- libm/src/math/generic/ceil.rs | 87 +++++++++++++++++++++++++++++++++++ libm/src/math/generic/mod.rs | 2 + 4 files changed, 91 insertions(+), 91 deletions(-) create mode 100644 libm/src/math/generic/ceil.rs diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index 398bfee47..535f434ac 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -1,8 +1,3 @@ -#![allow(unreachable_code)] -use core::f64; - -const TOINT: f64 = 1. / f64::EPSILON; - /// Ceil (f64) /// /// Finds the nearest integer greater than or equal to `x`. @@ -15,40 +10,5 @@ pub fn ceil(x: f64) -> f64 { args: x, } - let u: u64 = x.to_bits(); - let e: i64 = ((u >> 52) & 0x7ff) as i64; - let y: f64; - - if e >= 0x3ff + 52 || x == 0. { - return x; - } - // y = int(x) - x, where int(x) is an integer neighbor of x - y = if (u >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x }; - // special case because of non-nearest rounding modes - if e < 0x3ff { - force_eval!(y); - return if (u >> 63) != 0 { -0. } else { 1. }; - } - if y < 0. { x + y + 1. } else { x + y } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(ceil(1.1), 2.0); - assert_eq!(ceil(2.9), 3.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil - #[test] - fn spec_tests() { - // Not Asserted: that the current rounding mode has no effect. - assert!(ceil(f64::NAN).is_nan()); - for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY].iter().copied() { - assert_eq!(ceil(f), f); - } - } + super::generic::ceil(x) } diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs index 9e8e78e3e..66d44189c 100644 --- a/libm/src/math/ceilf.rs +++ b/libm/src/math/ceilf.rs @@ -1,5 +1,3 @@ -use core::f32; - /// Ceil (f32) /// /// Finds the nearest integer greater than or equal to `x`. @@ -11,52 +9,5 @@ pub fn ceilf(x: f32) -> f32 { args: x, } - let mut ui = x.to_bits(); - let e = (((ui >> 23) & 0xff).wrapping_sub(0x7f)) as i32; - - if e >= 23 { - return x; - } - if e >= 0 { - let m = 0x007fffff >> e; - if (ui & m) == 0 { - return x; - } - force_eval!(x + f32::from_bits(0x7b800000)); - if ui >> 31 == 0 { - ui += m; - } - ui &= !m; - } else { - force_eval!(x + f32::from_bits(0x7b800000)); - if ui >> 31 != 0 { - return -0.0; - } else if ui << 1 != 0 { - return 1.0; - } - } - f32::from_bits(ui) -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(ceilf(1.1), 2.0); - assert_eq!(ceilf(2.9), 3.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil - #[test] - fn spec_tests() { - // Not Asserted: that the current rounding mode has no effect. - assert!(ceilf(f32::NAN).is_nan()); - for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY].iter().copied() { - assert_eq!(ceilf(f), f); - } - } + super::generic::ceil(x) } diff --git a/libm/src/math/generic/ceil.rs b/libm/src/math/generic/ceil.rs new file mode 100644 index 000000000..d16d06572 --- /dev/null +++ b/libm/src/math/generic/ceil.rs @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/ceilf.c */ + +//! Generic `ceil` algorithm. +//! +//! Note that this uses the algorithm from musl's `ceilf` rather than `ceil` or `ceill` because +//! performance seems to be better (based on icount) and it does not seem to experience rounding +//! errors on i386. + +use super::super::{Float, Int, IntTy, MinInt}; + +pub fn ceil(x: F) -> F { + let zero = IntTy::::ZERO; + + let mut ix = x.to_bits(); + let e = x.exp_unbiased(); + + // If the represented value has no fractional part, no truncation is needed. + if e >= F::SIG_BITS as i32 { + return x; + } + + if e >= 0 { + // |x| >= 1.0 + + let m = F::SIG_MASK >> e.unsigned(); + if (ix & m) == zero { + // Portion to be masked is already zero; no adjustment needed. + return x; + } + + // Otherwise, raise an inexact exception. + force_eval!(x + F::MAX); + if x.is_sign_positive() { + ix += m; + } + ix &= !m; + } else { + // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0). + force_eval!(x + F::MAX); + + if x.is_sign_negative() { + // -1.0 < x <= -0.0; rounding up goes toward -0.0. + return F::NEG_ZERO; + } else if ix << 1 != zero { + // 0.0 < x < 1.0; rounding up goes toward +1.0. + return F::ONE; + } + } + + F::from_bits(ix) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Test against https://en.cppreference.com/w/cpp/numeric/math/ceil + fn spec_test() { + // Not Asserted: that the current rounding mode has no effect. + for f in [F::ZERO, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY].iter().copied() { + assert_biteq!(ceil(f), f); + } + } + + #[test] + fn sanity_check_f32() { + assert_eq!(ceil(1.1f32), 2.0); + assert_eq!(ceil(2.9f32), 3.0); + } + + #[test] + fn spec_tests_f32() { + spec_test::(); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(ceil(1.1f64), 2.0); + assert_eq!(ceil(2.9f64), 3.0); + } + + #[test] + fn spec_tests_f64() { + spec_test::(); + } +} diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index 3b5a2c3ef..f8bb9fa6a 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -1,9 +1,11 @@ +mod ceil; mod copysign; mod fabs; mod fdim; mod sqrt; mod trunc; +pub use ceil::ceil; pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; From 0cb45501be6d087eb06fcaf44a2b1e6b81ffa063 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 11:17:17 +0000 Subject: [PATCH 1106/1459] Add `ceilf16` and `ceilf128` Use the generic algorithms to provide implementations for these routines. --- libm/crates/libm-macros/src/shared.rs | 4 ++-- libm/crates/libm-test/benches/icount.rs | 2 ++ libm/crates/libm-test/benches/random.rs | 4 +++- libm/crates/libm-test/src/mpfloat.rs | 4 ++++ .../libm-test/tests/compare_built_musl.rs | 2 ++ libm/crates/util/src/main.rs | 4 +++- libm/etc/function-definitions.json | 20 +++++++++++++++++-- libm/etc/function-list.txt | 2 ++ libm/src/math/ceilf128.rs | 7 +++++++ libm/src/math/ceilf16.rs | 7 +++++++ libm/src/math/generic/ceil.rs | 14 +++++++++++++ libm/src/math/mod.rs | 4 ++++ 12 files changed, 68 insertions(+), 6 deletions(-) create mode 100644 libm/src/math/ceilf128.rs create mode 100644 libm/src/math/ceilf16.rs diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index d17bc6ffc..e7d3d18d9 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F16, Signature { args: &[Ty::F16], returns: &[Ty::F16] }, None, - &["fabsf16", "sqrtf16", "truncf16"], + &["ceilf16", "fabsf16", "sqrtf16", "truncf16"], ), ( // `fn(f32) -> f32` @@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F128, Signature { args: &[Ty::F128], returns: &[Ty::F128] }, None, - &["fabsf128", "sqrtf128", "truncf128"], + &["ceilf128", "fabsf128", "sqrtf128", "truncf128"], ), ( // `(f16, f16) -> f16` diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index c8451f88c..84be3d524 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -69,6 +69,8 @@ main!( icount_bench_cbrt_group, icount_bench_cbrtf_group, icount_bench_ceil_group, + icount_bench_ceilf128_group, + icount_bench_ceilf16_group, icount_bench_ceilf_group, icount_bench_copysign_group, icount_bench_copysignf128_group, diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 026841202..511e26d91 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -117,7 +117,9 @@ libm_macros::for_each_function! { exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)), // Musl does not provide `f16` and `f128` functions - copysignf128 + ceilf128 + | ceilf16 + | copysignf128 | copysignf16 | fabsf128 | fabsf16 diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 9d95356d3..bbd19dbb0 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -137,6 +137,8 @@ libm_macros::for_each_function! { // Most of these need a manual implementation ceil, ceilf, + ceilf128, + ceilf16, copysign, copysignf, copysignf128, @@ -237,12 +239,14 @@ impl_no_round! { #[cfg(f16_enabled)] impl_no_round! { fabsf16 => abs_mut; + ceilf16 => ceil_mut; truncf16 => trunc_mut; } #[cfg(f128_enabled)] impl_no_round! { fabsf128 => abs_mut; + ceilf128 => ceil_mut; truncf128 => trunc_mut; } diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 24703f273..e13acf3de 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -79,6 +79,8 @@ libm_macros::for_each_function! { ynf, // Not provided by musl + ceilf128, + ceilf16, copysignf128, copysignf16, fabsf128, diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index cd68d9afd..810919339 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -84,7 +84,9 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { emit_types: [CFn, RustFn, RustArgs], extra: (basis, op, inputs), fn_extra: match MACRO_FN_NAME { - copysignf128 + ceilf128 + | ceilf16 + | copysignf128 | copysignf16 | fabsf128 | fabsf16 diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 2d0af3bcf..c75152f63 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -109,17 +109,33 @@ "src/libm_helper.rs", "src/math/arch/i586.rs", "src/math/arch/wasm32.rs", - "src/math/ceil.rs" + "src/math/ceil.rs", + "src/math/generic/ceil.rs" ], "type": "f64" }, "ceilf": { "sources": [ "src/math/arch/wasm32.rs", - "src/math/ceilf.rs" + "src/math/ceilf.rs", + "src/math/generic/ceil.rs" ], "type": "f32" }, + "ceilf128": { + "sources": [ + "src/math/ceilf128.rs", + "src/math/generic/ceil.rs" + ], + "type": "f128" + }, + "ceilf16": { + "sources": [ + "src/math/ceilf16.rs", + "src/math/generic/ceil.rs" + ], + "type": "f16" + }, "copysign": { "sources": [ "src/libm_helper.rs", diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt index 47c34ab90..337e7e434 100644 --- a/libm/etc/function-list.txt +++ b/libm/etc/function-list.txt @@ -17,6 +17,8 @@ cbrt cbrtf ceil ceilf +ceilf128 +ceilf16 copysign copysignf copysignf128 diff --git a/libm/src/math/ceilf128.rs b/libm/src/math/ceilf128.rs new file mode 100644 index 000000000..89980858e --- /dev/null +++ b/libm/src/math/ceilf128.rs @@ -0,0 +1,7 @@ +/// Ceil (f128) +/// +/// Finds the nearest integer greater than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ceilf128(x: f128) -> f128 { + super::generic::ceil(x) +} diff --git a/libm/src/math/ceilf16.rs b/libm/src/math/ceilf16.rs new file mode 100644 index 000000000..2af67eff0 --- /dev/null +++ b/libm/src/math/ceilf16.rs @@ -0,0 +1,7 @@ +/// Ceil (f16) +/// +/// Finds the nearest integer greater than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ceilf16(x: f16) -> f16 { + super::generic::ceil(x) +} diff --git a/libm/src/math/generic/ceil.rs b/libm/src/math/generic/ceil.rs index d16d06572..34261faf7 100644 --- a/libm/src/math/generic/ceil.rs +++ b/libm/src/math/generic/ceil.rs @@ -63,6 +63,14 @@ mod tests { } } + /* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */ + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(); + } + #[test] fn sanity_check_f32() { assert_eq!(ceil(1.1f32), 2.0); @@ -84,4 +92,10 @@ mod tests { fn spec_tests_f64() { spec_test::(); } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(); + } } diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 3684025a6..5228e78b7 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -341,12 +341,14 @@ pub use self::truncf::truncf; cfg_if! { if #[cfg(f16_enabled)] { + mod ceilf16; mod copysignf16; mod fabsf16; mod fdimf16; mod sqrtf16; mod truncf16; + pub use self::ceilf16::ceilf16; pub use self::copysignf16::copysignf16; pub use self::fabsf16::fabsf16; pub use self::fdimf16::fdimf16; @@ -357,12 +359,14 @@ cfg_if! { cfg_if! { if #[cfg(f128_enabled)] { + mod ceilf128; mod copysignf128; mod fabsf128; mod fdimf128; mod sqrtf128; mod truncf128; + pub use self::ceilf128::ceilf128; pub use self::copysignf128::copysignf128; pub use self::fabsf128::fabsf128; pub use self::fdimf128::fdimf128; From 6463f4fa5afe45ef33ecae0486ae89df5a07e977 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 13:27:22 +0000 Subject: [PATCH 1107/1459] Add a generic version of `floor` Additionally, make use of this version to implement `floor` and `floorf`. Similar to `ceil`, musl'f `ceilf` routine seems to work better for all float widths than the `ceil` algorithm. Trying with the `ceil` (`f64`) algorithm produced the following regressions: icount::icount_bench_floor_group::icount_bench_floor logspace:setup_floor() Performance has regressed: Instructions (14064 > 13171) regressed by +6.78005% (>+5.00000) Baselines: softfloat|softfloat Instructions: 14064|13171 (+6.78005%) [+1.06780x] L1 Hits: 16821|15802 (+6.44855%) [+1.06449x] L2 Hits: 0|0 (No change) RAM Hits: 8|9 (-11.1111%) [-1.12500x] Total read+write: 16829|15811 (+6.43856%) [+1.06439x] Estimated Cycles: 17101|16117 (+6.10535%) [+1.06105x] icount::icount_bench_floorf128_group::icount_bench_floorf128 logspace:setup_floorf128() Baselines: softfloat|softfloat Instructions: 166868|N/A (*********) L1 Hits: 221429|N/A (*********) L2 Hits: 1|N/A (*********) RAM Hits: 34|N/A (*********) Total read+write: 221464|N/A (*********) Estimated Cycles: 222624|N/A (*********) icount::icount_bench_floorf16_group::icount_bench_floorf16 logspace:setup_floorf16() Baselines: softfloat|softfloat Instructions: 143029|N/A (*********) L1 Hits: 176517|N/A (*********) L2 Hits: 1|N/A (*********) RAM Hits: 13|N/A (*********) Total read+write: 176531|N/A (*********) Estimated Cycles: 176977|N/A (*********) icount::icount_bench_floorf_group::icount_bench_floorf logspace:setup_floorf() Performance has regressed: Instructions (14732 > 10441) regressed by +41.0976% (>+5.00000) Baselines: softfloat|softfloat Instructions: 14732|10441 (+41.0976%) [+1.41098x] L1 Hits: 17616|13027 (+35.2268%) [+1.35227x] L2 Hits: 0|0 (No change) RAM Hits: 8|6 (+33.3333%) [+1.33333x] Total read+write: 17624|13033 (+35.2260%) [+1.35226x] Estimated Cycles: 17896|13237 (+35.1968%) [+1.35197x] --- libm/etc/function-definitions.json | 6 +- libm/src/math/floor.rs | 41 +---------- libm/src/math/floorf.rs | 52 +------------- libm/src/math/generic/floor.rs | 106 +++++++++++++++++++++++++++++ libm/src/math/generic/mod.rs | 2 + 5 files changed, 114 insertions(+), 93 deletions(-) create mode 100644 libm/src/math/generic/floor.rs diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index c75152f63..6a865f427 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -336,14 +336,16 @@ "src/libm_helper.rs", "src/math/arch/i586.rs", "src/math/arch/wasm32.rs", - "src/math/floor.rs" + "src/math/floor.rs", + "src/math/generic/floor.rs" ], "type": "f64" }, "floorf": { "sources": [ "src/math/arch/wasm32.rs", - "src/math/floorf.rs" + "src/math/floorf.rs", + "src/math/generic/floor.rs" ], "type": "f32" }, diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index 2823bf44d..b4f02abc4 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -1,8 +1,3 @@ -#![allow(unreachable_code)] -use core::f64; - -const TOINT: f64 = 1. / f64::EPSILON; - /// Floor (f64) /// /// Finds the nearest integer less than or equal to `x`. @@ -15,39 +10,5 @@ pub fn floor(x: f64) -> f64 { args: x, } - let ui = x.to_bits(); - let e = ((ui >> 52) & 0x7ff) as i32; - - if (e >= 0x3ff + 52) || (x == 0.) { - return x; - } - /* y = int(x) - x, where int(x) is an integer neighbor of x */ - let y = if (ui >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x }; - /* special case because of non-nearest rounding modes */ - if e < 0x3ff { - force_eval!(y); - return if (ui >> 63) != 0 { -1. } else { 0. }; - } - if y > 0. { x + y - 1. } else { x + y } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(floor(1.1), 1.0); - assert_eq!(floor(2.9), 2.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor - #[test] - fn spec_tests() { - // Not Asserted: that the current rounding mode has no effect. - assert!(floor(f64::NAN).is_nan()); - for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY].iter().copied() { - assert_eq!(floor(f), f); - } - } + return super::generic::floor(x); } diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs index 23a18c0f7..16957b7f3 100644 --- a/libm/src/math/floorf.rs +++ b/libm/src/math/floorf.rs @@ -1,5 +1,3 @@ -use core::f32; - /// Floor (f32) /// /// Finds the nearest integer less than or equal to `x`. @@ -11,53 +9,5 @@ pub fn floorf(x: f32) -> f32 { args: x, } - let mut ui = x.to_bits(); - let e = (((ui >> 23) as i32) & 0xff) - 0x7f; - - if e >= 23 { - return x; - } - if e >= 0 { - let m: u32 = 0x007fffff >> e; - if (ui & m) == 0 { - return x; - } - force_eval!(x + f32::from_bits(0x7b800000)); - if ui >> 31 != 0 { - ui += m; - } - ui &= !m; - } else { - force_eval!(x + f32::from_bits(0x7b800000)); - if ui >> 31 == 0 { - ui = 0; - } else if ui << 1 != 0 { - return -1.0; - } - } - f32::from_bits(ui) -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(floorf(0.5), 0.0); - assert_eq!(floorf(1.1), 1.0); - assert_eq!(floorf(2.9), 2.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor - #[test] - fn spec_tests() { - // Not Asserted: that the current rounding mode has no effect. - assert!(floorf(f32::NAN).is_nan()); - for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY].iter().copied() { - assert_eq!(floorf(f), f); - } - } + return super::generic::floor(x); } diff --git a/libm/src/math/generic/floor.rs b/libm/src/math/generic/floor.rs new file mode 100644 index 000000000..6754c08f8 --- /dev/null +++ b/libm/src/math/generic/floor.rs @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: MIT + * origin: musl src/math/floor.c */ + +//! Generic `floor` algorithm. +//! +//! Note that this uses the algorithm from musl's `floorf` rather than `floor` or `floorl` because +//! performance seems to be better (based on icount) and it does not seem to experience rounding +//! errors on i386. + +use super::super::{Float, Int, IntTy, MinInt}; + +pub fn floor(x: F) -> F { + let zero = IntTy::::ZERO; + + let mut ix = x.to_bits(); + let e = x.exp_unbiased(); + + // If the represented value has no fractional part, no truncation is needed. + if e >= F::SIG_BITS as i32 { + return x; + } + + if e >= 0 { + // |x| >= 1.0 + + let m = F::SIG_MASK >> e.unsigned(); + if ix & m == zero { + // Portion to be masked is already zero; no adjustment needed. + return x; + } + + // Otherwise, raise an inexact exception. + force_eval!(x + F::MAX); + + if x.is_sign_negative() { + ix += m; + } + + ix &= !m; + F::from_bits(ix) + } else { + // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0). + force_eval!(x + F::MAX); + + if x.is_sign_positive() { + // 0.0 <= x < 1.0; rounding down goes toward +0.0. + F::ZERO + } else if ix << 1 != zero { + // -1.0 < x < 0.0; rounding down goes toward -1.0. + F::NEG_ONE + } else { + // -0.0 remains unchanged + x + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Test against https://en.cppreference.com/w/cpp/numeric/math/floor + fn spec_test() { + // Not Asserted: that the current rounding mode has no effect. + for f in [F::ZERO, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY].iter().copied() { + assert_biteq!(floor(f), f); + } + } + + /* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */ + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(floor(0.5f32), 0.0); + assert_eq!(floor(1.1f32), 1.0); + assert_eq!(floor(2.9f32), 2.0); + } + + #[test] + fn spec_tests_f32() { + spec_test::(); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(floor(1.1f64), 1.0); + assert_eq!(floor(2.9f64), 2.0); + } + + #[test] + fn spec_tests_f64() { + spec_test::(); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(); + } +} diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index f8bb9fa6a..b08a77d5d 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -2,6 +2,7 @@ mod ceil; mod copysign; mod fabs; mod fdim; +mod floor; mod sqrt; mod trunc; @@ -9,5 +10,6 @@ pub use ceil::ceil; pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; +pub use floor::floor; pub use sqrt::sqrt; pub use trunc::trunc; From 677e871af78c8f6270c8e260dcdff2addab9311c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 13:33:24 +0000 Subject: [PATCH 1108/1459] Add `floorf16` and `floorf128` Use the generic algorithms to provide implementations for these routines. --- .../crates/compiler-builtins-smoke-test/src/lib.rs | 2 ++ libm/crates/libm-macros/src/shared.rs | 4 ++-- libm/crates/libm-test/benches/icount.rs | 2 ++ libm/crates/libm-test/benches/random.rs | 4 +++- libm/crates/libm-test/src/mpfloat.rs | 4 ++++ libm/crates/libm-test/tests/compare_built_musl.rs | 2 ++ libm/crates/util/src/main.rs | 2 ++ libm/etc/function-definitions.json | 14 ++++++++++++++ libm/etc/function-list.txt | 2 ++ libm/src/math/floorf128.rs | 7 +++++++ libm/src/math/floorf16.rs | 7 +++++++ libm/src/math/mod.rs | 4 ++++ 12 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 libm/src/math/floorf128.rs create mode 100644 libm/src/math/floorf16.rs diff --git a/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/libm/crates/compiler-builtins-smoke-test/src/lib.rs index b9521eb07..4834ba256 100644 --- a/libm/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/libm/crates/compiler-builtins-smoke-test/src/lib.rs @@ -97,6 +97,8 @@ no_mangle! { fdimf16(x: f16, y: f16) -> f16; floor(x: f64) -> f64; floorf(x: f32) -> f32; + floorf128(x: f128) -> f128; + floorf16(x: f16) -> f16; fma(x: f64, y: f64, z: f64) -> f64; fmaf(x: f32, y: f32, z: f32) -> f32; fmax(x: f64, y: f64) -> f64; diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index e7d3d18d9..64623658d 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F16, Signature { args: &[Ty::F16], returns: &[Ty::F16] }, None, - &["ceilf16", "fabsf16", "sqrtf16", "truncf16"], + &["ceilf16", "fabsf16", "floorf16", "sqrtf16", "truncf16"], ), ( // `fn(f32) -> f32` @@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F128, Signature { args: &[Ty::F128], returns: &[Ty::F128] }, None, - &["ceilf128", "fabsf128", "sqrtf128", "truncf128"], + &["ceilf128", "fabsf128", "floorf128", "sqrtf128", "truncf128"], ), ( // `(f16, f16) -> f16` diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index 84be3d524..eae63619c 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -101,6 +101,8 @@ main!( icount_bench_fdimf16_group, icount_bench_fdimf_group, icount_bench_floor_group, + icount_bench_floorf128_group, + icount_bench_floorf16_group, icount_bench_floorf_group, icount_bench_fma_group, icount_bench_fmaf_group, diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 511e26d91..bd7b35971 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -125,8 +125,10 @@ libm_macros::for_each_function! { | fabsf16 | fdimf128 | fdimf16 - | sqrtf16 + | floorf128 + | floorf16 | sqrtf128 + | sqrtf16 | truncf128 | truncf16 => (false, None), diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index bbd19dbb0..53fade7d0 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -148,6 +148,8 @@ libm_macros::for_each_function! { fabsf128, fabsf16,floor, floorf, + floorf128, + floorf16, fmod, fmodf, frexp, @@ -240,6 +242,7 @@ impl_no_round! { impl_no_round! { fabsf16 => abs_mut; ceilf16 => ceil_mut; + floorf16 => floor_mut; truncf16 => trunc_mut; } @@ -247,6 +250,7 @@ impl_no_round! { impl_no_round! { fabsf128 => abs_mut; ceilf128 => ceil_mut; + floorf128 => floor_mut; truncf128 => trunc_mut; } diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index e13acf3de..335496fce 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -87,6 +87,8 @@ libm_macros::for_each_function! { fabsf16, fdimf128, fdimf16, + floorf128, + floorf16, truncf128, truncf16, sqrtf16, diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index 810919339..988c01d07 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -92,6 +92,8 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { | fabsf16 | fdimf128 | fdimf16 + | floorf128 + | floorf16 | sqrtf128 | sqrtf16 | truncf128 diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 6a865f427..eef176fb5 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -349,6 +349,20 @@ ], "type": "f32" }, + "floorf128": { + "sources": [ + "src/math/floorf128.rs", + "src/math/generic/floor.rs" + ], + "type": "f128" + }, + "floorf16": { + "sources": [ + "src/math/floorf16.rs", + "src/math/generic/floor.rs" + ], + "type": "f16" + }, "fma": { "sources": [ "src/libm_helper.rs", diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt index 337e7e434..3bb895f4a 100644 --- a/libm/etc/function-list.txt +++ b/libm/etc/function-list.txt @@ -49,6 +49,8 @@ fdimf128 fdimf16 floor floorf +floorf128 +floorf16 fma fmaf fmax diff --git a/libm/src/math/floorf128.rs b/libm/src/math/floorf128.rs new file mode 100644 index 000000000..9a9fe4151 --- /dev/null +++ b/libm/src/math/floorf128.rs @@ -0,0 +1,7 @@ +/// Floor (f128) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf128(x: f128) -> f128 { + return super::generic::floor(x); +} diff --git a/libm/src/math/floorf16.rs b/libm/src/math/floorf16.rs new file mode 100644 index 000000000..f9b868e04 --- /dev/null +++ b/libm/src/math/floorf16.rs @@ -0,0 +1,7 @@ +/// Floor (f16) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf16(x: f16) -> f16 { + return super::generic::floor(x); +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 5228e78b7..68d201524 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -345,6 +345,7 @@ cfg_if! { mod copysignf16; mod fabsf16; mod fdimf16; + mod floorf16; mod sqrtf16; mod truncf16; @@ -352,6 +353,7 @@ cfg_if! { pub use self::copysignf16::copysignf16; pub use self::fabsf16::fabsf16; pub use self::fdimf16::fdimf16; + pub use self::floorf16::floorf16; pub use self::sqrtf16::sqrtf16; pub use self::truncf16::truncf16; } @@ -363,6 +365,7 @@ cfg_if! { mod copysignf128; mod fabsf128; mod fdimf128; + mod floorf128; mod sqrtf128; mod truncf128; @@ -370,6 +373,7 @@ cfg_if! { pub use self::copysignf128::copysignf128; pub use self::fabsf128::fabsf128; pub use self::fdimf128::fdimf128; + pub use self::floorf128::floorf128; pub use self::sqrtf128::sqrtf128; pub use self::truncf128::truncf128; } From cb4984e525922c0494a99f3bdd29b243e25a672a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 22 Jan 2025 08:48:02 +0000 Subject: [PATCH 1109/1459] Adjust `ceil` style to be more similar to `floor` --- libm/crates/compiler-builtins-smoke-test/src/lib.rs | 2 ++ libm/src/math/generic/ceil.rs | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/libm/crates/compiler-builtins-smoke-test/src/lib.rs index 4834ba256..1a7aa983e 100644 --- a/libm/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/libm/crates/compiler-builtins-smoke-test/src/lib.rs @@ -67,6 +67,8 @@ no_mangle! { cbrtf(x: f32) -> f32; ceil(x: f64) -> f64; ceilf(x: f32) -> f32; + ceilf128(x: f128) -> f128; + ceilf16(x: f16) -> f16; copysign(x: f64, y: f64) -> f64; copysignf(x: f32, y: f32) -> f32; copysignf128(x: f128, y: f128) -> f128; diff --git a/libm/src/math/generic/ceil.rs b/libm/src/math/generic/ceil.rs index 34261faf7..971a4d3d8 100644 --- a/libm/src/math/generic/ceil.rs +++ b/libm/src/math/generic/ceil.rs @@ -31,24 +31,28 @@ pub fn ceil(x: F) -> F { // Otherwise, raise an inexact exception. force_eval!(x + F::MAX); + if x.is_sign_positive() { ix += m; } + ix &= !m; + F::from_bits(ix) } else { // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0). force_eval!(x + F::MAX); if x.is_sign_negative() { // -1.0 < x <= -0.0; rounding up goes toward -0.0. - return F::NEG_ZERO; + F::NEG_ZERO } else if ix << 1 != zero { // 0.0 < x < 1.0; rounding up goes toward +1.0. - return F::ONE; + F::ONE + } else { + // +0.0 remains unchanged + x } } - - F::from_bits(ix) } #[cfg(test)] From 6444d5875449a00e3dc69ca8eb0f9f73908ecaa6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 22 Jan 2025 09:06:26 +0000 Subject: [PATCH 1110/1459] Add a generic version of `rint` Use this to implement `rint` and `rintf`. --- libm/etc/function-definitions.json | 2 + libm/src/math/generic/mod.rs | 2 + libm/src/math/generic/rint.rs | 72 ++++++++++++++++++++++++++++++ libm/src/math/rint.rs | 48 +------------------- libm/src/math/rintf.rs | 48 +------------------- 5 files changed, 78 insertions(+), 94 deletions(-) create mode 100644 libm/src/math/generic/rint.rs diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index eef176fb5..86b088292 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -654,6 +654,7 @@ "src/libm_helper.rs", "src/math/arch/aarch64.rs", "src/math/arch/wasm32.rs", + "src/math/generic/rint.rs", "src/math/rint.rs" ], "type": "f64" @@ -662,6 +663,7 @@ "sources": [ "src/math/arch/aarch64.rs", "src/math/arch/wasm32.rs", + "src/math/generic/rint.rs", "src/math/rintf.rs" ], "type": "f32" diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index b08a77d5d..d3df650e1 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -3,6 +3,7 @@ mod copysign; mod fabs; mod fdim; mod floor; +mod rint; mod sqrt; mod trunc; @@ -11,5 +12,6 @@ pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; pub use floor::floor; +pub use rint::rint; pub use sqrt::sqrt; pub use trunc::trunc; diff --git a/libm/src/math/generic/rint.rs b/libm/src/math/generic/rint.rs new file mode 100644 index 000000000..80ba1faac --- /dev/null +++ b/libm/src/math/generic/rint.rs @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/rint.c */ + +use super::super::Float; + +pub fn rint(x: F) -> F { + let toint = F::ONE / F::EPSILON; + let e = x.exp(); + let positive = x.is_sign_positive(); + + // On i386 `force_eval!` must be used to force rounding via storage to memory. Otherwise, + // the excess precission from x87 would cause an incorrect final result. + let use_force = cfg!(x86_no_sse) && F::BITS == 32 || F::BITS == 64; + + if e >= F::EXP_BIAS + F::SIG_BITS { + // No fractional part; exact result can be returned. + x + } else { + // Apply a net-zero adjustment that nudges `y` in the direction of the rounding mode. + let y = if positive { + let tmp = if use_force { force_eval!(x) } else { x } + toint; + (if use_force { force_eval!(tmp) } else { tmp } - toint) + } else { + let tmp = if use_force { force_eval!(x) } else { x } - toint; + (if use_force { force_eval!(tmp) } else { tmp } + toint) + }; + + if y == F::ZERO { + // A zero result takes the sign of the input. + if positive { F::ZERO } else { F::NEG_ZERO } + } else { + y + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn zeroes_f32() { + assert_biteq!(rint(0.0_f32), 0.0_f32); + assert_biteq!(rint(-0.0_f32), -0.0_f32); + } + + #[test] + fn sanity_check_f32() { + assert_biteq!(rint(-1.0_f32), -1.0); + assert_biteq!(rint(2.8_f32), 3.0); + assert_biteq!(rint(-0.5_f32), -0.0); + assert_biteq!(rint(0.5_f32), 0.0); + assert_biteq!(rint(-1.5_f32), -2.0); + assert_biteq!(rint(1.5_f32), 2.0); + } + + #[test] + fn zeroes_f64() { + assert_biteq!(rint(0.0_f64), 0.0_f64); + assert_biteq!(rint(-0.0_f64), -0.0_f64); + } + + #[test] + fn sanity_check_f64() { + assert_biteq!(rint(-1.0_f64), -1.0); + assert_biteq!(rint(2.8_f64), 3.0); + assert_biteq!(rint(-0.5_f64), -0.0); + assert_biteq!(rint(0.5_f64), 0.0); + assert_biteq!(rint(-1.5_f64), -2.0); + assert_biteq!(rint(1.5_f64), 2.0); + } +} diff --git a/libm/src/math/rint.rs b/libm/src/math/rint.rs index c9ea6402e..592583aa5 100644 --- a/libm/src/math/rint.rs +++ b/libm/src/math/rint.rs @@ -9,51 +9,5 @@ pub fn rint(x: f64) -> f64 { args: x, } - let one_over_e = 1.0 / f64::EPSILON; - let as_u64: u64 = x.to_bits(); - let exponent: u64 = (as_u64 >> 52) & 0x7ff; - let is_positive = (as_u64 >> 63) == 0; - if exponent >= 0x3ff + 52 { - x - } else { - let ans = if is_positive { - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let x = force_eval!(x); - let xplusoneovere = x + one_over_e; - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let xplusoneovere = force_eval!(xplusoneovere); - xplusoneovere - one_over_e - } else { - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let x = force_eval!(x); - let xminusoneovere = x - one_over_e; - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let xminusoneovere = force_eval!(xminusoneovere); - xminusoneovere + one_over_e - }; - - if ans == 0.0 { if is_positive { 0.0 } else { -0.0 } } else { ans } - } -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use super::rint; - - #[test] - fn negative_zero() { - assert_eq!(rint(-0.0_f64).to_bits(), (-0.0_f64).to_bits()); - } - - #[test] - fn sanity_check() { - assert_eq!(rint(-1.0), -1.0); - assert_eq!(rint(2.8), 3.0); - assert_eq!(rint(-0.5), -0.0); - assert_eq!(rint(0.5), 0.0); - assert_eq!(rint(-1.5), -2.0); - assert_eq!(rint(1.5), 2.0); - } + super::generic::rint(x) } diff --git a/libm/src/math/rintf.rs b/libm/src/math/rintf.rs index 33b5b3dde..56666df11 100644 --- a/libm/src/math/rintf.rs +++ b/libm/src/math/rintf.rs @@ -9,51 +9,5 @@ pub fn rintf(x: f32) -> f32 { args: x, } - let one_over_e = 1.0 / f32::EPSILON; - let as_u32: u32 = x.to_bits(); - let exponent: u32 = (as_u32 >> 23) & 0xff; - let is_positive = (as_u32 >> 31) == 0; - if exponent >= 0x7f + 23 { - x - } else { - let ans = if is_positive { - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let x = force_eval!(x); - let xplusoneovere = x + one_over_e; - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let xplusoneovere = force_eval!(xplusoneovere); - xplusoneovere - one_over_e - } else { - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let x = force_eval!(x); - let xminusoneovere = x - one_over_e; - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let xminusoneovere = force_eval!(xminusoneovere); - xminusoneovere + one_over_e - }; - - if ans == 0.0 { if is_positive { 0.0 } else { -0.0 } } else { ans } - } -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use super::rintf; - - #[test] - fn negative_zero() { - assert_eq!(rintf(-0.0_f32).to_bits(), (-0.0_f32).to_bits()); - } - - #[test] - fn sanity_check() { - assert_eq!(rintf(-1.0), -1.0); - assert_eq!(rintf(2.8), 3.0); - assert_eq!(rintf(-0.5), -0.0); - assert_eq!(rintf(0.5), 0.0); - assert_eq!(rintf(-1.5), -2.0); - assert_eq!(rintf(1.5), 2.0); - } + super::generic::rint(x) } From 4572374776015b2ad324781227f4a05af63ac378 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 22 Jan 2025 10:58:29 +0000 Subject: [PATCH 1111/1459] Add `rintf16` and `rintf128` Use the generic algorithms to provide implementations for these routines. --- .../crates/compiler-builtins-smoke-test/src/lib.rs | 2 ++ libm/crates/libm-macros/src/shared.rs | 4 ++-- libm/crates/libm-test/benches/icount.rs | 2 ++ libm/crates/libm-test/benches/random.rs | 2 ++ libm/crates/libm-test/src/mpfloat.rs | 8 ++++++-- libm/crates/libm-test/tests/compare_built_musl.rs | 6 ++++-- libm/crates/util/src/main.rs | 2 ++ libm/etc/function-definitions.json | 14 ++++++++++++++ libm/etc/function-list.txt | 2 ++ libm/src/math/mod.rs | 4 ++++ libm/src/math/rint.rs | 1 + libm/src/math/rintf.rs | 1 + libm/src/math/rintf128.rs | 5 +++++ libm/src/math/rintf16.rs | 5 +++++ 14 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 libm/src/math/rintf128.rs create mode 100644 libm/src/math/rintf16.rs diff --git a/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/libm/crates/compiler-builtins-smoke-test/src/lib.rs index 1a7aa983e..ccd0642a2 100644 --- a/libm/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/libm/crates/compiler-builtins-smoke-test/src/lib.rs @@ -145,6 +145,8 @@ no_mangle! { remquof(x: f32, y: f32 | q: &mut c_int) -> f32; rint(x: f64) -> f64; rintf(x: f32) -> f32; + rintf128(x: f128) -> f128; + rintf16(x: f16) -> f16; round(x: f64) -> f64; roundf(x: f32) -> f32; scalbn(x: f64, y: c_int) -> f64; diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index 64623658d..80bd3e907 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F16, Signature { args: &[Ty::F16], returns: &[Ty::F16] }, None, - &["ceilf16", "fabsf16", "floorf16", "sqrtf16", "truncf16"], + &["ceilf16", "fabsf16", "floorf16", "rintf16", "sqrtf16", "truncf16"], ), ( // `fn(f32) -> f32` @@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F128, Signature { args: &[Ty::F128], returns: &[Ty::F128] }, None, - &["ceilf128", "fabsf128", "floorf128", "sqrtf128", "truncf128"], + &["ceilf128", "fabsf128", "floorf128", "rintf128", "sqrtf128", "truncf128"], ), ( // `(f16, f16) -> f16` diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index eae63619c..84f953262 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -149,6 +149,8 @@ main!( icount_bench_remquo_group, icount_bench_remquof_group, icount_bench_rint_group, + icount_bench_rintf128_group, + icount_bench_rintf16_group, icount_bench_rintf_group, icount_bench_round_group, icount_bench_roundf_group, diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index bd7b35971..4d050e817 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -127,6 +127,8 @@ libm_macros::for_each_function! { | fdimf16 | floorf128 | floorf16 + | rintf128 + | rintf16 | sqrtf128 | sqrtf16 | truncf128 diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 53fade7d0..a404f227b 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -170,6 +170,8 @@ libm_macros::for_each_function! { remquof, rint, rintf, + rintf128, + rintf16, round, roundf, scalbn, @@ -240,17 +242,19 @@ impl_no_round! { #[cfg(f16_enabled)] impl_no_round! { - fabsf16 => abs_mut; ceilf16 => ceil_mut; + fabsf16 => abs_mut; floorf16 => floor_mut; + rintf16 => round_even_mut; // FIXME: respect rounding mode truncf16 => trunc_mut; } #[cfg(f128_enabled)] impl_no_round! { - fabsf128 => abs_mut; ceilf128 => ceil_mut; + fabsf128 => abs_mut; floorf128 => floor_mut; + rintf128 => round_even_mut; // FIXME: respect rounding mode truncf128 => trunc_mut; } diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 335496fce..f009816c9 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -89,9 +89,11 @@ libm_macros::for_each_function! { fdimf16, floorf128, floorf16, + rintf128, + rintf16, + sqrtf128, + sqrtf16, truncf128, truncf16, - sqrtf16, - sqrtf128, ], } diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index 988c01d07..41d995b3b 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -94,6 +94,8 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { | fdimf16 | floorf128 | floorf16 + | rintf128 + | rintf16 | sqrtf128 | sqrtf16 | truncf128 diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 86b088292..d3810b940 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -668,6 +668,20 @@ ], "type": "f32" }, + "rintf128": { + "sources": [ + "src/math/generic/rint.rs", + "src/math/rintf128.rs" + ], + "type": "f128" + }, + "rintf16": { + "sources": [ + "src/math/generic/rint.rs", + "src/math/rintf16.rs" + ], + "type": "f16" + }, "round": { "sources": [ "src/libm_helper.rs", diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt index 3bb895f4a..41bb4e06b 100644 --- a/libm/etc/function-list.txt +++ b/libm/etc/function-list.txt @@ -97,6 +97,8 @@ remquo remquof rint rintf +rintf128 +rintf16 round roundf scalbn diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 68d201524..53d06974c 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -346,6 +346,7 @@ cfg_if! { mod fabsf16; mod fdimf16; mod floorf16; + mod rintf16; mod sqrtf16; mod truncf16; @@ -354,6 +355,7 @@ cfg_if! { pub use self::fabsf16::fabsf16; pub use self::fdimf16::fdimf16; pub use self::floorf16::floorf16; + pub use self::rintf16::rintf16; pub use self::sqrtf16::sqrtf16; pub use self::truncf16::truncf16; } @@ -366,6 +368,7 @@ cfg_if! { mod fabsf128; mod fdimf128; mod floorf128; + mod rintf128; mod sqrtf128; mod truncf128; @@ -374,6 +377,7 @@ cfg_if! { pub use self::fabsf128::fabsf128; pub use self::fdimf128::fdimf128; pub use self::floorf128::floorf128; + pub use self::rintf128::rintf128; pub use self::sqrtf128::sqrtf128; pub use self::truncf128::truncf128; } diff --git a/libm/src/math/rint.rs b/libm/src/math/rint.rs index 592583aa5..f409ec282 100644 --- a/libm/src/math/rint.rs +++ b/libm/src/math/rint.rs @@ -1,3 +1,4 @@ +/// Round `x` to the nearest integer, breaking ties toward even. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn rint(x: f64) -> f64 { select_implementation! { diff --git a/libm/src/math/rintf.rs b/libm/src/math/rintf.rs index 56666df11..5e9f5f718 100644 --- a/libm/src/math/rintf.rs +++ b/libm/src/math/rintf.rs @@ -1,3 +1,4 @@ +/// Round `x` to the nearest integer, breaking ties toward even. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn rintf(x: f32) -> f32 { select_implementation! { diff --git a/libm/src/math/rintf128.rs b/libm/src/math/rintf128.rs new file mode 100644 index 000000000..6b16fcd84 --- /dev/null +++ b/libm/src/math/rintf128.rs @@ -0,0 +1,5 @@ +/// Round `x` to the nearest integer, breaking ties toward even. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn rintf128(x: f128) -> f128 { + super::generic::rint(x) +} diff --git a/libm/src/math/rintf16.rs b/libm/src/math/rintf16.rs new file mode 100644 index 000000000..84d792561 --- /dev/null +++ b/libm/src/math/rintf16.rs @@ -0,0 +1,5 @@ +/// Round `x` to the nearest integer, breaking ties toward even. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn rintf16(x: f16) -> f16 { + super::generic::rint(x) +} From 4af368bd18ea183005eb2ccfc841db0a81e55384 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 22 Jan 2025 21:31:06 +0000 Subject: [PATCH 1112/1459] Add the ability to parse hex, binary, and float hex with util --- libm/crates/util/src/main.rs | 94 ++++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 5 deletions(-) diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index 41d995b3b..23aed06c0 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -5,10 +5,12 @@ use std::any::type_name; use std::env; +use std::num::ParseIntError; use std::str::FromStr; #[cfg(feature = "build-mpfr")] use az::Az; +use libm::support::{hf32, hf64}; #[cfg(feature = "build-mpfr")] use libm_test::mpfloat::MpOp; use libm_test::{MathOp, TupleCall}; @@ -238,21 +240,103 @@ impl_parse_tuple_via_rug!(f16); impl_parse_tuple_via_rug!(f128); /// Try to parse the number, printing a nice message on failure. -fn parse(input: &[&str], idx: usize) -> F { +fn parse(input: &[&str], idx: usize) -> T { let s = input[idx]; - s.parse().unwrap_or_else(|_| panic!("invalid {} input '{s}'", type_name::())) + + let msg = || format!("invalid {} input '{s}'", type_name::()); + + if s.starts_with("0x") { + return T::from_str_radix(s, 16).unwrap_or_else(|_| panic!("{}", msg())); + } + + if s.starts_with("0b") { + return T::from_str_radix(s, 2).unwrap_or_else(|_| panic!("{}", msg())); + } + + s.parse().unwrap_or_else(|_| panic!("{}", msg())) } /// Try to parse the float type going via `rug`, for `f16` and `f128` which don't yet implement /// `FromStr`. #[cfg(feature = "build-mpfr")] -fn parse_rug(input: &[&str], idx: usize) -> F +fn parse_rug(input: &[&str], idx: usize) -> F where + F: libm_test::Float + FromStrRadix, rug::Float: az::Cast, { let s = input[idx]; - let x = - rug::Float::parse(s).unwrap_or_else(|_| panic!("invalid {} input '{s}'", type_name::())); + + let msg = || format!("invalid {} input '{s}'", type_name::()); + + if s.starts_with("0x") { + return F::from_str_radix(s, 16).unwrap_or_else(|_| panic!("{}", msg())); + } + + if s.starts_with("0b") { + return F::from_str_radix(s, 2).unwrap_or_else(|_| panic!("{}", msg())); + } + + let x = rug::Float::parse(s).unwrap_or_else(|_| panic!("{}", msg())); let x = rug::Float::with_val(F::BITS, x); x.az() } + +trait FromStrRadix: Sized { + fn from_str_radix(s: &str, radix: u32) -> Result; +} + +impl FromStrRadix for i32 { + fn from_str_radix(s: &str, radix: u32) -> Result { + let s = strip_radix_prefix(s, radix); + i32::from_str_radix(s, radix) + } +} + +#[cfg(f16_enabled)] +impl FromStrRadix for f16 { + fn from_str_radix(s: &str, radix: u32) -> Result { + let s = strip_radix_prefix(s, radix); + u16::from_str_radix(s, radix).map(Self::from_bits) + } +} + +impl FromStrRadix for f32 { + fn from_str_radix(s: &str, radix: u32) -> Result { + if radix == 16 && s.contains("p") { + // Parse as hex float + return Ok(hf32(s)); + } + + let s = strip_radix_prefix(s, radix); + u32::from_str_radix(s, radix).map(Self::from_bits) + } +} + +impl FromStrRadix for f64 { + fn from_str_radix(s: &str, radix: u32) -> Result { + if s.contains("p") { + return Ok(hf64(s)); + } + + let s = strip_radix_prefix(s, radix); + u64::from_str_radix(s, radix).map(Self::from_bits) + } +} + +#[cfg(f128_enabled)] +impl FromStrRadix for f128 { + fn from_str_radix(s: &str, radix: u32) -> Result { + let s = strip_radix_prefix(s, radix); + u128::from_str_radix(s, radix).map(Self::from_bits) + } +} + +fn strip_radix_prefix(s: &str, radix: u32) -> &str { + if radix == 16 { + s.strip_prefix("0x").unwrap() + } else if radix == 2 { + s.strip_prefix("0b").unwrap() + } else { + s + } +} From ccd5801a1569baea749489d9f6aa498633f03ddd Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 22 Jan 2025 23:01:29 +0000 Subject: [PATCH 1113/1459] Fix the parsing of three-item tuples in `util` --- libm/crates/util/src/main.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index 23aed06c0..889823d2e 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -146,8 +146,8 @@ macro_rules! impl_parse_tuple { impl ParseTuple for ($ty, $ty, $ty) { fn parse(input: &[&str]) -> Self { - assert_eq!(input.len(), 2, "expected three arguments, got {input:?}"); - (parse(input, 0), parse(input, 1), parse(input, 3)) + assert_eq!(input.len(), 3, "expected three arguments, got {input:?}"); + (parse(input, 0), parse(input, 1), parse(input, 2)) } } }; @@ -187,8 +187,8 @@ macro_rules! impl_parse_tuple_via_rug { impl ParseTuple for ($ty, $ty, $ty) { fn parse(input: &[&str]) -> Self { - assert_eq!(input.len(), 2, "expected three arguments, got {input:?}"); - (parse_rug(input, 0), parse_rug(input, 1), parse_rug(input, 3)) + assert_eq!(input.len(), 3, "expected three arguments, got {input:?}"); + (parse_rug(input, 0), parse_rug(input, 1), parse_rug(input, 2)) } } }; From 5b5bccae84fa36fbd7a49a58ff841574674b54d1 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 22 Jan 2025 22:41:49 +0000 Subject: [PATCH 1114/1459] Add `hf16!` and `hf128!` Expand the existing hex float functions and macros with versions that work with `f16` and `f128`. --- libm/src/math/support/hex_float.rs | 246 ++++++++++++++++++++++++++++- libm/src/math/support/macros.rs | 22 +++ libm/src/math/support/mod.rs | 4 + 3 files changed, 266 insertions(+), 6 deletions(-) diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs index 1666c6153..949f21a57 100644 --- a/libm/src/math/support/hex_float.rs +++ b/libm/src/math/support/hex_float.rs @@ -4,6 +4,12 @@ use super::{f32_from_bits, f64_from_bits}; +/// Construct a 16-bit float from hex float representation (C-style) +#[cfg(f16_enabled)] +pub const fn hf16(s: &str) -> f16 { + f16::from_bits(parse_any(s, 16, 10) as u16) +} + /// Construct a 32-bit float from hex float representation (C-style) pub const fn hf32(s: &str) -> f32 { f32_from_bits(parse_any(s, 32, 23) as u32) @@ -14,6 +20,12 @@ pub const fn hf64(s: &str) -> f64 { f64_from_bits(parse_any(s, 64, 52) as u64) } +/// Construct a 128-bit float from hex float representation (C-style) +#[cfg(f128_enabled)] +pub const fn hf128(s: &str) -> f128 { + f128::from_bits(parse_any(s, 128, 112)) +} + const fn parse_any(s: &str, bits: u32, sig_bits: u32) -> u128 { let exp_bits: u32 = bits - sig_bits - 1; let max_msb: i32 = (1 << (exp_bits - 1)) - 1; @@ -230,6 +242,57 @@ mod tests { } } + // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to + // hide them from the AST. + #[cfg(f16_enabled)] + macro_rules! f16_tests { + () => { + #[test] + fn test_f16() { + let checks = [ + ("0x.1234p+16", (0x1234 as f16).to_bits()), + ("0x1.234p+12", (0x1234 as f16).to_bits()), + ("0x12.34p+8", (0x1234 as f16).to_bits()), + ("0x123.4p+4", (0x1234 as f16).to_bits()), + ("0x1234p+0", (0x1234 as f16).to_bits()), + ("0x1234.p+0", (0x1234 as f16).to_bits()), + ("0x1234.0p+0", (0x1234 as f16).to_bits()), + ("0x1.ffcp+15", f16::MAX.to_bits()), + ("0x1.0p+1", 2.0f16.to_bits()), + ("0x1.0p+0", 1.0f16.to_bits()), + ("0x1.ffp+8", 0x5ffc), + ("+0x1.ffp+8", 0x5ffc), + ("0x1p+0", 0x3c00), + ("0x1.998p-4", 0x2e66), + ("0x1.9p+6", 0x5640), + ("0x0.0p0", 0.0f16.to_bits()), + ("-0x0.0p0", (-0.0f16).to_bits()), + ("0x1.0p0", 1.0f16.to_bits()), + ("0x1.998p-4", (0.1f16).to_bits()), + ("-0x1.998p-4", (-0.1f16).to_bits()), + ("0x0.123p-12", 0x0123), + ("0x1p-24", 0x0001), + ]; + for (s, exp) in checks { + println!("parsing {s}"); + let act = hf16(s).to_bits(); + assert_eq!( + act, exp, + "parsing {s}: {act:#06x} != {exp:#06x}\nact: {act:#018b}\nexp: {exp:#018b}" + ); + } + } + + #[test] + fn test_macros_f16() { + assert_eq!(hf16!("0x1.ffp+8").to_bits(), 0x5ffc_u16); + } + }; + } + + #[cfg(f16_enabled)] + f16_tests!(); + #[test] fn test_f32() { let checks = [ @@ -308,16 +371,67 @@ mod tests { } } - #[test] - fn test_f32_almost_extra_precision() { - // Exact maximum precision allowed - hf32("0x1.abcdeep+0"); + // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to + // hide them from the AST. + #[cfg(f128_enabled)] + macro_rules! f128_tests { + () => { + #[test] + fn test_f128() { + let checks = [ + ("0x.1234p+16", (0x1234 as f128).to_bits()), + ("0x1.234p+12", (0x1234 as f128).to_bits()), + ("0x12.34p+8", (0x1234 as f128).to_bits()), + ("0x123.4p+4", (0x1234 as f128).to_bits()), + ("0x1234p+0", (0x1234 as f128).to_bits()), + ("0x1234.p+0", (0x1234 as f128).to_bits()), + ("0x1234.0p+0", (0x1234 as f128).to_bits()), + ("0x1.ffffffffffffffffffffffffffffp+16383", f128::MAX.to_bits()), + ("0x1.0p+1", 2.0f128.to_bits()), + ("0x1.0p+0", 1.0f128.to_bits()), + ("0x1.ffep+8", 0x4007ffe0000000000000000000000000), + ("+0x1.ffep+8", 0x4007ffe0000000000000000000000000), + ("0x1p+0", 0x3fff0000000000000000000000000000), + ("0x1.999999999999999999999999999ap-4", 0x3ffb999999999999999999999999999a), + ("0x1.9p+6", 0x40059000000000000000000000000000), + ("0x0.0p0", 0.0f128.to_bits()), + ("-0x0.0p0", (-0.0f128).to_bits()), + ("0x1.0p0", 1.0f128.to_bits()), + ("0x1.999999999999999999999999999ap-4", (0.1f128).to_bits()), + ("-0x1.999999999999999999999999999ap-4", (-0.1f128).to_bits()), + ("0x0.abcdef0123456789abcdef012345p-16382", 0x0000abcdef0123456789abcdef012345), + ("0x1p-16494", 0x00000000000000000000000000000001), + ]; + for (s, exp) in checks { + println!("parsing {s}"); + let act = hf128(s).to_bits(); + assert_eq!( + act, exp, + "parsing {s}: {act:#034x} != {exp:#034x}\nact: {act:#0130b}\nexp: {exp:#0130b}" + ); + } + } + + #[test] + fn test_macros_f128() { + assert_eq!(hf128!("0x1.ffep+8").to_bits(), 0x4007ffe0000000000000000000000000_u128); + } + } } + #[cfg(f128_enabled)] + f128_tests!(); + #[test] fn test_macros() { - assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000u32); - assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000u64); + // FIXME(msrv): enable once parsing works + // #[cfg(f16_enabled)] + // assert_eq!(hf16!("0x1.ffp+8").to_bits(), 0x5ffc_u16); + assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000_u32); + assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000_u64); + // FIXME(msrv): enable once parsing works + // #[cfg(f128_enabled)] + // assert_eq!(hf128!("0x1.ffep+8").to_bits(), 0x4007ffe0000000000000000000000000_u128); } } @@ -328,6 +442,69 @@ mod tests_panicking { extern crate std; use super::*; + // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to + // hide them from the AST. + #[cfg(f16_enabled)] + macro_rules! f16_tests { + () => { + #[test] + fn test_f16_almost_extra_precision() { + // Exact maximum precision allowed + hf16("0x1.ffcp+0"); + } + + #[test] + #[should_panic(expected = "the value is too precise")] + fn test_f16_extra_precision() { + // One bit more than the above. + hf16("0x1.ffdp+0"); + } + + #[test] + #[should_panic(expected = "the value is too huge")] + fn test_f16_overflow() { + // One bit more than the above. + hf16("0x1p+16"); + } + + #[test] + fn test_f16_tiniest() { + let x = hf16("0x1.p-24"); + let y = hf16("0x0.001p-12"); + let z = hf16("0x0.8p-23"); + assert_eq!(x, y); + assert_eq!(x, z); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f16_too_tiny() { + hf16("0x1.p-25"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f16_also_too_tiny() { + hf16("0x0.8p-24"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f16_again_too_tiny() { + hf16("0x0.001p-13"); + } + }; + } + + #[cfg(f16_enabled)] + f16_tests!(); + + #[test] + fn test_f32_almost_extra_precision() { + // Exact maximum precision allowed + hf32("0x1.abcdeep+0"); + } + #[test] #[should_panic] fn test_f32_extra_precision2() { @@ -388,4 +565,61 @@ mod tests_panicking { // One bit more than the above. hf64("0x1.abcdabcdabcdf8p+0"); } + + // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to + // hide them from the AST. + #[cfg(f128_enabled)] + macro_rules! f128_tests { + () => { + #[test] + fn test_f128_almost_extra_precision() { + // Exact maximum precision allowed + hf128("0x1.ffffffffffffffffffffffffffffp+16383"); + } + + #[test] + #[should_panic(expected = "the value is too precise")] + fn test_f128_extra_precision() { + // One bit more than the above. + hf128("0x1.ffffffffffffffffffffffffffff8p+16383"); + } + + #[test] + #[should_panic(expected = "the value is too huge")] + fn test_f128_overflow() { + // One bit more than the above. + hf128("0x1p+16384"); + } + + #[test] + fn test_f128_tiniest() { + let x = hf128("0x1.p-16494"); + let y = hf128("0x0.0000000000000001p-16430"); + let z = hf128("0x0.8p-16493"); + assert_eq!(x, y); + assert_eq!(x, z); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f128_too_tiny() { + hf128("0x1.p-16495"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f128_again_too_tiny() { + hf128("0x0.0000000000000001p-16431"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f128_also_too_tiny() { + hf128("0x0.8p-16494"); + } + }; + } + + #[cfg(f128_enabled)] + f128_tests!(); } diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs index c9a36c0db..d8ba04cff 100644 --- a/libm/src/math/support/macros.rs +++ b/libm/src/math/support/macros.rs @@ -87,6 +87,17 @@ macro_rules! select_implementation { (@cfg $provided:meta; $ex:expr) => { #[cfg($provided)] $ex }; } +/// Construct a 16-bit float from hex float representation (C-style), guaranteed to +/// evaluate at compile time. +#[cfg(f16_enabled)] +#[allow(unused_macros)] +macro_rules! hf16 { + ($s:literal) => {{ + const X: f16 = $crate::math::support::hf16($s); + X + }}; +} + /// Construct a 32-bit float from hex float representation (C-style), guaranteed to /// evaluate at compile time. #[allow(unused_macros)] @@ -107,6 +118,17 @@ macro_rules! hf64 { }}; } +/// Construct a 128-bit float from hex float representation (C-style), guaranteed to +/// evaluate at compile time. +#[cfg(f128_enabled)] +#[allow(unused_macros)] +macro_rules! hf128 { + ($s:literal) => {{ + const X: f128 = $crate::math::support::hf128($s); + X + }}; +} + /// Assert `F::biteq` with better messages. #[cfg(test)] macro_rules! assert_biteq { diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index ddfc2e3e0..da9e2c9ed 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -8,6 +8,10 @@ mod int_traits; #[allow(unused_imports)] pub use float_traits::{Float, IntTy}; pub(crate) use float_traits::{f32_from_bits, f64_from_bits}; +#[cfg(f16_enabled)] +pub use hex_float::hf16; +#[cfg(f128_enabled)] +pub use hex_float::hf128; #[allow(unused_imports)] pub use hex_float::{hf32, hf64}; pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; From 48296bd61377ddd82b56e0d0e5be80acc00413a1 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 23 Jan 2025 05:08:10 +0000 Subject: [PATCH 1115/1459] Introduce XFAILs that assert failure Currently our XFAILs are open ended; we do not check that it actually fails, so we have no easy way of knowing that a previously-failing test starts passing. Introduce a new enum that we return from overrides to give us more flexibility here, including the ability to assert that expected failures happen. With the new enum, it is also possible to specify ULP via return value rather than passing a `&mut u32` parameter. This includes refactoring of `precision.rs` to be more accurate about where errors come from, if possible. Fixes: https://github.com/rust-lang/libm/issues/455 --- libm/crates/libm-test/src/precision.rs | 461 +++++++++++------------ libm/crates/libm-test/src/test_traits.rs | 60 ++- 2 files changed, 268 insertions(+), 253 deletions(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 9d17ab8cc..800425f12 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -118,13 +118,13 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { // FIXME(#401): these need to be correctly rounded but are not. Id::Fmaf => ulp = 1, Id::Fdim => ulp = 1, + Id::Round => ulp = 1, Id::Asinh => ulp = 3, Id::Asinhf => ulp = 3, Id::Exp10 | Id::Exp10f => ulp = 1_000_000, Id::Exp2 | Id::Exp2f => ulp = 10_000_000, Id::Log1p | Id::Log1pf => ulp = 2, - Id::Round => ulp = 1, Id::Tan => ulp = 2, _ => (), } @@ -133,12 +133,42 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { ulp } +/// Result of checking for possible overrides. +#[derive(Debug, Default)] +pub enum CheckAction { + /// The check should pass. Default case. + #[default] + AssertSuccess, + + /// Override the ULP for this check. + AssertWithUlp(u32), + + /// Failure is expected, ensure this is the case (xfail). Takes a contxt string to help trace + /// back exactly why we expect this to fail. + AssertFailure(&'static str), + + /// The override somehow validated the result, here it is. + Custom(TestResult), + + /// Disregard the output. + Skip, +} + /// Don't run further validation on this test case. -const SKIP: Option = Some(Ok(())); +const SKIP: CheckAction = CheckAction::Skip; -/// Return this to skip checks on a test that currently fails but shouldn't. Looks -/// the same as skip, but we keep them separate to better indicate purpose. -const XFAIL: Option = Some(Ok(())); +/// Return this to skip checks on a test that currently fails but shouldn't. Takes a description +/// of context. +const XFAIL: fn(&'static str) -> CheckAction = CheckAction::AssertFailure; + +/// Indicates that we expect a test to fail but we aren't asserting that it does (e.g. some results +/// within a range do actually pass). +/// +/// Same as `SKIP`, just indicates we have something to eventually fix. +const XFAIL_NOCHECK: CheckAction = CheckAction::Skip; + +/// By default, all tests should pass. +const DEFAULT: CheckAction = CheckAction::AssertSuccess; /// Allow overriding the outputs of specific test cases. /// @@ -158,19 +188,13 @@ pub trait MaybeOverride { _input: Input, _actual: F, _expected: F, - _ulp: &mut u32, _ctx: &CheckCtx, - ) -> Option { - None + ) -> CheckAction { + DEFAULT } - fn check_int( - _input: Input, - _actual: I, - _expected: I, - _ctx: &CheckCtx, - ) -> Option { - None + fn check_int(_input: Input, _actual: I, _expected: I, _ctx: &CheckCtx) -> CheckAction { + DEFAULT } } @@ -178,33 +202,35 @@ pub trait MaybeOverride { impl MaybeOverride<(f16,)> for SpecialCase {} impl MaybeOverride<(f32,)> for SpecialCase { - fn check_float( - input: (f32,), - actual: F, - expected: F, - _ulp: &mut u32, - ctx: &CheckCtx, - ) -> Option { - if ctx.base_name == BaseName::Expm1 && input.0 > 80.0 && actual.is_infinite() { + fn check_float(input: (f32,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction { + if ctx.base_name == BaseName::Expm1 + && !input.0.is_infinite() + && input.0 > 80.0 + && actual.is_infinite() + && !expected.is_infinite() + { // we return infinity but the number is representable - return XFAIL; - } - - if ctx.base_name == BaseName::Sinh && input.0.abs() > 80.0 && actual.is_nan() { - // we return some NaN that should be real values or infinite - return XFAIL; + if ctx.basis == CheckBasis::Musl { + return XFAIL_NOCHECK; + } + return XFAIL("expm1 representable numbers"); } - if ctx.base_name == BaseName::Acosh && input.0 < -1.0 { - // acoshf is undefined for x <= 1.0, but we return a random result at lower - // values. - return XFAIL; + if cfg!(x86_no_sse) + && ctx.base_name == BaseName::Exp2 + && !expected.is_infinite() + && actual.is_infinite() + { + // We return infinity when there is a representable value. Test input: 127.97238 + return XFAIL("586 exp2 representable numbers"); } - if ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR && input.0 < 0.0 - { - // loggamma should not be defined for x < 0, yet we both return results - return XFAIL; + if ctx.base_name == BaseName::Sinh && input.0.abs() > 80.0 && actual.is_nan() { + // we return some NaN that should be real values or infinite + if ctx.basis == CheckBasis::Musl { + return XFAIL_NOCHECK; + } + return XFAIL("sinh unexpected NaN"); } if (ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR) @@ -213,32 +239,18 @@ impl MaybeOverride<(f32,)> for SpecialCase { && !actual.is_infinite() { // This result should saturate but we return a finite value. - return XFAIL; + return XFAIL_NOCHECK; } if ctx.base_name == BaseName::J0 && input.0 < -1e34 { // Errors get huge close to -inf - return XFAIL; - } - - if cfg!(x86_no_sse) - && ctx.base_name == BaseName::Exp2 - && !expected.is_infinite() - && actual.is_infinite() - { - // We return infinity when there is a representable value. Test input: 127.97238 - return XFAIL; + return XFAIL_NOCHECK; } - maybe_check_nan_bits(actual, expected, ctx) + unop_common(input, actual, expected, ctx) } - fn check_int( - input: (f32,), - actual: I, - expected: I, - ctx: &CheckCtx, - ) -> Option> { + fn check_int(input: (f32,), actual: I, expected: I, ctx: &CheckCtx) -> CheckAction { // On MPFR for lgammaf_r, we set -1 as the integer result for negative infinity but MPFR // sets +1 if ctx.basis == CheckBasis::Mpfr @@ -246,37 +258,25 @@ impl MaybeOverride<(f32,)> for SpecialCase { && input.0 == f32::NEG_INFINITY && actual.abs() == expected.abs() { - XFAIL - } else { - None + return XFAIL("lgammar integer result"); } + + DEFAULT } } impl MaybeOverride<(f64,)> for SpecialCase { - fn check_float( - input: (f64,), - actual: F, - expected: F, - _ulp: &mut u32, - ctx: &CheckCtx, - ) -> Option { - if ctx.basis == CheckBasis::Musl { - if cfg!(target_arch = "x86") && ctx.base_name == BaseName::Acosh && input.0 < 1.0 { - // The function is undefined, both implementations return random results - return SKIP; - } - - if cfg!(x86_no_sse) - && ctx.base_name == BaseName::Ceil - && input.0 < 0.0 - && input.0 > -1.0 - && expected == F::ZERO - && actual == F::ZERO - { - // musl returns -0.0, we return +0.0 - return XFAIL; - } + fn check_float(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction { + if cfg!(x86_no_sse) + && ctx.base_name == BaseName::Ceil + && ctx.basis == CheckBasis::Musl + && input.0 < 0.0 + && input.0 > -1.0 + && expected == F::ZERO + && actual == F::ZERO + { + // musl returns -0.0, we return +0.0 + return XFAIL("i586 ceil signed zero"); } if cfg!(x86_no_sse) @@ -285,53 +285,37 @@ impl MaybeOverride<(f64,)> for SpecialCase { && (expected - actual).abs() > F::ZERO { // Our rounding mode is incorrect. - return XFAIL; - } - - if ctx.base_name == BaseName::Acosh && input.0 < 1.0 { - // The function is undefined for the inputs, musl and our libm both return - // random results. - return XFAIL; - } - - if ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR && input.0 < 0.0 - { - // loggamma should not be defined for x < 0, yet we both return results - return XFAIL; + return XFAIL("i586 rint rounding mode"); } - if ctx.base_name == BaseName::J0 && input.0 < -1e300 { - // Errors get huge close to -inf - return XFAIL; - } - - if (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor) - && cfg!(x86_no_sse) + if cfg!(x86_no_sse) + && (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor) && expected.eq_repr(F::NEG_ZERO) && actual.eq_repr(F::ZERO) { // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0. // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955 - return XFAIL; + return XFAIL("i586 ceil/floor signed zero"); } - if (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2) - && cfg!(x86_no_sse) + if cfg!(x86_no_sse) + && (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2) { // FIXME: i586 has very imprecise results with ULP > u32::MAX for these // operations so we can't reasonably provide a limit. - return XFAIL; + return XFAIL_NOCHECK; } - maybe_check_nan_bits(actual, expected, ctx) + if ctx.base_name == BaseName::J0 && input.0 < -1e300 { + // Errors get huge close to -inf + return XFAIL_NOCHECK; + } + + // maybe_check_nan_bits(actual, expected, ctx) + unop_common(input, actual, expected, ctx) } - fn check_int( - input: (f64,), - actual: I, - expected: I, - ctx: &CheckCtx, - ) -> Option> { + fn check_int(input: (f64,), actual: I, expected: I, ctx: &CheckCtx) -> CheckAction { // On MPFR for lgamma_r, we set -1 as the integer result for negative infinity but MPFR // sets +1 if ctx.basis == CheckBasis::Mpfr @@ -339,41 +323,68 @@ impl MaybeOverride<(f64,)> for SpecialCase { && input.0 == f64::NEG_INFINITY && actual.abs() == expected.abs() { - XFAIL - } else { - None + return XFAIL("lgammar integer result"); } + + DEFAULT } } #[cfg(f128_enabled)] impl MaybeOverride<(f128,)> for SpecialCase {} -/// Check NaN bits if the function requires it -fn maybe_check_nan_bits(actual: F, expected: F, ctx: &CheckCtx) -> Option { - if !(ctx.base_name == BaseName::Fabs || ctx.base_name == BaseName::Copysign) { - return None; - } +// F1 and F2 are always the same type, this is just to please generics +fn unop_common( + input: (F1,), + actual: F2, + expected: F2, + ctx: &CheckCtx, +) -> CheckAction { + if ctx.base_name == BaseName::Acosh + && input.0 < F1::NEG_ONE + && !(expected.is_nan() && actual.is_nan()) + { + // acoshf is undefined for x <= 1.0, but we return a random result at lower values. - // LLVM currently uses x87 instructions which quieten signalling NaNs to handle the i686 - // `extern "C"` `f32`/`f64` return ABI. - // LLVM issue - // Rust issue - if cfg!(target_arch = "x86") && ctx.basis == CheckBasis::Musl { - return SKIP; + if ctx.basis == CheckBasis::Musl { + return XFAIL_NOCHECK; + } + + return XFAIL("acoshf undefined"); } - // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate. - if ctx.basis == CheckBasis::Mpfr { - return SKIP; + if (ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR) + && input.0 < F1::ZERO + && !input.0.is_infinite() + { + // loggamma should not be defined for x < 0, yet we both return results + return XFAIL_NOCHECK; } - // abs and copysign require signaling NaNs to be propagated, so verify bit equality. - if actual.to_bits() == expected.to_bits() { - SKIP - } else { - Some(Err(anyhow::anyhow!("NaNs have different bitpatterns"))) + // fabs and copysign must leave NaNs untouched. + if ctx.base_name == BaseName::Fabs && input.0.is_nan() { + // LLVM currently uses x87 instructions which quieten signalling NaNs to handle the i686 + // `extern "C"` `f32`/`f64` return ABI. + // LLVM issue + // Rust issue + if cfg!(target_arch = "x86") && ctx.basis == CheckBasis::Musl && actual.is_nan() { + return XFAIL_NOCHECK; + } + + // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate. + if ctx.basis == CheckBasis::Mpfr { + return DEFAULT; + } + + // abs and copysign require signaling NaNs to be propagated, so verify bit equality. + if actual.to_bits() == expected.to_bits() { + return CheckAction::Custom(Ok(())); + } else { + return CheckAction::Custom(Err(anyhow::anyhow!("NaNs have different bitpatterns"))); + } } + + DEFAULT } #[cfg(f16_enabled)] @@ -382,9 +393,8 @@ impl MaybeOverride<(f16, f16)> for SpecialCase { input: (f16, f16), actual: F, expected: F, - _ulp: &mut u32, ctx: &CheckCtx, - ) -> Option { + ) -> CheckAction { binop_common(input, actual, expected, ctx) } } @@ -394,18 +404,8 @@ impl MaybeOverride<(f32, f32)> for SpecialCase { input: (f32, f32), actual: F, expected: F, - _ulp: &mut u32, ctx: &CheckCtx, - ) -> Option { - if ctx.base_name == BaseName::Fmin - && input.0.biteq(f32::NEG_ZERO) - && input.1.biteq(f32::ZERO) - && expected.biteq(F::NEG_ZERO) - && actual.biteq(F::ZERO) - { - return XFAIL; - } - + ) -> CheckAction { binop_common(input, actual, expected, ctx) } @@ -414,7 +414,7 @@ impl MaybeOverride<(f32, f32)> for SpecialCase { actual: I, expected: I, ctx: &CheckCtx, - ) -> Option { + ) -> CheckAction { remquo_common(actual, expected, ctx) } } @@ -424,18 +424,8 @@ impl MaybeOverride<(f64, f64)> for SpecialCase { input: (f64, f64), actual: F, expected: F, - _ulp: &mut u32, ctx: &CheckCtx, - ) -> Option { - if ctx.base_name == BaseName::Fmin - && input.0.biteq(f64::NEG_ZERO) - && input.1.biteq(f64::ZERO) - && expected.biteq(F::ZERO) - && actual.biteq(F::NEG_ZERO) - { - return XFAIL; - } - + ) -> CheckAction { binop_common(input, actual, expected, ctx) } @@ -444,33 +434,19 @@ impl MaybeOverride<(f64, f64)> for SpecialCase { actual: I, expected: I, ctx: &CheckCtx, - ) -> Option { + ) -> CheckAction { remquo_common(actual, expected, ctx) } } -fn remquo_common(actual: I, expected: I, ctx: &CheckCtx) -> Option { - // FIXME: Our MPFR implementation disagrees with musl and may need to be updated. - if ctx.basis == CheckBasis::Mpfr - && ctx.base_name == BaseName::Remquo - && expected == I::MIN - && actual == I::ZERO - { - return XFAIL; - } - - None -} - #[cfg(f128_enabled)] impl MaybeOverride<(f128, f128)> for SpecialCase { fn check_float( input: (f128, f128), actual: F, expected: F, - _ulp: &mut u32, ctx: &CheckCtx, - ) -> Option { + ) -> CheckAction { binop_common(input, actual, expected, ctx) } } @@ -481,8 +457,17 @@ fn binop_common( actual: F2, expected: F2, ctx: &CheckCtx, -) -> Option { - /* FIXME(#439): we do not compare signed zeros */ +) -> CheckAction { + // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate. Skip if + // the first input (magnitude source) is NaN and the output is also a NaN, or if the second + // input (sign source) is NaN. + if ctx.basis == CheckBasis::Mpfr + && ((input.0.is_nan() && actual.is_nan() && expected.is_nan()) || input.1.is_nan()) + { + return SKIP; + } + + /* FIXME(#439): our fmin and fmax do not compare signed zeros */ if ctx.base_name == BaseName::Fmin && input.0.biteq(F1::NEG_ZERO) @@ -490,7 +475,7 @@ fn binop_common( && expected.biteq(F2::NEG_ZERO) && actual.biteq(F2::ZERO) { - return XFAIL; + return XFAIL("fmin signed zeroes"); } if ctx.base_name == BaseName::Fmax @@ -499,21 +484,32 @@ fn binop_common( && expected.biteq(F2::ZERO) && actual.biteq(F2::NEG_ZERO) { - return XFAIL; + return XFAIL("fmax signed zeroes"); } // Musl propagates NaNs if one is provided as the input, but we return the other input. - match (&ctx.basis, ctx.base_name) { - (Musl, BaseName::Fmin | BaseName::Fmax) - if (input.0.is_nan() || input.1.is_nan()) && expected.is_nan() => - { - XFAIL - } + if (ctx.base_name == BaseName::Fmax || ctx.base_name == BaseName::Fmin) + && ctx.basis == Musl + && (input.0.is_nan() ^ input.1.is_nan()) + && expected.is_nan() + { + return XFAIL("fmax/fmin musl NaN"); + } - (Mpfr, BaseName::Copysign) if input.1.is_nan() => SKIP, + DEFAULT +} - _ => None, +fn remquo_common(actual: I, expected: I, ctx: &CheckCtx) -> CheckAction { + // FIXME: Our MPFR implementation disagrees with musl and may need to be updated. + if ctx.basis == CheckBasis::Mpfr + && ctx.base_name == BaseName::Remquo + && expected == I::MIN + && actual == I::ZERO + { + return XFAIL("remquo integer mismatch"); } + + DEFAULT } impl MaybeOverride<(i32, f32)> for SpecialCase { @@ -521,28 +517,19 @@ impl MaybeOverride<(i32, f32)> for SpecialCase { input: (i32, f32), actual: F, expected: F, - ulp: &mut u32, ctx: &CheckCtx, - ) -> Option { - match (&ctx.basis, ctx.base_name) { - (Musl, _) => bessel_prec_dropoff(input, actual, expected, ulp, ctx), - - // We return +0.0, MPFR returns -0.0 - (Mpfr, BaseName::Jn | BaseName::Yn) - if input.1 == f32::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO => - { - XFAIL - } - - // `ynf(213, 109.15641) = -inf` with our library, should be finite. - (_, BaseName::Yn) - if input.0 > 200 && !expected.is_infinite() && actual.is_infinite() => - { - XFAIL - } - - _ => None, + ) -> CheckAction { + // `ynf(213, 109.15641) = -inf` with our library, should be finite. + if ctx.basis == Mpfr + && ctx.base_name == BaseName::Yn + && input.0 > 200 + && !expected.is_infinite() + && actual.is_infinite() + { + return XFAIL("ynf infinity mismatch"); } + + int_float_common(input, actual, expected, ctx) } } @@ -551,55 +538,51 @@ impl MaybeOverride<(i32, f64)> for SpecialCase { input: (i32, f64), actual: F, expected: F, - ulp: &mut u32, ctx: &CheckCtx, - ) -> Option { - match (&ctx.basis, ctx.base_name) { - (Musl, _) => bessel_prec_dropoff(input, actual, expected, ulp, ctx), - - // We return +0.0, MPFR returns -0.0 - (Mpfr, BaseName::Jn | BaseName::Yn) - if input.1 == f64::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO => - { - XFAIL - } - - _ => None, - } + ) -> CheckAction { + int_float_common(input, actual, expected, ctx) } } -/// Our bessel functions blow up with large N values -fn bessel_prec_dropoff( +fn int_float_common( input: (i32, F1), actual: F2, expected: F2, - ulp: &mut u32, ctx: &CheckCtx, -) -> Option { - if ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn { +) -> CheckAction { + if ctx.basis == Mpfr + && (ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn) + && input.1 == F1::NEG_INFINITY + && actual == F2::ZERO + && expected == F2::ZERO + { + return XFAIL("mpfr b"); + } + + // Our bessel functions blow up with large N values + if ctx.basis == Musl && (ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn) { if input.0 > 4000 { - return XFAIL; + return XFAIL_NOCHECK; } else if input.0 > 2000 { - // *ulp = 20_000; - *ulp = 20000; + return CheckAction::AssertWithUlp(20_000); } else if input.0 > 1000 { - *ulp = 4000; + return CheckAction::AssertWithUlp(4_000); } } // Values near infinity sometimes get cut off for us. `ynf(681, 509.90924) = -inf` but should // be -3.2161271e38. - if ctx.fn_ident == Identifier::Ynf + if ctx.basis == Musl + && ctx.fn_ident == Identifier::Ynf && !expected.is_infinite() && actual.is_infinite() && (expected.abs().to_bits().abs_diff(actual.abs().to_bits()) < F2::Int::cast_from(1_000_000u32)) { - return XFAIL; + return XFAIL_NOCHECK; } - None + DEFAULT } impl MaybeOverride<(f32, i32)> for SpecialCase {} @@ -610,9 +593,8 @@ impl MaybeOverride<(f32, f32, f32)> for SpecialCase { input: (f32, f32, f32), actual: F, expected: F, - _ulp: &mut u32, ctx: &CheckCtx, - ) -> Option { + ) -> CheckAction { ternop_common(input, actual, expected, ctx) } } @@ -621,9 +603,8 @@ impl MaybeOverride<(f64, f64, f64)> for SpecialCase { input: (f64, f64, f64), actual: F, expected: F, - _ulp: &mut u32, ctx: &CheckCtx, - ) -> Option { + ) -> CheckAction { ternop_common(input, actual, expected, ctx) } } @@ -634,7 +615,7 @@ fn ternop_common( actual: F2, expected: F2, ctx: &CheckCtx, -) -> Option { +) -> CheckAction { // FIXME(fma): 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result // of fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the // exact result". Our implementation returns the wrong sign: @@ -647,8 +628,8 @@ fn ternop_common( && expected.biteq(F2::NEG_ZERO) && actual.biteq(F2::ZERO) { - return XFAIL; + return XFAIL("fma sign"); } - None + DEFAULT } diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index 0a4baa2e3..a5806943e 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -8,8 +8,9 @@ use std::fmt; -use anyhow::{Context, bail, ensure}; +use anyhow::{Context, anyhow, bail, ensure}; +use crate::precision::CheckAction; use crate::{CheckCtx, Float, Int, MaybeOverride, SpecialCase, TestResult}; /// Trait for calling a function with a tuple as arguments. @@ -185,20 +186,34 @@ where Input: Hex + fmt::Debug, SpecialCase: MaybeOverride, { - if let Some(res) = SpecialCase::check_int(input, actual, expected, ctx) { - return res; - } + let (result, xfail_msg) = match SpecialCase::check_int(input, actual, expected, ctx) { + CheckAction::AssertSuccess => (actual == expected, None), + CheckAction::AssertFailure(msg) => (actual != expected, Some(msg)), + CheckAction::Custom(res) => return res, + CheckAction::Skip => return Ok(()), + CheckAction::AssertWithUlp(_) => panic!("ulp has no meaning for integer checks"), + }; + + let make_xfail_msg = || match xfail_msg { + Some(m) => format!( + "expected failure but test passed. Does an XFAIL need to be updated?\n\ + failed at: {m}", + ), + None => String::new(), + }; anyhow::ensure!( - actual == expected, + result, "\ \n input: {input:?} {ibits}\ \n expected: {expected:<22?} {expbits}\ \n actual: {actual:<22?} {actbits}\ + \n {msg}\ ", actbits = actual.hex(), expbits = expected.hex(), ibits = input.hex(), + msg = make_xfail_msg() ); Ok(()) @@ -246,15 +261,19 @@ where u32: TryFrom, SpecialCase: MaybeOverride, { + let mut assert_failure_msg = None; + // Create a wrapper function so we only need to `.with_context` once. - let inner = || -> TestResult { + let mut inner = || -> TestResult { let mut allowed_ulp = ctx.ulp; - // If the tested function requires a nonstandard test, run it here. - if let Some(res) = SpecialCase::check_float(input, actual, expected, &mut allowed_ulp, ctx) - { - return res; - } + match SpecialCase::check_float(input, actual, expected, ctx) { + CheckAction::AssertSuccess => (), + CheckAction::AssertFailure(msg) => assert_failure_msg = Some(msg), + CheckAction::Custom(res) => return res, + CheckAction::Skip => return Ok(()), + CheckAction::AssertWithUlp(ulp_override) => allowed_ulp = ulp_override, + }; // Check when both are NaNs if actual.is_nan() && expected.is_nan() { @@ -280,14 +299,29 @@ where let ulp_diff = act_bits.checked_sub(exp_bits).unwrap().abs(); let ulp_u32 = u32::try_from(ulp_diff) - .map_err(|e| anyhow::anyhow!("{e:?}: ulp of {ulp_diff} exceeds u32::MAX"))?; + .map_err(|e| anyhow!("{e:?}: ulp of {ulp_diff} exceeds u32::MAX"))?; ensure!(ulp_u32 <= allowed_ulp, "ulp {ulp_diff} > {allowed_ulp}",); Ok(()) }; - inner().with_context(|| { + let mut res = inner(); + + if let Some(msg) = assert_failure_msg { + // Invert `Ok` and `Err` if the test is an xfail. + if res.is_ok() { + let e = anyhow!( + "expected failure but test passed. Does an XFAIL need to be updated?\n\ + failed at: {msg}", + ); + res = Err(e) + } else { + res = Ok(()) + } + } + + res.with_context(|| { format!( "\ \n input: {input:?} {ibits}\ From fd78ea9a82889d9ed1f1202b960c944f11f974d2 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 23 Jan 2025 09:15:43 +0000 Subject: [PATCH 1116/1459] Change `from_parts` to take a `u32` exponent rather than `i32` Make things more consistent with other API that works with a bitwise representation of the exponent. That is, use `u32` when working with a bitwise (biased) representation, use `i32` when the bitwise representation has been adjusted for bias and ay be negative. Every place this has been used so far has an `as i32`, so this change makes things cleaner anyway. --- libm/src/math/generic/sqrt.rs | 2 +- libm/src/math/support/float_traits.rs | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/libm/src/math/generic/sqrt.rs b/libm/src/math/generic/sqrt.rs index 22ee93f57..c20c0f205 100644 --- a/libm/src/math/generic/sqrt.rs +++ b/libm/src/math/generic/sqrt.rs @@ -92,7 +92,7 @@ where } // Normalize subnormals by multiplying by 1.0 << SIG_BITS (e.g. 0x1p52 for doubles). - let scaled = x * F::from_parts(false, (F::SIG_BITS + F::EXP_BIAS) as i32, zero); + let scaled = x * F::from_parts(false, F::SIG_BITS + F::EXP_BIAS, zero); ix = scaled.to_bits(); match top { Exp::Shifted(ref mut v) => { diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 1abb7c4de..57e4aebec 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -131,11 +131,11 @@ pub trait Float: fn from_bits(a: Self::Int) -> Self; /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. - fn from_parts(negative: bool, exponent: i32, significand: Self::Int) -> Self { + fn from_parts(negative: bool, exponent: u32, significand: Self::Int) -> Self { let sign = if negative { Self::Int::ONE } else { Self::Int::ZERO }; Self::from_bits( (sign << (Self::BITS - 1)) - | (Self::Int::cast_from(exponent as u32 & Self::EXP_MAX) << Self::SIG_BITS) + | (Self::Int::cast_from(exponent & Self::EXP_MAX) << Self::SIG_BITS) | (significand & Self::SIG_MASK), ) } @@ -282,7 +282,7 @@ mod tests { assert_eq!(f16::from_bits(0x1).exp_unbiased(), -15); // `from_parts` - assert_biteq!(f16::from_parts(true, f16::EXP_BIAS as i32, 0), -1.0f16); + assert_biteq!(f16::from_parts(true, f16::EXP_BIAS, 0), -1.0f16); assert_biteq!(f16::from_parts(false, 0, 1), f16::from_bits(0x1)); } @@ -304,8 +304,8 @@ mod tests { assert_eq!(f32::from_bits(0x1).exp_unbiased(), -127); // `from_parts` - assert_biteq!(f32::from_parts(true, f32::EXP_BIAS as i32, 0), -1.0f32); - assert_biteq!(f32::from_parts(false, 10 + f32::EXP_BIAS as i32, 0), hf32!("0x1p10")); + assert_biteq!(f32::from_parts(true, f32::EXP_BIAS, 0), -1.0f32); + assert_biteq!(f32::from_parts(false, 10 + f32::EXP_BIAS, 0), hf32!("0x1p10")); assert_biteq!(f32::from_parts(false, 0, 1), f32::from_bits(0x1)); } @@ -327,8 +327,8 @@ mod tests { assert_eq!(f64::from_bits(0x1).exp_unbiased(), -1023); // `from_parts` - assert_biteq!(f64::from_parts(true, f64::EXP_BIAS as i32, 0), -1.0f64); - assert_biteq!(f64::from_parts(false, 10 + f64::EXP_BIAS as i32, 0), hf64!("0x1p10")); + assert_biteq!(f64::from_parts(true, f64::EXP_BIAS, 0), -1.0f64); + assert_biteq!(f64::from_parts(false, 10 + f64::EXP_BIAS, 0), hf64!("0x1p10")); assert_biteq!(f64::from_parts(false, 0, 1), f64::from_bits(0x1)); } @@ -351,7 +351,7 @@ mod tests { assert_eq!(f128::from_bits(0x1).exp_unbiased(), -16383); // `from_parts` - assert_biteq!(f128::from_parts(true, f128::EXP_BIAS as i32, 0), -1.0f128); + assert_biteq!(f128::from_parts(true, f128::EXP_BIAS, 0), -1.0f128); assert_biteq!(f128::from_parts(false, 0, 1), f128::from_bits(0x1)); } } From be9ba62af9022643fb9208c1e2e5e7b830499d22 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 23 Jan 2025 22:02:22 +0000 Subject: [PATCH 1117/1459] Add a generic version of `scalbn` This replaces the `f32` and `f64` versions of `scalbn` and `ldexp`. --- libm/crates/libm-test/src/mpfloat.rs | 3 +- libm/etc/function-definitions.json | 2 + libm/src/math/generic/mod.rs | 2 + libm/src/math/generic/scalbn.rs | 123 +++++++++++++++++++++++++++ libm/src/math/scalbn.rs | 33 +------ libm/src/math/scalbnf.rs | 29 +------ 6 files changed, 133 insertions(+), 59 deletions(-) create mode 100644 libm/src/math/generic/scalbn.rs diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index a404f227b..4ac70c2eb 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -158,7 +158,8 @@ libm_macros::for_each_function! { ilogbf, jn, jnf, - ldexp,ldexpf, + ldexp, + ldexpf, lgamma_r, lgammaf_r, modf, diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index d3810b940..bbb2b40f1 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -698,12 +698,14 @@ "scalbn": { "sources": [ "src/libm_helper.rs", + "src/math/generic/scalbn.rs", "src/math/scalbn.rs" ], "type": "f64" }, "scalbnf": { "sources": [ + "src/math/generic/scalbn.rs", "src/math/scalbnf.rs" ], "type": "f32" diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index d3df650e1..c7741cb46 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -4,6 +4,7 @@ mod fabs; mod fdim; mod floor; mod rint; +mod scalbn; mod sqrt; mod trunc; @@ -13,5 +14,6 @@ pub use fabs::fabs; pub use fdim::fdim; pub use floor::floor; pub use rint::rint; +pub use scalbn::scalbn; pub use sqrt::sqrt; pub use trunc::trunc; diff --git a/libm/src/math/generic/scalbn.rs b/libm/src/math/generic/scalbn.rs new file mode 100644 index 000000000..f036c15cc --- /dev/null +++ b/libm/src/math/generic/scalbn.rs @@ -0,0 +1,123 @@ +use super::super::{CastFrom, CastInto, Float, IntTy, MinInt}; + +/// Scale the exponent. +/// +/// From N3220: +/// +/// > The scalbn and scalbln functions compute `x * b^n`, where `b = FLT_RADIX` if the return type +/// > of the function is a standard floating type, or `b = 10` if the return type of the function +/// > is a decimal floating type. A range error occurs for some finite x, depending on n. +/// > +/// > [...] +/// > +/// > * `scalbn(±0, n)` returns `±0`. +/// > * `scalbn(x, 0)` returns `x`. +/// > * `scalbn(±∞, n)` returns `±∞`. +/// > +/// > If the calculation does not overflow or underflow, the returned value is exact and +/// > independent of the current rounding direction mode. +pub fn scalbn(mut x: F, mut n: i32) -> F +where + u32: CastInto, + F::Int: CastFrom, + F::Int: CastFrom, +{ + let zero = IntTy::::ZERO; + + // Bits including the implicit bit + let sig_total_bits = F::SIG_BITS + 1; + + // Maximum and minimum values when biased + let exp_max: i32 = F::EXP_BIAS as i32; + let exp_min = -(exp_max - 1); + + // 2 ^ Emax, where Emax is the maximum biased exponent value (1023 for f64) + let f_exp_max = F::from_parts(false, F::EXP_BIAS << 1, zero); + + // 2 ^ Emin, where Emin is the minimum biased exponent value (-1022 for f64) + let f_exp_min = F::from_parts(false, 1, zero); + + // 2 ^ sig_total_bits, representation of what can be accounted for with subnormals + let f_exp_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero); + + if n > exp_max { + x *= f_exp_max; + n -= exp_max; + if n > exp_max { + x *= f_exp_max; + n -= exp_max; + if n > exp_max { + n = exp_max; + } + } + } else if n < exp_min { + let mul = f_exp_min * f_exp_subnorm; + let add = (exp_max - 1) - sig_total_bits as i32; + + x *= mul; + n += add; + if n < exp_min { + x *= mul; + n += add; + if n < exp_min { + n = exp_min; + } + } + } + + x * F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero) +} + +#[cfg(test)] +mod tests { + use super::super::super::Int; + use super::*; + + // Tests against N3220 + fn spec_test() + where + u32: CastInto, + F::Int: CastFrom, + F::Int: CastFrom, + { + // `scalbn(±0, n)` returns `±0`. + assert_biteq!(scalbn(F::NEG_ZERO, 10), F::NEG_ZERO); + assert_biteq!(scalbn(F::NEG_ZERO, 0), F::NEG_ZERO); + assert_biteq!(scalbn(F::NEG_ZERO, -10), F::NEG_ZERO); + assert_biteq!(scalbn(F::ZERO, 10), F::ZERO); + assert_biteq!(scalbn(F::ZERO, 0), F::ZERO); + assert_biteq!(scalbn(F::ZERO, -10), F::ZERO); + + // `scalbn(x, 0)` returns `x`. + assert_biteq!(scalbn(F::MIN, 0), F::MIN); + assert_biteq!(scalbn(F::MAX, 0), F::MAX); + assert_biteq!(scalbn(F::INFINITY, 0), F::INFINITY); + assert_biteq!(scalbn(F::NEG_INFINITY, 0), F::NEG_INFINITY); + assert_biteq!(scalbn(F::ZERO, 0), F::ZERO); + assert_biteq!(scalbn(F::NEG_ZERO, 0), F::NEG_ZERO); + + // `scalbn(±∞, n)` returns `±∞`. + assert_biteq!(scalbn(F::INFINITY, 10), F::INFINITY); + assert_biteq!(scalbn(F::INFINITY, -10), F::INFINITY); + assert_biteq!(scalbn(F::NEG_INFINITY, 10), F::NEG_INFINITY); + assert_biteq!(scalbn(F::NEG_INFINITY, -10), F::NEG_INFINITY); + + // NaN should remain NaNs. + assert!(scalbn(F::NAN, 10).is_nan()); + assert!(scalbn(F::NAN, 0).is_nan()); + assert!(scalbn(F::NAN, -10).is_nan()); + assert!(scalbn(-F::NAN, 10).is_nan()); + assert!(scalbn(-F::NAN, 0).is_nan()); + assert!(scalbn(-F::NAN, -10).is_nan()); + } + + #[test] + fn spec_test_f32() { + spec_test::(); + } + + #[test] + fn spec_test_f64() { + spec_test::(); + } +} diff --git a/libm/src/math/scalbn.rs b/libm/src/math/scalbn.rs index 00c455a10..f809dad51 100644 --- a/libm/src/math/scalbn.rs +++ b/libm/src/math/scalbn.rs @@ -1,33 +1,4 @@ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn scalbn(x: f64, mut n: i32) -> f64 { - let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 - let x1p53 = f64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53 - let x1p_1022 = f64::from_bits(0x0010000000000000); // 0x1p-1022 === 2 ^ (-1022) - - let mut y = x; - - if n > 1023 { - y *= x1p1023; - n -= 1023; - if n > 1023 { - y *= x1p1023; - n -= 1023; - if n > 1023 { - n = 1023; - } - } - } else if n < -1022 { - /* make sure final n < -53 to avoid double - rounding in the subnormal range */ - y *= x1p_1022 * x1p53; - n += 1022 - 53; - if n < -1022 { - y *= x1p_1022 * x1p53; - n += 1022 - 53; - if n < -1022 { - n = -1022; - } - } - } - y * f64::from_bits(((0x3ff + n) as u64) << 52) +pub fn scalbn(x: f64, n: i32) -> f64 { + super::generic::scalbn(x, n) } diff --git a/libm/src/math/scalbnf.rs b/libm/src/math/scalbnf.rs index 73f4bb57a..57e7ba76f 100644 --- a/libm/src/math/scalbnf.rs +++ b/libm/src/math/scalbnf.rs @@ -1,29 +1,4 @@ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn scalbnf(mut x: f32, mut n: i32) -> f32 { - let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 - let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 - let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 - - if n > 127 { - x *= x1p127; - n -= 127; - if n > 127 { - x *= x1p127; - n -= 127; - if n > 127 { - n = 127; - } - } - } else if n < -126 { - x *= x1p_126 * x1p24; - n += 126 - 24; - if n < -126 { - x *= x1p_126 * x1p24; - n += 126 - 24; - if n < -126 { - n = -126; - } - } - } - x * f32::from_bits(((0x7f + n) as u32) << 23) +pub fn scalbnf(x: f32, n: i32) -> f32 { + super::generic::scalbn(x, n) } From e7f7efd7cf2fec3c1d8ce5c164aadcc5aea161e4 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 24 Jan 2025 01:49:23 +0000 Subject: [PATCH 1118/1459] Add a generic version of `round` This replaces `round` and `roundf`. --- libm/etc/function-definitions.json | 2 ++ libm/src/math/generic/mod.rs | 2 ++ libm/src/math/generic/round.rs | 46 ++++++++++++++++++++++++++++++ libm/src/math/round.rs | 27 ++---------------- libm/src/math/roundf.rs | 29 ++----------------- 5 files changed, 54 insertions(+), 52 deletions(-) create mode 100644 libm/src/math/generic/round.rs diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index bbb2b40f1..4aea45a07 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -685,12 +685,14 @@ "round": { "sources": [ "src/libm_helper.rs", + "src/math/generic/round.rs", "src/math/round.rs" ], "type": "f64" }, "roundf": { "sources": [ + "src/math/generic/round.rs", "src/math/roundf.rs" ], "type": "f32" diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index c7741cb46..1f557719f 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -4,6 +4,7 @@ mod fabs; mod fdim; mod floor; mod rint; +mod round; mod scalbn; mod sqrt; mod trunc; @@ -14,6 +15,7 @@ pub use fabs::fabs; pub use fdim::fdim; pub use floor::floor; pub use rint::rint; +pub use round::round; pub use scalbn::scalbn; pub use sqrt::sqrt; pub use trunc::trunc; diff --git a/libm/src/math/generic/round.rs b/libm/src/math/generic/round.rs new file mode 100644 index 000000000..fc9a1b675 --- /dev/null +++ b/libm/src/math/generic/round.rs @@ -0,0 +1,46 @@ +use super::super::{Float, MinInt}; +use super::{copysign, trunc}; + +pub fn round(x: F) -> F { + let f0p5 = F::from_parts(false, F::EXP_BIAS - 1, F::Int::ZERO); // 0.5 + let f0p25 = F::from_parts(false, F::EXP_BIAS - 2, F::Int::ZERO); // 0.25 + + trunc(x + copysign(f0p5 - f0p25 * F::EPSILON, x)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn zeroes_f32() { + assert_biteq!(round(0.0_f32), 0.0_f32); + assert_biteq!(round(-0.0_f32), -0.0_f32); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(round(-1.0_f32), -1.0); + assert_eq!(round(2.8_f32), 3.0); + assert_eq!(round(-0.5_f32), -1.0); + assert_eq!(round(0.5_f32), 1.0); + assert_eq!(round(-1.5_f32), -2.0); + assert_eq!(round(1.5_f32), 2.0); + } + + #[test] + fn zeroes_f64() { + assert_biteq!(round(0.0_f64), 0.0_f64); + assert_biteq!(round(-0.0_f64), -0.0_f64); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(round(-1.0_f64), -1.0); + assert_eq!(round(2.8_f64), 3.0); + assert_eq!(round(-0.5_f64), -1.0); + assert_eq!(round(0.5_f64), 1.0); + assert_eq!(round(-1.5_f64), -2.0); + assert_eq!(round(1.5_f64), 2.0); + } +} diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs index b81ebaa1d..36e0eb1f2 100644 --- a/libm/src/math/round.rs +++ b/libm/src/math/round.rs @@ -1,28 +1,5 @@ -use core::f64; - -use super::{copysign, trunc}; - +/// Round `x` to the nearest integer, breaking ties away from zero. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn round(x: f64) -> f64 { - trunc(x + copysign(0.5 - 0.25 * f64::EPSILON, x)) -} - -#[cfg(test)] -mod tests { - use super::round; - - #[test] - fn negative_zero() { - assert_eq!(round(-0.0_f64).to_bits(), (-0.0_f64).to_bits()); - } - - #[test] - fn sanity_check() { - assert_eq!(round(-1.0), -1.0); - assert_eq!(round(2.8), 3.0); - assert_eq!(round(-0.5), -1.0); - assert_eq!(round(0.5), 1.0); - assert_eq!(round(-1.5), -2.0); - assert_eq!(round(1.5), 2.0); - } + super::generic::round(x) } diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs index fb974bbfe..b5d7c9d69 100644 --- a/libm/src/math/roundf.rs +++ b/libm/src/math/roundf.rs @@ -1,30 +1,5 @@ -use core::f32; - -use super::{copysignf, truncf}; - +/// Round `x` to the nearest integer, breaking ties away from zero. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn roundf(x: f32) -> f32 { - truncf(x + copysignf(0.5 - 0.25 * f32::EPSILON, x)) -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use super::roundf; - - #[test] - fn negative_zero() { - assert_eq!(roundf(-0.0_f32).to_bits(), (-0.0_f32).to_bits()); - } - - #[test] - fn sanity_check() { - assert_eq!(roundf(-1.0), -1.0); - assert_eq!(roundf(2.8), 3.0); - assert_eq!(roundf(-0.5), -1.0); - assert_eq!(roundf(0.5), 1.0); - assert_eq!(roundf(-1.5), -2.0); - assert_eq!(roundf(1.5), 2.0); - } + super::generic::round(x) } From d1ed70891d29f454e8b80e235b3509f0a8d2009e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 24 Jan 2025 01:57:12 +0000 Subject: [PATCH 1119/1459] Add `roundf16` and `roundf128` --- libm/crates/libm-macros/src/shared.rs | 4 +-- libm/crates/libm-test/benches/random.rs | 2 ++ libm/crates/libm-test/src/mpfloat.rs | 4 +++ .../libm-test/tests/compare_built_musl.rs | 2 ++ libm/crates/util/src/main.rs | 2 ++ libm/etc/function-definitions.json | 14 ++++++++ libm/etc/function-list.txt | 2 ++ libm/src/math/generic/round.rs | 36 +++++++++++++++++++ libm/src/math/mod.rs | 4 +++ libm/src/math/roundf128.rs | 5 +++ libm/src/math/roundf16.rs | 5 +++ 11 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 libm/src/math/roundf128.rs create mode 100644 libm/src/math/roundf16.rs diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index 80bd3e907..b233e34f1 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F16, Signature { args: &[Ty::F16], returns: &[Ty::F16] }, None, - &["ceilf16", "fabsf16", "floorf16", "rintf16", "sqrtf16", "truncf16"], + &["ceilf16", "fabsf16", "floorf16", "rintf16", "roundf16", "sqrtf16", "truncf16"], ), ( // `fn(f32) -> f32` @@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F128, Signature { args: &[Ty::F128], returns: &[Ty::F128] }, None, - &["ceilf128", "fabsf128", "floorf128", "rintf128", "sqrtf128", "truncf128"], + &["ceilf128", "fabsf128", "floorf128", "rintf128", "roundf128", "sqrtf128", "truncf128"], ), ( // `(f16, f16) -> f16` diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 4d050e817..d0ecd851e 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -129,6 +129,8 @@ libm_macros::for_each_function! { | floorf16 | rintf128 | rintf16 + | roundf128 + | roundf16 | sqrtf128 | sqrtf16 | truncf128 diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 4ac70c2eb..4422ab88d 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -175,6 +175,8 @@ libm_macros::for_each_function! { rintf16, round, roundf, + roundf128, + roundf16, scalbn, scalbnf, sincos,sincosf, @@ -247,6 +249,7 @@ impl_no_round! { fabsf16 => abs_mut; floorf16 => floor_mut; rintf16 => round_even_mut; // FIXME: respect rounding mode + roundf16 => round_mut; truncf16 => trunc_mut; } @@ -256,6 +259,7 @@ impl_no_round! { fabsf128 => abs_mut; floorf128 => floor_mut; rintf128 => round_even_mut; // FIXME: respect rounding mode + roundf128 => round_mut; truncf128 => trunc_mut; } diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index f009816c9..0fc1b0df1 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -91,6 +91,8 @@ libm_macros::for_each_function! { floorf16, rintf128, rintf16, + roundf128, + roundf16, sqrtf128, sqrtf16, truncf128, diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index 889823d2e..aaedda6d1 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -98,6 +98,8 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { | floorf16 | rintf128 | rintf16 + | roundf128 + | roundf16 | sqrtf128 | sqrtf16 | truncf128 diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 4aea45a07..8c5903e93 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -697,6 +697,20 @@ ], "type": "f32" }, + "roundf128": { + "sources": [ + "src/math/generic/round.rs", + "src/math/roundf128.rs" + ], + "type": "f128" + }, + "roundf16": { + "sources": [ + "src/math/generic/round.rs", + "src/math/roundf16.rs" + ], + "type": "f16" + }, "scalbn": { "sources": [ "src/libm_helper.rs", diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt index 41bb4e06b..0b6eed828 100644 --- a/libm/etc/function-list.txt +++ b/libm/etc/function-list.txt @@ -101,6 +101,8 @@ rintf128 rintf16 round roundf +roundf128 +roundf16 scalbn scalbnf sin diff --git a/libm/src/math/generic/round.rs b/libm/src/math/generic/round.rs index fc9a1b675..8b5138188 100644 --- a/libm/src/math/generic/round.rs +++ b/libm/src/math/generic/round.rs @@ -12,6 +12,24 @@ pub fn round(x: F) -> F { mod tests { use super::*; + #[test] + #[cfg(f16_enabled)] + fn zeroes_f16() { + assert_biteq!(round(0.0_f16), 0.0_f16); + assert_biteq!(round(-0.0_f16), -0.0_f16); + } + + #[test] + #[cfg(f16_enabled)] + fn sanity_check_f16() { + assert_eq!(round(-1.0_f16), -1.0); + assert_eq!(round(2.8_f16), 3.0); + assert_eq!(round(-0.5_f16), -1.0); + assert_eq!(round(0.5_f16), 1.0); + assert_eq!(round(-1.5_f16), -2.0); + assert_eq!(round(1.5_f16), 2.0); + } + #[test] fn zeroes_f32() { assert_biteq!(round(0.0_f32), 0.0_f32); @@ -43,4 +61,22 @@ mod tests { assert_eq!(round(-1.5_f64), -2.0); assert_eq!(round(1.5_f64), 2.0); } + + #[test] + #[cfg(f128_enabled)] + fn zeroes_f128() { + assert_biteq!(round(0.0_f128), 0.0_f128); + assert_biteq!(round(-0.0_f128), -0.0_f128); + } + + #[test] + #[cfg(f128_enabled)] + fn sanity_check_f128() { + assert_eq!(round(-1.0_f128), -1.0); + assert_eq!(round(2.8_f128), 3.0); + assert_eq!(round(-0.5_f128), -1.0); + assert_eq!(round(0.5_f128), 1.0); + assert_eq!(round(-1.5_f128), -2.0); + assert_eq!(round(1.5_f128), 2.0); + } } diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 53d06974c..8db17a02d 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -347,6 +347,7 @@ cfg_if! { mod fdimf16; mod floorf16; mod rintf16; + mod roundf16; mod sqrtf16; mod truncf16; @@ -356,6 +357,7 @@ cfg_if! { pub use self::fdimf16::fdimf16; pub use self::floorf16::floorf16; pub use self::rintf16::rintf16; + pub use self::roundf16::roundf16; pub use self::sqrtf16::sqrtf16; pub use self::truncf16::truncf16; } @@ -369,6 +371,7 @@ cfg_if! { mod fdimf128; mod floorf128; mod rintf128; + mod roundf128; mod sqrtf128; mod truncf128; @@ -378,6 +381,7 @@ cfg_if! { pub use self::fdimf128::fdimf128; pub use self::floorf128::floorf128; pub use self::rintf128::rintf128; + pub use self::roundf128::roundf128; pub use self::sqrtf128::sqrtf128; pub use self::truncf128::truncf128; } diff --git a/libm/src/math/roundf128.rs b/libm/src/math/roundf128.rs new file mode 100644 index 000000000..fc3164929 --- /dev/null +++ b/libm/src/math/roundf128.rs @@ -0,0 +1,5 @@ +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf128(x: f128) -> f128 { + super::generic::round(x) +} diff --git a/libm/src/math/roundf16.rs b/libm/src/math/roundf16.rs new file mode 100644 index 000000000..8b356eaab --- /dev/null +++ b/libm/src/math/roundf16.rs @@ -0,0 +1,5 @@ +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf16(x: f16) -> f16 { + super::generic::round(x) +} From 2d2c45dea0f4960a6f68bea65a907c164a37307a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 24 Jan 2025 01:57:41 +0000 Subject: [PATCH 1120/1459] Remove an outdated note about precision --- libm/crates/libm-test/src/precision.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 800425f12..bed615882 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -13,9 +13,6 @@ use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult}; pub struct SpecialCase; /// ULP allowed to differ from the results returned by a test basis. -/// -/// Note that these results were obtained using 400M rounds of random inputs, which -/// is not a value used by default. pub fn default_ulp(ctx: &CheckCtx) -> u32 { // ULP compared to the infinite (MPFR) result. let mut ulp = match ctx.base_name { From 72956e35509e912b3b9e51611da19dd408819c80 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 24 Jan 2025 02:10:52 +0000 Subject: [PATCH 1121/1459] Add a generic version of `fmin` and `fmax` These can be used for `fmin`, `fminf`, `fmax`, and `fmaxf`. No changes to the implementation are made, so [1] is not fixed. [1]: https://github.com/rust-lang/libm/issues/439 --- libm/etc/function-definitions.json | 12 ++++++++---- libm/src/math/fmax.rs | 11 ++--------- libm/src/math/fmaxf.rs | 11 ++--------- libm/src/math/fmin.rs | 11 ++--------- libm/src/math/fminf.rs | 11 ++--------- libm/src/math/generic/fmax.rs | 14 ++++++++++++++ libm/src/math/generic/fmin.rs | 13 +++++++++++++ libm/src/math/generic/mod.rs | 4 ++++ 8 files changed, 47 insertions(+), 40 deletions(-) create mode 100644 libm/src/math/generic/fmax.rs create mode 100644 libm/src/math/generic/fmin.rs diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 8c5903e93..7ffe91ead 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -379,26 +379,30 @@ "fmax": { "sources": [ "src/libm_helper.rs", - "src/math/fmax.rs" + "src/math/fmax.rs", + "src/math/generic/fmax.rs" ], "type": "f64" }, "fmaxf": { "sources": [ - "src/math/fmaxf.rs" + "src/math/fmaxf.rs", + "src/math/generic/fmax.rs" ], "type": "f32" }, "fmin": { "sources": [ "src/libm_helper.rs", - "src/math/fmin.rs" + "src/math/fmin.rs", + "src/math/generic/fmin.rs" ], "type": "f64" }, "fminf": { "sources": [ - "src/math/fminf.rs" + "src/math/fminf.rs", + "src/math/generic/fmin.rs" ], "type": "f32" }, diff --git a/libm/src/math/fmax.rs b/libm/src/math/fmax.rs index 93c97bc61..d5d9b513b 100644 --- a/libm/src/math/fmax.rs +++ b/libm/src/math/fmax.rs @@ -1,12 +1,5 @@ +/// Return the greater of two arguments or, if either argument is NaN, the other argument. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmax(x: f64, y: f64) -> f64 { - // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the - // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it - // is either x or y, canonicalized (this means results might differ among implementations). - // When either x or y is a signalingNaN, then the result is according to 6.2. - // - // Since we do not support sNaN in Rust yet, we do not need to handle them. - // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by - // multiplying by 1.0. Should switch to the `canonicalize` when it works. - (if x.is_nan() || x < y { y } else { x }) * 1.0 + super::generic::fmax(x, y) } diff --git a/libm/src/math/fmaxf.rs b/libm/src/math/fmaxf.rs index 607746647..3197d5cf2 100644 --- a/libm/src/math/fmaxf.rs +++ b/libm/src/math/fmaxf.rs @@ -1,12 +1,5 @@ +/// Return the greater of two arguments or, if either argument is NaN, the other argument. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmaxf(x: f32, y: f32) -> f32 { - // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the - // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it - // is either x or y, canonicalized (this means results might differ among implementations). - // When either x or y is a signalingNaN, then the result is according to 6.2. - // - // Since we do not support sNaN in Rust yet, we do not need to handle them. - // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by - // multiplying by 1.0. Should switch to the `canonicalize` when it works. - (if x.is_nan() || x < y { y } else { x }) * 1.0 + super::generic::fmax(x, y) } diff --git a/libm/src/math/fmin.rs b/libm/src/math/fmin.rs index ab1509f34..df8ff7c32 100644 --- a/libm/src/math/fmin.rs +++ b/libm/src/math/fmin.rs @@ -1,12 +1,5 @@ +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmin(x: f64, y: f64) -> f64 { - // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the - // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it - // is either x or y, canonicalized (this means results might differ among implementations). - // When either x or y is a signalingNaN, then the result is according to 6.2. - // - // Since we do not support sNaN in Rust yet, we do not need to handle them. - // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by - // multiplying by 1.0. Should switch to the `canonicalize` when it works. - (if y.is_nan() || x < y { x } else { y }) * 1.0 + super::generic::fmin(x, y) } diff --git a/libm/src/math/fminf.rs b/libm/src/math/fminf.rs index 0049e7117..b2cdfe89d 100644 --- a/libm/src/math/fminf.rs +++ b/libm/src/math/fminf.rs @@ -1,12 +1,5 @@ +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fminf(x: f32, y: f32) -> f32 { - // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the - // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it - // is either x or y, canonicalized (this means results might differ among implementations). - // When either x or y is a signalingNaN, then the result is according to 6.2. - // - // Since we do not support sNaN in Rust yet, we do not need to handle them. - // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by - // multiplying by 1.0. Should switch to the `canonicalize` when it works. - (if y.is_nan() || x < y { x } else { y }) * 1.0 + super::generic::fmin(x, y) } diff --git a/libm/src/math/generic/fmax.rs b/libm/src/math/generic/fmax.rs new file mode 100644 index 000000000..97803052b --- /dev/null +++ b/libm/src/math/generic/fmax.rs @@ -0,0 +1,14 @@ +use super::super::Float; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmax(x: F, y: F) -> F { + // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the + // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it + // is either x or y, canonicalized (this means results might differ among implementations). + // When either x or y is a signalingNaN, then the result is according to 6.2. + // + // Since we do not support sNaN in Rust yet, we do not need to handle them. + // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by + // multiplying by 1.0. Should switch to the `canonicalize` when it works. + (if x.is_nan() || x < y { y } else { x }) * F::ONE +} diff --git a/libm/src/math/generic/fmin.rs b/libm/src/math/generic/fmin.rs new file mode 100644 index 000000000..697f72004 --- /dev/null +++ b/libm/src/math/generic/fmin.rs @@ -0,0 +1,13 @@ +use super::super::Float; + +pub fn fmin(x: F, y: F) -> F { + // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the + // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it + // is either x or y, canonicalized (this means results might differ among implementations). + // When either x or y is a signalingNaN, then the result is according to 6.2. + // + // Since we do not support sNaN in Rust yet, we do not need to handle them. + // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by + // multiplying by 1.0. Should switch to the `canonicalize` when it works. + (if y.is_nan() || x < y { x } else { y }) * F::ONE +} diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index 1f557719f..819781a21 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -3,6 +3,8 @@ mod copysign; mod fabs; mod fdim; mod floor; +mod fmax; +mod fmin; mod rint; mod round; mod scalbn; @@ -14,6 +16,8 @@ pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; pub use floor::floor; +pub use fmax::fmax; +pub use fmin::fmin; pub use rint::rint; pub use round::round; pub use scalbn::scalbn; From c53bc4db2456bd4ea920eb73aeaf6a5db30048b9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 24 Jan 2025 02:57:35 +0000 Subject: [PATCH 1122/1459] Add `fminf16`, `fmaxf16`, `fminf128`, and `fmaxf128` --- libm/crates/libm-macros/src/shared.rs | 4 +-- libm/crates/libm-test/benches/random.rs | 4 +++ libm/crates/libm-test/src/mpfloat.rs | 4 +-- .../libm-test/tests/compare_built_musl.rs | 4 +++ libm/crates/util/src/main.rs | 4 +++ libm/etc/function-definitions.json | 28 +++++++++++++++++++ libm/etc/function-list.txt | 4 +++ libm/src/math/fmaxf128.rs | 5 ++++ libm/src/math/fmaxf16.rs | 5 ++++ libm/src/math/fminf128.rs | 5 ++++ libm/src/math/fminf16.rs | 5 ++++ libm/src/math/mod.rs | 8 ++++++ 12 files changed, 76 insertions(+), 4 deletions(-) create mode 100644 libm/src/math/fmaxf128.rs create mode 100644 libm/src/math/fmaxf16.rs create mode 100644 libm/src/math/fminf128.rs create mode 100644 libm/src/math/fminf16.rs diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index b233e34f1..fbe0702a6 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -47,7 +47,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F16, Signature { args: &[Ty::F16, Ty::F16], returns: &[Ty::F16] }, None, - &["copysignf16", "fdimf16"], + &["copysignf16", "fdimf16", "fmaxf16", "fminf16"], ), ( // `(f32, f32) -> f32` @@ -90,7 +90,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F128, Signature { args: &[Ty::F128, Ty::F128], returns: &[Ty::F128] }, None, - &["copysignf128", "fdimf128"], + &["copysignf128", "fdimf128", "fmaxf128", "fminf128"], ), ( // `(f32, f32, f32) -> f32` diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index d0ecd851e..aac8379fd 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -127,6 +127,10 @@ libm_macros::for_each_function! { | fdimf16 | floorf128 | floorf16 + | fmaxf128 + | fmaxf16 + | fminf128 + | fminf16 | rintf128 | rintf16 | roundf128 diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 4422ab88d..da674c162 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -192,8 +192,8 @@ libm_macros::for_each_function! { fabs | fabsf => abs, fdim | fdimf | fdimf16 | fdimf128 => positive_diff, fma | fmaf => mul_add, - fmax | fmaxf => max, - fmin | fminf => min, + fmax | fmaxf | fmaxf16 | fmaxf128 => max, + fmin | fminf | fminf16 | fminf128 => min, lgamma | lgammaf => ln_gamma, log | logf => ln, log1p | log1pf => ln_1p, diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 0fc1b0df1..ca070e8f6 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -89,6 +89,10 @@ libm_macros::for_each_function! { fdimf16, floorf128, floorf16, + fmaxf128, + fmaxf16, + fminf128, + fminf16, rintf128, rintf16, roundf128, diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index aaedda6d1..eb8e37589 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -96,6 +96,10 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { | fdimf16 | floorf128 | floorf16 + | fmaxf128 + | fmaxf16 + | fminf128 + | fminf16 | rintf128 | rintf16 | roundf128 diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 7ffe91ead..b6653295c 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -391,6 +391,20 @@ ], "type": "f32" }, + "fmaxf128": { + "sources": [ + "src/math/fmaxf128.rs", + "src/math/generic/fmax.rs" + ], + "type": "f128" + }, + "fmaxf16": { + "sources": [ + "src/math/fmaxf16.rs", + "src/math/generic/fmax.rs" + ], + "type": "f16" + }, "fmin": { "sources": [ "src/libm_helper.rs", @@ -406,6 +420,20 @@ ], "type": "f32" }, + "fminf128": { + "sources": [ + "src/math/fminf128.rs", + "src/math/generic/fmin.rs" + ], + "type": "f128" + }, + "fminf16": { + "sources": [ + "src/math/fminf16.rs", + "src/math/generic/fmin.rs" + ], + "type": "f16" + }, "fmod": { "sources": [ "src/libm_helper.rs", diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt index 0b6eed828..25b92e58b 100644 --- a/libm/etc/function-list.txt +++ b/libm/etc/function-list.txt @@ -55,8 +55,12 @@ fma fmaf fmax fmaxf +fmaxf128 +fmaxf16 fmin fminf +fminf128 +fminf16 fmod fmodf frexp diff --git a/libm/src/math/fmaxf128.rs b/libm/src/math/fmaxf128.rs new file mode 100644 index 000000000..bace9ab53 --- /dev/null +++ b/libm/src/math/fmaxf128.rs @@ -0,0 +1,5 @@ +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaxf128(x: f128, y: f128) -> f128 { + super::generic::fmax(x, y) +} diff --git a/libm/src/math/fmaxf16.rs b/libm/src/math/fmaxf16.rs new file mode 100644 index 000000000..fea15be8f --- /dev/null +++ b/libm/src/math/fmaxf16.rs @@ -0,0 +1,5 @@ +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaxf16(x: f16, y: f16) -> f16 { + super::generic::fmax(x, y) +} diff --git a/libm/src/math/fminf128.rs b/libm/src/math/fminf128.rs new file mode 100644 index 000000000..a9224c22a --- /dev/null +++ b/libm/src/math/fminf128.rs @@ -0,0 +1,5 @@ +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminf128(x: f128, y: f128) -> f128 { + super::generic::fmin(x, y) +} diff --git a/libm/src/math/fminf16.rs b/libm/src/math/fminf16.rs new file mode 100644 index 000000000..6d936be34 --- /dev/null +++ b/libm/src/math/fminf16.rs @@ -0,0 +1,5 @@ +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminf16(x: f16, y: f16) -> f16 { + super::generic::fmin(x, y) +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 8db17a02d..cb83b2587 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -346,6 +346,8 @@ cfg_if! { mod fabsf16; mod fdimf16; mod floorf16; + mod fmaxf16; + mod fminf16; mod rintf16; mod roundf16; mod sqrtf16; @@ -356,6 +358,8 @@ cfg_if! { pub use self::fabsf16::fabsf16; pub use self::fdimf16::fdimf16; pub use self::floorf16::floorf16; + pub use self::fmaxf16::fmaxf16; + pub use self::fminf16::fminf16; pub use self::rintf16::rintf16; pub use self::roundf16::roundf16; pub use self::sqrtf16::sqrtf16; @@ -370,6 +374,8 @@ cfg_if! { mod fabsf128; mod fdimf128; mod floorf128; + mod fmaxf128; + mod fminf128; mod rintf128; mod roundf128; mod sqrtf128; @@ -380,6 +386,8 @@ cfg_if! { pub use self::fabsf128::fabsf128; pub use self::fdimf128::fdimf128; pub use self::floorf128::floorf128; + pub use self::fmaxf128::fmaxf128; + pub use self::fminf128::fminf128; pub use self::rintf128::rintf128; pub use self::roundf128::roundf128; pub use self::sqrtf128::sqrtf128; From e2e27a1dceb1e94c41ff59dee6d3eee69079bd5b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 24 Jan 2025 05:02:47 +0000 Subject: [PATCH 1123/1459] Add a generic version of `fmod` This can replace `fmod` and `fmodf`. As part of this change I was able to replace some of the `while` loops with `leading_zeros`. --- libm/etc/function-definitions.json | 6 +- libm/src/math/fmod.rs | 77 +------------------------ libm/src/math/fmodf.rs | 87 +---------------------------- libm/src/math/generic/fmod.rs | 84 ++++++++++++++++++++++++++++ libm/src/math/generic/mod.rs | 2 + libm/src/math/support/int_traits.rs | 2 + 6 files changed, 96 insertions(+), 162 deletions(-) create mode 100644 libm/src/math/generic/fmod.rs diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index b6653295c..866e9a439 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -437,13 +437,15 @@ "fmod": { "sources": [ "src/libm_helper.rs", - "src/math/fmod.rs" + "src/math/fmod.rs", + "src/math/generic/fmod.rs" ], "type": "f64" }, "fmodf": { "sources": [ - "src/math/fmodf.rs" + "src/math/fmodf.rs", + "src/math/generic/fmod.rs" ], "type": "f32" }, diff --git a/libm/src/math/fmod.rs b/libm/src/math/fmod.rs index b68e6b0ea..d9786b53d 100644 --- a/libm/src/math/fmod.rs +++ b/libm/src/math/fmod.rs @@ -1,78 +1,5 @@ +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmod(x: f64, y: f64) -> f64 { - let mut uxi = x.to_bits(); - let mut uyi = y.to_bits(); - let mut ex = ((uxi >> 52) & 0x7ff) as i64; - let mut ey = ((uyi >> 52) & 0x7ff) as i64; - let sx = uxi >> 63; - let mut i; - - if uyi << 1 == 0 || y.is_nan() || ex == 0x7ff { - return (x * y) / (x * y); - } - if uxi << 1 <= uyi << 1 { - if uxi << 1 == uyi << 1 { - return 0.0 * x; - } - return x; - } - - /* normalize x and y */ - if ex == 0 { - i = uxi << 12; - while i >> 63 == 0 { - ex -= 1; - i <<= 1; - } - uxi <<= -ex + 1; - } else { - uxi &= u64::MAX >> 12; - uxi |= 1 << 52; - } - if ey == 0 { - i = uyi << 12; - while i >> 63 == 0 { - ey -= 1; - i <<= 1; - } - uyi <<= -ey + 1; - } else { - uyi &= u64::MAX >> 12; - uyi |= 1 << 52; - } - - /* x mod y */ - while ex > ey { - i = uxi.wrapping_sub(uyi); - if i >> 63 == 0 { - if i == 0 { - return 0.0 * x; - } - uxi = i; - } - uxi <<= 1; - ex -= 1; - } - i = uxi.wrapping_sub(uyi); - if i >> 63 == 0 { - if i == 0 { - return 0.0 * x; - } - uxi = i; - } - while uxi >> 52 == 0 { - uxi <<= 1; - ex -= 1; - } - - /* scale result */ - if ex > 0 { - uxi -= 1 << 52; - uxi |= (ex as u64) << 52; - } else { - uxi >>= -ex + 1; - } - uxi |= sx << 63; - - f64::from_bits(uxi) + super::generic::fmod(x, y) } diff --git a/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs index 4de181957..4e95696e2 100644 --- a/libm/src/math/fmodf.rs +++ b/libm/src/math/fmodf.rs @@ -1,88 +1,5 @@ -use core::f32; - +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmodf(x: f32, y: f32) -> f32 { - let mut uxi = x.to_bits(); - let mut uyi = y.to_bits(); - let mut ex = ((uxi >> 23) & 0xff) as i32; - let mut ey = ((uyi >> 23) & 0xff) as i32; - let sx = uxi & 0x80000000; - let mut i; - - if uyi << 1 == 0 || y.is_nan() || ex == 0xff { - return (x * y) / (x * y); - } - - if uxi << 1 <= uyi << 1 { - if uxi << 1 == uyi << 1 { - return 0.0 * x; - } - - return x; - } - - /* normalize x and y */ - if ex == 0 { - i = uxi << 9; - while i >> 31 == 0 { - ex -= 1; - i <<= 1; - } - - uxi <<= -ex + 1; - } else { - uxi &= u32::MAX >> 9; - uxi |= 1 << 23; - } - - if ey == 0 { - i = uyi << 9; - while i >> 31 == 0 { - ey -= 1; - i <<= 1; - } - - uyi <<= -ey + 1; - } else { - uyi &= u32::MAX >> 9; - uyi |= 1 << 23; - } - - /* x mod y */ - while ex > ey { - i = uxi.wrapping_sub(uyi); - if i >> 31 == 0 { - if i == 0 { - return 0.0 * x; - } - uxi = i; - } - uxi <<= 1; - - ex -= 1; - } - - i = uxi.wrapping_sub(uyi); - if i >> 31 == 0 { - if i == 0 { - return 0.0 * x; - } - uxi = i; - } - - while uxi >> 23 == 0 { - uxi <<= 1; - ex -= 1; - } - - /* scale result up */ - if ex > 0 { - uxi -= 1 << 23; - uxi |= (ex as u32) << 23; - } else { - uxi >>= -ex + 1; - } - uxi |= sx; - - f32::from_bits(uxi) + super::generic::fmod(x, y) } diff --git a/libm/src/math/generic/fmod.rs b/libm/src/math/generic/fmod.rs new file mode 100644 index 000000000..93da6c51e --- /dev/null +++ b/libm/src/math/generic/fmod.rs @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/fmod.c. Ported to generic Rust algorithm in 2025, TG. */ + +use super::super::{CastFrom, Float, Int, MinInt}; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmod(x: F, y: F) -> F { + let zero = F::Int::ZERO; + let one = F::Int::ONE; + let mut ix = x.to_bits(); + let mut iy = y.to_bits(); + let mut ex = x.exp().signed(); + let mut ey = y.exp().signed(); + let sx = ix & F::SIGN_MASK; + + if iy << 1 == zero || y.is_nan() || ex == F::EXP_MAX as i32 { + return (x * y) / (x * y); + } + + if ix << 1 <= iy << 1 { + if ix << 1 == iy << 1 { + return F::ZERO * x; + } + return x; + } + + /* normalize x and y */ + if ex == 0 { + let i = ix << F::EXP_BITS; + ex -= i.leading_zeros() as i32; + ix <<= -ex + 1; + } else { + ix &= F::Int::MAX >> F::EXP_BITS; + ix |= one << F::SIG_BITS; + } + + if ey == 0 { + let i = iy << F::EXP_BITS; + ey -= i.leading_zeros() as i32; + iy <<= -ey + 1; + } else { + iy &= F::Int::MAX >> F::EXP_BITS; + iy |= one << F::SIG_BITS; + } + + /* x mod y */ + while ex > ey { + let i = ix.wrapping_sub(iy); + if i >> (F::BITS - 1) == zero { + if i == zero { + return F::ZERO * x; + } + ix = i; + } + + ix <<= 1; + ex -= 1; + } + + let i = ix.wrapping_sub(iy); + if i >> (F::BITS - 1) == zero { + if i == zero { + return F::ZERO * x; + } + + ix = i; + } + + let shift = ix.leading_zeros().saturating_sub(F::EXP_BITS); + ix <<= shift; + ex -= shift as i32; + + /* scale result */ + if ex > 0 { + ix -= one << F::SIG_BITS; + ix |= F::Int::cast_from(ex) << F::SIG_BITS; + } else { + ix >>= -ex + 1; + } + + ix |= sx; + + F::from_bits(ix) +} diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index 819781a21..68686b0b2 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -5,6 +5,7 @@ mod fdim; mod floor; mod fmax; mod fmin; +mod fmod; mod rint; mod round; mod scalbn; @@ -18,6 +19,7 @@ pub use fdim::fdim; pub use floor::floor; pub use fmax::fmax; pub use fmin::fmin; +pub use fmod::fmod; pub use rint::rint; pub use round::round; pub use scalbn::scalbn; diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index cf19762e8..b403c658c 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -45,7 +45,9 @@ pub trait Int: + ops::BitOrAssign + ops::BitXorAssign + ops::ShlAssign + + ops::ShlAssign + ops::ShrAssign + + ops::ShrAssign + ops::Add + ops::Sub + ops::Mul From 2634ceb9d8252b182f49abaa6b6e935c42a329ea Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 24 Jan 2025 05:09:08 +0000 Subject: [PATCH 1124/1459] Add `fmodf16` using the generic implementation --- libm/crates/libm-macros/src/shared.rs | 2 +- libm/crates/libm-test/benches/icount.rs | 1 + libm/crates/libm-test/benches/random.rs | 1 + libm/crates/libm-test/src/mpfloat.rs | 17 +++++++++++++++++ .../libm-test/tests/compare_built_musl.rs | 1 + libm/crates/util/src/main.rs | 1 + libm/etc/function-definitions.json | 7 +++++++ libm/etc/function-list.txt | 1 + libm/src/math/fmodf16.rs | 5 +++++ libm/src/math/mod.rs | 2 ++ 10 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 libm/src/math/fmodf16.rs diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index fbe0702a6..69fe45e03 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -47,7 +47,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F16, Signature { args: &[Ty::F16, Ty::F16], returns: &[Ty::F16] }, None, - &["copysignf16", "fdimf16", "fmaxf16", "fminf16"], + &["copysignf16", "fdimf16", "fmaxf16", "fminf16", "fmodf16"], ), ( // `(f32, f32) -> f32` diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index 84f953262..97e78d8f1 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -111,6 +111,7 @@ main!( icount_bench_fmin_group, icount_bench_fminf_group, icount_bench_fmod_group, + icount_bench_fmodf16_group, icount_bench_fmodf_group, icount_bench_frexp_group, icount_bench_frexpf_group, diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index aac8379fd..3e816e81a 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -131,6 +131,7 @@ libm_macros::for_each_function! { | fmaxf16 | fminf128 | fminf16 + | fmodf16 | rintf128 | rintf16 | roundf128 diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index da674c162..56234b14a 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -152,6 +152,7 @@ libm_macros::for_each_function! { floorf16, fmod, fmodf, + fmodf16, frexp, frexpf, ilogb, @@ -525,6 +526,22 @@ impl MpOp for crate::op::lgammaf_r::Routine { } } +// No fmodf128 yet +impl MpOp for crate::op::fmodf16::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = this.0.rem_assign_round(&this.1, Nearest); + prep_retval::(&mut this.0, ord) + } +} + /* stub implementations so we don't need to special case them */ impl MpOp for crate::op::nextafter::Routine { diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index ca070e8f6..46474c046 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -93,6 +93,7 @@ libm_macros::for_each_function! { fmaxf16, fminf128, fminf16, + fmodf16, rintf128, rintf16, roundf128, diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index eb8e37589..999b03af9 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -100,6 +100,7 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { | fmaxf16 | fminf128 | fminf16 + | fmodf16 | rintf128 | rintf16 | roundf128 diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 866e9a439..966060f77 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -449,6 +449,13 @@ ], "type": "f32" }, + "fmodf16": { + "sources": [ + "src/math/fmodf16.rs", + "src/math/generic/fmod.rs" + ], + "type": "f16" + }, "frexp": { "sources": [ "src/libm_helper.rs", diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt index 25b92e58b..ff4de0cb5 100644 --- a/libm/etc/function-list.txt +++ b/libm/etc/function-list.txt @@ -63,6 +63,7 @@ fminf128 fminf16 fmod fmodf +fmodf16 frexp frexpf hypot diff --git a/libm/src/math/fmodf16.rs b/libm/src/math/fmodf16.rs new file mode 100644 index 000000000..11972a7de --- /dev/null +++ b/libm/src/math/fmodf16.rs @@ -0,0 +1,5 @@ +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf16(x: f16, y: f16) -> f16 { + super::generic::fmod(x, y) +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index cb83b2587..aab551bed 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -348,6 +348,7 @@ cfg_if! { mod floorf16; mod fmaxf16; mod fminf16; + mod fmodf16; mod rintf16; mod roundf16; mod sqrtf16; @@ -360,6 +361,7 @@ cfg_if! { pub use self::floorf16::floorf16; pub use self::fmaxf16::fmaxf16; pub use self::fminf16::fminf16; + pub use self::fmodf16::fmodf16; pub use self::rintf16::rintf16; pub use self::roundf16::roundf16; pub use self::sqrtf16::sqrtf16; From bb10fdb17683c6b52a0a0cb897050dc5367cade7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 24 Jan 2025 06:24:45 +0000 Subject: [PATCH 1125/1459] Increase or set CI timeouts With the new routines, some of our tests are running close to their timeouts. Increase the timeout for test jobs, and set a short timeout for all other jobs that did not have one. --- libm/.github/workflows/main.yaml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index 89c5facef..599552711 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -15,7 +15,7 @@ env: jobs: test: name: Build and test - timeout-minutes: 40 + timeout-minutes: 60 strategy: fail-fast: false matrix: @@ -123,6 +123,7 @@ jobs: clippy: name: Clippy runs-on: ubuntu-24.04 + timeout-minutes: 10 steps: - uses: actions/checkout@master - name: Install Rust @@ -138,6 +139,7 @@ jobs: builtins: name: Check use with compiler-builtins runs-on: ubuntu-24.04 + timeout-minutes: 10 steps: - uses: actions/checkout@master - name: Install Rust @@ -194,6 +196,7 @@ jobs: msrv: name: Check MSRV runs-on: ubuntu-24.04 + timeout-minutes: 10 env: RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings` steps: @@ -210,6 +213,7 @@ jobs: rustfmt: name: Rustfmt runs-on: ubuntu-24.04 + timeout-minutes: 10 steps: - uses: actions/checkout@master - name: Install Rust @@ -223,6 +227,7 @@ jobs: calculate_extensive_matrix: name: Calculate job matrix runs-on: ubuntu-24.04 + timeout-minutes: 10 outputs: matrix: ${{ steps.script.outputs.matrix }} steps: @@ -242,7 +247,7 @@ jobs: - clippy - calculate_extensive_matrix runs-on: ubuntu-24.04 - timeout-minutes: 80 + timeout-minutes: 180 strategy: matrix: # Use the output from `calculate_extensive_matrix` to calculate the matrix @@ -286,6 +291,7 @@ jobs: - rustfmt - extensive runs-on: ubuntu-24.04 + timeout-minutes: 10 # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency # failed" as success. So we have to do some contortions to ensure the job fails if any of its # dependencies fails. From 950f58b7eb83c149b87d723067a1681e11db6f81 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 24 Jan 2025 07:59:09 +0000 Subject: [PATCH 1126/1459] Add way to override the number of iterations for specific tests Certain functions (`fmodf128`) are significantly slower than others, to the point that running the default number of tests adds tens of minutes to PR CI and extensive test time increases to ~1day. It does not make sense to do this by default; so, introduce `EXTREMELY_SLOW_TESTS` to test configuration that allows setting specific tests that need to have a reduced iteration count. --- libm/crates/libm-test/src/lib.rs | 4 ++-- libm/crates/libm-test/src/run_cfg.rs | 34 ++++++++++++++++++++++------ 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index b90423c1b..78b011b1f 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -28,7 +28,7 @@ pub use op::{ Ty, }; pub use precision::{MaybeOverride, SpecialCase, default_ulp}; -use run_cfg::EXTENSIVE_MAX_ITERATIONS; +use run_cfg::extensive_max_iterations; pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, skip_extensive_test}; pub use test_traits::{CheckOutput, Hex, TupleCall}; @@ -89,7 +89,7 @@ pub fn test_log(s: &str) { writeln!(f, "cargo features: {}", env!("CFG_CARGO_FEATURES")).unwrap(); writeln!(f, "opt level: {}", env!("CFG_OPT_LEVEL")).unwrap(); writeln!(f, "target features: {}", env!("CFG_TARGET_FEATURES")).unwrap(); - writeln!(f, "extensive iterations {}", *EXTENSIVE_MAX_ITERATIONS).unwrap(); + writeln!(f, "extensive iterations {}", extensive_max_iterations()).unwrap(); Some(f) }); diff --git a/libm/crates/libm-test/src/run_cfg.rs b/libm/crates/libm-test/src/run_cfg.rs index 3e91101f6..c76b6699f 100644 --- a/libm/crates/libm-test/src/run_cfg.rs +++ b/libm/crates/libm-test/src/run_cfg.rs @@ -13,18 +13,27 @@ pub const EXTENSIVE_ENV: &str = "LIBM_EXTENSIVE_TESTS"; /// Specify the number of iterations via this environment variable, rather than using the default. pub const EXTENSIVE_ITER_ENV: &str = "LIBM_EXTENSIVE_ITERATIONS"; +/// The override value, if set by the above environment. +static EXTENSIVE_ITER_OVERRIDE: LazyLock> = LazyLock::new(|| { + env::var(EXTENSIVE_ITER_ENV).map(|v| v.parse().expect("failed to parse iteration count")).ok() +}); + +/// Specific tests that need to have a reduced amount of iterations to complete in a reasonable +/// amount of time. +/// +/// Contains the itentifier+generator combo to match on, plus the factor to reduce by. +const EXTEMELY_SLOW_TESTS: &[(Identifier, GeneratorKind, u64)] = &[]; + /// Maximum number of iterations to run for a single routine. /// /// The default value of one greater than `u32::MAX` allows testing single-argument `f32` routines /// and single- or double-argument `f16` routines exhaustively. `f64` and `f128` can't feasibly /// be tested exhaustively; however, [`EXTENSIVE_ITER_ENV`] can be set to run tests for multiple /// hours. -pub static EXTENSIVE_MAX_ITERATIONS: LazyLock = LazyLock::new(|| { - let default = 1 << 32; - env::var(EXTENSIVE_ITER_ENV) - .map(|v| v.parse().expect("failed to parse iteration count")) - .unwrap_or(default) -}); +pub fn extensive_max_iterations() -> u64 { + let default = 1 << 32; // default value + EXTENSIVE_ITER_OVERRIDE.unwrap_or(default) +} /// Context passed to [`CheckOutput`]. #[derive(Clone, Debug, PartialEq, Eq)] @@ -206,12 +215,23 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { let mut total_iterations = match ctx.gen_kind { GeneratorKind::QuickSpaced => domain_iter_count, GeneratorKind::Random => random_iter_count, - GeneratorKind::Extensive => *EXTENSIVE_MAX_ITERATIONS, + GeneratorKind::Extensive => extensive_max_iterations(), GeneratorKind::EdgeCases => { unimplemented!("edge case tests shoudn't need `iteration_count`") } }; + // Some tests are significantly slower than others and need to be further reduced. + if let Some((_id, _gen, scale)) = EXTEMELY_SLOW_TESTS + .iter() + .find(|(id, gen, _scale)| *id == ctx.fn_ident && *gen == ctx.gen_kind) + { + // However, do not override if the extensive iteration count has been manually set. + if !(ctx.gen_kind == GeneratorKind::Extensive && EXTENSIVE_ITER_OVERRIDE.is_some()) { + total_iterations /= scale; + } + } + // FMA has a huge domain but is reasonably fast to run, so increase iterations. if ctx.base_name == BaseName::Fma { total_iterations *= 4; From 7472d366a3f6c6a10d4baac5649467085f647fcc Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 24 Jan 2025 05:58:08 +0000 Subject: [PATCH 1127/1459] Add `fmodf128` This function is significantly slower than all others so includes an override in `EXTREMELY_SLOW_TESTS`. Without it, PR CI takes ~1hour and the extensive tests in CI take ~1day. --- libm/crates/libm-macros/src/shared.rs | 2 +- libm/crates/libm-test/benches/icount.rs | 1 + libm/crates/libm-test/benches/random.rs | 1 + libm/crates/libm-test/src/mpfloat.rs | 47 +++++++------------ libm/crates/libm-test/src/run_cfg.rs | 5 +- .../libm-test/tests/compare_built_musl.rs | 1 + libm/crates/util/src/main.rs | 1 + libm/etc/function-definitions.json | 7 +++ libm/etc/function-list.txt | 1 + libm/src/math/fmodf128.rs | 5 ++ libm/src/math/mod.rs | 2 + 11 files changed, 40 insertions(+), 33 deletions(-) create mode 100644 libm/src/math/fmodf128.rs diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index 69fe45e03..b1f4f46cc 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -90,7 +90,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F128, Signature { args: &[Ty::F128, Ty::F128], returns: &[Ty::F128] }, None, - &["copysignf128", "fdimf128", "fmaxf128", "fminf128"], + &["copysignf128", "fdimf128", "fmaxf128", "fminf128", "fmodf128"], ), ( // `(f32, f32, f32) -> f32` diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index 97e78d8f1..46a659524 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -111,6 +111,7 @@ main!( icount_bench_fmin_group, icount_bench_fminf_group, icount_bench_fmod_group, + icount_bench_fmodf128_group, icount_bench_fmodf16_group, icount_bench_fmodf_group, icount_bench_frexp_group, diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 3e816e81a..ca9e86c10 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -131,6 +131,7 @@ libm_macros::for_each_function! { | fmaxf16 | fminf128 | fminf16 + | fmodf128 | fmodf16 | rintf128 | rintf16 diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 56234b14a..98b80505f 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -152,6 +152,7 @@ libm_macros::for_each_function! { floorf16, fmod, fmodf, + fmodf128, fmodf16, frexp, frexpf, @@ -301,21 +302,6 @@ macro_rules! impl_op_for_ty { } } - impl MpOp for crate::op::[]::Routine { - type MpTy = (MpFloat, MpFloat); - - fn new_mp() -> Self::MpTy { - (new_mpfloat::(), new_mpfloat::()) - } - - fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { - this.0.assign(input.0); - this.1.assign(input.1); - let ord = this.0.rem_assign_round(&this.1, Nearest); - prep_retval::(&mut this.0, ord) - } - } - impl MpOp for crate::op::[]::Routine { type MpTy = MpFloat; @@ -481,6 +467,21 @@ macro_rules! impl_op_for_ty_all { prep_retval::(&mut this.0, Ordering::Equal) } } + + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = this.0.rem_assign_round(&this.1, Nearest); + prep_retval::(&mut this.0, ord) + } + } } }; } @@ -526,22 +527,6 @@ impl MpOp for crate::op::lgammaf_r::Routine { } } -// No fmodf128 yet -impl MpOp for crate::op::fmodf16::Routine { - type MpTy = (MpFloat, MpFloat); - - fn new_mp() -> Self::MpTy { - (new_mpfloat::(), new_mpfloat::()) - } - - fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { - this.0.assign(input.0); - this.1.assign(input.1); - let ord = this.0.rem_assign_round(&this.1, Nearest); - prep_retval::(&mut this.0, ord) - } -} - /* stub implementations so we don't need to special case them */ impl MpOp for crate::op::nextafter::Routine { diff --git a/libm/crates/libm-test/src/run_cfg.rs b/libm/crates/libm-test/src/run_cfg.rs index c76b6699f..783142e37 100644 --- a/libm/crates/libm-test/src/run_cfg.rs +++ b/libm/crates/libm-test/src/run_cfg.rs @@ -22,7 +22,10 @@ static EXTENSIVE_ITER_OVERRIDE: LazyLock> = LazyLock::new(|| { /// amount of time. /// /// Contains the itentifier+generator combo to match on, plus the factor to reduce by. -const EXTEMELY_SLOW_TESTS: &[(Identifier, GeneratorKind, u64)] = &[]; +const EXTEMELY_SLOW_TESTS: &[(Identifier, GeneratorKind, u64)] = &[ + (Identifier::Fmodf128, GeneratorKind::QuickSpaced, 40), + (Identifier::Fmodf128, GeneratorKind::Extensive, 40), +]; /// Maximum number of iterations to run for a single routine. /// diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 46474c046..5466edf4f 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -93,6 +93,7 @@ libm_macros::for_each_function! { fmaxf16, fminf128, fminf16, + fmodf128, fmodf16, rintf128, rintf16, diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index 999b03af9..f4ee8fd2e 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -100,6 +100,7 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { | fmaxf16 | fminf128 | fminf16 + | fmodf128 | fmodf16 | rintf128 | rintf16 diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 966060f77..574ffea2e 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -449,6 +449,13 @@ ], "type": "f32" }, + "fmodf128": { + "sources": [ + "src/math/fmodf128.rs", + "src/math/generic/fmod.rs" + ], + "type": "f128" + }, "fmodf16": { "sources": [ "src/math/fmodf16.rs", diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt index ff4de0cb5..d82838b32 100644 --- a/libm/etc/function-list.txt +++ b/libm/etc/function-list.txt @@ -63,6 +63,7 @@ fminf128 fminf16 fmod fmodf +fmodf128 fmodf16 frexp frexpf diff --git a/libm/src/math/fmodf128.rs b/libm/src/math/fmodf128.rs new file mode 100644 index 000000000..ff0e0493e --- /dev/null +++ b/libm/src/math/fmodf128.rs @@ -0,0 +1,5 @@ +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf128(x: f128, y: f128) -> f128 { + super::generic::fmod(x, y) +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index aab551bed..969c1bfd9 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -378,6 +378,7 @@ cfg_if! { mod floorf128; mod fmaxf128; mod fminf128; + mod fmodf128; mod rintf128; mod roundf128; mod sqrtf128; @@ -390,6 +391,7 @@ cfg_if! { pub use self::floorf128::floorf128; pub use self::fmaxf128::fmaxf128; pub use self::fminf128::fminf128; + pub use self::fmodf128::fmodf128; pub use self::rintf128::rintf128; pub use self::roundf128::roundf128; pub use self::sqrtf128::sqrtf128; From 3c5b34d04df04ceb94b3397600ba3028d2d8ab29 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 24 Jan 2025 09:11:07 +0000 Subject: [PATCH 1128/1459] Enable missing icount benchmarks A few new functions were added but this list did not get updated. Do so here. --- libm/crates/libm-test/benches/icount.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index 46a659524..d5026f461 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -107,8 +107,12 @@ main!( icount_bench_fma_group, icount_bench_fmaf_group, icount_bench_fmax_group, + icount_bench_fmaxf128_group, + icount_bench_fmaxf16_group, icount_bench_fmaxf_group, icount_bench_fmin_group, + icount_bench_fminf128_group, + icount_bench_fminf16_group, icount_bench_fminf_group, icount_bench_fmod_group, icount_bench_fmodf128_group, @@ -155,6 +159,8 @@ main!( icount_bench_rintf16_group, icount_bench_rintf_group, icount_bench_round_group, + icount_bench_roundf128_group, + icount_bench_roundf16_group, icount_bench_roundf_group, icount_bench_scalbn_group, icount_bench_scalbnf_group, From cd0a9321cb4dda4fa6bf8ad5904e6f7e1fd14520 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 25 Jan 2025 00:50:02 +0000 Subject: [PATCH 1129/1459] Upgrade all dependencies to the latest version In particular, this includes updates to Rug that we can make use of [1], [2], [3], [4]. [1]: https://gitlab.com/tspiteri/rug/-/issues/78 [2]: https://gitlab.com/tspiteri/rug/-/issues/80 [3]: https://gitlab.com/tspiteri/rug/-/issues/76 [4]: https://gitlab.com/tspiteri/rug/-/issues/73 --- libm/Cargo.toml | 2 +- libm/crates/libm-macros/Cargo.toml | 6 +++--- libm/crates/libm-test/Cargo.toml | 4 ++-- libm/crates/musl-math-sys/Cargo.toml | 2 +- libm/crates/util/Cargo.toml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 18d89997d..7b6f9e1ce 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -59,7 +59,7 @@ exclude = [ ] [dev-dependencies] -no-panic = "0.1.30" +no-panic = "0.1.33" [profile.release] # Options for no-panic to correctly detect the lack of panics diff --git a/libm/crates/libm-macros/Cargo.toml b/libm/crates/libm-macros/Cargo.toml index 9194232b2..f0de0e176 100644 --- a/libm/crates/libm-macros/Cargo.toml +++ b/libm/crates/libm-macros/Cargo.toml @@ -9,9 +9,9 @@ proc-macro = true [dependencies] heck = "0.5.0" -proc-macro2 = "1.0.88" -quote = "1.0.37" -syn = { version = "2.0.79", features = ["full", "extra-traits", "visit-mut"] } +proc-macro2 = "1.0.93" +quote = "1.0.38" +syn = { version = "2.0.96", features = ["full", "extra-traits", "visit-mut"] } [lints.rust] # Values used during testing diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 3a1ba8796..137b81464 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -27,7 +27,7 @@ icount = ["dep:iai-callgrind"] short-benchmarks = [] [dependencies] -anyhow = "1.0.90" +anyhow = "1.0.95" az = { version = "1.2.1", optional = true } gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false, features = ["mpfr"] } iai-callgrind = { version = "0.14.0", optional = true } @@ -39,7 +39,7 @@ paste = "1.0.15" rand = "0.8.5" rand_chacha = "0.3.1" rayon = "1.10.0" -rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "integer", "std"] } +rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "integer", "std"] } [target.'cfg(target_family = "wasm")'.dependencies] # Enable randomness on WASM diff --git a/libm/crates/musl-math-sys/Cargo.toml b/libm/crates/musl-math-sys/Cargo.toml index 7f6272d79..cde78fd3c 100644 --- a/libm/crates/musl-math-sys/Cargo.toml +++ b/libm/crates/musl-math-sys/Cargo.toml @@ -10,4 +10,4 @@ publish = false libm = { path = "../../" } [build-dependencies] -cc = "1.1.24" +cc = "1.2.10" diff --git a/libm/crates/util/Cargo.toml b/libm/crates/util/Cargo.toml index acf5db704..51f44dddf 100644 --- a/libm/crates/util/Cargo.toml +++ b/libm/crates/util/Cargo.toml @@ -16,4 +16,4 @@ libm = { path = "../..", default-features = false } libm-macros = { path = "../libm-macros" } libm-test = { path = "../libm-test", default-features = false } musl-math-sys = { path = "../musl-math-sys", optional = true } -rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "std"] } +rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "std"] } From 9e344d0f6917a4ad6160fb4bb8ab532c541839a9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 25 Jan 2025 00:55:03 +0000 Subject: [PATCH 1130/1459] Use `az` exported from Rug Since Rug 1.27.0, `az` is reexported. This means we no longer need to track it as a separate dependency. --- libm/crates/libm-test/Cargo.toml | 3 +-- libm/crates/libm-test/src/mpfloat.rs | 2 +- libm/crates/util/Cargo.toml | 3 +-- libm/crates/util/src/main.rs | 4 ++-- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 137b81464..31cbf6e68 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -12,7 +12,7 @@ unstable-float = ["libm/unstable-float", "rug?/nightly-float"] # Generate tests which are random inputs and the outputs are calculated with # musl libc. -build-mpfr = ["dep:az", "dep:rug", "dep:gmp-mpfr-sys"] +build-mpfr = ["dep:rug", "dep:gmp-mpfr-sys"] # Build our own musl for testing and benchmarks build-musl = ["dep:musl-math-sys"] @@ -28,7 +28,6 @@ short-benchmarks = [] [dependencies] anyhow = "1.0.95" -az = { version = "1.2.1", optional = true } gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false, features = ["mpfr"] } iai-callgrind = { version = "0.14.0", optional = true } indicatif = { version = "0.17.9", default-features = false } diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 98b80505f..e2be6584d 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -6,10 +6,10 @@ use std::cmp::Ordering; use std::ffi::{c_int, c_long}; -use az::Az; use gmp_mpfr_sys::mpfr::rnd_t; use rug::Assign; pub use rug::Float as MpFloat; +use rug::az::{self, Az}; use rug::float::Round; use rug::float::Round::Nearest; use rug::ops::{PowAssignRound, RemAssignRound}; diff --git a/libm/crates/util/Cargo.toml b/libm/crates/util/Cargo.toml index 51f44dddf..8005459db 100644 --- a/libm/crates/util/Cargo.toml +++ b/libm/crates/util/Cargo.toml @@ -7,11 +7,10 @@ publish = false [features] default = ["build-musl", "build-mpfr", "unstable-float"] build-musl = ["libm-test/build-musl", "dep:musl-math-sys"] -build-mpfr = ["libm-test/build-mpfr", "dep:az", "dep:rug"] +build-mpfr = ["libm-test/build-mpfr", "dep:rug"] unstable-float = ["libm/unstable-float", "libm-test/unstable-float", "rug?/nightly-float"] [dependencies] -az = { version = "1.2.1", optional = true } libm = { path = "../..", default-features = false } libm-macros = { path = "../libm-macros" } libm-test = { path = "../libm-test", default-features = false } diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index f4ee8fd2e..6ea1be3d9 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -8,12 +8,12 @@ use std::env; use std::num::ParseIntError; use std::str::FromStr; -#[cfg(feature = "build-mpfr")] -use az::Az; use libm::support::{hf32, hf64}; #[cfg(feature = "build-mpfr")] use libm_test::mpfloat::MpOp; use libm_test::{MathOp, TupleCall}; +#[cfg(feature = "build-mpfr")] +use rug::az::{self, Az}; const USAGE: &str = "\ usage: From 5fb34f9a7a09368c0fb4215affa6eaf40ed199d5 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 25 Jan 2025 01:00:01 +0000 Subject: [PATCH 1131/1459] Use `frexp` from Rug Rug 1.27.0 exposes `frexp`. Make use of it for our tests. --- libm/crates/libm-test/src/mpfloat.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index e2be6584d..6896425d1 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -310,13 +310,8 @@ macro_rules! impl_op_for_ty { } fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { - // Implementation taken from `rug::Float::to_f32_exp`. this.assign(input.0); - let exp = this.get_exp().unwrap_or(0); - if exp != 0 { - *this >>= exp; - } - + let exp = this.frexp_mut(); (prep_retval::(this, Ordering::Equal), exp) } } From f37f1aac9daa71f2687d6b721527c61b72e0f218 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 25 Jan 2025 01:01:24 +0000 Subject: [PATCH 1132/1459] Use `remquo` from Rug Rug 1.27.0 exposes `remquo`; make use of it for our tests. Removing our workaround also allows removing the direct dependency on `gmp-mpfr-sys` --- libm/crates/libm-test/Cargo.toml | 3 +- libm/crates/libm-test/src/mpfloat.rs | 38 ++------------------------ libm/crates/libm-test/src/precision.rs | 31 --------------------- 3 files changed, 5 insertions(+), 67 deletions(-) diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 31cbf6e68..dcbddb667 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -28,7 +28,8 @@ short-benchmarks = [] [dependencies] anyhow = "1.0.95" -gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false, features = ["mpfr"] } +# This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`. +gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false } iai-callgrind = { version = "0.14.0", optional = true } indicatif = { version = "0.17.9", default-features = false } libm = { path = "../..", features = ["unstable-public-internals"] } diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 6896425d1..3d84740cc 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -4,13 +4,10 @@ //! a struct named `Operation` that implements [`MpOp`]. use std::cmp::Ordering; -use std::ffi::{c_int, c_long}; -use gmp_mpfr_sys::mpfr::rnd_t; use rug::Assign; pub use rug::Float as MpFloat; use rug::az::{self, Az}; -use rug::float::Round; use rug::float::Round::Nearest; use rug::ops::{PowAssignRound, RemAssignRound}; @@ -401,28 +398,20 @@ macro_rules! impl_op_for_ty { } impl MpOp for crate::op::[]::Routine { - type MpTy = (MpFloat, MpFloat, MpFloat); + type MpTy = (MpFloat, MpFloat); fn new_mp() -> Self::MpTy { ( new_mpfloat::(), new_mpfloat::(), - new_mpfloat::() ) } fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { this.0.assign(input.0); this.1.assign(input.1); - let (ord, ql) = mpfr_remquo(&mut this.2, &this.0, &this.1, Nearest); - - // `remquo` integer results are sign-magnitude representation. Transfer the - // sign bit from the long result to the int result. - let clear = !(1 << (c_int::BITS - 1)); - let sign = ((ql >> (c_long::BITS - 1)) as i32) << (c_int::BITS - 1); - let q = (ql as i32) & clear | sign; - - (prep_retval::(&mut this.2, ord), q) + let (ord, q) = this.0.remainder_quo31_round(&this.1, Nearest); + (prep_retval::(&mut this.0, ord), q) } } @@ -547,24 +536,3 @@ impl MpOp for crate::op::nextafterf::Routine { unimplemented!("nextafter does not yet have a MPFR operation"); } } - -/// `rug` does not provide `remquo` so this exposes `mpfr_remquo`. See rug#76. -fn mpfr_remquo(r: &mut MpFloat, x: &MpFloat, y: &MpFloat, round: Round) -> (Ordering, c_long) { - let r = r.as_raw_mut(); - let x = x.as_raw(); - let y = y.as_raw(); - let mut q: c_long = 0; - - let round = match round { - Round::Nearest => rnd_t::RNDN, - Round::Zero => rnd_t::RNDZ, - Round::Up => rnd_t::RNDU, - Round::Down => rnd_t::RNDD, - Round::AwayZero => rnd_t::RNDA, - _ => unreachable!(), - }; - - // SAFETY: mutable and const pointers are valid and do not alias, by Rust's rules. - let ord = unsafe { gmp_mpfr_sys::mpfr::remquo(r, &mut q, x, y, round) }; - (ord.cmp(&0), q) -} diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index bed615882..ffb322e38 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -405,15 +405,6 @@ impl MaybeOverride<(f32, f32)> for SpecialCase { ) -> CheckAction { binop_common(input, actual, expected, ctx) } - - fn check_int( - _input: (f32, f32), - actual: I, - expected: I, - ctx: &CheckCtx, - ) -> CheckAction { - remquo_common(actual, expected, ctx) - } } impl MaybeOverride<(f64, f64)> for SpecialCase { @@ -425,15 +416,6 @@ impl MaybeOverride<(f64, f64)> for SpecialCase { ) -> CheckAction { binop_common(input, actual, expected, ctx) } - - fn check_int( - _input: (f64, f64), - actual: I, - expected: I, - ctx: &CheckCtx, - ) -> CheckAction { - remquo_common(actual, expected, ctx) - } } #[cfg(f128_enabled)] @@ -496,19 +478,6 @@ fn binop_common( DEFAULT } -fn remquo_common(actual: I, expected: I, ctx: &CheckCtx) -> CheckAction { - // FIXME: Our MPFR implementation disagrees with musl and may need to be updated. - if ctx.basis == CheckBasis::Mpfr - && ctx.base_name == BaseName::Remquo - && expected == I::MIN - && actual == I::ZERO - { - return XFAIL("remquo integer mismatch"); - } - - DEFAULT -} - impl MaybeOverride<(i32, f32)> for SpecialCase { fn check_float( input: (i32, f32), From d5d3fea86b087d56225be6d180ddb84319826515 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 25 Jan 2025 04:18:44 +0000 Subject: [PATCH 1133/1459] Remove remnants of the `checked` feature The Cargo feature `checked` was added in 410b0633a6b9 ("Overhaul tests") and later removed in e4ac1399062c ("swap stable to be unstable, checked is now debug_assertions"). However, there are a few remaining uses of `feature = "checked"` that did not get removed. Clean these up here. --- libm/build.rs | 19 +++++++------------ .../compiler-builtins-smoke-test/Cargo.toml | 1 - libm/src/math/rem_pio2_large.rs | 5 +++-- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/libm/build.rs b/libm/build.rs index ca4a639a1..caf5a108a 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -8,18 +8,13 @@ fn main() { println!("cargo:rerun-if-changed=build.rs"); println!("cargo:rustc-check-cfg=cfg(assert_no_panic)"); - println!("cargo:rustc-check-cfg=cfg(feature, values(\"checked\"))"); - - #[allow(unexpected_cfgs)] - if !cfg!(feature = "checked") { - let lvl = env::var("OPT_LEVEL").unwrap(); - if lvl != "0" && !cfg!(debug_assertions) { - println!("cargo:rustc-cfg=assert_no_panic"); - } else if env::var("ENSURE_NO_PANIC").is_ok() { - // Give us a defensive way of ensureing that no-panic is checked when we - // expect it to be. - panic!("`assert_no_panic `was not enabled"); - } + let lvl = env::var("OPT_LEVEL").unwrap(); + if lvl != "0" && !cfg!(debug_assertions) { + println!("cargo:rustc-cfg=assert_no_panic"); + } else if env::var("ENSURE_NO_PANIC").is_ok() { + // Give us a defensive way of ensureing that no-panic is checked when we + // expect it to be. + panic!("`assert_no_panic `was not enabled"); } configure::emit_libm_config(&cfg); diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index d578b0dcd..24b33645e 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -22,7 +22,6 @@ unexpected_cfgs = { level = "warn", check-cfg = [ "cfg(arch_enabled)", "cfg(assert_no_panic)", "cfg(intrinsics_enabled)", - 'cfg(feature, values("checked"))', 'cfg(feature, values("force-soft-floats"))', 'cfg(feature, values("unstable"))', 'cfg(feature, values("unstable-intrinsics"))', diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index ec8397f4b..6d679bbe9 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -226,8 +226,9 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24) - #[cfg(all(target_pointer_width = "64", feature = "checked"))] - assert!(e0 <= 16360); + if cfg!(target_pointer_width = "64") { + debug_assert!(e0 <= 16360); + } let nx = x.len(); From a941aa904a246ae0c22aaa3ea2557e923a15f904 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 25 Jan 2025 05:22:11 +0000 Subject: [PATCH 1134/1459] Rework the available Cargo profiles Currently the default release profile enables LTO and single CGU builds, which is very slow to build. Most tests are better run with optimizations enabled since it allows testing a much larger number of inputs, so it is inconvenient that building can sometimes take significantly longer than the tests. Remedy this by doing the following: * Move the existing `release` profile to `release-opt`. * With the above, the default `release` profile is untouched (16 CGUs and thin local LTO). * `release-checked` inherits `release`, so no LTO or single CGU. This means that the simple `cargo test --release` becomes much faster for local development. We are able to enable the other profiles as needed in CI. Tests should ideally still be run with `--profile release-checked` to ensure there are no debug assetions or unexpected wrapping math hit. `no-panic` still needs a single CGU, so must be run with `--profile release-opt`. Since it is not possible to detect CGU or profilel configuration from within build scripts, the `ENSURE_NO_PANIC` environment variable must now always be set. --- libm/Cargo.toml | 14 ++++++++------ libm/build.rs | 8 ++------ libm/ci/run.sh | 12 +++++++++++- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 7b6f9e1ce..08342a929 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -61,18 +61,20 @@ exclude = [ [dev-dependencies] no-panic = "0.1.33" -[profile.release] -# Options for no-panic to correctly detect the lack of panics -codegen-units = 1 -lto = "fat" +# The default release profile is unchanged. # Release mode with debug assertions [profile.release-checked] -codegen-units = 1 +inherits = "release" debug-assertions = true +overflow-checks = true + +# Release with maximum optimizations, which is very slow to build. This is also +# what is needed to check `no-panic`. +[profile.release-opt] inherits = "release" +codegen-units = 1 lto = "fat" -overflow-checks = true [profile.bench] # Required for iai-callgrind diff --git a/libm/build.rs b/libm/build.rs index caf5a108a..7042b54d7 100644 --- a/libm/build.rs +++ b/libm/build.rs @@ -8,13 +8,9 @@ fn main() { println!("cargo:rerun-if-changed=build.rs"); println!("cargo:rustc-check-cfg=cfg(assert_no_panic)"); - let lvl = env::var("OPT_LEVEL").unwrap(); - if lvl != "0" && !cfg!(debug_assertions) { + // If set, enable `no-panic`. Requires LTO (`release-opt` profile). + if env::var("ENSURE_NO_PANIC").is_ok() { println!("cargo:rustc-cfg=assert_no_panic"); - } else if env::var("ENSURE_NO_PANIC").is_ok() { - // Give us a defensive way of ensureing that no-panic is checked when we - // expect it to be. - panic!("`assert_no_panic `was not enabled"); } configure::emit_libm_config(&cfg); diff --git a/libm/ci/run.sh b/libm/ci/run.sh index 296986d97..a946d325e 100755 --- a/libm/ci/run.sh +++ b/libm/ci/run.sh @@ -117,4 +117,14 @@ $cmd "$profile" release-checked --features unstable-intrinsics $cmd "$profile" release-checked --features unstable-intrinsics --benches # Ensure that the routines do not panic. -ENSURE_NO_PANIC=1 cargo build -p libm --target "$target" --no-default-features --release +# +# `--tests` must be passed because no-panic is only enabled as a dev +# dependency. The `release-opt` profile must be used to enable LTO and a +# single CGU. +ENSURE_NO_PANIC=1 cargo build \ + -p libm \ + --target "$target" \ + --no-default-features \ + --features unstable-float \ + --tests \ + --profile release-opt From dacd8055e737e76a7d1618bcf7852202efe290ca Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 27 Jan 2025 11:37:01 +0000 Subject: [PATCH 1135/1459] Ignore specific `atan2` and `sin` tests on i586 There seems to be a case of unsoundness with the `i586` version of `atan2`. For the following test: assert_eq!(atan2(2.0, -1.0), atan(2.0 / -1.0) + PI);atan2(2.0, -1.0) The output is optimization-dependent. The new `release-checked` profile produces the following failure: thread 'math::atan2::sanity_check' panicked at src/math/atan2.rs:123:5: assertion `left == right` failed left: 2.0344439357957027 right: 2.0344439357957027 Similarly, `sin::test_near_pi` fails with the following: thread 'math::sin::test_near_pi' panicked at src/math/sin.rs:91:5: assertion `left == right` failed left: 6.273720864039203e-7 right: 6.273720864039205e-7 Mark the tests ignored on `i586` for now. --- libm/src/math/atan2.rs | 22 ++++++++++++++-------- libm/src/math/sin.rs | 19 +++++++++++-------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/libm/src/math/atan2.rs b/libm/src/math/atan2.rs index b9bf0da93..c668731cf 100644 --- a/libm/src/math/atan2.rs +++ b/libm/src/math/atan2.rs @@ -114,12 +114,18 @@ pub fn atan2(y: f64, x: f64) -> f64 { } } -#[test] -fn sanity_check() { - assert_eq!(atan2(0.0, 1.0), 0.0); - assert_eq!(atan2(0.0, -1.0), PI); - assert_eq!(atan2(-0.0, -1.0), -PI); - assert_eq!(atan2(3.0, 2.0), atan(3.0 / 2.0)); - assert_eq!(atan2(2.0, -1.0), atan(2.0 / -1.0) + PI); - assert_eq!(atan2(-2.0, -1.0), atan(-2.0 / -1.0) - PI); +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg_attr(x86_no_sse, ignore = "FIXME(i586): possible incorrect rounding")] + fn sanity_check() { + assert_eq!(atan2(0.0, 1.0), 0.0); + assert_eq!(atan2(0.0, -1.0), PI); + assert_eq!(atan2(-0.0, -1.0), -PI); + assert_eq!(atan2(3.0, 2.0), atan(3.0 / 2.0)); + assert_eq!(atan2(2.0, -1.0), atan(2.0 / -1.0) + PI); + assert_eq!(atan2(-2.0, -1.0), atan(-2.0 / -1.0) - PI); + } } diff --git a/libm/src/math/sin.rs b/libm/src/math/sin.rs index e04e0d6a0..229fa4bef 100644 --- a/libm/src/math/sin.rs +++ b/libm/src/math/sin.rs @@ -81,12 +81,15 @@ pub fn sin(x: f64) -> f64 { } } -#[test] -fn test_near_pi() { - let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707 - let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7 - let result = sin(x); - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let result = force_eval!(result); - assert_eq!(result, sx); +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg_attr(x86_no_sse, ignore = "FIXME(i586): possible incorrect rounding")] + fn test_near_pi() { + let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707 + let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7 + assert_eq!(sin(x), sx); + } } From 44ac9401a74de35be10b91078c541447338be553 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 27 Jan 2025 12:56:15 +0000 Subject: [PATCH 1136/1459] Remove use of the `start` feature `#![feature(start)]` was removed in [1], but we make use of it in the intrinsics example. Replace use of this feature with `#[no_mangle]` applied to `#[main]`. We don't actually run this example so it is not a problem if this is not entirely accurate. Currently the example does not run to completion, instead invoking `rust_begin_unwind`. [1]: https://github.com/rust-lang/rust/pull/134299 --- examples/intrinsics.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index e13c0fb1f..59a70e207 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -5,14 +5,13 @@ #![allow(unused_features)] #![allow(internal_features)] -#![cfg_attr(thumb, no_main)] #![deny(dead_code)] #![feature(allocator_api)] #![feature(f128)] #![feature(f16)] #![feature(lang_items)] -#![feature(start)] #![no_std] +#![no_main] extern crate panic_handler; @@ -630,11 +629,10 @@ fn run() { extern "C" { fn rust_begin_unwind(x: usize); } - // if bb(false) { + unsafe { rust_begin_unwind(0); } - // } } fn something_with_a_dtor(f: &dyn Fn()) { @@ -649,15 +647,15 @@ fn something_with_a_dtor(f: &dyn Fn()) { f(); } +#[no_mangle] #[cfg(not(thumb))] -#[start] -fn main(_: isize, _: *const *const u8) -> isize { +fn main(_argc: core::ffi::c_int, _argv: *const *const u8) -> core::ffi::c_int { run(); 0 } -#[cfg(thumb)] #[no_mangle] +#[cfg(thumb)] pub fn _start() -> ! { run(); loop {} From 195cc974aa20a6ca93e7142d6b44f8bb2916464d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 24 Jan 2025 08:45:56 +0000 Subject: [PATCH 1137/1459] Add a version to some FIXMEs that will be resolved in LLVM 20 --- configure.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.rs b/configure.rs index 87bc7a0ee..fa3e302ea 100644 --- a/configure.rs +++ b/configure.rs @@ -74,7 +74,7 @@ pub fn configure_f16_f128(target: &Target) { // Selection failure "s390x" => false, // Infinite recursion - // FIXME(llvm): loongarch fixed by + // FIXME(llvm20): loongarch fixed by "csky" => false, "hexagon" => false, "loongarch64" => false, @@ -91,7 +91,7 @@ pub fn configure_f16_f128(target: &Target) { "amdgpu" => false, // Unsupported "arm64ec" => false, - // Selection failure + // FIXME(llvm20): fixed by "mips64" | "mips64r6" => false, // Selection failure "nvptx64" => false, From 20a0340468802ffa106200b9c9b8d2e1ddde5e06 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 27 Jan 2025 23:25:00 +0000 Subject: [PATCH 1138/1459] Switch musl from a script download to a submodule Rather than keeping a script that downloads the tarball, we can just add musl as a submodule and let git handle the synchronizatoin. Do so here. --- libm/.github/workflows/main.yaml | 18 ++++++++---------- libm/.gitignore | 1 - libm/.gitmodules | 4 ++++ libm/CONTRIBUTING.md | 4 ++++ libm/ci/download-musl.sh | 24 ------------------------ libm/crates/musl-math-sys/build.rs | 14 +++----------- libm/crates/musl-math-sys/musl | 1 + 7 files changed, 20 insertions(+), 46 deletions(-) create mode 100644 libm/.gitmodules delete mode 100755 libm/ci/download-musl.sh create mode 160000 libm/crates/musl-math-sys/musl diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index 599552711..e03d7ecd3 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -79,6 +79,8 @@ jobs: - name: Print runner information run: uname -a - uses: actions/checkout@v4 + with: + submodules: true - name: Install Rust (rustup) shell: bash run: | @@ -94,10 +96,6 @@ jobs: with: key: ${{ matrix.target }} - - name: Download musl source - run: ./ci/download-musl.sh - shell: bash - - name: Verify API list if: matrix.os == 'ubuntu-24.04' run: python3 etc/update-api-list.py --check @@ -126,14 +124,14 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@master + with: + submodules: true - name: Install Rust run: | rustup update nightly --no-self-update rustup default nightly rustup component add clippy - uses: Swatinem/rust-cache@v2 - - name: Download musl source - run: ./ci/download-musl.sh - run: cargo clippy --all --all-features --all-targets builtins: @@ -153,6 +151,8 @@ jobs: timeout-minutes: 20 steps: - uses: actions/checkout@master + with: + submodules: true - uses: taiki-e/install-action@cargo-binstall - name: Set up dependencies @@ -166,8 +166,6 @@ jobs: sudo apt-get install valgrind - uses: Swatinem/rust-cache@v2 - - name: Download musl source - run: ./ci/download-musl.sh - name: Run icount benchmarks env: @@ -259,13 +257,13 @@ jobs: CHANGED: ${{ matrix.changed }} steps: - uses: actions/checkout@v4 + with: + submodules: true - name: Install Rust run: | rustup update nightly --no-self-update rustup default nightly - uses: Swatinem/rust-cache@v2 - - name: Download musl source - run: ./ci/download-musl.sh - name: Run extensive tests run: | echo "Changed: '$CHANGED'" diff --git a/libm/.gitignore b/libm/.gitignore index a447c34cd..d5caba1a0 100644 --- a/libm/.gitignore +++ b/libm/.gitignore @@ -4,7 +4,6 @@ /math/src target Cargo.lock -musl/ **.tar.gz # Benchmark cache diff --git a/libm/.gitmodules b/libm/.gitmodules new file mode 100644 index 000000000..35b269ead --- /dev/null +++ b/libm/.gitmodules @@ -0,0 +1,4 @@ +[submodule "musl"] + path = crates/musl-math-sys/musl + url = https://git.musl-libc.org/git/musl + shallow = true diff --git a/libm/CONTRIBUTING.md b/libm/CONTRIBUTING.md index ba7f78ca0..dc4006035 100644 --- a/libm/CONTRIBUTING.md +++ b/libm/CONTRIBUTING.md @@ -62,6 +62,10 @@ Check [PR #65] for an example. Normal tests can be executed with: ```sh +# Tests against musl require that the submodule is up to date. +git submodule init +git submodule update + # `--release` ables more test cases cargo test --release ``` diff --git a/libm/ci/download-musl.sh b/libm/ci/download-musl.sh deleted file mode 100755 index 8a8c58550..000000000 --- a/libm/ci/download-musl.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/sh -# Download the expected version of musl to a directory `musl` - -set -eux - -fname=musl-1.2.5.tar.gz -sha=a9a118bbe84d8764da0ea0d28b3ab3fae8477fc7e4085d90102b8596fc7c75e4 - -mkdir musl -curl -L "https://musl.libc.org/releases/$fname" -O --retry 5 - -case "$(uname -s)" in - MINGW*) - # Need to extract the second line because certutil does human output - fsha=$(certutil -hashfile "$fname" SHA256 | sed -n '2p') - [ "$sha" = "$fsha" ] || exit 1 - ;; - *) - echo "$sha $fname" | shasum -a 256 --check || exit 1 - ;; -esac - -tar -xzf "$fname" -C musl --strip-components 1 -rm "$fname" diff --git a/libm/crates/musl-math-sys/build.rs b/libm/crates/musl-math-sys/build.rs index d75748159..f06d84ee2 100644 --- a/libm/crates/musl-math-sys/build.rs +++ b/libm/crates/musl-math-sys/build.rs @@ -79,17 +79,12 @@ impl Config { let target_features = env::var("CARGO_CFG_TARGET_FEATURE") .map(|feats| feats.split(',').map(ToOwned::to_owned).collect()) .unwrap_or_default(); - - // Default to the `{workspace_root}/musl` if not specified - let musl_dir = env::var("MUSL_SOURCE_DIR") - .map(PathBuf::from) - .unwrap_or_else(|_| manifest_dir.parent().unwrap().parent().unwrap().join("musl")); + let musl_dir = manifest_dir.join("musl"); let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); let musl_arch = if target_arch == "x86" { "i386".to_owned() } else { target_arch.clone() }; println!("cargo::rerun-if-changed={}/c_patches", manifest_dir.display()); - println!("cargo::rerun-if-env-changed=MUSL_SOURCE_DIR"); println!("cargo::rerun-if-changed={}", musl_dir.display()); Self { @@ -111,13 +106,10 @@ impl Config { /// Build musl math symbols to a static library fn build_musl_math(cfg: &Config) { let musl_dir = &cfg.musl_dir; - assert!( - musl_dir.exists(), - "musl source is missing. it can be downloaded with ./ci/download-musl.sh" - ); - let math = musl_dir.join("src/math"); let arch_dir = musl_dir.join("arch").join(&cfg.musl_arch); + assert!(math.exists(), "musl source not found. Is the submodule up to date?"); + let source_map = find_math_source(&math, cfg); let out_path = cfg.out_dir.join(format!("lib{LIB_NAME}.a")); diff --git a/libm/crates/musl-math-sys/musl b/libm/crates/musl-math-sys/musl new file mode 160000 index 000000000..0784374d5 --- /dev/null +++ b/libm/crates/musl-math-sys/musl @@ -0,0 +1 @@ +Subproject commit 0784374d561435f7c787a555aeab8ede699ed298 From 1dacdabdb6186f97144c50f8952575576deb3730 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 28 Jan 2025 10:31:33 +0000 Subject: [PATCH 1139/1459] Temporarily pin `indicatif` to 0.17.9 0.17.10 introduced a change that removes `Sync` from `ProgressStyle`, which makes it more difficult to share in a callback. Pin the dependency for now until we see if `indicatif` will change this back or if we need to find a workaround. --- libm/crates/libm-test/Cargo.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index dcbddb667..63e75260e 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -31,7 +31,8 @@ anyhow = "1.0.95" # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`. gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false } iai-callgrind = { version = "0.14.0", optional = true } -indicatif = { version = "0.17.9", default-features = false } +# 0.17.10 made `ProgressStyle` non-`Sync` +indicatif = { version = "=0.17.9", default-features = false } libm = { path = "../..", features = ["unstable-public-internals"] } libm-macros = { path = "../libm-macros" } musl-math-sys = { path = "../musl-math-sys", optional = true } From a233db5491360c01af0698784940ec4508738f33 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 28 Jan 2025 15:40:23 +0100 Subject: [PATCH 1140/1459] Don't build out of line atomics support code for uefi https://github.com/llvm/llvm-project/pull/116706 added Windows support to cpu_model. Compiling for UEFI also goes through that code path, because we treat it as a windows target. However, including windows.h is not actually going to work (and the used API would not be available in an UEFI environment). Disable building of cpu_model on UEFI to fix this. --- build.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/build.rs b/build.rs index f512fc2e6..39cee311f 100644 --- a/build.rs +++ b/build.rs @@ -644,9 +644,10 @@ mod c { // Include out-of-line atomics for aarch64, which are all generated by supplying different // sets of flags to the same source file. - // Note: Out-of-line aarch64 atomics are not supported by the msvc toolchain (#430). + // Note: Out-of-line aarch64 atomics are not supported by the msvc toolchain (#430) and + // on uefi. let src_dir = root.join("lib/builtins"); - if target.arch == "aarch64" && target.env != "msvc" { + if target.arch == "aarch64" && target.env != "msvc" && target.os != "uefi" { // See below for why we're building these as separate libraries. build_aarch64_out_of_line_atomics_libraries(&src_dir, cfg); From ad1f6df4a656778578060980fefb6d66f7d68e7e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 28 Jan 2025 13:54:26 -0600 Subject: [PATCH 1141/1459] Revert "Temporarily pin `indicatif` to 0.17.9" This reverts commit 1dacdabdb6186f97144c50f8952575576deb3730. --- libm/crates/libm-test/Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 63e75260e..dcbddb667 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -31,8 +31,7 @@ anyhow = "1.0.95" # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`. gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false } iai-callgrind = { version = "0.14.0", optional = true } -# 0.17.10 made `ProgressStyle` non-`Sync` -indicatif = { version = "=0.17.9", default-features = false } +indicatif = { version = "0.17.9", default-features = false } libm = { path = "../..", features = ["unstable-public-internals"] } libm-macros = { path = "../libm-macros" } musl-math-sys = { path = "../musl-math-sys", optional = true } From 7570081e5b1ebbbe3447078fcc706fb11f59d6a6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 28 Jan 2025 09:50:21 +0000 Subject: [PATCH 1142/1459] Support parsing NaN and infinities from the `hf*` functions This isn't very useful for constants since the trait constants are available, but does enable roundtripping via hex float syntax. --- libm/crates/libm-test/src/f8_impl.rs | 6 +++ libm/crates/libm-test/src/lib.rs | 2 +- libm/src/math/support/hex_float.rs | 63 +++++++++++++++++++++++----- libm/src/math/support/mod.rs | 2 +- 4 files changed, 61 insertions(+), 12 deletions(-) diff --git a/libm/crates/libm-test/src/f8_impl.rs b/libm/crates/libm-test/src/f8_impl.rs index 96b783924..5dce9be18 100644 --- a/libm/crates/libm-test/src/f8_impl.rs +++ b/libm/crates/libm-test/src/f8_impl.rs @@ -3,6 +3,8 @@ use std::cmp::{self, Ordering}; use std::{fmt, ops}; +use libm::support::hex_float::parse_any; + use crate::Float; /// Sometimes verifying float logic is easiest when all values can quickly be checked exhaustively @@ -490,3 +492,7 @@ impl fmt::LowerHex for f8 { self.0.fmt(f) } } + +pub const fn hf8(s: &str) -> f8 { + f8(parse_any(s, 8, 3) as u8) +} diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 78b011b1f..d2fef2325 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -20,7 +20,7 @@ use std::path::PathBuf; use std::sync::LazyLock; use std::time::SystemTime; -pub use f8_impl::f8; +pub use f8_impl::{f8, hf8}; pub use libm::support::{Float, Int, IntTy, MinInt}; pub use num::{FloatExt, linear_ints, logspace}; pub use op::{ diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs index 949f21a57..6eb1bd67a 100644 --- a/libm/src/math/support/hex_float.rs +++ b/libm/src/math/support/hex_float.rs @@ -26,17 +26,25 @@ pub const fn hf128(s: &str) -> f128 { f128::from_bits(parse_any(s, 128, 112)) } -const fn parse_any(s: &str, bits: u32, sig_bits: u32) -> u128 { +/// Parse any float from hex to its bitwise representation. +/// +/// `nan_repr` is passed rather than constructed so the platform-specific NaN is returned. +pub const fn parse_any(s: &str, bits: u32, sig_bits: u32) -> u128 { let exp_bits: u32 = bits - sig_bits - 1; let max_msb: i32 = (1 << (exp_bits - 1)) - 1; // The exponent of one ULP in the subnormals let min_lsb: i32 = 1 - max_msb - sig_bits as i32; - let (neg, mut sig, exp) = parse_hex(s.as_bytes()); + let exp_mask = ((1 << exp_bits) - 1) << sig_bits; - if sig == 0 { - return (neg as u128) << (bits - 1); - } + let (neg, mut sig, exp) = match parse_hex(s.as_bytes()) { + Parsed::Finite { neg, sig: 0, .. } => return (neg as u128) << (bits - 1), + Parsed::Finite { neg, sig, exp } => (neg, sig, exp), + Parsed::Infinite { neg } => return ((neg as u128) << (bits - 1)) | exp_mask, + Parsed::Nan { neg } => { + return ((neg as u128) << (bits - 1)) | exp_mask | 1 << (sig_bits - 1); + } + }; // exponents of the least and most significant bits in the value let lsb = sig.trailing_zeros() as i32; @@ -76,11 +84,24 @@ const fn parse_any(s: &str, bits: u32, sig_bits: u32) -> u128 { sig | ((neg as u128) << (bits - 1)) } +/// A parsed floating point number. +enum Parsed { + /// Absolute value sig * 2^e + Finite { + neg: bool, + sig: u128, + exp: i32, + }, + Infinite { + neg: bool, + }, + Nan { + neg: bool, + }, +} + /// Parse a hexadecimal float x -/// returns (s,n,e): -/// s == x.is_sign_negative() -/// n * 2^e == x.abs() -const fn parse_hex(mut b: &[u8]) -> (bool, u128, i32) { +const fn parse_hex(mut b: &[u8]) -> Parsed { let mut neg = false; let mut sig: u128 = 0; let mut exp: i32 = 0; @@ -90,6 +111,12 @@ const fn parse_hex(mut b: &[u8]) -> (bool, u128, i32) { neg = c == b'-'; } + match *b { + [b'i' | b'I', b'n' | b'N', b'f' | b'F'] => return Parsed::Infinite { neg }, + [b'n' | b'N', b'a' | b'A', b'n' | b'N'] => return Parsed::Nan { neg }, + _ => (), + } + if let &[b'0', b'x' | b'X', ref rest @ ..] = b { b = rest; } else { @@ -152,7 +179,7 @@ const fn parse_hex(mut b: &[u8]) -> (bool, u128, i32) { exp += pexp; } - (neg, sig, exp) + Parsed::Finite { neg, sig, exp } } const fn dec_digit(c: u8) -> u8 { @@ -272,6 +299,10 @@ mod tests { ("-0x1.998p-4", (-0.1f16).to_bits()), ("0x0.123p-12", 0x0123), ("0x1p-24", 0x0001), + ("nan", f16::NAN.to_bits()), + ("-nan", (-f16::NAN).to_bits()), + ("inf", f16::INFINITY.to_bits()), + ("-inf", f16::NEG_INFINITY.to_bits()), ]; for (s, exp) in checks { println!("parsing {s}"); @@ -322,6 +353,10 @@ mod tests { ("0x1.111114p-127", 0x00444445), ("0x1.23456p-130", 0x00091a2b), ("0x1p-149", 0x00000001), + ("nan", f32::NAN.to_bits()), + ("-nan", (-f32::NAN).to_bits()), + ("inf", f32::INFINITY.to_bits()), + ("-inf", f32::NEG_INFINITY.to_bits()), ]; for (s, exp) in checks { println!("parsing {s}"); @@ -360,6 +395,10 @@ mod tests { ("0x0.8000000000001p-1022", 0x0008000000000001), ("0x0.123456789abcdp-1022", 0x000123456789abcd), ("0x0.0000000000002p-1022", 0x0000000000000002), + ("nan", f64::NAN.to_bits()), + ("-nan", (-f64::NAN).to_bits()), + ("inf", f64::INFINITY.to_bits()), + ("-inf", f64::NEG_INFINITY.to_bits()), ]; for (s, exp) in checks { println!("parsing {s}"); @@ -401,6 +440,10 @@ mod tests { ("-0x1.999999999999999999999999999ap-4", (-0.1f128).to_bits()), ("0x0.abcdef0123456789abcdef012345p-16382", 0x0000abcdef0123456789abcdef012345), ("0x1p-16494", 0x00000000000000000000000000000001), + ("nan", f128::NAN.to_bits()), + ("-nan", (-f128::NAN).to_bits()), + ("inf", f128::INFINITY.to_bits()), + ("-inf", f128::NEG_INFINITY.to_bits()), ]; for (s, exp) in checks { println!("parsing {s}"); diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index da9e2c9ed..b82a2ea05 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -2,7 +2,7 @@ pub mod macros; mod big; mod float_traits; -mod hex_float; +pub mod hex_float; mod int_traits; #[allow(unused_imports)] From 0524237d35b46839594efa496abae08bfd030c99 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 28 Jan 2025 09:51:43 +0000 Subject: [PATCH 1143/1459] Introduce a wrapper type for IEEE hex float formatting --- libm/src/math/support/hex_float.rs | 183 ++++++++++++++++++++++++++++- libm/src/math/support/mod.rs | 2 +- 2 files changed, 181 insertions(+), 4 deletions(-) diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs index 6eb1bd67a..da41622f2 100644 --- a/libm/src/math/support/hex_float.rs +++ b/libm/src/math/support/hex_float.rs @@ -2,7 +2,9 @@ #![allow(dead_code)] // FIXME: remove once this gets used -use super::{f32_from_bits, f64_from_bits}; +use core::fmt; + +use super::{Float, f32_from_bits, f64_from_bits}; /// Construct a 16-bit float from hex float representation (C-style) #[cfg(f16_enabled)] @@ -42,7 +44,7 @@ pub const fn parse_any(s: &str, bits: u32, sig_bits: u32) -> u128 { Parsed::Finite { neg, sig, exp } => (neg, sig, exp), Parsed::Infinite { neg } => return ((neg as u128) << (bits - 1)) | exp_mask, Parsed::Nan { neg } => { - return ((neg as u128) << (bits - 1)) | exp_mask | 1 << (sig_bits - 1); + return ((neg as u128) << (bits - 1)) | exp_mask | (1 << (sig_bits - 1)); } }; @@ -206,8 +208,107 @@ const fn u128_ilog2(v: u128) -> u32 { u128::BITS - 1 - v.leading_zeros() } +/// Format a floating point number as its IEEE hex (`%a`) representation. +pub struct Hexf(pub F); + +// Adapted from https://github.com/ericseppanen/hexfloat2/blob/a5c27932f0ff/src/format.rs +fn fmt_any_hex(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if x.is_sign_negative() { + write!(f, "-")?; + } + + if x.is_nan() { + return write!(f, "NaN"); + } else if x.is_infinite() { + return write!(f, "inf"); + } else if *x == F::ZERO { + return write!(f, "0x0p+0"); + } + + let mut exponent = x.exp_unbiased(); + let sig = x.to_bits() & F::SIG_MASK; + + let bias = F::EXP_BIAS as i32; + // The mantissa MSB needs to be shifted up to the nearest nibble. + let mshift = (4 - (F::SIG_BITS % 4)) % 4; + let sig = sig << mshift; + // The width is rounded up to the nearest char (4 bits) + let mwidth = (F::SIG_BITS as usize + 3) / 4; + let leading = if exponent == -bias { + // subnormal number means we shift our output by 1 bit. + exponent += 1; + "0." + } else { + "1." + }; + + write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}") +} + +#[cfg(f16_enabled)] +impl fmt::LowerHex for Hexf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt_any_hex(&self.0, f) + } +} + +impl fmt::LowerHex for Hexf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt_any_hex(&self.0, f) + } +} + +impl fmt::LowerHex for Hexf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt_any_hex(&self.0, f) + } +} + +#[cfg(f128_enabled)] +impl fmt::LowerHex for Hexf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt_any_hex(&self.0, f) + } +} + +impl fmt::LowerHex for Hexf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::LowerHex::fmt(&self.0, f) + } +} + +impl fmt::LowerHex for Hexf<(T1, T2)> +where + T1: Copy, + T2: Copy, + Hexf: fmt::LowerHex, + Hexf: fmt::LowerHex, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) + } +} + +impl fmt::Debug for Hexf +where + Hexf: fmt::LowerHex, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::LowerHex::fmt(self, f) + } +} + +impl fmt::Display for Hexf +where + Hexf: fmt::LowerHex, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::LowerHex::fmt(self, f) + } +} + #[cfg(test)] -mod tests { +mod parse_tests { extern crate std; use std::{format, println}; @@ -666,3 +767,79 @@ mod tests_panicking { #[cfg(f128_enabled)] f128_tests!(); } + +#[cfg(test)] +mod print_tests { + extern crate std; + use std::string::ToString; + + use super::*; + + #[test] + #[cfg(f16_enabled)] + fn test_f16() { + use std::format; + // Exhaustively check that `f16` roundtrips. + for x in 0..=u16::MAX { + let f = f16::from_bits(x); + let s = format!("{}", Hexf(f)); + let from_s = hf16(&s); + + if f.is_nan() && from_s.is_nan() { + continue; + } + + assert_eq!( + f.to_bits(), + from_s.to_bits(), + "{f:?} formatted as {s} but parsed as {from_s:?}" + ); + } + } + + #[test] + fn spot_checks() { + assert_eq!(Hexf(f32::MAX).to_string(), "0x1.fffffep+127"); + assert_eq!(Hexf(f64::MAX).to_string(), "0x1.fffffffffffffp+1023"); + + assert_eq!(Hexf(f32::MIN).to_string(), "-0x1.fffffep+127"); + assert_eq!(Hexf(f64::MIN).to_string(), "-0x1.fffffffffffffp+1023"); + + assert_eq!(Hexf(f32::ZERO).to_string(), "0x0p+0"); + assert_eq!(Hexf(f64::ZERO).to_string(), "0x0p+0"); + + assert_eq!(Hexf(f32::NEG_ZERO).to_string(), "-0x0p+0"); + assert_eq!(Hexf(f64::NEG_ZERO).to_string(), "-0x0p+0"); + + assert_eq!(Hexf(f32::NAN).to_string(), "NaN"); + assert_eq!(Hexf(f64::NAN).to_string(), "NaN"); + + assert_eq!(Hexf(f32::INFINITY).to_string(), "inf"); + assert_eq!(Hexf(f64::INFINITY).to_string(), "inf"); + + assert_eq!(Hexf(f32::NEG_INFINITY).to_string(), "-inf"); + assert_eq!(Hexf(f64::NEG_INFINITY).to_string(), "-inf"); + + #[cfg(f16_enabled)] + { + assert_eq!(Hexf(f16::MAX).to_string(), "0x1.ffcp+15"); + assert_eq!(Hexf(f16::MIN).to_string(), "-0x1.ffcp+15"); + assert_eq!(Hexf(f16::ZERO).to_string(), "0x0p+0"); + assert_eq!(Hexf(f16::NEG_ZERO).to_string(), "-0x0p+0"); + assert_eq!(Hexf(f16::NAN).to_string(), "NaN"); + assert_eq!(Hexf(f16::INFINITY).to_string(), "inf"); + assert_eq!(Hexf(f16::NEG_INFINITY).to_string(), "-inf"); + } + + #[cfg(f128_enabled)] + { + assert_eq!(Hexf(f128::MAX).to_string(), "0x1.ffffffffffffffffffffffffffffp+16383"); + assert_eq!(Hexf(f128::MIN).to_string(), "-0x1.ffffffffffffffffffffffffffffp+16383"); + assert_eq!(Hexf(f128::ZERO).to_string(), "0x0p+0"); + assert_eq!(Hexf(f128::NEG_ZERO).to_string(), "-0x0p+0"); + assert_eq!(Hexf(f128::NAN).to_string(), "NaN"); + assert_eq!(Hexf(f128::INFINITY).to_string(), "inf"); + assert_eq!(Hexf(f128::NEG_INFINITY).to_string(), "-inf"); + } + } +} diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index b82a2ea05..d471c5b70 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -13,7 +13,7 @@ pub use hex_float::hf16; #[cfg(f128_enabled)] pub use hex_float::hf128; #[allow(unused_imports)] -pub use hex_float::{hf32, hf64}; +pub use hex_float::{Hexf, hf32, hf64}; pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; /// Hint to the compiler that the current path is cold. From 34259f752a63d6287e16d03ddde53d06854a6633 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 28 Jan 2025 10:09:27 +0000 Subject: [PATCH 1144/1459] Util: also print the hex float format for outputs --- libm/crates/util/src/main.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index 6ea1be3d9..357df6b4f 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -8,7 +8,7 @@ use std::env; use std::num::ParseIntError; use std::str::FromStr; -use libm::support::{hf32, hf64}; +use libm::support::{Hexf, hf32, hf64}; #[cfg(feature = "build-mpfr")] use libm_test::mpfloat::MpOp; use libm_test::{MathOp, TupleCall}; @@ -73,7 +73,7 @@ macro_rules! handle_call { } _ => panic!("unrecognized or disabled basis '{}'", $basis), }; - println!("{output:?}"); + println!("{output:?} {:x}", Hexf(output)); return; } }; @@ -303,6 +303,10 @@ impl FromStrRadix for i32 { #[cfg(f16_enabled)] impl FromStrRadix for f16 { fn from_str_radix(s: &str, radix: u32) -> Result { + if radix == 16 && s.contains("p") { + return Ok(libm::support::hf16(s)); + } + let s = strip_radix_prefix(s, radix); u16::from_str_radix(s, radix).map(Self::from_bits) } @@ -334,6 +338,9 @@ impl FromStrRadix for f64 { #[cfg(f128_enabled)] impl FromStrRadix for f128 { fn from_str_radix(s: &str, radix: u32) -> Result { + if radix == 16 && s.contains("p") { + return Ok(libm::support::hf128(s)); + } let s = strip_radix_prefix(s, radix); u128::from_str_radix(s, radix).map(Self::from_bits) } From 49e0ed7de8313ca9b494f1e4d59f1c515db3381d Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Thu, 30 Jan 2025 13:06:41 -0800 Subject: [PATCH 1145/1459] Specify license as just MIT Simplify the SPDX string to the user-facing version to make it easier for users and tooling to understand. Contributions must still be `MIT OR Apache-2.0`. [ add commit body with context - Trevor ] --- libm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 08342a929..f24f4423c 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -4,7 +4,7 @@ categories = ["no-std"] description = "libm in pure Rust" documentation = "https://docs.rs/libm" keywords = ["libm", "math"] -license = "MIT AND (MIT OR Apache-2.0)" +license = "MIT" name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/libm" From 41d22161858669f0df9cead05154b2bb728db8f3 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 31 Jan 2025 12:31:35 +0000 Subject: [PATCH 1146/1459] Rename `EXP_MAX` to `EXP_SAT` `EXP_MAX` sounds like it would be the maximum value representable by that float type's exponent, rather than the maximum unsigned value of its bits. Clarify this by renaming to `EXP_SAT`, the "saturated" exponent representation. --- libm/src/math/generic/fmod.rs | 2 +- libm/src/math/generic/sqrt.rs | 4 ++-- libm/src/math/support/float_traits.rs | 21 ++++++++++++--------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/libm/src/math/generic/fmod.rs b/libm/src/math/generic/fmod.rs index 93da6c51e..ca1cda383 100644 --- a/libm/src/math/generic/fmod.rs +++ b/libm/src/math/generic/fmod.rs @@ -13,7 +13,7 @@ pub fn fmod(x: F, y: F) -> F { let mut ey = y.exp().signed(); let sx = ix & F::SIGN_MASK; - if iy << 1 == zero || y.is_nan() || ex == F::EXP_MAX as i32 { + if iy << 1 == zero || y.is_nan() || ex == F::EXP_SAT as i32 { return (x * y) / (x * y); } diff --git a/libm/src/math/generic/sqrt.rs b/libm/src/math/generic/sqrt.rs index c20c0f205..90d6c01e9 100644 --- a/libm/src/math/generic/sqrt.rs +++ b/libm/src/math/generic/sqrt.rs @@ -68,7 +68,7 @@ where (Exp::NoShift(()), special_case) } else { let top = u32::cast_from(ix >> F::SIG_BITS); - let special_case = top.wrapping_sub(1) >= F::EXP_MAX - 1; + let special_case = top.wrapping_sub(1) >= F::EXP_SAT - 1; (Exp::Shifted(top), special_case) }; @@ -119,7 +119,7 @@ where if even { m_u2 >>= 1; } - e = (e.wrapping_add(F::EXP_MAX >> 1)) >> 1; + e = (e.wrapping_add(F::EXP_SAT >> 1)) >> 1; (m_u2, Exp::Shifted(e)) } Exp::NoShift(()) => { diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 57e4aebec..1fe2cb424 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -48,11 +48,14 @@ pub trait Float: /// The bitwidth of the exponent const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; - /// The saturated value of the exponent (infinite representation), in the rightmost postiion. - const EXP_MAX: u32 = (1 << Self::EXP_BITS) - 1; + /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite + /// representation. + /// + /// This shifted fully right, use `EXP_MASK` for the shifted value. + const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1; /// The exponent bias value - const EXP_BIAS: u32 = Self::EXP_MAX >> 1; + const EXP_BIAS: u32 = Self::EXP_SAT >> 1; /// A mask for the sign bit const SIGN_MASK: Self::Int; @@ -109,7 +112,7 @@ pub trait Float: /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero. fn exp(self) -> u32 { - u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_MAX + u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_SAT } /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero. @@ -135,7 +138,7 @@ pub trait Float: let sign = if negative { Self::Int::ONE } else { Self::Int::ZERO }; Self::from_bits( (sign << (Self::BITS - 1)) - | (Self::Int::cast_from(exponent & Self::EXP_MAX) << Self::SIG_BITS) + | (Self::Int::cast_from(exponent & Self::EXP_SAT) << Self::SIG_BITS) | (significand & Self::SIG_MASK), ) } @@ -267,7 +270,7 @@ mod tests { #[cfg(f16_enabled)] fn check_f16() { // Constants - assert_eq!(f16::EXP_MAX, 0b11111); + assert_eq!(f16::EXP_SAT, 0b11111); assert_eq!(f16::EXP_BIAS, 15); // `exp_unbiased` @@ -289,7 +292,7 @@ mod tests { #[test] fn check_f32() { // Constants - assert_eq!(f32::EXP_MAX, 0b11111111); + assert_eq!(f32::EXP_SAT, 0b11111111); assert_eq!(f32::EXP_BIAS, 127); // `exp_unbiased` @@ -312,7 +315,7 @@ mod tests { #[test] fn check_f64() { // Constants - assert_eq!(f64::EXP_MAX, 0b11111111111); + assert_eq!(f64::EXP_SAT, 0b11111111111); assert_eq!(f64::EXP_BIAS, 1023); // `exp_unbiased` @@ -336,7 +339,7 @@ mod tests { #[cfg(f128_enabled)] fn check_f128() { // Constants - assert_eq!(f128::EXP_MAX, 0b111111111111111); + assert_eq!(f128::EXP_SAT, 0b111111111111111); assert_eq!(f128::EXP_BIAS, 16383); // `exp_unbiased` From 6f96bccc5d4aa3ba4c4cebdf23a3ccc3bc7fe77c Mon Sep 17 00:00:00 2001 From: Pavel Grigorenko Date: Sat, 1 Feb 2025 02:14:22 +0300 Subject: [PATCH 1147/1459] Indentation fix to please clippy --- src/float/conv.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/float/conv.rs b/src/float/conv.rs index 83a181c37..4f52ac712 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -344,8 +344,7 @@ where /// /// Parameters: /// - `fbits`: `abg(f)` bitcasted to an integer. -/// - `map_inbounds`: apply this transformation to integers that are within range (add the sign -/// back). +/// - `map_inbounds`: apply this transformation to integers that are within range (add the sign back). /// - `out_of_bounds`: return value when out of range for `I`. fn float_to_int_inner( fbits: F::Int, From f6eef07f537e40ac77df993573f3fbce14e52146 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 4 Feb 2025 10:36:45 +0000 Subject: [PATCH 1148/1459] Revert "Eliminate the use of `public_test_dep!`" [1] has not gone forward, so this needs to be reverted again in order to unblock a compiler-builtins upgrade that is necessary for the LLVM 20 upgrade. This reverts commit a2494f14e99ae90c964f12bf0c059d63ccc07c2a. [1]: https://github.com/rust-lang/rust/pull/135501 --- src/float/mod.rs | 195 ++++++++++- src/float/traits.rs | 189 ----------- src/int/leading_zeros.rs | 227 +++++++------ src/int/mod.rs | 424 +++++++++++++++++++++++- src/int/specialized_div_rem/delegate.rs | 4 +- src/int/trailing_zeros.rs | 69 ++-- src/int/traits.rs | 411 ----------------------- src/macros.rs | 16 + 8 files changed, 772 insertions(+), 763 deletions(-) delete mode 100644 src/float/traits.rs delete mode 100644 src/int/traits.rs diff --git a/src/float/mod.rs b/src/float/mod.rs index 41b308626..6ee55950e 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -1,3 +1,7 @@ +use core::ops; + +use crate::int::{DInt, Int, MinInt}; + pub mod add; pub mod cmp; pub mod conv; @@ -6,11 +10,192 @@ pub mod extend; pub mod mul; pub mod pow; pub mod sub; -pub(crate) mod traits; pub mod trunc; -#[cfg(not(feature = "public-test-deps"))] -pub(crate) use traits::{Float, HalfRep}; +/// Wrapper to extract the integer type half of the float's size +pub(crate) type HalfRep = <::Int as DInt>::H; + +public_test_dep! { +/// Trait for some basic operations on floats +#[allow(dead_code)] +pub(crate) trait Float: + Copy + + core::fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::MulAssign + + ops::Add + + ops::Sub + + ops::Div + + ops::Rem +{ + /// A uint of the same width as the float + type Int: Int; + + /// A int of the same width as the float + type SignedInt: Int + MinInt; + + /// An int capable of containing the exponent bits plus a sign bit. This is signed. + type ExpInt: Int; + + const ZERO: Self; + const ONE: Self; + + /// The bitwidth of the float type. + const BITS: u32; + + /// The bitwidth of the significand. + const SIG_BITS: u32; + + /// The bitwidth of the exponent. + const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; + + /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite + /// representation. + /// + /// This is in the rightmost position, use `EXP_MASK` for the shifted value. + const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1; + + /// The exponent bias value. + const EXP_BIAS: u32 = Self::EXP_SAT >> 1; + + /// A mask for the sign bit. + const SIGN_MASK: Self::Int; + + /// A mask for the significand. + const SIG_MASK: Self::Int; + + /// The implicit bit of the float format. + const IMPLICIT_BIT: Self::Int; + + /// A mask for the exponent. + const EXP_MASK: Self::Int; + + /// Returns `self` transmuted to `Self::Int` + fn to_bits(self) -> Self::Int; + + /// Returns `self` transmuted to `Self::SignedInt` + fn to_bits_signed(self) -> Self::SignedInt; + + /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be + /// represented in multiple different ways. This method returns `true` if two NaNs are + /// compared. + fn eq_repr(self, rhs: Self) -> bool; + + /// Returns true if the sign is negative + fn is_sign_negative(self) -> bool; + + /// Returns the exponent, not adjusting for bias. + fn exp(self) -> Self::ExpInt; + + /// Returns the significand with no implicit bit (or the "fractional" part) + fn frac(self) -> Self::Int; + + /// Returns the significand with implicit bit + fn imp_frac(self) -> Self::Int; + + /// Returns a `Self::Int` transmuted back to `Self` + fn from_bits(a: Self::Int) -> Self; + + /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. + fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self; + + fn abs(self) -> Self { + let abs_mask = !Self::SIGN_MASK ; + Self::from_bits(self.to_bits() & abs_mask) + } + + /// Returns (normalized exponent, normalized significand) + fn normalize(significand: Self::Int) -> (i32, Self::Int); + + /// Returns if `self` is subnormal + fn is_subnormal(self) -> bool; +} +} + +macro_rules! float_impl { + ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { + impl Float for $ty { + type Int = $ity; + type SignedInt = $sity; + type ExpInt = $expty; + + const ZERO: Self = 0.0; + const ONE: Self = 1.0; + + const BITS: u32 = $bits; + const SIG_BITS: u32 = $significand_bits; + + const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); + const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1; + const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS; + const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK); + + fn to_bits(self) -> Self::Int { + self.to_bits() + } + fn to_bits_signed(self) -> Self::SignedInt { + self.to_bits() as Self::SignedInt + } + fn eq_repr(self, rhs: Self) -> bool { + #[cfg(feature = "mangled-names")] + fn is_nan(x: $ty) -> bool { + // When using mangled-names, the "real" compiler-builtins might not have the + // necessary builtin (__unordtf2) to test whether `f128` is NaN. + // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin + // x is NaN if all the bits of the exponent are set and the significand is non-0 + x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0 + } + #[cfg(not(feature = "mangled-names"))] + fn is_nan(x: $ty) -> bool { + x.is_nan() + } + if is_nan(self) && is_nan(rhs) { + true + } else { + self.to_bits() == rhs.to_bits() + } + } + fn is_sign_negative(self) -> bool { + self.is_sign_negative() + } + fn exp(self) -> Self::ExpInt { + ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt + } + fn frac(self) -> Self::Int { + self.to_bits() & Self::SIG_MASK + } + fn imp_frac(self) -> Self::Int { + self.frac() | Self::IMPLICIT_BIT + } + fn from_bits(a: Self::Int) -> Self { + Self::from_bits(a) + } + fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self { + Self::from_bits( + ((negative as Self::Int) << (Self::BITS - 1)) + | ((exponent << Self::SIG_BITS) & Self::EXP_MASK) + | (significand & Self::SIG_MASK), + ) + } + fn normalize(significand: Self::Int) -> (i32, Self::Int) { + let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); + ( + 1i32.wrapping_sub(shift as i32), + significand << shift as Self::Int, + ) + } + fn is_subnormal(self) -> bool { + (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO + } + } + }; +} -#[cfg(feature = "public-test-deps")] -pub use traits::{Float, HalfRep}; +#[cfg(f16_enabled)] +float_impl!(f16, u16, i16, i8, 16, 10); +float_impl!(f32, u32, i32, i16, 32, 23); +float_impl!(f64, u64, i64, i16, 64, 52); +#[cfg(f128_enabled)] +float_impl!(f128, u128, i128, i16, 128, 112); diff --git a/src/float/traits.rs b/src/float/traits.rs deleted file mode 100644 index 8ccaa7bcb..000000000 --- a/src/float/traits.rs +++ /dev/null @@ -1,189 +0,0 @@ -use core::ops; - -use crate::int::{DInt, Int, MinInt}; - -/// Wrapper to extract the integer type half of the float's size -pub type HalfRep = <::Int as DInt>::H; - -/// Trait for some basic operations on floats -#[allow(dead_code)] -pub trait Float: - Copy - + core::fmt::Debug - + PartialEq - + PartialOrd - + ops::AddAssign - + ops::MulAssign - + ops::Add - + ops::Sub - + ops::Div - + ops::Rem -{ - /// A uint of the same width as the float - type Int: Int; - - /// A int of the same width as the float - type SignedInt: Int + MinInt; - - /// An int capable of containing the exponent bits plus a sign bit. This is signed. - type ExpInt: Int; - - const ZERO: Self; - const ONE: Self; - - /// The bitwidth of the float type. - const BITS: u32; - - /// The bitwidth of the significand. - const SIG_BITS: u32; - - /// The bitwidth of the exponent. - const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; - - /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite - /// representation. - /// - /// This is in the rightmost position, use `EXP_MASK` for the shifted value. - const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1; - - /// The exponent bias value. - const EXP_BIAS: u32 = Self::EXP_SAT >> 1; - - /// A mask for the sign bit. - const SIGN_MASK: Self::Int; - - /// A mask for the significand. - const SIG_MASK: Self::Int; - - /// The implicit bit of the float format. - const IMPLICIT_BIT: Self::Int; - - /// A mask for the exponent. - const EXP_MASK: Self::Int; - - /// Returns `self` transmuted to `Self::Int` - fn to_bits(self) -> Self::Int; - - /// Returns `self` transmuted to `Self::SignedInt` - fn to_bits_signed(self) -> Self::SignedInt; - - /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be - /// represented in multiple different ways. This method returns `true` if two NaNs are - /// compared. - fn eq_repr(self, rhs: Self) -> bool; - - /// Returns true if the sign is negative - fn is_sign_negative(self) -> bool; - - /// Returns the exponent, not adjusting for bias. - fn exp(self) -> Self::ExpInt; - - /// Returns the significand with no implicit bit (or the "fractional" part) - fn frac(self) -> Self::Int; - - /// Returns the significand with implicit bit - fn imp_frac(self) -> Self::Int; - - /// Returns a `Self::Int` transmuted back to `Self` - fn from_bits(a: Self::Int) -> Self; - - /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. - fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self; - - fn abs(self) -> Self { - let abs_mask = !Self::SIGN_MASK; - Self::from_bits(self.to_bits() & abs_mask) - } - - /// Returns (normalized exponent, normalized significand) - fn normalize(significand: Self::Int) -> (i32, Self::Int); - - /// Returns if `self` is subnormal - fn is_subnormal(self) -> bool; -} - -macro_rules! float_impl { - ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { - impl Float for $ty { - type Int = $ity; - type SignedInt = $sity; - type ExpInt = $expty; - - const ZERO: Self = 0.0; - const ONE: Self = 1.0; - - const BITS: u32 = $bits; - const SIG_BITS: u32 = $significand_bits; - - const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); - const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1; - const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS; - const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK); - - fn to_bits(self) -> Self::Int { - self.to_bits() - } - fn to_bits_signed(self) -> Self::SignedInt { - self.to_bits() as Self::SignedInt - } - fn eq_repr(self, rhs: Self) -> bool { - #[cfg(feature = "mangled-names")] - fn is_nan(x: $ty) -> bool { - // When using mangled-names, the "real" compiler-builtins might not have the - // necessary builtin (__unordtf2) to test whether `f128` is NaN. - // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin - // x is NaN if all the bits of the exponent are set and the significand is non-0 - x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0 - } - #[cfg(not(feature = "mangled-names"))] - fn is_nan(x: $ty) -> bool { - x.is_nan() - } - if is_nan(self) && is_nan(rhs) { - true - } else { - self.to_bits() == rhs.to_bits() - } - } - fn is_sign_negative(self) -> bool { - self.is_sign_negative() - } - fn exp(self) -> Self::ExpInt { - ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt - } - fn frac(self) -> Self::Int { - self.to_bits() & Self::SIG_MASK - } - fn imp_frac(self) -> Self::Int { - self.frac() | Self::IMPLICIT_BIT - } - fn from_bits(a: Self::Int) -> Self { - Self::from_bits(a) - } - fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self { - Self::from_bits( - ((negative as Self::Int) << (Self::BITS - 1)) - | ((exponent << Self::SIG_BITS) & Self::EXP_MASK) - | (significand & Self::SIG_MASK), - ) - } - fn normalize(significand: Self::Int) -> (i32, Self::Int) { - let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); - ( - 1i32.wrapping_sub(shift as i32), - significand << shift as Self::Int, - ) - } - fn is_subnormal(self) -> bool { - (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO - } - } - }; -} - -#[cfg(f16_enabled)] -float_impl!(f16, u16, i16, i8, 16, 10); -float_impl!(f32, u32, i32, i16, 32, 23); -float_impl!(f64, u64, i64, i16, 64, 52); -#[cfg(f128_enabled)] -float_impl!(f128, u128, i128, i16, 128, 112); diff --git a/src/int/leading_zeros.rs b/src/int/leading_zeros.rs index ba735aa74..1fee9fcf5 100644 --- a/src/int/leading_zeros.rs +++ b/src/int/leading_zeros.rs @@ -3,138 +3,135 @@ // adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`. // Compilers will insert the check for zero in cases where it is needed. -#[cfg(feature = "public-test-deps")] -pub use implementation::{leading_zeros_default, leading_zeros_riscv}; -#[cfg(not(feature = "public-test-deps"))] -pub(crate) use implementation::{leading_zeros_default, leading_zeros_riscv}; +use crate::int::{CastInto, Int}; -mod implementation { - use crate::int::{CastInto, Int}; +public_test_dep! { +/// Returns the number of leading binary zeros in `x`. +#[allow(dead_code)] +pub(crate) fn leading_zeros_default>(x: T) -> usize { + // The basic idea is to test if the higher bits of `x` are zero and bisect the number + // of leading zeros. It is possible for all branches of the bisection to use the same + // code path by conditionally shifting the higher parts down to let the next bisection + // step work on the higher or lower parts of `x`. Instead of starting with `z == 0` + // and adding to the number of zeros, it is slightly faster to start with + // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros, + // because it simplifies the final bisection step. + let mut x = x; + // the number of potential leading zeros + let mut z = T::BITS as usize; + // a temporary + let mut t: T; - /// Returns the number of leading binary zeros in `x`. - #[allow(dead_code)] - pub fn leading_zeros_default>(x: T) -> usize { - // The basic idea is to test if the higher bits of `x` are zero and bisect the number - // of leading zeros. It is possible for all branches of the bisection to use the same - // code path by conditionally shifting the higher parts down to let the next bisection - // step work on the higher or lower parts of `x`. Instead of starting with `z == 0` - // and adding to the number of zeros, it is slightly faster to start with - // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros, - // because it simplifies the final bisection step. - let mut x = x; - // the number of potential leading zeros - let mut z = T::BITS as usize; - // a temporary - let mut t: T; - - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - t = x >> 32; - if t != T::ZERO { - z -= 32; - x = t; - } - } - if T::BITS >= 32 { - t = x >> 16; - if t != T::ZERO { - z -= 16; - x = t; - } - } - const { assert!(T::BITS >= 16) }; - t = x >> 8; + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + t = x >> 32; if t != T::ZERO { - z -= 8; + z -= 32; x = t; } - t = x >> 4; - if t != T::ZERO { - z -= 4; - x = t; - } - t = x >> 2; + } + if T::BITS >= 32 { + t = x >> 16; if t != T::ZERO { - z -= 2; + z -= 16; x = t; } - // the last two bisections are combined into one conditional - t = x >> 1; - if t != T::ZERO { - z - 2 - } else { - z - x.cast() - } - - // We could potentially save a few cycles by using the LUT trick from - // "https://embeddedgurus.com/state-space/2014/09/ - // fast-deterministic-and-portable-counting-leading-zeros/". - // However, 256 bytes for a LUT is too large for embedded use cases. We could remove - // the last 3 bisections and use this 16 byte LUT for the rest of the work: - //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]; - //z -= LUT[x] as usize; - //z - // However, it ends up generating about the same number of instructions. When benchmarked - // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO - // execution effects. Changing to using a LUT and branching is risky for smaller cores. + } + const { assert!(T::BITS >= 16) }; + t = x >> 8; + if t != T::ZERO { + z -= 8; + x = t; + } + t = x >> 4; + if t != T::ZERO { + z -= 4; + x = t; + } + t = x >> 2; + if t != T::ZERO { + z -= 2; + x = t; + } + // the last two bisections are combined into one conditional + t = x >> 1; + if t != T::ZERO { + z - 2 + } else { + z - x.cast() } - // The above method does not compile well on RISC-V (because of the lack of predicated - // instructions), producing code with many branches or using an excessively long - // branchless solution. This method takes advantage of the set-if-less-than instruction on - // RISC-V that allows `(x >= power-of-two) as usize` to be branchless. + // We could potentially save a few cycles by using the LUT trick from + // "https://embeddedgurus.com/state-space/2014/09/ + // fast-deterministic-and-portable-counting-leading-zeros/". + // However, 256 bytes for a LUT is too large for embedded use cases. We could remove + // the last 3 bisections and use this 16 byte LUT for the rest of the work: + //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]; + //z -= LUT[x] as usize; + //z + // However, it ends up generating about the same number of instructions. When benchmarked + // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO + // execution effects. Changing to using a LUT and branching is risky for smaller cores. +} +} - /// Returns the number of leading binary zeros in `x`. - #[allow(dead_code)] - pub fn leading_zeros_riscv>(x: T) -> usize { - let mut x = x; - // the number of potential leading zeros - let mut z = T::BITS; - // a temporary - let mut t: u32; +// The above method does not compile well on RISC-V (because of the lack of predicated +// instructions), producing code with many branches or using an excessively long +// branchless solution. This method takes advantage of the set-if-less-than instruction on +// RISC-V that allows `(x >= power-of-two) as usize` to be branchless. - // RISC-V does not have a set-if-greater-than-or-equal instruction and - // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is - // still the most optimal method. A conditional set can only be turned into a single - // immediate instruction if `x` is compared with an immediate `imm` (that can fit into - // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the - // right). If we try to save an instruction by using `x < imm` for each bisection, we - // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, - // but the immediate will never fit into 12 bits and never save an instruction. - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise - // `t` is set to 0. - t = ((x >= (T::ONE << 32)) as u32) << 5; - // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the - // next step to process. - x >>= t; - // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential - // leading zeros - z -= t; - } - if T::BITS >= 32 { - t = ((x >= (T::ONE << 16)) as u32) << 4; - x >>= t; - z -= t; - } - const { assert!(T::BITS >= 16) }; - t = ((x >= (T::ONE << 8)) as u32) << 3; - x >>= t; - z -= t; - t = ((x >= (T::ONE << 4)) as u32) << 2; - x >>= t; - z -= t; - t = ((x >= (T::ONE << 2)) as u32) << 1; +public_test_dep! { +/// Returns the number of leading binary zeros in `x`. +#[allow(dead_code)] +pub(crate) fn leading_zeros_riscv>(x: T) -> usize { + let mut x = x; + // the number of potential leading zeros + let mut z = T::BITS; + // a temporary + let mut t: u32; + + // RISC-V does not have a set-if-greater-than-or-equal instruction and + // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is + // still the most optimal method. A conditional set can only be turned into a single + // immediate instruction if `x` is compared with an immediate `imm` (that can fit into + // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the + // right). If we try to save an instruction by using `x < imm` for each bisection, we + // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, + // but the immediate will never fit into 12 bits and never save an instruction. + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise + // `t` is set to 0. + t = ((x >= (T::ONE << 32)) as u32) << 5; + // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the + // next step to process. x >>= t; + // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential + // leading zeros z -= t; - t = (x >= (T::ONE << 1)) as u32; + } + if T::BITS >= 32 { + t = ((x >= (T::ONE << 16)) as u32) << 4; x >>= t; z -= t; - // All bits except the LSB are guaranteed to be zero for this final bisection step. - // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. - z as usize - x.cast() } + const { assert!(T::BITS >= 16) }; + t = ((x >= (T::ONE << 8)) as u32) << 3; + x >>= t; + z -= t; + t = ((x >= (T::ONE << 4)) as u32) << 2; + x >>= t; + z -= t; + t = ((x >= (T::ONE << 2)) as u32) << 1; + x >>= t; + z -= t; + t = (x >= (T::ONE << 1)) as u32; + x >>= t; + z -= t; + // All bits except the LSB are guaranteed to be zero for this final bisection step. + // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. + z as usize - x.cast() +} } intrinsics! { diff --git a/src/int/mod.rs b/src/int/mod.rs index 1f1be711b..c0d5a6715 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -1,3 +1,5 @@ +use core::ops; + mod specialized_div_rem; pub mod addsub; @@ -8,13 +10,425 @@ pub mod mul; pub mod sdiv; pub mod shift; pub mod trailing_zeros; -mod traits; pub mod udiv; pub use big::{i256, u256}; -#[cfg(not(feature = "public-test-deps"))] -pub(crate) use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; +public_test_dep! { +/// Minimal integer implementations needed on all integer types, including wide integers. +#[allow(dead_code)] +pub(crate) trait MinInt: Copy + + core::fmt::Debug + + ops::BitOr + + ops::Not + + ops::Shl +{ + + /// Type with the same width but other signedness + type OtherSign: MinInt; + /// Unsigned version of Self + type UnsignedInt: MinInt; + + /// If `Self` is a signed integer + const SIGNED: bool; + + /// The bitwidth of the int type + const BITS: u32; + + const ZERO: Self; + const ONE: Self; + const MIN: Self; + const MAX: Self; +} +} + +public_test_dep! { +/// Trait for some basic operations on integers +#[allow(dead_code)] +pub(crate) trait Int: MinInt + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::SubAssign + + ops::BitAndAssign + + ops::BitOrAssign + + ops::BitXorAssign + + ops::ShlAssign + + ops::ShrAssign + + ops::Add + + ops::Sub + + ops::Mul + + ops::Div + + ops::Shr + + ops::BitXor + + ops::BitAnd +{ + /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing + /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, + /// 112,119,120,125,126,127]. + const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(::BITS); + + /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. + const FUZZ_NUM: usize = { + let log2 = (::BITS - 1).count_ones() as usize; + if log2 == 3 { + // case for u8 + 6 + } else { + // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate + // boundaries. + 8 + (4 * (log2 - 4)) + } + }; + + fn unsigned(self) -> Self::UnsignedInt; + fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; + fn unsigned_abs(self) -> Self::UnsignedInt; + + fn from_bool(b: bool) -> Self; + + /// Prevents the need for excessive conversions between signed and unsigned + fn logical_shr(self, other: u32) -> Self; + + /// Absolute difference between two integers. + fn abs_diff(self, other: Self) -> Self::UnsignedInt; + + // copied from primitive integers, but put in a trait + fn is_zero(self) -> bool; + fn wrapping_neg(self) -> Self; + fn wrapping_add(self, other: Self) -> Self; + fn wrapping_mul(self, other: Self) -> Self; + fn wrapping_sub(self, other: Self) -> Self; + fn wrapping_shl(self, other: u32) -> Self; + fn wrapping_shr(self, other: u32) -> Self; + fn rotate_left(self, other: u32) -> Self; + fn overflowing_add(self, other: Self) -> (Self, bool); + fn leading_zeros(self) -> u32; + fn ilog2(self) -> u32; +} +} + +pub(crate) const fn make_fuzz_lengths(bits: u32) -> [u8; 20] { + let mut v = [0u8; 20]; + v[0] = 0; + v[1] = 1; + v[2] = 2; // important for parity and the iX::MIN case when reversed + let mut i = 3; + + // No need for any more until the byte boundary, because there should be no algorithms + // that are sensitive to anything not next to byte boundaries after 2. We also scale + // in powers of two, which is important to prevent u128 corner tests from getting too + // big. + let mut l = 8; + loop { + if l >= ((bits / 2) as u8) { + break; + } + // get both sides of the byte boundary + v[i] = l - 1; + i += 1; + v[i] = l; + i += 1; + l *= 2; + } + + if bits != 8 { + // add the lower side of the middle boundary + v[i] = ((bits / 2) - 1) as u8; + i += 1; + } + + // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS + // boundary because of algorithms that split the high part up. We reverse the scaling + // as we go to Self::BITS. + let mid = i; + let mut j = 1; + loop { + v[i] = (bits as u8) - (v[mid - j]) - 1; + if j == mid { + break; + } + i += 1; + j += 1; + } + v +} + +macro_rules! int_impl_common { + ($ty:ty) => { + fn from_bool(b: bool) -> Self { + b as $ty + } + + fn logical_shr(self, other: u32) -> Self { + Self::from_unsigned(self.unsigned().wrapping_shr(other)) + } + + fn is_zero(self) -> bool { + self == Self::ZERO + } + + fn wrapping_neg(self) -> Self { + ::wrapping_neg(self) + } + + fn wrapping_add(self, other: Self) -> Self { + ::wrapping_add(self, other) + } + + fn wrapping_mul(self, other: Self) -> Self { + ::wrapping_mul(self, other) + } + fn wrapping_sub(self, other: Self) -> Self { + ::wrapping_sub(self, other) + } + + fn wrapping_shl(self, other: u32) -> Self { + ::wrapping_shl(self, other) + } + + fn wrapping_shr(self, other: u32) -> Self { + ::wrapping_shr(self, other) + } + + fn rotate_left(self, other: u32) -> Self { + ::rotate_left(self, other) + } + + fn overflowing_add(self, other: Self) -> (Self, bool) { + ::overflowing_add(self, other) + } + + fn leading_zeros(self) -> u32 { + ::leading_zeros(self) + } + + fn ilog2(self) -> u32 { + ::ilog2(self) + } + }; +} + +macro_rules! int_impl { + ($ity:ty, $uty:ty) => { + impl MinInt for $uty { + type OtherSign = $ity; + type UnsignedInt = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $uty { + fn unsigned(self) -> $uty { + self + } + + // It makes writing macros easier if this is implemented for both signed and unsigned + #[allow(clippy::wrong_self_convention)] + fn from_unsigned(me: $uty) -> Self { + me + } + + fn unsigned_abs(self) -> Self { + self + } + + fn abs_diff(self, other: Self) -> Self { + self.abs_diff(other) + } + + int_impl_common!($uty); + } + + impl MinInt for $ity { + type OtherSign = $uty; + type UnsignedInt = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $ity { + fn unsigned(self) -> $uty { + self as $uty + } + + fn from_unsigned(me: $uty) -> Self { + me as $ity + } + + fn unsigned_abs(self) -> Self::UnsignedInt { + self.unsigned_abs() + } + + fn abs_diff(self, other: Self) -> $uty { + self.abs_diff(other) + } + + int_impl_common!($ity); + } + }; +} + +int_impl!(isize, usize); +int_impl!(i8, u8); +int_impl!(i16, u16); +int_impl!(i32, u32); +int_impl!(i64, u64); +int_impl!(i128, u128); + +public_test_dep! { +/// Trait for integers twice the bit width of another integer. This is implemented for all +/// primitives except for `u8`, because there is not a smaller primitive. +pub(crate) trait DInt: MinInt { + /// Integer that is half the bit width of the integer this trait is implemented for + type H: HInt; + + /// Returns the low half of `self` + fn lo(self) -> Self::H; + /// Returns the high half of `self` + fn hi(self) -> Self::H; + /// Returns the low and high halves of `self` as a tuple + fn lo_hi(self) -> (Self::H, Self::H) { + (self.lo(), self.hi()) + } + /// Constructs an integer using lower and higher half parts + fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { + lo.zero_widen() | hi.widen_hi() + } +} +} + +public_test_dep! { +/// Trait for integers half the bit width of another integer. This is implemented for all +/// primitives except for `u128`, because it there is not a larger primitive. +pub(crate) trait HInt: Int { + /// Integer that is double the bit width of the integer this trait is implemented for + type D: DInt + MinInt; + + // NB: some of the below methods could have default implementations (e.g. `widen_hi`), but for + // unknown reasons this can cause infinite recursion when optimizations are disabled. See + // for context. + + /// Widens (using default extension) the integer to have double bit width + fn widen(self) -> Self::D; + /// Widens (zero extension only) the integer to have double bit width. This is needed to get + /// around problems with associated type bounds (such as `Int`) being unstable + fn zero_widen(self) -> Self::D; + /// Widens the integer to have double bit width and shifts the integer into the higher bits + fn widen_hi(self) -> Self::D; + /// Widening multiplication with zero widening. This cannot overflow. + fn zero_widen_mul(self, rhs: Self) -> Self::D; + /// Widening multiplication. This cannot overflow. + fn widen_mul(self, rhs: Self) -> Self::D; +} +} + +macro_rules! impl_d_int { + ($($X:ident $D:ident),*) => { + $( + impl DInt for $D { + type H = $X; + + fn lo(self) -> Self::H { + self as $X + } + fn hi(self) -> Self::H { + (self >> <$X as MinInt>::BITS) as $X + } + } + )* + }; +} + +macro_rules! impl_h_int { + ($($H:ident $uH:ident $X:ident),*) => { + $( + impl HInt for $H { + type D = $X; + + fn widen(self) -> Self::D { + self as $X + } + fn zero_widen(self) -> Self::D { + (self as $uH) as $X + } + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen().wrapping_mul(rhs.zero_widen()) + } + fn widen_mul(self, rhs: Self) -> Self::D { + self.widen().wrapping_mul(rhs.widen()) + } + fn widen_hi(self) -> Self::D { + (self as $X) << ::BITS + } + } + )* + }; +} + +impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); +impl_h_int!( + u8 u8 u16, + u16 u16 u32, + u32 u32 u64, + u64 u64 u128, + i8 u8 i16, + i16 u16 i32, + i32 u32 i64, + i64 u64 i128 +); + +public_test_dep! { +/// Trait to express (possibly lossy) casting of integers +pub(crate) trait CastInto: Copy { + fn cast(self) -> T; +} + +pub(crate) trait CastFrom:Copy { + fn cast_from(value: T) -> Self; +} +} + +impl + Copy> CastFrom for T { + fn cast_from(value: U) -> Self { + value.cast() + } +} + +macro_rules! cast_into { + ($ty:ty) => { + cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); + }; + ($ty:ty; $($into:ty),*) => {$( + impl CastInto<$into> for $ty { + fn cast(self) -> $into { + self as $into + } + } + )*}; +} -#[cfg(feature = "public-test-deps")] -pub use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; +cast_into!(usize); +cast_into!(isize); +cast_into!(u8); +cast_into!(i8); +cast_into!(u16); +cast_into!(i16); +cast_into!(u32); +cast_into!(i32); +cast_into!(u64); +cast_into!(i64); +cast_into!(u128); +cast_into!(i128); diff --git a/src/int/specialized_div_rem/delegate.rs b/src/int/specialized_div_rem/delegate.rs index f5c6e5023..330c6e4f8 100644 --- a/src/int/specialized_div_rem/delegate.rs +++ b/src/int/specialized_div_rem/delegate.rs @@ -185,6 +185,7 @@ macro_rules! impl_delegate { }; } +public_test_dep! { /// Returns `n / d` and sets `*rem = n % d`. /// /// This specialization exists because: @@ -194,7 +195,7 @@ macro_rules! impl_delegate { /// delegate algorithm strategy the only reasonably fast way to perform `u128` division. // used on SPARC #[allow(dead_code)] -pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { +pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { use super::*; let duo_lo = duo as u64; let duo_hi = (duo >> 64) as u64; @@ -315,3 +316,4 @@ pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { } } } +} diff --git a/src/int/trailing_zeros.rs b/src/int/trailing_zeros.rs index dbc0cce9f..cea366b07 100644 --- a/src/int/trailing_zeros.rs +++ b/src/int/trailing_zeros.rs @@ -1,49 +1,44 @@ -#[cfg(feature = "public-test-deps")] -pub use implementation::trailing_zeros; -#[cfg(not(feature = "public-test-deps"))] -pub(crate) use implementation::trailing_zeros; +use crate::int::{CastInto, Int}; -mod implementation { - use crate::int::{CastInto, Int}; +public_test_dep! { +/// Returns number of trailing binary zeros in `x`. +#[allow(dead_code)] +pub(crate) fn trailing_zeros + CastInto + CastInto>(x: T) -> usize { + let mut x = x; + let mut r: u32 = 0; + let mut t: u32; - /// Returns number of trailing binary zeros in `x`. - #[allow(dead_code)] - pub fn trailing_zeros + CastInto + CastInto>(x: T) -> usize { - let mut x = x; - let mut r: u32 = 0; - let mut t: u32; - - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - r += ((CastInto::::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 - x >>= r; // remove 32 zero bits - } - - if T::BITS >= 32 { - t = ((CastInto::::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 - r += t; - x >>= t; // x = [0 - 0xFFFF] + higher garbage bits - } + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + r += ((CastInto::::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 + x >>= r; // remove 32 zero bits + } - const { assert!(T::BITS >= 16) }; - t = ((CastInto::::cast(x) == 0) as u32) << 3; - x >>= t; // x = [0 - 0xFF] + higher garbage bits + if T::BITS >= 32 { + t = ((CastInto::::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 r += t; + x >>= t; // x = [0 - 0xFFFF] + higher garbage bits + } - let mut x: u8 = x.cast(); + const { assert!(T::BITS >= 16) }; + t = ((CastInto::::cast(x) == 0) as u32) << 3; + x >>= t; // x = [0 - 0xFF] + higher garbage bits + r += t; - t = (((x & 0x0F) == 0) as u32) << 2; - x >>= t; // x = [0 - 0xF] + higher garbage bits - r += t; + let mut x: u8 = x.cast(); - t = (((x & 0x3) == 0) as u32) << 1; - x >>= t; // x = [0 - 0x3] + higher garbage bits - r += t; + t = (((x & 0x0F) == 0) as u32) << 2; + x >>= t; // x = [0 - 0xF] + higher garbage bits + r += t; - x &= 3; + t = (((x & 0x3) == 0) as u32) << 1; + x >>= t; // x = [0 - 0x3] + higher garbage bits + r += t; - r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg()) - } + x &= 3; + + r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg()) +} } intrinsics! { diff --git a/src/int/traits.rs b/src/int/traits.rs deleted file mode 100644 index 9b079e2aa..000000000 --- a/src/int/traits.rs +++ /dev/null @@ -1,411 +0,0 @@ -use core::ops; - -/// Minimal integer implementations needed on all integer types, including wide integers. -#[allow(dead_code)] -pub trait MinInt: - Copy - + core::fmt::Debug - + ops::BitOr - + ops::Not - + ops::Shl -{ - /// Type with the same width but other signedness - type OtherSign: MinInt; - /// Unsigned version of Self - type UnsignedInt: MinInt; - - /// If `Self` is a signed integer - const SIGNED: bool; - - /// The bitwidth of the int type - const BITS: u32; - - const ZERO: Self; - const ONE: Self; - const MIN: Self; - const MAX: Self; -} - -/// Trait for some basic operations on integers -#[allow(dead_code)] -pub trait Int: - MinInt - + PartialEq - + PartialOrd - + ops::AddAssign - + ops::SubAssign - + ops::BitAndAssign - + ops::BitOrAssign - + ops::BitXorAssign - + ops::ShlAssign - + ops::ShrAssign - + ops::Add - + ops::Sub - + ops::Mul - + ops::Div - + ops::Shr - + ops::BitXor - + ops::BitAnd -{ - /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing - /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, - /// 112,119,120,125,126,127]. - const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(::BITS); - - /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. - const FUZZ_NUM: usize = { - let log2 = (::BITS - 1).count_ones() as usize; - if log2 == 3 { - // case for u8 - 6 - } else { - // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate - // boundaries. - 8 + (4 * (log2 - 4)) - } - }; - - fn unsigned(self) -> Self::UnsignedInt; - fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; - fn unsigned_abs(self) -> Self::UnsignedInt; - - fn from_bool(b: bool) -> Self; - - /// Prevents the need for excessive conversions between signed and unsigned - fn logical_shr(self, other: u32) -> Self; - - /// Absolute difference between two integers. - fn abs_diff(self, other: Self) -> Self::UnsignedInt; - - // copied from primitive integers, but put in a trait - fn is_zero(self) -> bool; - fn wrapping_neg(self) -> Self; - fn wrapping_add(self, other: Self) -> Self; - fn wrapping_mul(self, other: Self) -> Self; - fn wrapping_sub(self, other: Self) -> Self; - fn wrapping_shl(self, other: u32) -> Self; - fn wrapping_shr(self, other: u32) -> Self; - fn rotate_left(self, other: u32) -> Self; - fn overflowing_add(self, other: Self) -> (Self, bool); - fn leading_zeros(self) -> u32; - fn ilog2(self) -> u32; -} - -pub(crate) const fn make_fuzz_lengths(bits: u32) -> [u8; 20] { - let mut v = [0u8; 20]; - v[0] = 0; - v[1] = 1; - v[2] = 2; // important for parity and the iX::MIN case when reversed - let mut i = 3; - - // No need for any more until the byte boundary, because there should be no algorithms - // that are sensitive to anything not next to byte boundaries after 2. We also scale - // in powers of two, which is important to prevent u128 corner tests from getting too - // big. - let mut l = 8; - loop { - if l >= ((bits / 2) as u8) { - break; - } - // get both sides of the byte boundary - v[i] = l - 1; - i += 1; - v[i] = l; - i += 1; - l *= 2; - } - - if bits != 8 { - // add the lower side of the middle boundary - v[i] = ((bits / 2) - 1) as u8; - i += 1; - } - - // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS - // boundary because of algorithms that split the high part up. We reverse the scaling - // as we go to Self::BITS. - let mid = i; - let mut j = 1; - loop { - v[i] = (bits as u8) - (v[mid - j]) - 1; - if j == mid { - break; - } - i += 1; - j += 1; - } - v -} - -macro_rules! int_impl_common { - ($ty:ty) => { - fn from_bool(b: bool) -> Self { - b as $ty - } - - fn logical_shr(self, other: u32) -> Self { - Self::from_unsigned(self.unsigned().wrapping_shr(other)) - } - - fn is_zero(self) -> bool { - self == Self::ZERO - } - - fn wrapping_neg(self) -> Self { - ::wrapping_neg(self) - } - - fn wrapping_add(self, other: Self) -> Self { - ::wrapping_add(self, other) - } - - fn wrapping_mul(self, other: Self) -> Self { - ::wrapping_mul(self, other) - } - fn wrapping_sub(self, other: Self) -> Self { - ::wrapping_sub(self, other) - } - - fn wrapping_shl(self, other: u32) -> Self { - ::wrapping_shl(self, other) - } - - fn wrapping_shr(self, other: u32) -> Self { - ::wrapping_shr(self, other) - } - - fn rotate_left(self, other: u32) -> Self { - ::rotate_left(self, other) - } - - fn overflowing_add(self, other: Self) -> (Self, bool) { - ::overflowing_add(self, other) - } - - fn leading_zeros(self) -> u32 { - ::leading_zeros(self) - } - - fn ilog2(self) -> u32 { - ::ilog2(self) - } - }; -} - -macro_rules! int_impl { - ($ity:ty, $uty:ty) => { - impl MinInt for $uty { - type OtherSign = $ity; - type UnsignedInt = $uty; - - const BITS: u32 = ::ZERO.count_zeros(); - const SIGNED: bool = Self::MIN != Self::ZERO; - - const ZERO: Self = 0; - const ONE: Self = 1; - const MIN: Self = ::MIN; - const MAX: Self = ::MAX; - } - - impl Int for $uty { - fn unsigned(self) -> $uty { - self - } - - // It makes writing macros easier if this is implemented for both signed and unsigned - #[allow(clippy::wrong_self_convention)] - fn from_unsigned(me: $uty) -> Self { - me - } - - fn unsigned_abs(self) -> Self { - self - } - - fn abs_diff(self, other: Self) -> Self { - self.abs_diff(other) - } - - int_impl_common!($uty); - } - - impl MinInt for $ity { - type OtherSign = $uty; - type UnsignedInt = $uty; - - const BITS: u32 = ::ZERO.count_zeros(); - const SIGNED: bool = Self::MIN != Self::ZERO; - - const ZERO: Self = 0; - const ONE: Self = 1; - const MIN: Self = ::MIN; - const MAX: Self = ::MAX; - } - - impl Int for $ity { - fn unsigned(self) -> $uty { - self as $uty - } - - fn from_unsigned(me: $uty) -> Self { - me as $ity - } - - fn unsigned_abs(self) -> Self::UnsignedInt { - self.unsigned_abs() - } - - fn abs_diff(self, other: Self) -> $uty { - self.abs_diff(other) - } - - int_impl_common!($ity); - } - }; -} - -int_impl!(isize, usize); -int_impl!(i8, u8); -int_impl!(i16, u16); -int_impl!(i32, u32); -int_impl!(i64, u64); -int_impl!(i128, u128); - -/// Trait for integers twice the bit width of another integer. This is implemented for all -/// primitives except for `u8`, because there is not a smaller primitive. -pub trait DInt: MinInt { - /// Integer that is half the bit width of the integer this trait is implemented for - type H: HInt; - - /// Returns the low half of `self` - fn lo(self) -> Self::H; - /// Returns the high half of `self` - fn hi(self) -> Self::H; - /// Returns the low and high halves of `self` as a tuple - fn lo_hi(self) -> (Self::H, Self::H) { - (self.lo(), self.hi()) - } - /// Constructs an integer using lower and higher half parts - fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { - lo.zero_widen() | hi.widen_hi() - } -} - -/// Trait for integers half the bit width of another integer. This is implemented for all -/// primitives except for `u128`, because it there is not a larger primitive. -pub trait HInt: Int { - /// Integer that is double the bit width of the integer this trait is implemented for - type D: DInt + MinInt; - - // NB: some of the below methods could have default implementations (e.g. `widen_hi`), but for - // unknown reasons this can cause infinite recursion when optimizations are disabled. See - // for context. - - /// Widens (using default extension) the integer to have double bit width - fn widen(self) -> Self::D; - /// Widens (zero extension only) the integer to have double bit width. This is needed to get - /// around problems with associated type bounds (such as `Int`) being unstable - fn zero_widen(self) -> Self::D; - /// Widens the integer to have double bit width and shifts the integer into the higher bits - fn widen_hi(self) -> Self::D; - /// Widening multiplication with zero widening. This cannot overflow. - fn zero_widen_mul(self, rhs: Self) -> Self::D; - /// Widening multiplication. This cannot overflow. - fn widen_mul(self, rhs: Self) -> Self::D; -} - -macro_rules! impl_d_int { - ($($X:ident $D:ident),*) => { - $( - impl DInt for $D { - type H = $X; - - fn lo(self) -> Self::H { - self as $X - } - fn hi(self) -> Self::H { - (self >> <$X as MinInt>::BITS) as $X - } - } - )* - }; -} - -macro_rules! impl_h_int { - ($($H:ident $uH:ident $X:ident),*) => { - $( - impl HInt for $H { - type D = $X; - - fn widen(self) -> Self::D { - self as $X - } - fn zero_widen(self) -> Self::D { - (self as $uH) as $X - } - fn zero_widen_mul(self, rhs: Self) -> Self::D { - self.zero_widen().wrapping_mul(rhs.zero_widen()) - } - fn widen_mul(self, rhs: Self) -> Self::D { - self.widen().wrapping_mul(rhs.widen()) - } - fn widen_hi(self) -> Self::D { - (self as $X) << ::BITS - } - } - )* - }; -} - -impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); -impl_h_int!( - u8 u8 u16, - u16 u16 u32, - u32 u32 u64, - u64 u64 u128, - i8 u8 i16, - i16 u16 i32, - i32 u32 i64, - i64 u64 i128 -); - -/// Trait to express (possibly lossy) casting of integers -pub trait CastInto: Copy { - fn cast(self) -> T; -} - -pub trait CastFrom: Copy { - fn cast_from(value: T) -> Self; -} - -impl + Copy> CastFrom for T { - fn cast_from(value: U) -> Self { - value.cast() - } -} - -macro_rules! cast_into { - ($ty:ty) => { - cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); - }; - ($ty:ty; $($into:ty),*) => {$( - impl CastInto<$into> for $ty { - fn cast(self) -> $into { - self as $into - } - } - )*}; -} - -cast_into!(usize); -cast_into!(isize); -cast_into!(u8); -cast_into!(i8); -cast_into!(u16); -cast_into!(i16); -cast_into!(u32); -cast_into!(i32); -cast_into!(u64); -cast_into!(i64); -cast_into!(u128); -cast_into!(i128); diff --git a/src/macros.rs b/src/macros.rs index 91606d42b..f51e49e98 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -1,5 +1,21 @@ //! Macros shared throughout the compiler-builtins implementation +/// Changes the visibility to `pub` if feature "public-test-deps" is set +#[cfg(not(feature = "public-test-deps"))] +macro_rules! public_test_dep { + ($(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*) => { + $(#[$($meta)*])* pub(crate) $ident $($tokens)* + }; +} + +/// Changes the visibility to `pub` if feature "public-test-deps" is set +#[cfg(feature = "public-test-deps")] +macro_rules! public_test_dep { + {$(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*} => { + $(#[$($meta)*])* pub $ident $($tokens)* + }; +} + /// The "main macro" used for defining intrinsics. /// /// The compiler-builtins library is super platform-specific with tons of crazy From b5b0fac15f69f4eb57b92537b471e8f046444352 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 4 Feb 2025 10:46:49 +0000 Subject: [PATCH 1149/1459] chore: release v0.1.145 --- CHANGELOG.md | 10 ++++++++++ Cargo.toml | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a1f95d178..ddebb7823 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.145](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.144...compiler_builtins-v0.1.145) - 2025-02-04 + +### Other + +- Revert "Eliminate the use of `public_test_dep!`" +- Indentation fix to please clippy +- Don't build out of line atomics support code for uefi +- Add a version to some FIXMEs that will be resolved in LLVM 20 +- Remove use of the `start` feature + ## [0.1.144](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.143...compiler_builtins-v0.1.144) - 2025-01-15 ### Other diff --git a/Cargo.toml b/Cargo.toml index 0b0b7c365..fcbc60238 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.144" +version = "0.1.145" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 0e026f3084185793bf524dacea50625dc6f1fffb Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 30 Jan 2025 12:52:35 +0000 Subject: [PATCH 1150/1459] Fix hex float trait recursion problem --- libm/src/math/support/hex_float.rs | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs index da41622f2..ebc4f7c64 100644 --- a/libm/src/math/support/hex_float.rs +++ b/libm/src/math/support/hex_float.rs @@ -245,29 +245,21 @@ fn fmt_any_hex(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}") } -#[cfg(f16_enabled)] -impl fmt::LowerHex for Hexf { +impl fmt::LowerHex for Hexf { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt_any_hex(&self.0, f) } } -impl fmt::LowerHex for Hexf { +impl fmt::LowerHex for Hexf<(F, F)> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt_any_hex(&self.0, f) - } -} - -impl fmt::LowerHex for Hexf { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt_any_hex(&self.0, f) + write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) } } -#[cfg(f128_enabled)] -impl fmt::LowerHex for Hexf { +impl fmt::LowerHex for Hexf<(F, i32)> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt_any_hex(&self.0, f) + write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) } } @@ -277,18 +269,6 @@ impl fmt::LowerHex for Hexf { } } -impl fmt::LowerHex for Hexf<(T1, T2)> -where - T1: Copy, - T2: Copy, - Hexf: fmt::LowerHex, - Hexf: fmt::LowerHex, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) - } -} - impl fmt::Debug for Hexf where Hexf: fmt::LowerHex, From e12dae19d8d3622776a528efc97fa6b7d26bd68e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 3 Jan 2025 04:34:21 +0000 Subject: [PATCH 1151/1459] Add `scalbnf16`, `scalbnf128`, `ldexpf16`, and `ldexpf128` Use the generic `scalbn` to provide `f16` and `f128` versions, which also work for `ldexp`. This involves a new algorithm for `f16` because the default does not converge fast enough with a limited number of rounds. --- libm/crates/libm-macros/src/shared.rs | 14 +++ libm/crates/libm-test/benches/icount.rs | 4 + libm/crates/libm-test/benches/random.rs | 4 + libm/crates/libm-test/src/mpfloat.rs | 61 +++++++------ libm/crates/libm-test/src/precision.rs | 4 + .../libm-test/tests/compare_built_musl.rs | 4 + libm/crates/util/src/main.rs | 4 + libm/etc/function-definitions.json | 26 ++++++ libm/etc/function-list.txt | 4 + libm/src/math/generic/scalbn.rs | 85 ++++++++++++++++--- libm/src/math/ldexpf128.rs | 4 + libm/src/math/ldexpf16.rs | 4 + libm/src/math/mod.rs | 8 ++ libm/src/math/scalbnf128.rs | 4 + libm/src/math/scalbnf16.rs | 4 + 15 files changed, 195 insertions(+), 39 deletions(-) create mode 100644 libm/src/math/ldexpf128.rs create mode 100644 libm/src/math/ldexpf16.rs create mode 100644 libm/src/math/scalbnf128.rs create mode 100644 libm/src/math/scalbnf16.rs diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index b1f4f46cc..4fd0834f6 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -134,6 +134,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] None, &["jn", "yn"], ), + ( + // `(f16, i32) -> f16` + FloatTy::F16, + Signature { args: &[Ty::F16, Ty::I32], returns: &[Ty::F16] }, + None, + &["scalbnf16", "ldexpf16"], + ), ( // `(f32, i32) -> f32` FloatTy::F32, @@ -148,6 +155,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] None, &["scalbn", "ldexp"], ), + ( + // `(f128, i32) -> f128` + FloatTy::F128, + Signature { args: &[Ty::F128, Ty::I32], returns: &[Ty::F128] }, + None, + &["scalbnf128", "ldexpf128"], + ), ( // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)` FloatTy::F32, diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index d5026f461..13de799c7 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -131,6 +131,8 @@ main!( icount_bench_jn_group, icount_bench_jnf_group, icount_bench_ldexp_group, + icount_bench_ldexpf128_group, + icount_bench_ldexpf16_group, icount_bench_ldexpf_group, icount_bench_lgamma_group, icount_bench_lgamma_r_group, @@ -163,6 +165,8 @@ main!( icount_bench_roundf16_group, icount_bench_roundf_group, icount_bench_scalbn_group, + icount_bench_scalbnf128_group, + icount_bench_scalbnf16_group, icount_bench_scalbnf_group, icount_bench_sin_group, icount_bench_sinf_group, diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index ca9e86c10..56d288c33 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -133,10 +133,14 @@ libm_macros::for_each_function! { | fminf16 | fmodf128 | fmodf16 + | ldexpf128 + | ldexpf16 | rintf128 | rintf16 | roundf128 | roundf16 + | scalbnf128 + | scalbnf16 | sqrtf128 | sqrtf16 | truncf128 diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 3d84740cc..e3211b913 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -159,6 +159,8 @@ libm_macros::for_each_function! { jnf, ldexp, ldexpf, + ldexpf128, + ldexpf16, lgamma_r, lgammaf_r, modf, @@ -178,6 +180,8 @@ libm_macros::for_each_function! { roundf16, scalbn, scalbnf, + scalbnf128, + scalbnf16, sincos,sincosf, trunc, truncf, @@ -351,34 +355,6 @@ macro_rules! impl_op_for_ty { } } - // `ldexp` and `scalbn` are the same for binary floating point, so just forward all - // methods. - impl MpOp for crate::op::[]::Routine { - type MpTy = ]::Routine as MpOp>::MpTy; - - fn new_mp() -> Self::MpTy { - ]::Routine as MpOp>::new_mp() - } - - fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { - ]::Routine as MpOp>::run(this, input) - } - } - - impl MpOp for crate::op::[]::Routine { - type MpTy = MpFloat; - - fn new_mp() -> Self::MpTy { - new_mpfloat::() - } - - fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { - this.assign(input.0); - *this <<= input.1; - prep_retval::(this, Ordering::Equal) - } - } - impl MpOp for crate::op::[]::Routine { type MpTy = (MpFloat, MpFloat); @@ -464,6 +440,35 @@ macro_rules! impl_op_for_ty_all { this.1.assign(input.1); let ord = this.0.rem_assign_round(&this.1, Nearest); prep_retval::(&mut this.0, ord) + + } + } + + // `ldexp` and `scalbn` are the same for binary floating point, so just forward all + // methods. + impl MpOp for crate::op::[]::Routine { + type MpTy = ]::Routine as MpOp>::MpTy; + + fn new_mp() -> Self::MpTy { + ]::Routine as MpOp>::new_mp() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + ]::Routine as MpOp>::run(this, input) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + *this <<= input.1; + prep_retval::(this, Ordering::Equal) } } } diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index ffb322e38..051960b7a 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -551,8 +551,12 @@ fn int_float_common( DEFAULT } +#[cfg(f16_enabled)] +impl MaybeOverride<(f16, i32)> for SpecialCase {} impl MaybeOverride<(f32, i32)> for SpecialCase {} impl MaybeOverride<(f64, i32)> for SpecialCase {} +#[cfg(f128_enabled)] +impl MaybeOverride<(f128, i32)> for SpecialCase {} impl MaybeOverride<(f32, f32, f32)> for SpecialCase { fn check_float( diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 5466edf4f..191c7e69d 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -95,10 +95,14 @@ libm_macros::for_each_function! { fminf16, fmodf128, fmodf16, + ldexpf128, + ldexpf16, rintf128, rintf16, roundf128, roundf16, + scalbnf128, + scalbnf16, sqrtf128, sqrtf16, truncf128, diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index 357df6b4f..e5d6f374a 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -102,10 +102,14 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { | fminf16 | fmodf128 | fmodf16 + | ldexpf128 + | ldexpf16 | rintf128 | rintf16 | roundf128 | roundf16 + | scalbnf128 + | scalbnf16 | sqrtf128 | sqrtf16 | truncf128 diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 574ffea2e..e38dfd236 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -554,6 +554,18 @@ ], "type": "f32" }, + "ldexpf128": { + "sources": [ + "src/math/ldexpf128.rs" + ], + "type": "f128" + }, + "ldexpf16": { + "sources": [ + "src/math/ldexpf16.rs" + ], + "type": "f16" + }, "lgamma": { "sources": [ "src/libm_helper.rs", @@ -774,6 +786,20 @@ ], "type": "f32" }, + "scalbnf128": { + "sources": [ + "src/math/generic/scalbn.rs", + "src/math/scalbnf128.rs" + ], + "type": "f128" + }, + "scalbnf16": { + "sources": [ + "src/math/generic/scalbn.rs", + "src/math/scalbnf16.rs" + ], + "type": "f16" + }, "sin": { "sources": [ "src/libm_helper.rs", diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt index d82838b32..c92eaf9e2 100644 --- a/libm/etc/function-list.txt +++ b/libm/etc/function-list.txt @@ -79,6 +79,8 @@ jn jnf ldexp ldexpf +ldexpf128 +ldexpf16 lgamma lgamma_r lgammaf @@ -111,6 +113,8 @@ roundf128 roundf16 scalbn scalbnf +scalbnf128 +scalbnf16 sin sincos sincosf diff --git a/libm/src/math/generic/scalbn.rs b/libm/src/math/generic/scalbn.rs index f036c15cc..f15cb75d6 100644 --- a/libm/src/math/generic/scalbn.rs +++ b/libm/src/math/generic/scalbn.rs @@ -31,16 +31,27 @@ where let exp_max: i32 = F::EXP_BIAS as i32; let exp_min = -(exp_max - 1); - // 2 ^ Emax, where Emax is the maximum biased exponent value (1023 for f64) + // 2 ^ Emax, maximum positive with null significand (0x1p1023 for f64) let f_exp_max = F::from_parts(false, F::EXP_BIAS << 1, zero); - // 2 ^ Emin, where Emin is the minimum biased exponent value (-1022 for f64) + // 2 ^ Emin, minimum positive normal with null significand (0x1p-1022 for f64) let f_exp_min = F::from_parts(false, 1, zero); - // 2 ^ sig_total_bits, representation of what can be accounted for with subnormals - let f_exp_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero); + // 2 ^ sig_total_bits, moltiplier to normalize subnormals (0x1p53 for f64) + let f_pow_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero); + + /* + * The goal is to multiply `x` by a scale factor that applies `n`. However, there are cases + * where `2^n` is not representable by `F` but the result should be, e.g. `x = 2^Emin` with + * `n = -EMin + 2` (one out of range of 2^Emax). To get around this, reduce the magnitude of + * the final scale operation by prescaling by the max/min power representable by `F`. + */ if n > exp_max { + // Worse case positive `n`: `x` is the minimum subnormal value, the result is `F::MAX`. + // This can be reached by three scaling multiplications (two here and one final). + debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= exp_max * 3); + x *= f_exp_max; n -= exp_max; if n > exp_max { @@ -51,21 +62,61 @@ where } } } else if n < exp_min { - let mul = f_exp_min * f_exp_subnorm; - let add = (exp_max - 1) - sig_total_bits as i32; + // When scaling toward 0, the prescaling is limited to a value that does not allow `x` to + // go subnormal. This avoids double rounding. + if F::BITS > 16 { + // `mul` s.t. `!(x * mul).is_subnormal() ∀ x` + let mul = f_exp_min * f_pow_subnorm; + let add = -exp_min - sig_total_bits as i32; + + // Worse case negative `n`: `x` is the maximum positive value, the result is `F::MIN`. + // This must be reachable by three scaling multiplications (two here and one final). + debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= add * 2 + -exp_min); - x *= mul; - n += add; - if n < exp_min { x *= mul; n += add; + if n < exp_min { - n = exp_min; + x *= mul; + n += add; + + if n < exp_min { + n = exp_min; + } + } + } else { + // `f16` is unique compared to other float types in that the difference between the + // minimum exponent and the significand bits (`add = -exp_min - sig_total_bits`) is + // small, only three. The above method depend on decrementing `n` by `add` two times; + // for other float types this works out because `add` is a substantial fraction of + // the exponent range. For `f16`, however, 3 is relatively small compared to the + // exponent range (which is 39), so that requires ~10 prescale rounds rather than two. + // + // Work aroudn this by using a different algorithm that calculates the prescale + // dynamically based on the maximum possible value. This adds more operations per round + // since it needs to construct the scale, but works better in the general case. + let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32); + let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero); + + x *= mul; + n += add; + + if n < exp_min { + let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32); + let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero); + + x *= mul; + n += add; + + if n < exp_min { + n = exp_min; + } } } } - x * F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero) + let scale = F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero); + x * scale } #[cfg(test)] @@ -111,6 +162,12 @@ mod tests { assert!(scalbn(-F::NAN, -10).is_nan()); } + #[test] + #[cfg(f16_enabled)] + fn spec_test_f16() { + spec_test::(); + } + #[test] fn spec_test_f32() { spec_test::(); @@ -120,4 +177,10 @@ mod tests { fn spec_test_f64() { spec_test::(); } + + #[test] + #[cfg(f128_enabled)] + fn spec_test_f128() { + spec_test::(); + } } diff --git a/libm/src/math/ldexpf128.rs b/libm/src/math/ldexpf128.rs new file mode 100644 index 000000000..b35277d15 --- /dev/null +++ b/libm/src/math/ldexpf128.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf128(x: f128, n: i32) -> f128 { + super::scalbnf128(x, n) +} diff --git a/libm/src/math/ldexpf16.rs b/libm/src/math/ldexpf16.rs new file mode 100644 index 000000000..8de6cffd6 --- /dev/null +++ b/libm/src/math/ldexpf16.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf16(x: f16, n: i32) -> f16 { + super::scalbnf16(x, n) +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 969c1bfd9..9b07dc8a7 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -349,8 +349,10 @@ cfg_if! { mod fmaxf16; mod fminf16; mod fmodf16; + mod ldexpf16; mod rintf16; mod roundf16; + mod scalbnf16; mod sqrtf16; mod truncf16; @@ -362,8 +364,10 @@ cfg_if! { pub use self::fmaxf16::fmaxf16; pub use self::fminf16::fminf16; pub use self::fmodf16::fmodf16; + pub use self::ldexpf16::ldexpf16; pub use self::rintf16::rintf16; pub use self::roundf16::roundf16; + pub use self::scalbnf16::scalbnf16; pub use self::sqrtf16::sqrtf16; pub use self::truncf16::truncf16; } @@ -379,8 +383,10 @@ cfg_if! { mod fmaxf128; mod fminf128; mod fmodf128; + mod ldexpf128; mod rintf128; mod roundf128; + mod scalbnf128; mod sqrtf128; mod truncf128; @@ -392,8 +398,10 @@ cfg_if! { pub use self::fmaxf128::fmaxf128; pub use self::fminf128::fminf128; pub use self::fmodf128::fmodf128; + pub use self::ldexpf128::ldexpf128; pub use self::rintf128::rintf128; pub use self::roundf128::roundf128; + pub use self::scalbnf128::scalbnf128; pub use self::sqrtf128::sqrtf128; pub use self::truncf128::truncf128; } diff --git a/libm/src/math/scalbnf128.rs b/libm/src/math/scalbnf128.rs new file mode 100644 index 000000000..c1d2b4855 --- /dev/null +++ b/libm/src/math/scalbnf128.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf128(x: f128, n: i32) -> f128 { + super::generic::scalbn(x, n) +} diff --git a/libm/src/math/scalbnf16.rs b/libm/src/math/scalbnf16.rs new file mode 100644 index 000000000..2209e1a17 --- /dev/null +++ b/libm/src/math/scalbnf16.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf16(x: f16, n: i32) -> f16 { + super::generic::scalbn(x, n) +} From 8b80240fd42fc9dbdd6e36e519be0ec8b0f25625 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 5 Feb 2025 15:00:04 +0000 Subject: [PATCH 1152/1459] Add a check in the `shared.rs` that the function list is sorted --- libm/crates/libm-macros/src/shared.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index 4fd0834f6..da16cd8e2 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -18,7 +18,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] None, &[ "acosf", "acoshf", "asinf", "asinhf", "atanf", "atanhf", "cbrtf", "ceilf", "cosf", - "coshf", "erff", "erfcf", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf", + "coshf", "erfcf", "erff", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf", "j0f", "j1f", "lgammaf", "log10f", "log1pf", "log2f", "logf", "rintf", "roundf", "sinf", "sinhf", "sqrtf", "tanf", "tanhf", "tgammaf", "truncf", "y0f", "y1f", ], @@ -30,8 +30,8 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] None, &[ "acos", "acosh", "asin", "asinh", "atan", "atanh", "cbrt", "ceil", "cos", "cosh", - "erf", "erfc", "exp10", "exp2", "exp", "expm1", "fabs", "floor", "j0", "j1", "lgamma", - "log10", "log1p", "log2", "log", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh", + "erf", "erfc", "exp", "exp10", "exp2", "expm1", "fabs", "floor", "j0", "j1", "lgamma", + "log", "log10", "log1p", "log2", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh", "tgamma", "trunc", "y0", "y1", ], ), @@ -139,28 +139,28 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F16, Signature { args: &[Ty::F16, Ty::I32], returns: &[Ty::F16] }, None, - &["scalbnf16", "ldexpf16"], + &["ldexpf16", "scalbnf16"], ), ( // `(f32, i32) -> f32` FloatTy::F32, Signature { args: &[Ty::F32, Ty::I32], returns: &[Ty::F32] }, None, - &["scalbnf", "ldexpf"], + &["ldexpf", "scalbnf"], ), ( // `(f64, i64) -> f64` FloatTy::F64, Signature { args: &[Ty::F64, Ty::I32], returns: &[Ty::F64] }, None, - &["scalbn", "ldexp"], + &["ldexp", "scalbn"], ), ( // `(f128, i32) -> f128` FloatTy::F128, Signature { args: &[Ty::F128, Ty::I32], returns: &[Ty::F128] }, None, - &["scalbnf128", "ldexpf128"], + &["ldexpf128", "scalbnf128"], ), ( // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)` @@ -312,6 +312,12 @@ pub static ALL_OPERATIONS: LazyLock> = LazyLock::new(|| { }; ret.push(api); } + + if !names.is_sorted() { + let mut sorted = (*names).to_owned(); + sorted.sort_unstable(); + panic!("names list is not sorted: {names:?}\nExpected: {sorted:?}"); + } } ret.sort_by_key(|item| item.name); From 5804ec6bf2d3460d0eb4213ddb9cc98def4c4ac2 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 5 Feb 2025 15:02:17 +0000 Subject: [PATCH 1153/1459] Do not add `libm_helper.rs` to the sources list This is just a collection of all functions and should not trigger extensive tests when changed. --- libm/etc/function-definitions.json | 57 ------------------------------ libm/etc/update-api-list.py | 9 +++++ 2 files changed, 9 insertions(+), 57 deletions(-) diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index e38dfd236..a1d3adf59 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -2,7 +2,6 @@ "__comment": "Autogenerated by update-api-list.py. List of files that define a function with a given name. This file is checked in to make it obvious if refactoring breaks things", "acos": { "sources": [ - "src/libm_helper.rs", "src/math/acos.rs" ], "type": "f64" @@ -15,7 +14,6 @@ }, "acosh": { "sources": [ - "src/libm_helper.rs", "src/math/acosh.rs" ], "type": "f64" @@ -28,7 +26,6 @@ }, "asin": { "sources": [ - "src/libm_helper.rs", "src/math/asin.rs" ], "type": "f64" @@ -41,7 +38,6 @@ }, "asinh": { "sources": [ - "src/libm_helper.rs", "src/math/asinh.rs" ], "type": "f64" @@ -54,14 +50,12 @@ }, "atan": { "sources": [ - "src/libm_helper.rs", "src/math/atan.rs" ], "type": "f64" }, "atan2": { "sources": [ - "src/libm_helper.rs", "src/math/atan2.rs" ], "type": "f64" @@ -80,7 +74,6 @@ }, "atanh": { "sources": [ - "src/libm_helper.rs", "src/math/atanh.rs" ], "type": "f64" @@ -93,7 +86,6 @@ }, "cbrt": { "sources": [ - "src/libm_helper.rs", "src/math/cbrt.rs" ], "type": "f64" @@ -106,7 +98,6 @@ }, "ceil": { "sources": [ - "src/libm_helper.rs", "src/math/arch/i586.rs", "src/math/arch/wasm32.rs", "src/math/ceil.rs", @@ -138,7 +129,6 @@ }, "copysign": { "sources": [ - "src/libm_helper.rs", "src/math/copysign.rs", "src/math/generic/copysign.rs", "src/math/support/float_traits.rs" @@ -168,7 +158,6 @@ }, "cos": { "sources": [ - "src/libm_helper.rs", "src/math/cos.rs" ], "type": "f64" @@ -181,7 +170,6 @@ }, "cosh": { "sources": [ - "src/libm_helper.rs", "src/math/cosh.rs" ], "type": "f64" @@ -194,14 +182,12 @@ }, "erf": { "sources": [ - "src/libm_helper.rs", "src/math/erf.rs" ], "type": "f64" }, "erfc": { "sources": [ - "src/libm_helper.rs", "src/math/erf.rs" ], "type": "f64" @@ -220,7 +206,6 @@ }, "exp": { "sources": [ - "src/libm_helper.rs", "src/math/exp.rs", "src/math/support/float_traits.rs" ], @@ -228,7 +213,6 @@ }, "exp10": { "sources": [ - "src/libm_helper.rs", "src/math/exp10.rs" ], "type": "f64" @@ -241,7 +225,6 @@ }, "exp2": { "sources": [ - "src/libm_helper.rs", "src/math/exp2.rs" ], "type": "f64" @@ -260,7 +243,6 @@ }, "expm1": { "sources": [ - "src/libm_helper.rs", "src/math/expm1.rs" ], "type": "f64" @@ -273,7 +255,6 @@ }, "fabs": { "sources": [ - "src/libm_helper.rs", "src/math/arch/wasm32.rs", "src/math/fabs.rs", "src/math/generic/fabs.rs" @@ -304,7 +285,6 @@ }, "fdim": { "sources": [ - "src/libm_helper.rs", "src/math/fdim.rs", "src/math/generic/fdim.rs" ], @@ -333,7 +313,6 @@ }, "floor": { "sources": [ - "src/libm_helper.rs", "src/math/arch/i586.rs", "src/math/arch/wasm32.rs", "src/math/floor.rs", @@ -365,7 +344,6 @@ }, "fma": { "sources": [ - "src/libm_helper.rs", "src/math/fma.rs" ], "type": "f64" @@ -378,7 +356,6 @@ }, "fmax": { "sources": [ - "src/libm_helper.rs", "src/math/fmax.rs", "src/math/generic/fmax.rs" ], @@ -407,7 +384,6 @@ }, "fmin": { "sources": [ - "src/libm_helper.rs", "src/math/fmin.rs", "src/math/generic/fmin.rs" ], @@ -436,7 +412,6 @@ }, "fmod": { "sources": [ - "src/libm_helper.rs", "src/math/fmod.rs", "src/math/generic/fmod.rs" ], @@ -465,7 +440,6 @@ }, "frexp": { "sources": [ - "src/libm_helper.rs", "src/math/frexp.rs" ], "type": "f64" @@ -478,7 +452,6 @@ }, "hypot": { "sources": [ - "src/libm_helper.rs", "src/math/hypot.rs" ], "type": "f64" @@ -491,7 +464,6 @@ }, "ilogb": { "sources": [ - "src/libm_helper.rs", "src/math/ilogb.rs" ], "type": "f64" @@ -504,7 +476,6 @@ }, "j0": { "sources": [ - "src/libm_helper.rs", "src/math/j0.rs" ], "type": "f64" @@ -517,7 +488,6 @@ }, "j1": { "sources": [ - "src/libm_helper.rs", "src/math/j1.rs" ], "type": "f64" @@ -530,7 +500,6 @@ }, "jn": { "sources": [ - "src/libm_helper.rs", "src/math/jn.rs" ], "type": "f64" @@ -543,7 +512,6 @@ }, "ldexp": { "sources": [ - "src/libm_helper.rs", "src/math/ldexp.rs" ], "type": "f64" @@ -568,14 +536,12 @@ }, "lgamma": { "sources": [ - "src/libm_helper.rs", "src/math/lgamma.rs" ], "type": "f64" }, "lgamma_r": { "sources": [ - "src/libm_helper.rs", "src/math/lgamma_r.rs" ], "type": "f64" @@ -594,14 +560,12 @@ }, "log": { "sources": [ - "src/libm_helper.rs", "src/math/log.rs" ], "type": "f64" }, "log10": { "sources": [ - "src/libm_helper.rs", "src/math/log10.rs" ], "type": "f64" @@ -614,7 +578,6 @@ }, "log1p": { "sources": [ - "src/libm_helper.rs", "src/math/log1p.rs" ], "type": "f64" @@ -627,7 +590,6 @@ }, "log2": { "sources": [ - "src/libm_helper.rs", "src/math/log2.rs" ], "type": "f64" @@ -646,7 +608,6 @@ }, "modf": { "sources": [ - "src/libm_helper.rs", "src/math/modf.rs" ], "type": "f64" @@ -659,7 +620,6 @@ }, "nextafter": { "sources": [ - "src/libm_helper.rs", "src/math/nextafter.rs" ], "type": "f64" @@ -672,7 +632,6 @@ }, "pow": { "sources": [ - "src/libm_helper.rs", "src/math/pow.rs" ], "type": "f64" @@ -685,7 +644,6 @@ }, "remainder": { "sources": [ - "src/libm_helper.rs", "src/math/remainder.rs" ], "type": "f64" @@ -698,7 +656,6 @@ }, "remquo": { "sources": [ - "src/libm_helper.rs", "src/math/remquo.rs" ], "type": "f64" @@ -711,7 +668,6 @@ }, "rint": { "sources": [ - "src/libm_helper.rs", "src/math/arch/aarch64.rs", "src/math/arch/wasm32.rs", "src/math/generic/rint.rs", @@ -744,7 +700,6 @@ }, "round": { "sources": [ - "src/libm_helper.rs", "src/math/generic/round.rs", "src/math/round.rs" ], @@ -773,7 +728,6 @@ }, "scalbn": { "sources": [ - "src/libm_helper.rs", "src/math/generic/scalbn.rs", "src/math/scalbn.rs" ], @@ -802,14 +756,12 @@ }, "sin": { "sources": [ - "src/libm_helper.rs", "src/math/sin.rs" ], "type": "f64" }, "sincos": { "sources": [ - "src/libm_helper.rs", "src/math/sincos.rs" ], "type": "f64" @@ -828,7 +780,6 @@ }, "sinh": { "sources": [ - "src/libm_helper.rs", "src/math/sinh.rs" ], "type": "f64" @@ -841,7 +792,6 @@ }, "sqrt": { "sources": [ - "src/libm_helper.rs", "src/math/arch/i686.rs", "src/math/arch/wasm32.rs", "src/math/generic/sqrt.rs", @@ -874,7 +824,6 @@ }, "tan": { "sources": [ - "src/libm_helper.rs", "src/math/tan.rs" ], "type": "f64" @@ -887,7 +836,6 @@ }, "tanh": { "sources": [ - "src/libm_helper.rs", "src/math/tanh.rs" ], "type": "f64" @@ -900,7 +848,6 @@ }, "tgamma": { "sources": [ - "src/libm_helper.rs", "src/math/tgamma.rs" ], "type": "f64" @@ -913,7 +860,6 @@ }, "trunc": { "sources": [ - "src/libm_helper.rs", "src/math/arch/wasm32.rs", "src/math/generic/trunc.rs", "src/math/trunc.rs" @@ -944,7 +890,6 @@ }, "y0": { "sources": [ - "src/libm_helper.rs", "src/math/j0.rs" ], "type": "f64" @@ -957,7 +902,6 @@ }, "y1": { "sources": [ - "src/libm_helper.rs", "src/math/j1.rs" ], "type": "f64" @@ -970,7 +914,6 @@ }, "yn": { "sources": [ - "src/libm_helper.rs", "src/math/jn.rs" ], "type": "f64" diff --git a/libm/etc/update-api-list.py b/libm/etc/update-api-list.py index 67f73e59c..54da13257 100755 --- a/libm/etc/update-api-list.py +++ b/libm/etc/update-api-list.py @@ -1,6 +1,9 @@ #!/usr/bin/env python3 """Create a text file listing all public API. This can be used to ensure that all functions are covered by our macros. + +This file additionally does tidy-esque checks that all functions are listed where +needed, or that lists are sorted. """ import difflib @@ -15,6 +18,9 @@ ETC_DIR = Path(__file__).parent ROOT_DIR = ETC_DIR.parent +# These files do not trigger a retest. +IGNORED_SOURCES = ["src/libm_helper.rs"] + IndexTy: TypeAlias = dict[str, dict[str, Any]] """Type of the `index` item in rustdoc's JSON output""" @@ -120,6 +126,9 @@ def _init_defs(self, index: IndexTy) -> None: for src in (s for s in base_sources if "generic" in s): sources.add(src) + for src in IGNORED_SOURCES: + sources.discard(src) + # Sort the set self.defs = {k: sorted(v) for (k, v) in defs.items()} From a16c16dc193aaf2d03bf421fdf9a2dd14d3e12d9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 5 Feb 2025 15:03:34 +0000 Subject: [PATCH 1154/1459] Add checks via annotation that lists are sorted or exhaustive This crate has a handful of lists that need to list all API and can't easily be verified. Additionally, some longer lists should be kept sorted so they are easier to look through. Resolve both of these by adding a check in `update-api-list.py` that looks for annotations and verifies the contents are as expected. Annotations are `verify-apilist-start`, `verify-apilist-end`, `verify-sorted-start`, and `verify-sorted-end`. This includes fixes for anything that did not meet the criteria. --- libm/crates/libm-test/benches/icount.rs | 9 +- libm/crates/libm-test/src/mpfloat.rs | 2 + .../libm-test/tests/compare_built_musl.rs | 2 + libm/etc/update-api-list.py | 143 ++++++++++++++++-- libm/src/libm_helper.rs | 44 +++++- libm/src/math/mod.rs | 8 + 6 files changed, 187 insertions(+), 21 deletions(-) diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index 13de799c7..53ecb5a37 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -52,7 +52,10 @@ libm_macros::for_each_function! { } main!( - library_benchmark_groups = icount_bench_acos_group, + library_benchmark_groups = + // verify-apilist-start + // verify-sorted-start + icount_bench_acos_group, icount_bench_acosf_group, icount_bench_acosh_group, icount_bench_acoshf_group, @@ -169,6 +172,8 @@ main!( icount_bench_scalbnf16_group, icount_bench_scalbnf_group, icount_bench_sin_group, + icount_bench_sincos_group, + icount_bench_sincosf_group, icount_bench_sinf_group, icount_bench_sinh_group, icount_bench_sinhf_group, @@ -192,4 +197,6 @@ main!( icount_bench_y1f_group, icount_bench_yn_group, icount_bench_ynf_group, + // verify-sorted-end + // verify-apilist-end ); diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index e3211b913..ab77d541c 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -132,6 +132,7 @@ libm_macros::for_each_function! { emit_types: [RustFn], skip: [ // Most of these need a manual implementation + // verify-sorted-start ceil, ceilf, ceilf128, @@ -188,6 +189,7 @@ libm_macros::for_each_function! { truncf128, truncf16,yn, ynf, + // verify-sorted-end ], fn_extra: match MACRO_FN_NAME { // Remap function names that are different between mpfr and libm diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 191c7e69d..0b0a9f097 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -79,6 +79,7 @@ libm_macros::for_each_function! { ynf, // Not provided by musl + // verify-sorted-start ceilf128, ceilf16, copysignf128, @@ -107,5 +108,6 @@ libm_macros::for_each_function! { sqrtf16, truncf128, truncf16, + // verify-sorted-end ], } diff --git a/libm/etc/update-api-list.py b/libm/etc/update-api-list.py index 54da13257..9cf625554 100755 --- a/libm/etc/update-api-list.py +++ b/libm/etc/update-api-list.py @@ -8,16 +8,21 @@ import difflib import json +import re import subprocess as sp import sys from dataclasses import dataclass -from glob import glob +from glob import glob, iglob from pathlib import Path -from typing import Any, TypeAlias +from typing import Any, Callable, TypeAlias -ETC_DIR = Path(__file__).parent +SELF_PATH = Path(__file__) +ETC_DIR = SELF_PATH.parent ROOT_DIR = ETC_DIR.parent +# Loose approximation of what gets checked in to git, without needing `git ls-files`. +DIRECTORIES = [".github", "ci", "crates", "etc", "src"] + # These files do not trigger a retest. IGNORED_SOURCES = ["src/libm_helper.rs"] @@ -25,6 +30,11 @@ """Type of the `index` item in rustdoc's JSON output""" +def eprint(*args, **kwargs): + """Print to stderr.""" + print(*args, file=sys.stderr, **kwargs) + + @dataclass class Crate: """Representation of public interfaces and function defintion locations in @@ -146,7 +156,7 @@ def write_function_list(self, check: bool) -> None: if check: with open(out_file, "r") as f: current = f.read() - diff_and_exit(current, output) + diff_and_exit(current, output, "function list") else: with open(out_file, "w") as f: f.write(output) @@ -171,18 +181,115 @@ def write_function_defs(self, check: bool) -> None: if check: with open(out_file, "r") as f: current = f.read() - diff_and_exit(current, output) + diff_and_exit(current, output, "source list") else: with open(out_file, "w") as f: f.write(output) + def tidy_lists(self) -> None: + """In each file, check annotations indicating blocks of code should be sorted or should + include all public API. + """ + for dirname in DIRECTORIES: + dir = ROOT_DIR.joinpath(dirname) + for fname in iglob("**", root_dir=dir, recursive=True): + fpath = dir.joinpath(fname) + if fpath.is_dir() or fpath == SELF_PATH: + continue + + lines = fpath.read_text().splitlines() + + validate_delimited_block( + fpath, + lines, + "verify-sorted-start", + "verify-sorted-end", + ensure_sorted, + ) + + validate_delimited_block( + fpath, + lines, + "verify-apilist-start", + "verify-apilist-end", + lambda p, n, lines: self.ensure_contains_api(p, n, lines), + ) + + def ensure_contains_api(self, fpath: Path, line_num: int, lines: list[str]): + """Given a list of strings, ensure that each public function we have is named + somewhere. + """ + not_found = [] + for func in self.public_functions: + # The function name may be on its own or somewhere in a snake case string. + pat = re.compile(rf"(\b|_){func}(\b|_)") + found = next((line for line in lines if pat.search(line)), None) + + if found is None: + not_found.append(func) + + if len(not_found) == 0: + return + + relpath = fpath.relative_to(ROOT_DIR) + eprint(f"functions not found at {relpath}:{line_num}: {not_found}") + exit(1) + + +def validate_delimited_block( + fpath: Path, + lines: list[str], + start: str, + end: str, + validate: Callable[[Path, int, list[str]], None], +) -> None: + """Identify blocks of code wrapped within `start` and `end`, collect their contents + to a list of strings, and call `validate` for each of those lists. + """ + relpath = fpath.relative_to(ROOT_DIR) + block_lines = [] + block_start_line: None | int = None + for line_num, line in enumerate(lines): + line_num += 1 + + if start in line: + block_start_line = line_num + continue + + if end in line: + if block_start_line is None: + eprint(f"`{end}` without `{start}` at {relpath}:{line_num}") + exit(1) + + validate(fpath, block_start_line, block_lines) + block_lines = [] + block_start_line = None + continue + + if block_start_line is not None: + block_lines.append(line) + + if block_start_line is not None: + eprint(f"`{start}` without `{end}` at {relpath}:{block_start_line}") + exit(1) + + +def ensure_sorted(fpath: Path, block_start_line: int, lines: list[str]) -> None: + """Ensure that a list of lines is sorted, otherwise print a diff and exit.""" + relpath = fpath.relative_to(ROOT_DIR) + diff_and_exit( + "".join(lines), + "".join(sorted(lines)), + f"sorted block at {relpath}:{block_start_line}", + ) -def diff_and_exit(actual: str, expected: str): + +def diff_and_exit(actual: str, expected: str, name: str): """If the two strings are different, print a diff between them and then exit with an error. """ if actual == expected: - print("output matches expected; success") + print(f"{name} output matches expected; success") return a = [f"{line}\n" for line in actual.splitlines()] @@ -190,7 +297,7 @@ def diff_and_exit(actual: str, expected: str): diff = difflib.unified_diff(a, b, "actual", "expected") sys.stdout.writelines(diff) - print("mismatched function list") + print(f"mismatched {name}") exit(1) @@ -223,23 +330,31 @@ def base_name(name: str) -> tuple[str, str]: return (name, "f64") +def ensure_updated_list(check: bool) -> None: + """Runner to update the function list and JSON, or check that it is already up + to date. + """ + crate = Crate() + crate.write_function_list(check) + crate.write_function_defs(check) + + if check: + crate.tidy_lists() + + def main(): """By default overwrite the file. If `--check` is passed, print a diff instead and error if the files are different. """ match sys.argv: case [_]: - check = False + ensure_updated_list(False) case [_, "--check"]: - check = True + ensure_updated_list(True) case _: print("unrecognized arguments") exit(1) - crate = Crate() - crate.write_function_list(check) - crate.write_function_defs(check) - if __name__ == "__main__": main() diff --git a/libm/src/libm_helper.rs b/libm/src/libm_helper.rs index 73bae4567..0768839c7 100644 --- a/libm/src/libm_helper.rs +++ b/libm/src/libm_helper.rs @@ -44,9 +44,11 @@ macro_rules! libm_helper { }; } +// verify-apilist-start libm_helper! { f32, funcs: { + // verify-sorted-start (fn acos(x: f32) -> (f32); => acosf); (fn acosh(x: f32) -> (f32); => acoshf); (fn asin(x: f32) -> (f32); => asinf); @@ -62,8 +64,8 @@ libm_helper! { (fn erf(x: f32) -> (f32); => erff); (fn erfc(x: f32) -> (f32); => erfcf); (fn exp(x: f32) -> (f32); => expf); - (fn exp2(x: f32) -> (f32); => exp2f); (fn exp10(x: f32) -> (f32); => exp10f); + (fn exp2(x: f32) -> (f32); => exp2f); (fn expm1(x: f32) -> (f32); => expm1f); (fn fabs(x: f32) -> (f32); => fabsf); (fn fdim(x: f32, y: f32) -> (f32); => fdimf); @@ -79,12 +81,12 @@ libm_helper! { (fn j1(x: f32) -> (f32); => j1f); (fn jn(n: i32, x: f32) -> (f32); => jnf); (fn ldexp(x: f32, n: i32) -> (f32); => ldexpf); - (fn lgamma_r(x: f32) -> (f32, i32); => lgammaf_r); (fn lgamma(x: f32) -> (f32); => lgammaf); + (fn lgamma_r(x: f32) -> (f32, i32); => lgammaf_r); (fn log(x: f32) -> (f32); => logf); + (fn log10(x: f32) -> (f32); => log10f); (fn log1p(x: f32) -> (f32); => log1pf); (fn log2(x: f32) -> (f32); => log2f); - (fn log10(x: f32) -> (f32); => log10f); (fn modf(x: f32) -> (f32, f32); => modff); (fn nextafter(x: f32, y: f32) -> (f32); => nextafterf); (fn pow(x: f32, y: f32) -> (f32); => powf); @@ -104,12 +106,14 @@ libm_helper! { (fn y0(x: f32) -> (f32); => y0f); (fn y1(x: f32) -> (f32); => y1f); (fn yn(n: i32, x: f32) -> (f32); => ynf); + // verify-sorted-end } } libm_helper! { f64, funcs: { + // verify-sorted-start (fn acos(x: f64) -> (f64); => acos); (fn acosh(x: f64) -> (f64); => acosh); (fn asin(x: f64) -> (f64); => asin); @@ -125,8 +129,8 @@ libm_helper! { (fn erf(x: f64) -> (f64); => erf); (fn erfc(x: f64) -> (f64); => erfc); (fn exp(x: f64) -> (f64); => exp); - (fn exp2(x: f64) -> (f64); => exp2); (fn exp10(x: f64) -> (f64); => exp10); + (fn exp2(x: f64) -> (f64); => exp2); (fn expm1(x: f64) -> (f64); => expm1); (fn fabs(x: f64) -> (f64); => fabs); (fn fdim(x: f64, y: f64) -> (f64); => fdim); @@ -142,12 +146,12 @@ libm_helper! { (fn j1(x: f64) -> (f64); => j1); (fn jn(n: i32, x: f64) -> (f64); => jn); (fn ldexp(x: f64, n: i32) -> (f64); => ldexp); - (fn lgamma_r(x: f64) -> (f64, i32); => lgamma_r); (fn lgamma(x: f64) -> (f64); => lgamma); + (fn lgamma_r(x: f64) -> (f64, i32); => lgamma_r); (fn log(x: f64) -> (f64); => log); + (fn log10(x: f64) -> (f64); => log10); (fn log1p(x: f64) -> (f64); => log1p); (fn log2(x: f64) -> (f64); => log2); - (fn log10(x: f64) -> (f64); => log10); (fn modf(x: f64) -> (f64, f64); => modf); (fn nextafter(x: f64, y: f64) -> (f64); => nextafter); (fn pow(x: f64, y: f64) -> (f64); => pow); @@ -167,6 +171,7 @@ libm_helper! { (fn y0(x: f64) -> (f64); => y0); (fn y1(x: f64) -> (f64); => y1); (fn yn(n: i32, x: f64) -> (f64); => yn); + // verify-sorted-end } } @@ -174,9 +179,22 @@ libm_helper! { libm_helper! { f16, funcs: { + // verify-sorted-start + (fn ceilf(x: f16) -> (f16); => ceilf16); (fn copysign(x: f16, y: f16) -> (f16); => copysignf16); (fn fabs(x: f16) -> (f16); => fabsf16); (fn fdim(x: f16, y: f16) -> (f16); => fdimf16); + (fn floorf(x: f16) -> (f16); => floorf16); + (fn fmaxf(x: f16, y: f16) -> (f16); => fmaxf16); + (fn fminf(x: f16, y: f16) -> (f16); => fminf16); + (fn fmodf(x: f16, y: f16) -> (f16); => fmodf16); + (fn ldexpf16(x: f16, n: i32) -> (f16); => ldexpf16); + (fn rintf(x: f16) -> (f16); => rintf16); + (fn roundf(x: f16) -> (f16); => roundf16); + (fn scalbnf16(x: f16, n: i32) -> (f16); => ldexpf16); + (fn sqrtf(x: f16) -> (f16); => sqrtf16); + (fn truncf(x: f16) -> (f16); => truncf16); + // verify-sorted-end } } @@ -184,8 +202,22 @@ libm_helper! { libm_helper! { f128, funcs: { + // verify-sorted-start + (fn ceil(x: f128) -> (f128); => ceilf128); (fn copysign(x: f128, y: f128) -> (f128); => copysignf128); (fn fabs(x: f128) -> (f128); => fabsf128); (fn fdim(x: f128, y: f128) -> (f128); => fdimf128); + (fn floor(x: f128) -> (f128); => floorf128); + (fn fmax(x: f128, y: f128) -> (f128); => fmaxf128); + (fn fmin(x: f128, y: f128) -> (f128); => fminf128); + (fn fmod(x: f128, y: f128) -> (f128); => fmodf128); + (fn ldexpf128(x: f128, n: i32) -> (f128); => ldexpf128); + (fn rint(x: f128) -> (f128); => rintf128); + (fn round(x: f128) -> (f128); => roundf128); + (fn scalbnf128(x: f128, n: i32) -> (f128); => ldexpf128); + (fn sqrt(x: f128) -> (f128); => sqrtf128); + (fn trunc(x: f128) -> (f128); => truncf128); + // verify-sorted-end } } +// verify-apilist-end diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 9b07dc8a7..f0698ad02 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -341,6 +341,7 @@ pub use self::truncf::truncf; cfg_if! { if #[cfg(f16_enabled)] { + // verify-sorted-start mod ceilf16; mod copysignf16; mod fabsf16; @@ -355,7 +356,9 @@ cfg_if! { mod scalbnf16; mod sqrtf16; mod truncf16; + // verify-sorted-end + // verify-sorted-start pub use self::ceilf16::ceilf16; pub use self::copysignf16::copysignf16; pub use self::fabsf16::fabsf16; @@ -370,11 +373,13 @@ cfg_if! { pub use self::scalbnf16::scalbnf16; pub use self::sqrtf16::sqrtf16; pub use self::truncf16::truncf16; + // verify-sorted-end } } cfg_if! { if #[cfg(f128_enabled)] { + // verify-sorted-start mod ceilf128; mod copysignf128; mod fabsf128; @@ -389,7 +394,9 @@ cfg_if! { mod scalbnf128; mod sqrtf128; mod truncf128; + // verify-sorted-end + // verify-sorted-start pub use self::ceilf128::ceilf128; pub use self::copysignf128::copysignf128; pub use self::fabsf128::fabsf128; @@ -404,6 +411,7 @@ cfg_if! { pub use self::scalbnf128::scalbnf128; pub use self::sqrtf128::sqrtf128; pub use self::truncf128::truncf128; + // verify-sorted-end } } From 765a1730bb8fb0d599e43503e108c9764c79ed73 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 23 Jan 2025 08:28:58 +0000 Subject: [PATCH 1155/1459] Start converting `fma` to a generic function This is the first step toward making `fma` usable for `f128`, and possibly `f32` on platforms where growing to `f64` is not fast. This does not yet work for anything other than `f64`. --- libm/etc/function-definitions.json | 6 +- libm/src/math/fma.rs | 192 +--------------------- libm/src/math/generic/fma.rs | 227 ++++++++++++++++++++++++++ libm/src/math/generic/mod.rs | 2 + libm/src/math/support/float_traits.rs | 4 +- libm/src/math/support/int_traits.rs | 39 +++++ 6 files changed, 278 insertions(+), 192 deletions(-) create mode 100644 libm/src/math/generic/fma.rs diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index a1d3adf59..243862075 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -344,13 +344,15 @@ }, "fma": { "sources": [ - "src/math/fma.rs" + "src/math/fma.rs", + "src/math/generic/fma.rs" ], "type": "f64" }, "fmaf": { "sources": [ - "src/math/fmaf.rs" + "src/math/fmaf.rs", + "src/math/generic/fma.rs" ], "type": "f32" }, diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 826143d5a..69cc3eb67 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -1,195 +1,9 @@ -use core::{f32, f64}; - -use super::scalbn; - -const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1; - -struct Num { - m: u64, - e: i32, - sign: i32, -} - -fn normalize(x: f64) -> Num { - let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 - - let mut ix: u64 = x.to_bits(); - let mut e: i32 = (ix >> 52) as i32; - let sign: i32 = e & 0x800; - e &= 0x7ff; - if e == 0 { - ix = (x * x1p63).to_bits(); - e = (ix >> 52) as i32 & 0x7ff; - e = if e != 0 { e - 63 } else { 0x800 }; - } - ix &= (1 << 52) - 1; - ix |= 1 << 52; - ix <<= 1; - e -= 0x3ff + 52 + 1; - Num { m: ix, e, sign } -} - -#[inline] -fn mul(x: u64, y: u64) -> (u64, u64) { - let t = (x as u128).wrapping_mul(y as u128); - ((t >> 64) as u64, t as u64) -} - -/// Floating multiply add (f64) +/// Fused multiply add (f64) /// -/// Computes `(x*y)+z`, rounded as one ternary operation: -/// Computes the value (as if) to infinite precision and rounds once to the result format, -/// according to the rounding mode characterized by the value of FLT_ROUNDS. +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fma(x: f64, y: f64, z: f64) -> f64 { - let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 - let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63 - - /* normalize so top 10bits and last bit are 0 */ - let nx = normalize(x); - let ny = normalize(y); - let nz = normalize(z); - - if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN { - return x * y + z; - } - if nz.e >= ZEROINFNAN { - if nz.e > ZEROINFNAN { - /* z==0 */ - return x * y + z; - } - return z; - } - - /* mul: r = x*y */ - let zhi: u64; - let zlo: u64; - let (mut rhi, mut rlo) = mul(nx.m, ny.m); - /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */ - - /* align exponents */ - let mut e: i32 = nx.e + ny.e; - let mut d: i32 = nz.e - e; - /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */ - if d > 0 { - if d < 64 { - zlo = nz.m << d; - zhi = nz.m >> (64 - d); - } else { - zlo = 0; - zhi = nz.m; - e = nz.e - 64; - d -= 64; - if d == 0 { - } else if d < 64 { - rlo = (rhi << (64 - d)) | (rlo >> d) | ((rlo << (64 - d)) != 0) as u64; - rhi = rhi >> d; - } else { - rlo = 1; - rhi = 0; - } - } - } else { - zhi = 0; - d = -d; - if d == 0 { - zlo = nz.m; - } else if d < 64 { - zlo = (nz.m >> d) | ((nz.m << (64 - d)) != 0) as u64; - } else { - zlo = 1; - } - } - - /* add */ - let mut sign: i32 = nx.sign ^ ny.sign; - let samesign: bool = (sign ^ nz.sign) == 0; - let mut nonzero: i32 = 1; - if samesign { - /* r += z */ - rlo = rlo.wrapping_add(zlo); - rhi += zhi + (rlo < zlo) as u64; - } else { - /* r -= z */ - let (res, borrow) = rlo.overflowing_sub(zlo); - rlo = res; - rhi = rhi.wrapping_sub(zhi.wrapping_add(borrow as u64)); - if (rhi >> 63) != 0 { - rlo = (rlo as i64).wrapping_neg() as u64; - rhi = (rhi as i64).wrapping_neg() as u64 - (rlo != 0) as u64; - sign = (sign == 0) as i32; - } - nonzero = (rhi != 0) as i32; - } - - /* set rhi to top 63bit of the result (last bit is sticky) */ - if nonzero != 0 { - e += 64; - d = rhi.leading_zeros() as i32 - 1; - /* note: d > 0 */ - rhi = (rhi << d) | (rlo >> (64 - d)) | ((rlo << d) != 0) as u64; - } else if rlo != 0 { - d = rlo.leading_zeros() as i32 - 1; - if d < 0 { - rhi = (rlo >> 1) | (rlo & 1); - } else { - rhi = rlo << d; - } - } else { - /* exact +-0 */ - return x * y + z; - } - e -= d; - - /* convert to double */ - let mut i: i64 = rhi as i64; /* i is in [1<<62,(1<<63)-1] */ - if sign != 0 { - i = -i; - } - let mut r: f64 = i as f64; /* |r| is in [0x1p62,0x1p63] */ - - if e < -1022 - 62 { - /* result is subnormal before rounding */ - if e == -1022 - 63 { - let mut c: f64 = x1p63; - if sign != 0 { - c = -c; - } - if r == c { - /* min normal after rounding, underflow depends - on arch behaviour which can be imitated by - a double to float conversion */ - let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32; - return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64; - } - /* one bit is lost when scaled, add another top bit to - only round once at conversion if it is inexact */ - if (rhi << 53) != 0 { - i = ((rhi >> 1) | (rhi & 1) | (1 << 62)) as i64; - if sign != 0 { - i = -i; - } - r = i as f64; - r = 2. * r - c; /* remove top bit */ - - /* raise underflow portably, such that it - cannot be optimized away */ - { - let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r; - r += (tiny * tiny) * (r - r); - } - } - } else { - /* only round once when scaled */ - d = 10; - i = (((rhi >> d) | ((rhi << (64 - d)) != 0) as u64) << d) as i64; - if sign != 0 { - i = -i; - } - r = i as f64; - } - } - scalbn(r, e) + return super::generic::fma(x, y, z); } #[cfg(test)] diff --git a/libm/src/math/generic/fma.rs b/libm/src/math/generic/fma.rs new file mode 100644 index 000000000..3d5459f1a --- /dev/null +++ b/libm/src/math/generic/fma.rs @@ -0,0 +1,227 @@ +use core::{f32, f64}; + +use super::super::support::{DInt, HInt, IntTy}; +use super::super::{CastFrom, CastInto, Float, Int, MinInt}; + +const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1; + +/// Fused multiply-add that works when there is not a larger float size available. Currently this +/// is still specialized only for `f64`. Computes `(x * y) + z`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fma(x: F, y: F, z: F) -> F +where + F: Float + FmaHelper, + F: CastFrom, + F: CastFrom, + F::Int: HInt, + u32: CastInto, +{ + let one = IntTy::::ONE; + let zero = IntTy::::ZERO; + let magic = F::from_parts(false, F::BITS - 1 + F::EXP_BIAS, zero); + + /* normalize so top 10bits and last bit are 0 */ + let nx = Norm::from_float(x); + let ny = Norm::from_float(y); + let nz = Norm::from_float(z); + + if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN { + return x * y + z; + } + if nz.e >= ZEROINFNAN { + if nz.e > ZEROINFNAN { + /* z==0 */ + return x * y + z; + } + return z; + } + + /* mul: r = x*y */ + let zhi: F::Int; + let zlo: F::Int; + let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi(); + + /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */ + + /* align exponents */ + let mut e: i32 = nx.e + ny.e; + let mut d: i32 = nz.e - e; + let sbits = F::BITS as i32; + + /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */ + if d > 0 { + if d < sbits { + zlo = nz.m << d; + zhi = nz.m >> (sbits - d); + } else { + zlo = zero; + zhi = nz.m; + e = nz.e - sbits; + d -= sbits; + if d == 0 { + } else if d < sbits { + rlo = (rhi << (sbits - d)) + | (rlo >> d) + | IntTy::::from((rlo << (sbits - d)) != zero); + rhi = rhi >> d; + } else { + rlo = one; + rhi = zero; + } + } + } else { + zhi = zero; + d = -d; + if d == 0 { + zlo = nz.m; + } else if d < sbits { + zlo = (nz.m >> d) | IntTy::::from((nz.m << (sbits - d)) != zero); + } else { + zlo = one; + } + } + + /* add */ + let mut neg = nx.neg ^ ny.neg; + let samesign: bool = !neg ^ nz.neg; + let mut nonzero: i32 = 1; + if samesign { + /* r += z */ + rlo = rlo.wrapping_add(zlo); + rhi += zhi + IntTy::::from(rlo < zlo); + } else { + /* r -= z */ + let (res, borrow) = rlo.overflowing_sub(zlo); + rlo = res; + rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::::from(borrow))); + if (rhi >> (F::BITS - 1)) != zero { + rlo = rlo.signed().wrapping_neg().unsigned(); + rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::::from(rlo != zero); + neg = !neg; + } + nonzero = (rhi != zero) as i32; + } + + /* set rhi to top 63bit of the result (last bit is sticky) */ + if nonzero != 0 { + e += sbits; + d = rhi.leading_zeros() as i32 - 1; + /* note: d > 0 */ + rhi = (rhi << d) | (rlo >> (sbits - d)) | IntTy::::from((rlo << d) != zero); + } else if rlo != zero { + d = rlo.leading_zeros() as i32 - 1; + if d < 0 { + rhi = (rlo >> 1) | (rlo & one); + } else { + rhi = rlo << d; + } + } else { + /* exact +-0 */ + return x * y + z; + } + e -= d; + + /* convert to double */ + let mut i: F::SignedInt = rhi.signed(); /* i is in [1<<62,(1<<63)-1] */ + if neg { + i = -i; + } + + let mut r: F = F::cast_from_lossy(i); /* |r| is in [0x1p62,0x1p63] */ + + if e < -(F::EXP_BIAS as i32 - 1) - (sbits - 2) { + /* result is subnormal before rounding */ + if e == -(F::EXP_BIAS as i32 - 1) - (sbits - 1) { + let mut c: F = magic; + if neg { + c = -c; + } + if r == c { + /* min normal after rounding, underflow depends + * on arch behaviour which can be imitated by + * a double to float conversion */ + return r.raise_underflow(); + } + /* one bit is lost when scaled, add another top bit to + * only round once at conversion if it is inexact */ + if (rhi << F::SIG_BITS) != zero { + let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << 62); + i = iu.signed(); + if neg { + i = -i; + } + r = F::cast_from_lossy(i); + r = F::cast_from(2i8) * r - c; /* remove top bit */ + + /* raise underflow portably, such that it + * cannot be optimized away */ + r += r.raise_underflow2(); + } + } else { + /* only round once when scaled */ + d = 10; + i = (((rhi >> d) | IntTy::::from(rhi << (F::BITS as i32 - d) != zero)) << d) + .signed(); + if neg { + i = -i; + } + r = F::cast_from(i); + } + } + + super::scalbn(r, e) +} + +/// Representation of `F` that has handled subnormals. +struct Norm { + /// Normalized significand with one guard bit. + m: F::Int, + /// Unbiased exponent, normalized. + e: i32, + neg: bool, +} + +impl Norm { + fn from_float(x: F) -> Self { + let mut ix = x.to_bits(); + let mut e = x.exp() as i32; + let neg = x.is_sign_negative(); + if e == 0 { + // Normalize subnormals by multiplication + let magic = F::from_parts(false, F::BITS - 1 + F::EXP_BIAS, F::Int::ZERO); + let scaled = x * magic; + ix = scaled.to_bits(); + e = scaled.exp() as i32; + e = if e != 0 { e - (F::BITS as i32 - 1) } else { 0x800 }; + } + + e -= F::EXP_BIAS as i32 + 52 + 1; + + ix &= F::SIG_MASK; + ix |= F::IMPLICIT_BIT; + ix <<= 1; // add a guard bit + + Self { m: ix, e, neg } + } +} + +/// Type-specific helpers that are not needed outside of fma. +pub trait FmaHelper { + fn raise_underflow(self) -> Self; + fn raise_underflow2(self) -> Self; +} + +impl FmaHelper for f64 { + fn raise_underflow(self) -> Self { + let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63 + let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * self) as f32; + f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64 + } + + fn raise_underflow2(self) -> Self { + /* raise underflow portably, such that it + * cannot be optimized away */ + let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * self; + (tiny * tiny) * (self - self) + } +} diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index 68686b0b2..e19cc83a9 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -3,6 +3,7 @@ mod copysign; mod fabs; mod fdim; mod floor; +mod fma; mod fmax; mod fmin; mod fmod; @@ -17,6 +18,7 @@ pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; pub use floor::floor; +pub use fma::fma; pub use fmax::fmax; pub use fmin::fmin; pub use fmod::fmod; diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 1fe2cb424..24cf7d4b0 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -23,7 +23,9 @@ pub trait Float: type Int: Int; /// A int of the same width as the float - type SignedInt: Int + MinInt; + type SignedInt: Int + + MinInt + + ops::Neg; const ZERO: Self; const NEG_ZERO: Self; diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index b403c658c..793a0f306 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -52,10 +52,14 @@ pub trait Int: + ops::Sub + ops::Mul + ops::Div + + ops::Shl + + ops::Shl + + ops::Shr + ops::Shr + ops::BitXor + ops::BitAnd + cmp::Ord + + From + CastFrom + CastFrom + CastFrom @@ -92,6 +96,7 @@ pub trait Int: fn wrapping_shr(self, other: u32) -> Self; fn rotate_left(self, other: u32) -> Self; fn overflowing_add(self, other: Self) -> (Self, bool); + fn overflowing_sub(self, other: Self) -> (Self, bool); fn leading_zeros(self) -> u32; fn ilog2(self) -> u32; } @@ -150,6 +155,10 @@ macro_rules! int_impl_common { ::overflowing_add(self, other) } + fn overflowing_sub(self, other: Self) -> (Self, bool) { + ::overflowing_sub(self, other) + } + fn leading_zeros(self) -> u32 { ::leading_zeros(self) } @@ -399,6 +408,30 @@ macro_rules! cast_into { )*}; } +macro_rules! cast_into_float { + ($ty:ty) => { + #[cfg(f16_enabled)] + cast_into_float!($ty; f16); + + cast_into_float!($ty; f32, f64); + + #[cfg(f128_enabled)] + cast_into_float!($ty; f128); + }; + ($ty:ty; $($into:ty),*) => {$( + impl CastInto<$into> for $ty { + fn cast(self) -> $into { + debug_assert_eq!(self as $into as $ty, self, "inexact float cast"); + self as $into + } + + fn cast_lossy(self) -> $into { + self as $into + } + } + )*}; +} + cast_into!(usize); cast_into!(isize); cast_into!(u8); @@ -411,3 +444,9 @@ cast_into!(u64); cast_into!(i64); cast_into!(u128); cast_into!(i128); + +cast_into_float!(i8); +cast_into_float!(i16); +cast_into_float!(i32); +cast_into_float!(i64); +cast_into_float!(i128); From cc0c261243c90cb0af2054958339dfa12ad0af2d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 5 Feb 2025 21:18:33 +0000 Subject: [PATCH 1156/1459] Commonize the signature for all instances of `get_test_cases` In order to make these more interchangeable in more places, always return `(impl Iterator, u64)`. This will facilitate using other generators for extensive tests. --- libm/crates/libm-test/benches/random.rs | 2 +- .../crates/libm-test/examples/plot_domains.rs | 8 ++++- libm/crates/libm-test/src/gen/edge_cases.rs | 29 +++++++++-------- libm/crates/libm-test/src/gen/random.rs | 31 ++++++++++--------- .../libm-test/tests/compare_built_musl.rs | 4 +-- libm/crates/libm-test/tests/multiprecision.rs | 4 +-- 6 files changed, 45 insertions(+), 33 deletions(-) diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 56d288c33..66486a56a 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -54,7 +54,7 @@ where let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl, GeneratorKind::Random); let benchvec: Vec<_> = - random::get_test_cases::(&ctx).take(BENCH_ITER_ITEMS).collect(); + random::get_test_cases::(&ctx).0.take(BENCH_ITER_ITEMS).collect(); // Perform a sanity check that we are benchmarking the same thing // Don't test against musl if it is not available diff --git a/libm/crates/libm-test/examples/plot_domains.rs b/libm/crates/libm-test/examples/plot_domains.rs index fb7b854df..441889c69 100644 --- a/libm/crates/libm-test/examples/plot_domains.rs +++ b/libm/crates/libm-test/examples/plot_domains.rs @@ -58,7 +58,13 @@ where let mut ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr, GeneratorKind::QuickSpaced); plot_one_generator(out_dir, &ctx, "logspace", config, spaced::get_test_cases::(&ctx).0); ctx.gen_kind = GeneratorKind::EdgeCases; - plot_one_generator(out_dir, &ctx, "edge_cases", config, edge_cases::get_test_cases::(&ctx)); + plot_one_generator( + out_dir, + &ctx, + "edge_cases", + config, + edge_cases::get_test_cases::(&ctx).0, + ); } /// Plot the output of a single generator. diff --git a/libm/crates/libm-test/src/gen/edge_cases.rs b/libm/crates/libm-test/src/gen/edge_cases.rs index d4014bdb3..8de954ae3 100644 --- a/libm/crates/libm-test/src/gen/edge_cases.rs +++ b/libm/crates/libm-test/src/gen/edge_cases.rs @@ -9,7 +9,7 @@ use crate::{CheckCtx, FloatExt, MathOp, test_log}; /// Generate a sequence of edge cases, e.g. numbers near zeroes and infiniteis. pub trait EdgeCaseInput { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator + Send; + fn get_cases(ctx: &CheckCtx) -> (impl Iterator + Send, u64); } /// Create a list of values around interesting points (infinities, zeroes, NaNs). @@ -140,10 +140,10 @@ macro_rules! impl_edge_case_input { where Op: MathOp, { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { let (iter0, steps0) = float_edge_cases::(ctx, 0); let iter0 = iter0.map(|v| (v,)); - KnownSize::new(iter0, steps0) + (iter0, steps0) } } @@ -151,13 +151,13 @@ macro_rules! impl_edge_case_input { where Op: MathOp, { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { let (iter0, steps0) = float_edge_cases::(ctx, 0); let (iter1, steps1) = float_edge_cases::(ctx, 1); let iter = iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); let count = steps0.checked_mul(steps1).unwrap(); - KnownSize::new(iter, count) + (iter, count) } } @@ -165,7 +165,7 @@ macro_rules! impl_edge_case_input { where Op: MathOp, { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { let (iter0, steps0) = float_edge_cases::(ctx, 0); let (iter1, steps1) = float_edge_cases::(ctx, 1); let (iter2, steps2) = float_edge_cases::(ctx, 2); @@ -177,7 +177,7 @@ macro_rules! impl_edge_case_input { }); let count = steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap(); - KnownSize::new(iter, count) + (iter, count) } } @@ -185,7 +185,7 @@ macro_rules! impl_edge_case_input { where Op: MathOp, { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { let (iter0, steps0) = int_edge_cases(ctx, 0); let (iter1, steps1) = float_edge_cases::(ctx, 1); @@ -193,7 +193,7 @@ macro_rules! impl_edge_case_input { iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); let count = steps0.checked_mul(steps1).unwrap(); - KnownSize::new(iter, count) + (iter, count) } } @@ -201,7 +201,7 @@ macro_rules! impl_edge_case_input { where Op: MathOp, { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { let (iter0, steps0) = float_edge_cases::(ctx, 0); let (iter1, steps1) = int_edge_cases(ctx, 1); @@ -209,7 +209,7 @@ macro_rules! impl_edge_case_input { iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); let count = steps0.checked_mul(steps1).unwrap(); - KnownSize::new(iter, count) + (iter, count) } } }; @@ -224,10 +224,13 @@ impl_edge_case_input!(f128); pub fn get_test_cases( ctx: &CheckCtx, -) -> impl ExactSizeIterator + use<'_, Op> +) -> (impl Iterator + Send + use<'_, Op>, u64) where Op: MathOp, Op::RustArgs: EdgeCaseInput, { - Op::RustArgs::get_cases(ctx) + let (iter, count) = Op::RustArgs::get_cases(ctx); + + // Wrap in `KnownSize` so we get an assertion if the cuunt is wrong. + (KnownSize::new(iter, count), count) } diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index 56c39981a..5b127f38d 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -26,8 +26,8 @@ pub(crate) static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| { }); /// Generate a sequence of random values of this type. -pub trait RandomInput { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator; +pub trait RandomInput: Sized { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator + Send, u64); } /// Generate a sequence of deterministically random floats. @@ -51,25 +51,25 @@ fn random_ints(count: u64, range: RangeInclusive) -> impl Iterator { impl RandomInput for ($fty,) { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { let count = iteration_count(ctx, 0); let iter = random_floats(count).map(|f: $fty| (f,)); - KnownSize::new(iter, count) + (iter, count) } } impl RandomInput for ($fty, $fty) { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { let count0 = iteration_count(ctx, 0); let count1 = iteration_count(ctx, 1); let iter = random_floats(count0) .flat_map(move |f1: $fty| random_floats(count1).map(move |f2: $fty| (f1, f2))); - KnownSize::new(iter, count0 * count1) + (iter, count0 * count1) } } impl RandomInput for ($fty, $fty, $fty) { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { let count0 = iteration_count(ctx, 0); let count1 = iteration_count(ctx, 1); let count2 = iteration_count(ctx, 2); @@ -78,30 +78,30 @@ macro_rules! impl_random_input { random_floats(count2).map(move |f3: $fty| (f1, f2, f3)) }) }); - KnownSize::new(iter, count0 * count1 * count2) + (iter, count0 * count1 * count2) } } impl RandomInput for (i32, $fty) { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { let count0 = iteration_count(ctx, 0); let count1 = iteration_count(ctx, 1); let range0 = int_range(ctx, 0); let iter = random_ints(count0, range0) .flat_map(move |f1: i32| random_floats(count1).map(move |f2: $fty| (f1, f2))); - KnownSize::new(iter, count0 * count1) + (iter, count0 * count1) } } impl RandomInput for ($fty, i32) { - fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { let count0 = iteration_count(ctx, 0); let count1 = iteration_count(ctx, 1); let range1 = int_range(ctx, 1); let iter = random_floats(count0).flat_map(move |f1: $fty| { random_ints(count1, range1.clone()).map(move |f2: i32| (f1, f2)) }); - KnownSize::new(iter, count0 * count1) + (iter, count0 * count1) } } }; @@ -117,6 +117,9 @@ impl_random_input!(f128); /// Create a test case iterator. pub fn get_test_cases( ctx: &CheckCtx, -) -> impl Iterator + use<'_, RustArgs> { - RustArgs::get_cases(ctx) +) -> (impl Iterator + Send + use<'_, RustArgs>, u64) { + let (iter, count) = RustArgs::get_cases(ctx); + + // Wrap in `KnownSize` so we get an assertion if the cuunt is wrong. + (KnownSize::new(iter, count), count) } diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 0b0a9f097..c8beaffc3 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -39,7 +39,7 @@ macro_rules! musl_tests { fn [< musl_random_ $fn_name >]() { type Op = libm_test::op::$fn_name::Routine; let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Random); - let cases = random::get_test_cases::<::RustArgs>(&ctx); + let cases = random::get_test_cases::<::RustArgs>(&ctx).0; musl_runner::(&ctx, cases, musl_math_sys::$fn_name); } @@ -48,7 +48,7 @@ macro_rules! musl_tests { fn [< musl_edge_case_ $fn_name >]() { type Op = libm_test::op::$fn_name::Routine; let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::EdgeCases); - let cases = edge_cases::get_test_cases::(&ctx); + let cases = edge_cases::get_test_cases::(&ctx).0; musl_runner::(&ctx, cases, musl_math_sys::$fn_name); } diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 761ca1f85..0d5c5e60c 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -29,7 +29,7 @@ macro_rules! mp_tests { fn [< mp_random_ $fn_name >]() { type Op = libm_test::op::$fn_name::Routine; let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Random); - let cases = random::get_test_cases::<::RustArgs>(&ctx); + let cases = random::get_test_cases::<::RustArgs>(&ctx).0; mp_runner::(&ctx, cases); } @@ -38,7 +38,7 @@ macro_rules! mp_tests { fn [< mp_edge_case_ $fn_name >]() { type Op = libm_test::op::$fn_name::Routine; let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::EdgeCases); - let cases = edge_cases::get_test_cases::(&ctx); + let cases = edge_cases::get_test_cases::(&ctx).0; mp_runner::(&ctx, cases); } From 90e6f8834c9274ff4319a725ce82609ab1efd40b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 6 Feb 2025 00:02:00 +0000 Subject: [PATCH 1157/1459] Print the hex float format upon failure Now that we have a hex float formatter, make use of it for test output. This produces values that are easier to read than the bitwise hex representation. Example: thread 'mp_quickspace_fmaf128' panicked at crates/libm-test/tests/multiprecision.rs:17:48: called `Result::unwrap()` on an `Err` value: input: (0xe38d71c71c71c71c71c71c71c71c71c8, 0xe38d71c71c71c71c71c71c71c71c71c8, 0xffff0000000000000000000000000000) as hex: (-0x1.71c71c71c71c71c71c71c71c71c8p+9102, -0x1.71c71c71c71c71c71c71c71c71c8p+9102, -inf) as bits: (0xe38d71c71c71c71c71c71c71c71c71c8, 0xe38d71c71c71c71c71c71c71c71c71c8, 0xffff0000000000000000000000000000) expected: 0xffff0000000000000000000000000000 -inf 0xffff0000000000000000000000000000 actual: 0x7fff8000000000000000000000000000 NaN 0x7fff8000000000000000000000000000 Caused by: real value != NaN --- libm/crates/libm-test/src/test_traits.rs | 46 ++++++++++++++++++++---- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index a5806943e..1bd5bce16 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -9,6 +9,7 @@ use std::fmt; use anyhow::{Context, anyhow, bail, ensure}; +use libm::support::Hexf; use crate::precision::CheckAction; use crate::{CheckCtx, Float, Int, MaybeOverride, SpecialCase, TestResult}; @@ -35,7 +36,10 @@ pub trait CheckOutput: Sized { /// /// This is only used for printing errors so allocating is okay. pub trait Hex: Copy { + /// Hex integer syntax. fn hex(self) -> String; + /// Hex float syntax. + fn hexf(self) -> String; } /* implement `TupleCall` */ @@ -128,6 +132,10 @@ where fn hex(self) -> String { format!("({},)", self.0.hex()) } + + fn hexf(self) -> String { + format!("({},)", self.0.hexf()) + } } impl Hex for (T1, T2) @@ -138,6 +146,10 @@ where fn hex(self) -> String { format!("({}, {})", self.0.hex(), self.1.hex()) } + + fn hexf(self) -> String { + format!("({}, {})", self.0.hexf(), self.1.hexf()) + } } impl Hex for (T1, T2, T3) @@ -149,6 +161,10 @@ where fn hex(self) -> String { format!("({}, {}, {})", self.0.hex(), self.1.hex(), self.2.hex()) } + + fn hexf(self) -> String { + format!("({}, {}, {})", self.0.hexf(), self.1.hexf(), self.2.hexf()) + } } /* trait implementations for ints */ @@ -160,6 +176,10 @@ macro_rules! impl_int { fn hex(self) -> String { format!("{self:#0width$x}", width = ((Self::BITS / 4) + 2) as usize) } + + fn hexf(self) -> String { + String::new() + } } impl $crate::CheckOutput for $ty @@ -234,6 +254,10 @@ macro_rules! impl_float { width = ((Self::BITS / 4) + 2) as usize ) } + + fn hexf(self) -> String { + format!("{}", Hexf(self)) + } } impl $crate::CheckOutput for $ty @@ -324,13 +348,18 @@ where res.with_context(|| { format!( "\ - \n input: {input:?} {ibits}\ - \n expected: {expected:<22?} {expbits}\ - \n actual: {actual:<22?} {actbits}\ + \n input: {input:?}\ + \n as hex: {ihex}\ + \n as bits: {ibits}\ + \n expected: {expected:<22?} {exphex} {expbits}\ + \n actual: {actual:<22?} {acthex} {actbits}\ ", - actbits = actual.hex(), - expbits = expected.hex(), + ihex = input.hexf(), ibits = input.hex(), + exphex = expected.hexf(), + expbits = expected.hex(), + actbits = actual.hex(), + acthex = actual.hexf(), ) }) } @@ -365,12 +394,15 @@ macro_rules! impl_tuples { .with_context(|| format!( "full context:\ \n input: {input:?} {ibits}\ + \n as hex: {ihex}\ + \n as bits: {ibits}\ \n expected: {expected:?} {expbits}\ \n actual: {self:?} {actbits}\ ", - actbits = self.hex(), - expbits = expected.hex(), + ihex = input.hexf(), ibits = input.hex(), + expbits = expected.hex(), + actbits = self.hex(), )) } } From cf5dadd339937db9a74dc59de629d6b0b84c740e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 6 Feb 2025 00:34:56 +0000 Subject: [PATCH 1158/1459] fma: Ensure zero has the correct sign Currently, `fma(tiny, -tiny, 0.0)` returns 0.0 while the answer should be -0.0. This is because `-0.0 + 0.0 = +0.0` in the default rounding mode; however, the result should be negative. Musl has the same pattern but that version worked because the C compiler was contracting `x*y + z` to (ironically) `fmadd`. Musl was fixed in 9683bd6241 ("math: fix fma(x,y,0) when x*y rounds to -0"). Add the same fix here, which allows dropping the xfails. --- libm/crates/libm-test/src/precision.rs | 47 ++------------------------ libm/src/math/generic/fma.rs | 2 +- 2 files changed, 3 insertions(+), 46 deletions(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 051960b7a..596f91fe1 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -558,48 +558,5 @@ impl MaybeOverride<(f64, i32)> for SpecialCase {} #[cfg(f128_enabled)] impl MaybeOverride<(f128, i32)> for SpecialCase {} -impl MaybeOverride<(f32, f32, f32)> for SpecialCase { - fn check_float( - input: (f32, f32, f32), - actual: F, - expected: F, - ctx: &CheckCtx, - ) -> CheckAction { - ternop_common(input, actual, expected, ctx) - } -} -impl MaybeOverride<(f64, f64, f64)> for SpecialCase { - fn check_float( - input: (f64, f64, f64), - actual: F, - expected: F, - ctx: &CheckCtx, - ) -> CheckAction { - ternop_common(input, actual, expected, ctx) - } -} - -// F1 and F2 are always the same type, this is just to please generics -fn ternop_common( - input: (F1, F1, F1), - actual: F2, - expected: F2, - ctx: &CheckCtx, -) -> CheckAction { - // FIXME(fma): 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result - // of fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the - // exact result". Our implementation returns the wrong sign: - // fma(5e-324, -5e-324, 0.0) = 0.0 (should be -0.0) - if ctx.base_name == BaseName::Fma - && (input.0.is_sign_negative() ^ input.1.is_sign_negative()) - && input.0 != F1::ZERO - && input.1 != F1::ZERO - && input.2.biteq(F1::ZERO) - && expected.biteq(F2::NEG_ZERO) - && actual.biteq(F2::ZERO) - { - return XFAIL("fma sign"); - } - - DEFAULT -} +impl MaybeOverride<(f32, f32, f32)> for SpecialCase {} +impl MaybeOverride<(f64, f64, f64)> for SpecialCase {} diff --git a/libm/src/math/generic/fma.rs b/libm/src/math/generic/fma.rs index 3d5459f1a..b0e2117ea 100644 --- a/libm/src/math/generic/fma.rs +++ b/libm/src/math/generic/fma.rs @@ -31,7 +31,7 @@ where if nz.e >= ZEROINFNAN { if nz.e > ZEROINFNAN { /* z==0 */ - return x * y + z; + return x * y; } return z; } From 3a1969b6b256bff47357be41316922bff347eaa9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 6 Feb 2025 01:59:44 +0000 Subject: [PATCH 1159/1459] Switch `musl` to track `master` A few bugs have been fixed, including the sign of `fma(tiny, -tiny, 0.0)`. Switch to tracking `master` rather than the latest tag so we don't need to xfail these tests. --- libm/crates/musl-math-sys/musl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/crates/musl-math-sys/musl b/libm/crates/musl-math-sys/musl index 0784374d5..61399d4bd 160000 --- a/libm/crates/musl-math-sys/musl +++ b/libm/crates/musl-math-sys/musl @@ -1 +1 @@ -Subproject commit 0784374d561435f7c787a555aeab8ede699ed298 +Subproject commit 61399d4bd02ae1ec03068445aa7ffe9174466bfd From 3d78de2367efa913bd760662e63407333ac91294 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 6 Feb 2025 04:03:11 +0000 Subject: [PATCH 1160/1459] Add an integration test that verifies a list of cases We need someplace to collect known failures, previous regressions, edge cases that are difficult to construct from generics, and similar. Introduce this here. --- libm/crates/libm-test/src/gen.rs | 1 + libm/crates/libm-test/src/gen/case_list.rs | 686 ++++++++++++++++++ libm/crates/libm-test/src/run_cfg.rs | 8 +- .../libm-test/tests/compare_built_musl.rs | 11 +- libm/crates/libm-test/tests/multiprecision.rs | 11 +- libm/crates/libm-test/tests/standalone.rs | 38 + 6 files changed, 750 insertions(+), 5 deletions(-) create mode 100644 libm/crates/libm-test/src/gen/case_list.rs create mode 100644 libm/crates/libm-test/tests/standalone.rs diff --git a/libm/crates/libm-test/src/gen.rs b/libm/crates/libm-test/src/gen.rs index e0a7f5766..89ca09a7a 100644 --- a/libm/crates/libm-test/src/gen.rs +++ b/libm/crates/libm-test/src/gen.rs @@ -1,5 +1,6 @@ //! Different generators that can create random or systematic bit patterns. +pub mod case_list; pub mod edge_cases; pub mod random; pub mod spaced; diff --git a/libm/crates/libm-test/src/gen/case_list.rs b/libm/crates/libm-test/src/gen/case_list.rs new file mode 100644 index 000000000..9720f68e9 --- /dev/null +++ b/libm/crates/libm-test/src/gen/case_list.rs @@ -0,0 +1,686 @@ +//! Test cases to verify specific values. +//! +//! Each routine can have a set of inputs and, optinoally, outputs. If an output is provided, it +//! will be used to check against. If only inputs are provided, the case will be checked against +//! a basis. +//! +//! This is useful for adding regression tests or expected failures. + +use crate::{CheckBasis, CheckCtx, GeneratorKind, MathOp, op}; + +pub struct TestCase { + pub input: Op::RustArgs, + pub output: Option, +} + +impl TestCase { + #[expect(dead_code)] + fn append_inputs(v: &mut Vec, l: &[Op::RustArgs]) { + v.extend(l.iter().copied().map(|input| Self { input, output: None })); + } + + fn append_pairs(v: &mut Vec, l: &[(Op::RustArgs, Option)]) + where + Op::RustRet: Copy, + { + v.extend(l.iter().copied().map(|(input, output)| Self { input, output })); + } +} + +fn acos_cases() -> Vec> { + vec![] +} + +fn acosf_cases() -> Vec> { + vec![] +} + +fn acosh_cases() -> Vec> { + vec![] +} + +fn acoshf_cases() -> Vec> { + vec![] +} + +fn asin_cases() -> Vec> { + vec![] +} + +fn asinf_cases() -> Vec> { + vec![] +} + +fn asinh_cases() -> Vec> { + vec![] +} + +fn asinhf_cases() -> Vec> { + vec![] +} + +fn atan_cases() -> Vec> { + vec![] +} + +fn atan2_cases() -> Vec> { + vec![] +} + +fn atan2f_cases() -> Vec> { + vec![] +} + +fn atanf_cases() -> Vec> { + vec![] +} + +fn atanh_cases() -> Vec> { + vec![] +} + +fn atanhf_cases() -> Vec> { + vec![] +} + +fn cbrt_cases() -> Vec> { + vec![] +} + +fn cbrtf_cases() -> Vec> { + vec![] +} + +fn ceil_cases() -> Vec> { + vec![] +} + +fn ceilf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn ceilf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn ceilf16_cases() -> Vec> { + vec![] +} + +fn copysign_cases() -> Vec> { + vec![] +} + +fn copysignf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn copysignf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn copysignf16_cases() -> Vec> { + vec![] +} + +fn cos_cases() -> Vec> { + vec![] +} + +fn cosf_cases() -> Vec> { + vec![] +} + +fn cosh_cases() -> Vec> { + vec![] +} + +fn coshf_cases() -> Vec> { + vec![] +} + +fn erf_cases() -> Vec> { + vec![] +} + +fn erfc_cases() -> Vec> { + vec![] +} + +fn erfcf_cases() -> Vec> { + vec![] +} + +fn erff_cases() -> Vec> { + vec![] +} + +fn exp_cases() -> Vec> { + vec![] +} + +fn exp10_cases() -> Vec> { + vec![] +} + +fn exp10f_cases() -> Vec> { + vec![] +} + +fn exp2_cases() -> Vec> { + vec![] +} + +fn exp2f_cases() -> Vec> { + vec![] +} + +fn expf_cases() -> Vec> { + vec![] +} + +fn expm1_cases() -> Vec> { + vec![] +} + +fn expm1f_cases() -> Vec> { + vec![] +} + +fn fabs_cases() -> Vec> { + vec![] +} + +fn fabsf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fabsf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fabsf16_cases() -> Vec> { + vec![] +} + +fn fdim_cases() -> Vec> { + vec![] +} + +fn fdimf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fdimf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fdimf16_cases() -> Vec> { + vec![] +} + +fn floor_cases() -> Vec> { + vec![] +} + +fn floorf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn floorf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn floorf16_cases() -> Vec> { + vec![] +} + +fn fma_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Previously failure with incorrect sign + ((5e-324, -5e-324, 0.0), Some(-0.0)), + ], + ); + v +} + +fn fmaf_cases() -> Vec> { + vec![] +} + +fn fmax_cases() -> Vec> { + vec![] +} + +fn fmaxf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fmaxf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fmaxf16_cases() -> Vec> { + vec![] +} + +fn fmin_cases() -> Vec> { + vec![] +} + +fn fminf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fminf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fminf16_cases() -> Vec> { + vec![] +} + +fn fmod_cases() -> Vec> { + vec![] +} + +fn fmodf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fmodf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fmodf16_cases() -> Vec> { + vec![] +} + +fn frexp_cases() -> Vec> { + vec![] +} + +fn frexpf_cases() -> Vec> { + vec![] +} + +fn hypot_cases() -> Vec> { + vec![] +} + +fn hypotf_cases() -> Vec> { + vec![] +} + +fn ilogb_cases() -> Vec> { + vec![] +} + +fn ilogbf_cases() -> Vec> { + vec![] +} + +fn j0_cases() -> Vec> { + vec![] +} + +fn j0f_cases() -> Vec> { + vec![] +} + +fn j1_cases() -> Vec> { + vec![] +} + +fn j1f_cases() -> Vec> { + vec![] +} + +fn jn_cases() -> Vec> { + vec![] +} + +fn jnf_cases() -> Vec> { + vec![] +} + +fn ldexp_cases() -> Vec> { + vec![] +} + +fn ldexpf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn ldexpf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn ldexpf16_cases() -> Vec> { + vec![] +} + +fn lgamma_cases() -> Vec> { + vec![] +} + +fn lgamma_r_cases() -> Vec> { + vec![] +} + +fn lgammaf_cases() -> Vec> { + vec![] +} + +fn lgammaf_r_cases() -> Vec> { + vec![] +} + +fn log_cases() -> Vec> { + vec![] +} + +fn log10_cases() -> Vec> { + vec![] +} + +fn log10f_cases() -> Vec> { + vec![] +} + +fn log1p_cases() -> Vec> { + vec![] +} + +fn log1pf_cases() -> Vec> { + vec![] +} + +fn log2_cases() -> Vec> { + vec![] +} + +fn log2f_cases() -> Vec> { + vec![] +} + +fn logf_cases() -> Vec> { + vec![] +} + +fn modf_cases() -> Vec> { + vec![] +} + +fn modff_cases() -> Vec> { + vec![] +} + +fn nextafter_cases() -> Vec> { + vec![] +} + +fn nextafterf_cases() -> Vec> { + vec![] +} + +fn pow_cases() -> Vec> { + vec![] +} + +fn powf_cases() -> Vec> { + vec![] +} + +fn remainder_cases() -> Vec> { + vec![] +} + +fn remainderf_cases() -> Vec> { + vec![] +} + +fn remquo_cases() -> Vec> { + vec![] +} + +fn remquof_cases() -> Vec> { + vec![] +} + +fn rint_cases() -> Vec> { + vec![] +} + +fn rintf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn rintf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn rintf16_cases() -> Vec> { + vec![] +} + +fn round_cases() -> Vec> { + vec![] +} + +fn roundf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn roundf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn roundf16_cases() -> Vec> { + vec![] +} + +fn scalbn_cases() -> Vec> { + vec![] +} + +fn scalbnf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn scalbnf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn scalbnf16_cases() -> Vec> { + vec![] +} + +fn sin_cases() -> Vec> { + vec![] +} + +fn sincos_cases() -> Vec> { + vec![] +} + +fn sincosf_cases() -> Vec> { + vec![] +} + +fn sinf_cases() -> Vec> { + vec![] +} + +fn sinh_cases() -> Vec> { + vec![] +} + +fn sinhf_cases() -> Vec> { + vec![] +} + +fn sqrt_cases() -> Vec> { + vec![] +} + +fn sqrtf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn sqrtf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn sqrtf16_cases() -> Vec> { + vec![] +} + +fn tan_cases() -> Vec> { + vec![] +} + +fn tanf_cases() -> Vec> { + vec![] +} + +fn tanh_cases() -> Vec> { + vec![] +} + +fn tanhf_cases() -> Vec> { + vec![] +} + +fn tgamma_cases() -> Vec> { + vec![] +} + +fn tgammaf_cases() -> Vec> { + vec![] +} + +fn trunc_cases() -> Vec> { + vec![] +} + +fn truncf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn truncf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn truncf16_cases() -> Vec> { + vec![] +} + +fn y0_cases() -> Vec> { + vec![] +} + +fn y0f_cases() -> Vec> { + vec![] +} + +fn y1_cases() -> Vec> { + vec![] +} + +fn y1f_cases() -> Vec> { + vec![] +} + +fn yn_cases() -> Vec> { + vec![] +} + +fn ynf_cases() -> Vec> { + vec![] +} + +pub trait CaseListInput: MathOp + Sized { + fn get_cases() -> Vec>; +} + +macro_rules! impl_case_list { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + ) => { + paste::paste! { + $(#[$attr])* + impl CaseListInput for crate::op::$fn_name::Routine { + fn get_cases() -> Vec> { + [< $fn_name _cases >]() + } + } + } + }; +} + +libm_macros::for_each_function! { + callback: impl_case_list, +} + +/// This is the test generator for standalone tests, i.e. those with no basis. For this, it +/// only extracts tests with a known output. +pub fn get_test_cases_standalone( + ctx: &CheckCtx, +) -> impl Iterator + use<'_, Op> +where + Op: MathOp + CaseListInput, +{ + assert_eq!(ctx.basis, CheckBasis::None); + assert_eq!(ctx.gen_kind, GeneratorKind::List); + Op::get_cases().into_iter().filter_map(|x| x.output.map(|o| (x.input, o))) +} + +/// Opposite of the above; extract only test cases that don't have a known output, to be run +/// against a basis. +pub fn get_test_cases_basis( + ctx: &CheckCtx, +) -> (impl Iterator + use<'_, Op>, u64) +where + Op: MathOp + CaseListInput, +{ + assert_ne!(ctx.basis, CheckBasis::None); + assert_eq!(ctx.gen_kind, GeneratorKind::List); + + let cases = Op::get_cases(); + let count: u64 = cases.iter().filter(|case| case.output.is_none()).count().try_into().unwrap(); + + (cases.into_iter().filter(|x| x.output.is_none()).map(|x| x.input), count) +} diff --git a/libm/crates/libm-test/src/run_cfg.rs b/libm/crates/libm-test/src/run_cfg.rs index 783142e37..5728c3b2e 100644 --- a/libm/crates/libm-test/src/run_cfg.rs +++ b/libm/crates/libm-test/src/run_cfg.rs @@ -102,6 +102,7 @@ pub enum GeneratorKind { Extensive, QuickSpaced, Random, + List, } /// A list of all functions that should get extensive tests. @@ -219,8 +220,8 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { GeneratorKind::QuickSpaced => domain_iter_count, GeneratorKind::Random => random_iter_count, GeneratorKind::Extensive => extensive_max_iterations(), - GeneratorKind::EdgeCases => { - unimplemented!("edge case tests shoudn't need `iteration_count`") + GeneratorKind::EdgeCases | GeneratorKind::List => { + unimplemented!("shoudn't need `iteration_count` for {:?}", ctx.gen_kind) } }; @@ -269,7 +270,7 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { GeneratorKind::Random => { format!(" using `{SEED_ENV}={}`", str::from_utf8(SEED.as_slice()).unwrap()) } - GeneratorKind::EdgeCases => unreachable!(), + GeneratorKind::EdgeCases | GeneratorKind::List => unimplemented!(), }; test_log(&format!( @@ -310,6 +311,7 @@ pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive { GeneratorKind::Extensive => extensive_range, GeneratorKind::QuickSpaced | GeneratorKind::Random => non_extensive_range, GeneratorKind::EdgeCases => extensive_range, + GeneratorKind::List => unimplemented!("shoudn't need range for {:?}", ctx.gen_kind), } } diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index c8beaffc3..927cb25af 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -9,7 +9,7 @@ // There are some targets we can't build musl for #![cfg(feature = "build-musl")] -use libm_test::gen::{edge_cases, random, spaced}; +use libm_test::gen::{case_list, edge_cases, random, spaced}; use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall}; const BASIS: CheckBasis = CheckBasis::Musl; @@ -34,6 +34,15 @@ macro_rules! musl_tests { attrs: [$($attr:meta),*], ) => { paste::paste! { + #[test] + $(#[$attr])* + fn [< musl_case_list_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::List); + let cases = case_list::get_test_cases_basis::(&ctx).0; + musl_runner::(&ctx, cases, musl_math_sys::$fn_name); + } + #[test] $(#[$attr])* fn [< musl_random_ $fn_name >]() { diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 0d5c5e60c..fd1f11610 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -2,7 +2,7 @@ #![cfg(feature = "build-mpfr")] -use libm_test::gen::{edge_cases, random, spaced}; +use libm_test::gen::{case_list, edge_cases, random, spaced}; use libm_test::mpfloat::MpOp; use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall}; @@ -24,6 +24,15 @@ macro_rules! mp_tests { attrs: [$($attr:meta),*], ) => { paste::paste! { + #[test] + $(#[$attr])* + fn [< mp_case_list_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::List); + let cases = case_list::get_test_cases_basis::(&ctx).0; + mp_runner::(&ctx, cases); + } + #[test] $(#[$attr])* fn [< mp_random_ $fn_name >]() { diff --git a/libm/crates/libm-test/tests/standalone.rs b/libm/crates/libm-test/tests/standalone.rs new file mode 100644 index 000000000..d6417acac --- /dev/null +++ b/libm/crates/libm-test/tests/standalone.rs @@ -0,0 +1,38 @@ +//! Test cases that have both an input and an output, so do not require a basis. + +use libm_test::gen::case_list; +use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall}; + +const BASIS: CheckBasis = CheckBasis::None; + +fn standalone_runner( + ctx: &CheckCtx, + cases: impl Iterator, +) { + for (input, expected) in cases { + let crate_res = input.call(Op::ROUTINE); + crate_res.validate(expected, input, ctx).unwrap(); + } +} + +macro_rules! mp_tests { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + ) => { + paste::paste! { + #[test] + $(#[$attr])* + fn [< standalone_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::List); + let cases = case_list::get_test_cases_standalone::(&ctx); + standalone_runner::(&ctx, cases); + } + } + }; +} + +libm_macros::for_each_function! { + callback: mp_tests, +} From 4b67aba5c503a4764d6f1ec6911677405de80a13 Mon Sep 17 00:00:00 2001 From: Pavel Grigorenko Date: Sat, 1 Feb 2025 02:10:59 +0300 Subject: [PATCH 1161/1459] Expose erf{,c}{,f} from libm --- src/math.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/math.rs b/src/math.rs index da208239e..21670f243 100644 --- a/src/math.rs +++ b/src/math.rs @@ -91,6 +91,11 @@ no_mangle! { fn fmod(x: f64, y: f64) -> f64; // `f32 % f32` fn fmodf(x: f32, y: f32) -> f32; + + fn erf(x: f64) -> f64; + fn erff(x: f32) -> f32; + fn erfc(x: f64) -> f64; + fn erfcf(x: f32) -> f32; } // allow for windows (and other targets) From 3c09866aa4c699ed5f430567f6de97ed56b65126 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 6 Feb 2025 20:55:26 +0000 Subject: [PATCH 1162/1459] chore: release v0.1.146 --- CHANGELOG.md | 6 ++++++ Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ddebb7823..d6962dc5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.146](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.145...compiler_builtins-v0.1.146) - 2025-02-06 + +### Other + +- Expose erf{,c}{,f} from libm + ## [0.1.145](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.144...compiler_builtins-v0.1.145) - 2025-02-04 ### Other diff --git a/Cargo.toml b/Cargo.toml index fcbc60238..c93ca563c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.145" +version = "0.1.146" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From ec036ba2715350ec1ac6c546743f63c92215bd09 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 6 Feb 2025 23:17:27 +0000 Subject: [PATCH 1163/1459] Improve tidy output Print a better diff when lists are unsorted, and always check tidy lists even if `--check` is not passed. --- libm/etc/update-api-list.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libm/etc/update-api-list.py b/libm/etc/update-api-list.py index 9cf625554..c0b6e41d3 100755 --- a/libm/etc/update-api-list.py +++ b/libm/etc/update-api-list.py @@ -278,8 +278,8 @@ def ensure_sorted(fpath: Path, block_start_line: int, lines: list[str]) -> None: """Ensure that a list of lines is sorted, otherwise print a diff and exit.""" relpath = fpath.relative_to(ROOT_DIR) diff_and_exit( - "".join(lines), - "".join(sorted(lines)), + "\n".join(lines), + "\n".join(sorted(lines)), f"sorted block at {relpath}:{block_start_line}", ) @@ -338,8 +338,7 @@ def ensure_updated_list(check: bool) -> None: crate.write_function_list(check) crate.write_function_defs(check) - if check: - crate.tidy_lists() + crate.tidy_lists() def main(): From 82443ffdf9b07dc03025d6b93f55169ccb2419fd Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 6 Feb 2025 23:18:31 +0000 Subject: [PATCH 1164/1459] Make it possible to use `hf32!` and similar macros outside of `libm` Adjust paths such that these macros don't go through the private `math` module. `feature = "private-test-deps"` is still needed. Additionally, ensure that `cargo check` for this crate gets run in CI because `cargo test` does not seem to identify this problem. `compiler_builtins` will need to reexport the `support` module. --- libm/.github/workflows/main.yaml | 1 + libm/crates/compiler-builtins-smoke-test/src/lib.rs | 3 +++ libm/src/math/mod.rs | 2 +- libm/src/math/support/float_traits.rs | 2 +- libm/src/math/support/macros.rs | 12 ++++++++---- libm/src/math/support/mod.rs | 2 ++ 6 files changed, 16 insertions(+), 6 deletions(-) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index e03d7ecd3..e1d263dea 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -143,6 +143,7 @@ jobs: - name: Install Rust run: rustup update nightly --no-self-update && rustup default nightly - uses: Swatinem/rust-cache@v2 + - run: cargo check --manifest-path crates/compiler-builtins-smoke-test/Cargo.toml - run: cargo test --manifest-path crates/compiler-builtins-smoke-test/Cargo.toml benchmarks: diff --git a/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/libm/crates/compiler-builtins-smoke-test/src/lib.rs index ccd0642a2..77a4666a1 100644 --- a/libm/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/libm/crates/compiler-builtins-smoke-test/src/lib.rs @@ -17,6 +17,9 @@ pub mod libm; use core::ffi::c_int; +// Required for macro paths. +use libm::support; + /// Mark functions `#[no_mangle]` and with the C ABI. macro_rules! no_mangle { ($( $name:ident( $($tt:tt)+ ) -> $ret:ty; )+) => { diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index f0698ad02..7ad808cf7 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -81,7 +81,7 @@ pub mod support; #[macro_use] #[cfg(not(feature = "unstable-public-internals"))] -mod support; +pub(crate) mod support; cfg_if! { if #[cfg(feature = "unstable-public-internals")] { diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 24cf7d4b0..328b70610 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -107,7 +107,7 @@ pub trait Float: !self.is_sign_negative() } - /// Returns if `self` is subnormal + /// Returns if `self` is subnormal. fn is_subnormal(self) -> bool { (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO } diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs index d8ba04cff..c80e77511 100644 --- a/libm/src/math/support/macros.rs +++ b/libm/src/math/support/macros.rs @@ -90,10 +90,11 @@ macro_rules! select_implementation { /// Construct a 16-bit float from hex float representation (C-style), guaranteed to /// evaluate at compile time. #[cfg(f16_enabled)] +#[cfg_attr(feature = "unstable-public-internals", macro_export)] #[allow(unused_macros)] macro_rules! hf16 { ($s:literal) => {{ - const X: f16 = $crate::math::support::hf16($s); + const X: f16 = $crate::support::hf16($s); X }}; } @@ -101,9 +102,10 @@ macro_rules! hf16 { /// Construct a 32-bit float from hex float representation (C-style), guaranteed to /// evaluate at compile time. #[allow(unused_macros)] +#[cfg_attr(feature = "unstable-public-internals", macro_export)] macro_rules! hf32 { ($s:literal) => {{ - const X: f32 = $crate::math::support::hf32($s); + const X: f32 = $crate::support::hf32($s); X }}; } @@ -111,9 +113,10 @@ macro_rules! hf32 { /// Construct a 64-bit float from hex float representation (C-style), guaranteed to /// evaluate at compile time. #[allow(unused_macros)] +#[cfg_attr(feature = "unstable-public-internals", macro_export)] macro_rules! hf64 { ($s:literal) => {{ - const X: f64 = $crate::math::support::hf64($s); + const X: f64 = $crate::support::hf64($s); X }}; } @@ -122,9 +125,10 @@ macro_rules! hf64 { /// evaluate at compile time. #[cfg(f128_enabled)] #[allow(unused_macros)] +#[cfg_attr(feature = "unstable-public-internals", macro_export)] macro_rules! hf128 { ($s:literal) => {{ - const X: f128 = $crate::math::support::hf128($s); + const X: f128 = $crate::support::hf128($s); X }}; } diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index d471c5b70..68f1e49e5 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -9,8 +9,10 @@ mod int_traits; pub use float_traits::{Float, IntTy}; pub(crate) use float_traits::{f32_from_bits, f64_from_bits}; #[cfg(f16_enabled)] +#[allow(unused_imports)] pub use hex_float::hf16; #[cfg(f128_enabled)] +#[allow(unused_imports)] pub use hex_float::hf128; #[allow(unused_imports)] pub use hex_float::{Hexf, hf32, hf64}; From 90d541863b81ba15cf43dc9e7d851c8bf641315f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 5 Feb 2025 23:45:14 +0000 Subject: [PATCH 1165/1459] Add `fmaf128` Resolve all remaining `f64`-specific items in the generic version of `fma`, then expose `fmaf128`. --- libm/crates/libm-macros/src/shared.rs | 7 + libm/crates/libm-test/benches/icount.rs | 1 + libm/crates/libm-test/benches/random.rs | 1 + libm/crates/libm-test/src/gen/case_list.rs | 23 +- libm/crates/libm-test/src/mpfloat.rs | 2 +- libm/crates/libm-test/src/precision.rs | 2 + .../libm-test/tests/compare_built_musl.rs | 1 + libm/crates/util/src/main.rs | 1 + libm/etc/function-definitions.json | 7 + libm/etc/function-list.txt | 1 + libm/src/libm_helper.rs | 1 + libm/src/math/fmaf128.rs | 7 + libm/src/math/generic/fma.rs | 248 +++++++++++++----- libm/src/math/mod.rs | 2 + 14 files changed, 237 insertions(+), 67 deletions(-) create mode 100644 libm/src/math/fmaf128.rs diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index da16cd8e2..48d19c50d 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -106,6 +106,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] None, &["fma"], ), + ( + // `(f128, f128, f128) -> f128` + FloatTy::F128, + Signature { args: &[Ty::F128, Ty::F128, Ty::F128], returns: &[Ty::F128] }, + None, + &["fmaf128"], + ), ( // `(f32) -> i32` FloatTy::F32, diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index 53ecb5a37..c41cef24e 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -108,6 +108,7 @@ main!( icount_bench_floorf16_group, icount_bench_floorf_group, icount_bench_fma_group, + icount_bench_fmaf128_group, icount_bench_fmaf_group, icount_bench_fmax_group, icount_bench_fmaxf128_group, diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 66486a56a..6e8a33479 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -127,6 +127,7 @@ libm_macros::for_each_function! { | fdimf16 | floorf128 | floorf16 + | fmaf128 | fmaxf128 | fmaxf16 | fminf128 diff --git a/libm/crates/libm-test/src/gen/case_list.rs b/libm/crates/libm-test/src/gen/case_list.rs index 9720f68e9..302d5c391 100644 --- a/libm/crates/libm-test/src/gen/case_list.rs +++ b/libm/crates/libm-test/src/gen/case_list.rs @@ -6,6 +6,9 @@ //! //! This is useful for adding regression tests or expected failures. +#[cfg(f128_enabled)] +use libm::hf128; + use crate::{CheckBasis, CheckCtx, GeneratorKind, MathOp, op}; pub struct TestCase { @@ -250,7 +253,7 @@ fn fma_cases() -> Vec> { TestCase::append_pairs( &mut v, &[ - // Previously failure with incorrect sign + // Previous failure with incorrect sign ((5e-324, -5e-324, 0.0), Some(-0.0)), ], ); @@ -261,6 +264,24 @@ fn fmaf_cases() -> Vec> { vec![] } +#[cfg(f128_enabled)] +fn fmaf128_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[( + // Tricky rounding case that previously failed in extensive tests + ( + hf128!("-0x1.1966cc01966cc01966cc01966f06p-25"), + hf128!("-0x1.669933fe69933fe69933fe6997c9p-16358"), + hf128!("-0x0.000000000000000000000000048ap-16382"), + ), + Some(hf128!("0x0.c5171470a3ff5e0f68d751491b18p-16382")), + )], + ); + v +} + fn fmax_cases() -> Vec> { vec![] } diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index ab77d541c..f4a9ff7ff 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -196,7 +196,7 @@ libm_macros::for_each_function! { expm1 | expm1f => exp_m1, fabs | fabsf => abs, fdim | fdimf | fdimf16 | fdimf128 => positive_diff, - fma | fmaf => mul_add, + fma | fmaf | fmaf128 => mul_add, fmax | fmaxf | fmaxf16 | fmaxf128 => max, fmin | fminf | fminf16 | fminf128 => min, lgamma | lgammaf => ln_gamma, diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 596f91fe1..20aa96b6a 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -560,3 +560,5 @@ impl MaybeOverride<(f128, i32)> for SpecialCase {} impl MaybeOverride<(f32, f32, f32)> for SpecialCase {} impl MaybeOverride<(f64, f64, f64)> for SpecialCase {} +#[cfg(f128_enabled)] +impl MaybeOverride<(f128, f128, f128)> for SpecialCase {} diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 927cb25af..7fa77e832 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -99,6 +99,7 @@ libm_macros::for_each_function! { fdimf16, floorf128, floorf16, + fmaf128, fmaxf128, fmaxf16, fminf128, diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index e5d6f374a..0f845a1c4 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -96,6 +96,7 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { | fdimf16 | floorf128 | floorf16 + | fmaf128 | fmaxf128 | fmaxf16 | fminf128 diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 243862075..5742ed585 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -356,6 +356,13 @@ ], "type": "f32" }, + "fmaf128": { + "sources": [ + "src/math/fmaf128.rs", + "src/math/generic/fma.rs" + ], + "type": "f128" + }, "fmax": { "sources": [ "src/math/fmax.rs", diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt index c92eaf9e2..1c9c5e3bc 100644 --- a/libm/etc/function-list.txt +++ b/libm/etc/function-list.txt @@ -53,6 +53,7 @@ floorf128 floorf16 fma fmaf +fmaf128 fmax fmaxf fmaxf128 diff --git a/libm/src/libm_helper.rs b/libm/src/libm_helper.rs index 0768839c7..68f1fb362 100644 --- a/libm/src/libm_helper.rs +++ b/libm/src/libm_helper.rs @@ -208,6 +208,7 @@ libm_helper! { (fn fabs(x: f128) -> (f128); => fabsf128); (fn fdim(x: f128, y: f128) -> (f128); => fdimf128); (fn floor(x: f128) -> (f128); => floorf128); + (fn fmaf128(x: f128, y: f128, z: f128) -> (f128); => fmaf128); (fn fmax(x: f128, y: f128) -> (f128); => fmaxf128); (fn fmin(x: f128, y: f128) -> (f128); => fminf128); (fn fmod(x: f128, y: f128) -> (f128); => fmodf128); diff --git a/libm/src/math/fmaf128.rs b/libm/src/math/fmaf128.rs new file mode 100644 index 000000000..50f7360de --- /dev/null +++ b/libm/src/math/fmaf128.rs @@ -0,0 +1,7 @@ +/// Fused multiply add (f128) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { + return super::generic::fma(x, y, z); +} diff --git a/libm/src/math/generic/fma.rs b/libm/src/math/generic/fma.rs index b0e2117ea..ac53acadf 100644 --- a/libm/src/math/generic/fma.rs +++ b/libm/src/math/generic/fma.rs @@ -1,10 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */ + use core::{f32, f64}; use super::super::support::{DInt, HInt, IntTy}; use super::super::{CastFrom, CastInto, Float, Int, MinInt}; -const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1; - /// Fused multiply-add that works when there is not a larger float size available. Currently this /// is still specialized only for `f64`. Computes `(x * y) + z`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] @@ -18,79 +19,99 @@ where { let one = IntTy::::ONE; let zero = IntTy::::ZERO; - let magic = F::from_parts(false, F::BITS - 1 + F::EXP_BIAS, zero); - /* normalize so top 10bits and last bit are 0 */ + // Normalize such that the top of the mantissa is zero and we have a guard bit. let nx = Norm::from_float(x); let ny = Norm::from_float(y); let nz = Norm::from_float(z); - if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN { + if nx.is_zero_nan_inf() || ny.is_zero_nan_inf() { + // Value will overflow, defer to non-fused operations. return x * y + z; } - if nz.e >= ZEROINFNAN { - if nz.e > ZEROINFNAN { - /* z==0 */ + + if nz.is_zero_nan_inf() { + if nz.is_zero() { + // Empty add component means we only need to multiply. return x * y; } + // `z` is NaN or infinity, which sets the result. return z; } - /* mul: r = x*y */ + // multiply: r = x * y let zhi: F::Int; let zlo: F::Int; let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi(); - /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */ - - /* align exponents */ + // Exponent result of multiplication let mut e: i32 = nx.e + ny.e; + // Needed shift to align `z` to the multiplication result let mut d: i32 = nz.e - e; let sbits = F::BITS as i32; - /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */ + // Scale `z`. Shift `z <<= kz`, `r >>= kr`, so `kz+kr == d`, set `e = e+kr` (== ez-kz) if d > 0 { + // The magnitude of `z` is larger than `x * y` if d < sbits { + // Maximum shift of one `F::BITS` means shifted `z` will fit into `2 * F::BITS`. Shift + // it into `(zhi, zlo)`. No exponent adjustment necessary. zlo = nz.m << d; zhi = nz.m >> (sbits - d); } else { + // Shift larger than `sbits`, `z` only needs the top half `zhi`. Place it there (acts + // as a shift by `sbits`). zlo = zero; zhi = nz.m; - e = nz.e - sbits; d -= sbits; + + // `z`'s exponent is large enough that it now needs to be taken into account. + e = nz.e - sbits; + if d == 0 { + // Exactly `sbits`, nothing to do } else if d < sbits { - rlo = (rhi << (sbits - d)) - | (rlo >> d) - | IntTy::::from((rlo << (sbits - d)) != zero); + // Remaining shift fits within `sbits`. Leave `z` in place, shift `x * y` + rlo = (rhi << (sbits - d)) | (rlo >> d); + // Set the sticky bit + rlo |= IntTy::::from((rlo << (sbits - d)) != zero); rhi = rhi >> d; } else { + // `z`'s magnitude is enough that `x * y` is irrelevant. It was nonzero, so set + // the sticky bit. rlo = one; rhi = zero; } } } else { + // `z`'s magnitude once shifted fits entirely within `zlo` zhi = zero; d = -d; if d == 0 { + // No shift needed zlo = nz.m; } else if d < sbits { - zlo = (nz.m >> d) | IntTy::::from((nz.m << (sbits - d)) != zero); + // Shift s.t. `nz.m` fits into `zlo` + let sticky = IntTy::::from((nz.m << (sbits - d)) != zero); + zlo = (nz.m >> d) | sticky; } else { + // Would be entirely shifted out, only set the sticky bit zlo = one; } } - /* add */ + /* addition */ + let mut neg = nx.neg ^ ny.neg; let samesign: bool = !neg ^ nz.neg; - let mut nonzero: i32 = 1; + let mut rhi_nonzero = true; + if samesign { - /* r += z */ + // r += z rlo = rlo.wrapping_add(zlo); rhi += zhi + IntTy::::from(rlo < zlo); } else { - /* r -= z */ + // r -= z let (res, borrow) = rlo.overflowing_sub(zlo); rlo = res; rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::::from(borrow))); @@ -99,129 +120,226 @@ where rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::::from(rlo != zero); neg = !neg; } - nonzero = (rhi != zero) as i32; + rhi_nonzero = rhi != zero; } - /* set rhi to top 63bit of the result (last bit is sticky) */ - if nonzero != 0 { + /* Construct result */ + + // Shift result into `rhi`, left-aligned. Last bit is sticky + if rhi_nonzero { + // `d` > 0, need to shift both `rhi` and `rlo` into result e += sbits; d = rhi.leading_zeros() as i32 - 1; - /* note: d > 0 */ - rhi = (rhi << d) | (rlo >> (sbits - d)) | IntTy::::from((rlo << d) != zero); + rhi = (rhi << d) | (rlo >> (sbits - d)); + // Update sticky + rhi |= IntTy::::from((rlo << d) != zero); } else if rlo != zero { + // `rhi` is zero, `rlo` is the entire result and needs to be shifted d = rlo.leading_zeros() as i32 - 1; if d < 0 { + // Shift and set sticky rhi = (rlo >> 1) | (rlo & one); } else { rhi = rlo << d; } } else { - /* exact +-0 */ + // exact +/- 0.0 return x * y + z; } e -= d; - /* convert to double */ - let mut i: F::SignedInt = rhi.signed(); /* i is in [1<<62,(1<<63)-1] */ + // Use int->float conversion to populate the significand. + // i is in [1 << (BITS - 2), (1 << (BITS - 1)) - 1] + let mut i: F::SignedInt = rhi.signed(); + if neg { i = -i; } - let mut r: F = F::cast_from_lossy(i); /* |r| is in [0x1p62,0x1p63] */ + // `|r|` is in `[0x1p62,0x1p63]` for `f64` + let mut r: F = F::cast_from_lossy(i); + + /* Account for subnormal and rounding */ + + // Unbiased exponent for the maximum value of `r` + let max_pow = F::BITS - 1 + F::EXP_BIAS; - if e < -(F::EXP_BIAS as i32 - 1) - (sbits - 2) { - /* result is subnormal before rounding */ - if e == -(F::EXP_BIAS as i32 - 1) - (sbits - 1) { - let mut c: F = magic; + if e < -(max_pow as i32 - 2) { + // Result is subnormal before rounding + if e == -(max_pow as i32 - 1) { + let mut c = F::from_parts(false, max_pow, zero); if neg { c = -c; } + if r == c { - /* min normal after rounding, underflow depends - * on arch behaviour which can be imitated by - * a double to float conversion */ - return r.raise_underflow(); + // Min normal after rounding, + return r.raise_underflow_ret_self(); } - /* one bit is lost when scaled, add another top bit to - * only round once at conversion if it is inexact */ - if (rhi << F::SIG_BITS) != zero { - let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << 62); + + if (rhi << (F::SIG_BITS + 1)) != zero { + // Account for truncated bits. One bit will be lost in the `scalbn` call, add + // another top bit to avoid double rounding if inexact. + let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << (F::BITS - 2)); i = iu.signed(); + if neg { i = -i; } + r = F::cast_from_lossy(i); - r = F::cast_from(2i8) * r - c; /* remove top bit */ - /* raise underflow portably, such that it - * cannot be optimized away */ - r += r.raise_underflow2(); + // Remove the top bit + r = F::cast_from(2i8) * r - c; + r += r.raise_underflow_ret_zero(); } } else { - /* only round once when scaled */ - d = 10; - i = (((rhi >> d) | IntTy::::from(rhi << (F::BITS as i32 - d) != zero)) << d) - .signed(); + // Only round once when scaled + d = F::EXP_BITS as i32 - 1; + let sticky = IntTy::::from(rhi << (F::BITS as i32 - d) != zero); + i = (((rhi >> d) | sticky) << d).signed(); + if neg { i = -i; } - r = F::cast_from(i); + + r = F::cast_from_lossy(i); } } + // Use our exponent to scale the final value. super::scalbn(r, e) } /// Representation of `F` that has handled subnormals. +#[derive(Clone, Copy, Debug)] struct Norm { - /// Normalized significand with one guard bit. + /// Normalized significand with one guard bit, unsigned. m: F::Int, - /// Unbiased exponent, normalized. + /// Exponent of the mantissa such that `m * 2^e = x`. Accounts for the shift in the mantissa + /// and the guard bit; that is, 1.0 will normalize as `m = 1 << 53` and `e = -53`. e: i32, neg: bool, } impl Norm { + /// Unbias the exponent and account for the mantissa's precision, including the guard bit. + const EXP_UNBIAS: u32 = F::EXP_BIAS + F::SIG_BITS + 1; + + /// Values greater than this had a saturated exponent (infinity or NaN), OR were zero and we + /// adjusted the exponent such that it exceeds this threashold. + const ZERO_INF_NAN: u32 = F::EXP_SAT - Self::EXP_UNBIAS; + fn from_float(x: F) -> Self { let mut ix = x.to_bits(); let mut e = x.exp() as i32; let neg = x.is_sign_negative(); if e == 0 { // Normalize subnormals by multiplication - let magic = F::from_parts(false, F::BITS - 1 + F::EXP_BIAS, F::Int::ZERO); - let scaled = x * magic; + let scale_i = F::BITS - 1; + let scale_f = F::from_parts(false, scale_i + F::EXP_BIAS, F::Int::ZERO); + let scaled = x * scale_f; ix = scaled.to_bits(); e = scaled.exp() as i32; - e = if e != 0 { e - (F::BITS as i32 - 1) } else { 0x800 }; + e = if e == 0 { + // If the exponent is still zero, the input was zero. Artifically set this value + // such that the final `e` will exceed `ZERO_INF_NAN`. + 1 << F::EXP_BITS + } else { + // Otherwise, account for the scaling we just did. + e - scale_i as i32 + }; } - e -= F::EXP_BIAS as i32 + 52 + 1; + e -= Self::EXP_UNBIAS as i32; + // Absolute value, set the implicit bit, and shift to create a guard bit ix &= F::SIG_MASK; ix |= F::IMPLICIT_BIT; - ix <<= 1; // add a guard bit + ix <<= 1; Self { m: ix, e, neg } } + + /// True if the value was zero, infinity, or NaN. + fn is_zero_nan_inf(self) -> bool { + self.e >= Self::ZERO_INF_NAN as i32 + } + + /// The only value we have + fn is_zero(self) -> bool { + // The only exponent that strictly exceeds this value is our sentinel value for zero. + self.e > Self::ZERO_INF_NAN as i32 + } } /// Type-specific helpers that are not needed outside of fma. pub trait FmaHelper { - fn raise_underflow(self) -> Self; - fn raise_underflow2(self) -> Self; + fn raise_underflow_ret_self(self) -> Self; + fn raise_underflow_ret_zero(self) -> Self; } impl FmaHelper for f64 { - fn raise_underflow(self) -> Self { - let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63 - let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * self) as f32; + fn raise_underflow_ret_self(self) -> Self { + /* min normal after rounding, underflow depends + * on arch behaviour which can be imitated by + * a double to float conversion */ + let fltmin: f32 = (hf64!("0x0.ffffff8p-63") * f32::MIN_POSITIVE as f64 * self) as f32; f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64 } - fn raise_underflow2(self) -> Self { + fn raise_underflow_ret_zero(self) -> Self { /* raise underflow portably, such that it * cannot be optimized away */ let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * self; (tiny * tiny) * (self - self) } } + +#[cfg(f128_enabled)] +impl FmaHelper for f128 { + fn raise_underflow_ret_self(self) -> Self { + self + } + + fn raise_underflow_ret_zero(self) -> Self { + f128::ZERO + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn spec_test() + where + F: Float + FmaHelper, + F: CastFrom, + F: CastFrom, + F::Int: HInt, + u32: CastInto, + { + let x = F::from_bits(F::Int::ONE); + let y = F::from_bits(F::Int::ONE); + let z = F::ZERO; + + // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of + // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the + // exact result" + assert_biteq!(fma(x, y, z), F::ZERO); + assert_biteq!(fma(x, -y, z), F::NEG_ZERO); + assert_biteq!(fma(-x, y, z), F::NEG_ZERO); + assert_biteq!(fma(-x, -y, z), F::ZERO); + } + + #[test] + fn spec_test_f64() { + spec_test::(); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_test_f128() { + spec_test::(); + } +} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 7ad808cf7..677ed8d6e 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -385,6 +385,7 @@ cfg_if! { mod fabsf128; mod fdimf128; mod floorf128; + mod fmaf128; mod fmaxf128; mod fminf128; mod fmodf128; @@ -402,6 +403,7 @@ cfg_if! { pub use self::fabsf128::fabsf128; pub use self::fdimf128::fdimf128; pub use self::floorf128::floorf128; + pub use self::fmaf128::fmaf128; pub use self::fmaxf128::fmaxf128; pub use self::fminf128::fminf128; pub use self::fmodf128::fmodf128; From c70973ef315f1874ef601db0abd04c66ffb5b172 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 7 Feb 2025 00:47:00 +0000 Subject: [PATCH 1166/1459] Add better edge case testing for `scalbn` Include integer values around the minimum and maximum exponents which require different behavior in the scale functions. --- libm/crates/libm-test/src/gen/edge_cases.rs | 100 +++++++++++++++----- libm/src/math/generic/scalbn.rs | 4 +- libm/src/math/support/float_traits.rs | 21 ++++ 3 files changed, 101 insertions(+), 24 deletions(-) diff --git a/libm/crates/libm-test/src/gen/edge_cases.rs b/libm/crates/libm-test/src/gen/edge_cases.rs index 8de954ae3..8da635114 100644 --- a/libm/crates/libm-test/src/gen/edge_cases.rs +++ b/libm/crates/libm-test/src/gen/edge_cases.rs @@ -1,11 +1,11 @@ //! A generator that checks a handful of cases near infinities, zeros, asymptotes, and NaNs. -use libm::support::{Float, Int}; +use libm::support::{CastInto, Float, Int}; use crate::domain::get_domain; use crate::gen::KnownSize; use crate::run_cfg::{check_near_count, check_point_count}; -use crate::{CheckCtx, FloatExt, MathOp, test_log}; +use crate::{BaseName, CheckCtx, FloatExt, FloatTy, MathOp, test_log}; /// Generate a sequence of edge cases, e.g. numbers near zeroes and infiniteis. pub trait EdgeCaseInput { @@ -78,7 +78,7 @@ where (ret.into_iter(), count) } -/// Add `AROUND` values starting at and including `x` and counting up. Uses the smallest possible +/// Add `points` values starting at and including `x` and counting up. Uses the smallest possible /// increments (1 ULP). fn count_up(mut x: F, points: u64, values: &mut Vec) { assert!(!x.is_nan()); @@ -91,7 +91,7 @@ fn count_up(mut x: F, points: u64, values: &mut Vec) { } } -/// Add `AROUND` values starting at and including `x` and counting down. Uses the smallest possible +/// Add `points` values starting at and including `x` and counting down. Uses the smallest possible /// increments (1 ULP). fn count_down(mut x: F, points: u64, values: &mut Vec) { assert!(!x.is_nan()); @@ -107,31 +107,87 @@ fn count_down(mut x: F, points: u64, values: &mut Vec) { /// Create a list of values around interesting integer points (min, zero, max). pub fn int_edge_cases( ctx: &CheckCtx, - _argnum: usize, -) -> (impl Iterator + Clone, u64) { + argnum: usize, +) -> (impl Iterator + Clone, u64) +where + i32: CastInto, +{ let mut values = Vec::new(); let near_points = check_near_count(ctx); - for up_from in [I::MIN, I::ZERO] { - let mut x = up_from; - for _ in 0..near_points { - values.push(x); - x += I::ONE; - } - } - - for down_from in [I::ZERO, I::MAX] { - let mut x = down_from; - for _ in 0..near_points { - values.push(x); - x -= I::ONE; - } + // Check around max/min and zero + int_count_around(I::MIN, near_points, &mut values); + int_count_around(I::MAX, near_points, &mut values); + int_count_around(I::ZERO, near_points, &mut values); + int_count_around(I::ZERO, near_points, &mut values); + + if matches!(ctx.base_name, BaseName::Scalbn | BaseName::Ldexp) { + assert_eq!(argnum, 1, "scalbn integer argument should be arg1"); + let (emax, emin, emin_sn) = match ctx.fn_ident.math_op().float_ty { + FloatTy::F16 => { + #[cfg(not(f16_enabled))] + unreachable!(); + #[cfg(f16_enabled)] + (f16::EXP_MAX, f16::EXP_MIN, f16::EXP_MIN_SUBNORM) + } + FloatTy::F32 => (f32::EXP_MAX, f32::EXP_MIN, f32::EXP_MIN_SUBNORM), + FloatTy::F64 => (f64::EXP_MAX, f64::EXP_MIN, f64::EXP_MIN_SUBNORM), + FloatTy::F128 => { + #[cfg(not(f128_enabled))] + unreachable!(); + #[cfg(f128_enabled)] + (f128::EXP_MAX, f128::EXP_MIN, f128::EXP_MIN_SUBNORM) + } + }; + + // `scalbn`/`ldexp` have their trickiest behavior around exponent limits + int_count_around(emax.cast(), near_points, &mut values); + int_count_around(emin.cast(), near_points, &mut values); + int_count_around(emin_sn.cast(), near_points, &mut values); + int_count_around((-emin_sn).cast(), near_points, &mut values); + + // Also check values that cause the maximum possible difference in exponents + int_count_around((emax - emin).cast(), near_points, &mut values); + int_count_around((emin - emax).cast(), near_points, &mut values); + int_count_around((emax - emin_sn).cast(), near_points, &mut values); + int_count_around((emin_sn - emax).cast(), near_points, &mut values); } values.sort(); values.dedup(); - let len = values.len().try_into().unwrap(); - (values.into_iter(), len) + let count = values.len().try_into().unwrap(); + + test_log(&format!( + "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {count} edge cases", + gen_kind = ctx.gen_kind, + basis = ctx.basis, + fn_ident = ctx.fn_ident, + arg = argnum + 1, + args = ctx.input_count(), + )); + + (values.into_iter(), count) +} + +/// Add `points` values both up and down, starting at and including `x`. +fn int_count_around(x: I, points: u64, values: &mut Vec) { + let mut current = x; + for _ in 0..points { + values.push(current); + current = match current.checked_add(I::ONE) { + Some(v) => v, + None => break, + }; + } + + current = x; + for _ in 0..points { + values.push(current); + current = match current.checked_sub(I::ONE) { + Some(v) => v, + None => break, + }; + } } macro_rules! impl_edge_case_input { diff --git a/libm/src/math/generic/scalbn.rs b/libm/src/math/generic/scalbn.rs index f15cb75d6..5ba7f2ab2 100644 --- a/libm/src/math/generic/scalbn.rs +++ b/libm/src/math/generic/scalbn.rs @@ -28,8 +28,8 @@ where let sig_total_bits = F::SIG_BITS + 1; // Maximum and minimum values when biased - let exp_max: i32 = F::EXP_BIAS as i32; - let exp_min = -(exp_max - 1); + let exp_max = F::EXP_MAX; + let exp_min = F::EXP_MIN; // 2 ^ Emax, maximum positive with null significand (0x1p1023 for f64) let f_exp_max = F::from_parts(false, F::EXP_BIAS << 1, zero); diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 328b70610..d6ce13f69 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -59,6 +59,15 @@ pub trait Float: /// The exponent bias value const EXP_BIAS: u32 = Self::EXP_SAT >> 1; + /// Maximum unbiased exponent value. + const EXP_MAX: i32 = Self::EXP_BIAS as i32; + + /// Minimum *NORMAL* unbiased exponent value. + const EXP_MIN: i32 = -(Self::EXP_MAX - 1); + + /// Minimum subnormal exponent value. + const EXP_MIN_SUBNORM: i32 = Self::EXP_MIN - Self::SIG_BITS as i32; + /// A mask for the sign bit const SIGN_MASK: Self::Int; @@ -274,6 +283,9 @@ mod tests { // Constants assert_eq!(f16::EXP_SAT, 0b11111); assert_eq!(f16::EXP_BIAS, 15); + assert_eq!(f16::EXP_MAX, 15); + assert_eq!(f16::EXP_MIN, -14); + assert_eq!(f16::EXP_MIN_SUBNORM, -24); // `exp_unbiased` assert_eq!(f16::FRAC_PI_2.exp_unbiased(), 0); @@ -296,6 +308,9 @@ mod tests { // Constants assert_eq!(f32::EXP_SAT, 0b11111111); assert_eq!(f32::EXP_BIAS, 127); + assert_eq!(f32::EXP_MAX, 127); + assert_eq!(f32::EXP_MIN, -126); + assert_eq!(f32::EXP_MIN_SUBNORM, -149); // `exp_unbiased` assert_eq!(f32::FRAC_PI_2.exp_unbiased(), 0); @@ -319,6 +334,9 @@ mod tests { // Constants assert_eq!(f64::EXP_SAT, 0b11111111111); assert_eq!(f64::EXP_BIAS, 1023); + assert_eq!(f64::EXP_MAX, 1023); + assert_eq!(f64::EXP_MIN, -1022); + assert_eq!(f64::EXP_MIN_SUBNORM, -1074); // `exp_unbiased` assert_eq!(f64::FRAC_PI_2.exp_unbiased(), 0); @@ -343,6 +361,9 @@ mod tests { // Constants assert_eq!(f128::EXP_SAT, 0b111111111111111); assert_eq!(f128::EXP_BIAS, 16383); + assert_eq!(f128::EXP_MAX, 16383); + assert_eq!(f128::EXP_MIN, -16382); + assert_eq!(f128::EXP_MIN_SUBNORM, -16494); // `exp_unbiased` assert_eq!(f128::FRAC_PI_2.exp_unbiased(), 0); From 732b7267fb46a2dbdd8fd15c0ffdfb946f124fd5 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 7 Feb 2025 01:23:19 +0000 Subject: [PATCH 1167/1459] Run standard tests before running integration tests To ensure we don't waste time running extensive tests when there is an easily identifiable failure, run the normal test suite for relevant functions before starting extensive tests. --- libm/.github/workflows/main.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index e1d263dea..de131639b 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -273,6 +273,9 @@ jobs: exit fi + # Run the non-extensive tests first to catch any easy failures + cargo t --profile release-checked -- "$CHANGED" + LIBM_EXTENSIVE_TESTS="$CHANGED" cargo t \ --features build-mpfr,unstable,force-soft-floats \ --profile release-checked \ From 2bac70142048d2ecccf8c2c4884a008effab23de Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 7 Feb 2025 01:05:38 +0000 Subject: [PATCH 1168/1459] Check more subnormal values during edge cases tests Add checks at the max subnormal value and a couple values scatted throughout the subnormal range. This helped identifiy a bug in `fmaf128`. As part of this, slightly reduce the amount of edge cases checked without optimizations because the change makes it become noticible. --- libm/crates/libm-test/src/gen/edge_cases.rs | 20 +++++++++++++++++++- libm/crates/libm-test/src/run_cfg.rs | 2 +- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/libm/crates/libm-test/src/gen/edge_cases.rs b/libm/crates/libm-test/src/gen/edge_cases.rs index 8da635114..69b59a105 100644 --- a/libm/crates/libm-test/src/gen/edge_cases.rs +++ b/libm/crates/libm-test/src/gen/edge_cases.rs @@ -1,9 +1,10 @@ //! A generator that checks a handful of cases near infinities, zeros, asymptotes, and NaNs. -use libm::support::{CastInto, Float, Int}; +use libm::support::{CastInto, Float, Int, MinInt}; use crate::domain::get_domain; use crate::gen::KnownSize; +use crate::op::OpITy; use crate::run_cfg::{check_near_count, check_point_count}; use crate::{BaseName, CheckCtx, FloatExt, FloatTy, MathOp, test_log}; @@ -21,6 +22,7 @@ where Op: MathOp, { let mut ret = Vec::new(); + let one = OpITy::::ONE; let values = &mut ret; let domain = get_domain::<_, i8>(ctx.fn_ident, argnum).unwrap_float(); let domain_start = domain.range_start(); @@ -51,6 +53,22 @@ where values.push(Op::FTy::NAN); values.extend(Op::FTy::consts().iter()); + // Check around the maximum subnormal value + let sub_max = Op::FTy::from_bits(Op::FTy::SIG_MASK); + count_up(sub_max, near_points, values); + count_down(sub_max, near_points, values); + count_up(-sub_max, near_points, values); + count_down(-sub_max, near_points, values); + + // Check a few values around the subnormal range + for shift in (0..Op::FTy::SIG_BITS).step_by(Op::FTy::SIG_BITS as usize / 5) { + let v = Op::FTy::from_bits(one << shift); + count_up(v, 2, values); + count_down(v, 2, values); + count_up(-v, 2, values); + count_down(-v, 2, values); + } + // Check around asymptotes if let Some(f) = domain.check_points { let iter = f(); diff --git a/libm/crates/libm-test/src/run_cfg.rs b/libm/crates/libm-test/src/run_cfg.rs index 5728c3b2e..4dd43bdf3 100644 --- a/libm/crates/libm-test/src/run_cfg.rs +++ b/libm/crates/libm-test/src/run_cfg.rs @@ -342,7 +342,7 @@ pub fn check_near_count(ctx: &CheckCtx) -> u64 { x => panic!("unexpected argument count {x}"), } } else { - 10 + 8 } } From e22f6ba2b2fab8961a9c8daffc266633cb967d0b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 7 Feb 2025 00:52:56 +0000 Subject: [PATCH 1169/1459] fmaf128: fix exponent calculation for subnormals When `fmaf128` was introduced in [1], it included a bug where `self` gets returned rather than the expected minimum positive value. Resolve this and add a regression test. [1]: https://github.com/rust-lang/libm/pull/494 --- libm/crates/libm-test/src/gen/case_list.rs | 25 ++++++++++++++++------ libm/src/math/generic/fma.rs | 13 ++++++----- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/libm/crates/libm-test/src/gen/case_list.rs b/libm/crates/libm-test/src/gen/case_list.rs index 302d5c391..23226d5c2 100644 --- a/libm/crates/libm-test/src/gen/case_list.rs +++ b/libm/crates/libm-test/src/gen/case_list.rs @@ -269,15 +269,26 @@ fn fmaf128_cases() -> Vec> { let mut v = vec![]; TestCase::append_pairs( &mut v, - &[( - // Tricky rounding case that previously failed in extensive tests + &[ + ( + // Tricky rounding case that previously failed in extensive tests + ( + hf128!("-0x1.1966cc01966cc01966cc01966f06p-25"), + hf128!("-0x1.669933fe69933fe69933fe6997c9p-16358"), + hf128!("-0x0.000000000000000000000000048ap-16382"), + ), + Some(hf128!("0x0.c5171470a3ff5e0f68d751491b18p-16382")), + ), ( - hf128!("-0x1.1966cc01966cc01966cc01966f06p-25"), - hf128!("-0x1.669933fe69933fe69933fe6997c9p-16358"), - hf128!("-0x0.000000000000000000000000048ap-16382"), + // Subnormal edge case that caused a failure + ( + hf128!("0x0.7ffffffffffffffffffffffffff7p-16382"), + hf128!("0x1.ffffffffffffffffffffffffffffp-1"), + hf128!("0x0.8000000000000000000000000009p-16382"), + ), + Some(hf128!("0x1.0000000000000000000000000000p-16382")), ), - Some(hf128!("0x0.c5171470a3ff5e0f68d751491b18p-16382")), - )], + ], ); v } diff --git a/libm/src/math/generic/fma.rs b/libm/src/math/generic/fma.rs index ac53acadf..4c6f1fad6 100644 --- a/libm/src/math/generic/fma.rs +++ b/libm/src/math/generic/fma.rs @@ -146,6 +146,7 @@ where // exact +/- 0.0 return x * y + z; } + e -= d; // Use int->float conversion to populate the significand. @@ -174,7 +175,7 @@ where if r == c { // Min normal after rounding, - return r.raise_underflow_ret_self(); + return r.raise_underflow_as_min_positive(); } if (rhi << (F::SIG_BITS + 1)) != zero { @@ -275,12 +276,14 @@ impl Norm { /// Type-specific helpers that are not needed outside of fma. pub trait FmaHelper { - fn raise_underflow_ret_self(self) -> Self; + /// Raise underflow and return the minimum positive normal value with the sign of `self`. + fn raise_underflow_as_min_positive(self) -> Self; + /// Raise underflow and return zero. fn raise_underflow_ret_zero(self) -> Self; } impl FmaHelper for f64 { - fn raise_underflow_ret_self(self) -> Self { + fn raise_underflow_as_min_positive(self) -> Self { /* min normal after rounding, underflow depends * on arch behaviour which can be imitated by * a double to float conversion */ @@ -298,8 +301,8 @@ impl FmaHelper for f64 { #[cfg(f128_enabled)] impl FmaHelper for f128 { - fn raise_underflow_ret_self(self) -> Self { - self + fn raise_underflow_as_min_positive(self) -> Self { + f128::MIN_POSITIVE.copysign(self) } fn raise_underflow_ret_zero(self) -> Self { From fe5aaf450d4d21afd62d719c1e0b264f697d677b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 7 Feb 2025 02:36:58 +0000 Subject: [PATCH 1170/1459] Remove or reduce the scope of `allow(unused)` where possible Now that we have more in this crate making use of traits, try to be more specific about what is actually unused. --- libm/crates/libm-macros/tests/basic.rs | 13 +++---------- libm/src/math/support/big.rs | 11 +++++------ libm/src/math/support/float_traits.rs | 17 ++++++++++------- libm/src/math/support/int_traits.rs | 7 ++----- libm/src/math/support/mod.rs | 1 - 5 files changed, 20 insertions(+), 29 deletions(-) diff --git a/libm/crates/libm-macros/tests/basic.rs b/libm/crates/libm-macros/tests/basic.rs index 0aa417f13..5314e84bb 100644 --- a/libm/crates/libm-macros/tests/basic.rs +++ b/libm/crates/libm-macros/tests/basic.rs @@ -18,20 +18,14 @@ macro_rules! basic { fn_extra: $fn_extra:expr, ) => { $(#[$attr])* - mod $fn_name { - #[allow(unused)] + #[allow(dead_code)] + pub mod $fn_name { type FTy= $FTy; - #[allow(unused)] type CFnTy<'a> = $CFn; - #[allow(unused)] type RustFnTy = $RustFn; - #[allow(unused)] type RustArgsTy = $RustArgs; - #[allow(unused)] type RustRetTy = $RustRet; - #[allow(unused)] const A: &[&str] = &[$($extra_tt)*]; - #[allow(unused)] fn foo(a: f32) -> f32 { $fn_extra(a) } @@ -92,10 +86,9 @@ macro_rules! specified_types { attrs: [$($attr:meta),*], ) => { $(#[$attr])* + #[allow(dead_code)] mod $fn_name { - #[allow(unused)] type RustFnTy = $RustFn; - #[allow(unused)] type RustArgsTy = $RustArgs; } }; diff --git a/libm/src/math/support/big.rs b/libm/src/math/support/big.rs index e0f5e5263..bf47d2001 100644 --- a/libm/src/math/support/big.rs +++ b/libm/src/math/support/big.rs @@ -1,11 +1,9 @@ //! Integers used for wide operations, larger than `u128`. -#![allow(unused)] - #[cfg(test)] mod tests; -use core::{fmt, ops}; +use core::ops; use super::{DInt, HInt, Int, MinInt}; @@ -13,7 +11,6 @@ const WORD_LO_MASK: u64 = 0x00000000ffffffff; const WORD_HI_MASK: u64 = 0xffffffff00000000; const WORD_FULL_MASK: u64 = 0xffffffffffffffff; const U128_LO_MASK: u128 = u64::MAX as u128; -const U128_HI_MASK: u128 = (u64::MAX as u128) << 64; /// A 256-bit unsigned integer represented as 4 64-bit limbs. /// @@ -23,6 +20,7 @@ const U128_HI_MASK: u128 = (u64::MAX as u128) << 64; pub struct u256(pub [u64; 4]); impl u256 { + #[cfg(test)] pub const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX]); /// Reinterpret as a signed integer @@ -40,6 +38,7 @@ pub struct i256(pub [u64; 4]); impl i256 { /// Reinterpret as an unsigned integer + #[cfg(test)] pub fn unsigned(self) -> u256 { u256(self.0) } @@ -96,7 +95,7 @@ macro_rules! impl_common { impl ops::Shl for $ty { type Output = Self; - fn shl(self, rhs: u32) -> Self::Output { + fn shl(self, _rhs: u32) -> Self::Output { unimplemented!("only used to meet trait bounds") } } @@ -256,7 +255,7 @@ impl HInt for i128 { self.unsigned().zero_widen_mul(rhs.unsigned()).signed() } - fn widen_mul(self, rhs: Self) -> Self::D { + fn widen_mul(self, _rhs: Self) -> Self::D { unimplemented!("signed i128 widening multiply is not used") } diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index d6ce13f69..3b27f8de5 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -3,7 +3,7 @@ use core::{fmt, mem, ops}; use super::int_traits::{CastFrom, Int, MinInt}; /// Trait for some basic operations on floats -#[allow(dead_code)] +// #[allow(dead_code)] pub trait Float: Copy + fmt::Debug @@ -84,11 +84,13 @@ pub trait Float: fn to_bits(self) -> Self::Int; /// Returns `self` transmuted to `Self::SignedInt` + #[allow(dead_code)] fn to_bits_signed(self) -> Self::SignedInt { self.to_bits().signed() } /// Check bitwise equality. + #[allow(dead_code)] fn biteq(self, rhs: Self) -> bool { self.to_bits() == rhs.to_bits() } @@ -98,6 +100,7 @@ pub trait Float: /// /// This method returns `true` if two NaNs are compared. Use [`biteq`](Self::biteq) instead /// if `NaN` should not be treated separately. + #[allow(dead_code)] fn eq_repr(self, rhs: Self) -> bool { if self.is_nan() && rhs.is_nan() { true } else { self.biteq(rhs) } } @@ -117,6 +120,7 @@ pub trait Float: } /// Returns if `self` is subnormal. + #[allow(dead_code)] fn is_subnormal(self) -> bool { (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO } @@ -132,15 +136,11 @@ pub trait Float: } /// Returns the significand with no implicit bit (or the "fractional" part) + #[allow(dead_code)] fn frac(self) -> Self::Int { self.to_bits() & Self::SIG_MASK } - /// Returns the significand with implicit bit. - fn imp_frac(self) -> Self::Int { - self.frac() | Self::IMPLICIT_BIT - } - /// Returns a `Self::Int` transmuted back to `Self` fn from_bits(a: Self::Int) -> Self; @@ -154,22 +154,25 @@ pub trait Float: ) } + #[allow(dead_code)] fn abs(self) -> Self; /// Returns a number composed of the magnitude of self and the sign of sign. + #[allow(dead_code)] fn copysign(self, other: Self) -> Self; /// Returns (normalized exponent, normalized significand) + #[allow(dead_code)] fn normalize(significand: Self::Int) -> (i32, Self::Int); /// Returns a number that represents the sign of self. + #[allow(dead_code)] fn signum(self) -> Self { if self.is_nan() { self } else { Self::ONE.copysign(self) } } } /// Access the associated `Int` type from a float (helper to avoid ambiguous associated types). -#[allow(dead_code)] pub type IntTy = ::Int; macro_rules! float_impl { diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index 793a0f306..d34797764 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -1,7 +1,6 @@ use core::{cmp, fmt, ops}; /// Minimal integer implementations needed on all integer types, including wide integers. -#[allow(dead_code)] pub trait MinInt: Copy + fmt::Debug @@ -261,7 +260,6 @@ int_impl!(i128, u128); /// Trait for integers twice the bit width of another integer. This is implemented for all /// primitives except for `u8`, because there is not a smaller primitive. -#[allow(unused)] pub trait DInt: MinInt { /// Integer that is half the bit width of the integer this trait is implemented for type H: HInt; @@ -275,6 +273,7 @@ pub trait DInt: MinInt { (self.lo(), self.hi()) } /// Constructs an integer using lower and higher half parts + #[allow(unused)] fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { lo.zero_widen() | hi.widen_hi() } @@ -282,7 +281,6 @@ pub trait DInt: MinInt { /// Trait for integers half the bit width of another integer. This is implemented for all /// primitives except for `u128`, because it there is not a larger primitive. -#[allow(unused)] pub trait HInt: Int { /// Integer that is double the bit width of the integer this trait is implemented for type D: DInt + MinInt; @@ -297,6 +295,7 @@ pub trait HInt: Int { /// around problems with associated type bounds (such as `Int`) being unstable fn zero_widen(self) -> Self::D; /// Widens the integer to have double bit width and shifts the integer into the higher bits + #[allow(unused)] fn widen_hi(self) -> Self::D; /// Widening multiplication with zero widening. This cannot overflow. fn zero_widen_mul(self, rhs: Self) -> Self::D; @@ -360,7 +359,6 @@ impl_h_int!( ); /// Trait to express (possibly lossy) casting of integers -#[allow(unused)] pub trait CastInto: Copy { /// By default, casts should be exact. fn cast(self) -> T; @@ -369,7 +367,6 @@ pub trait CastInto: Copy { fn cast_lossy(self) -> T; } -#[allow(unused)] pub trait CastFrom: Copy { /// By default, casts should be exact. fn cast_from(value: T) -> Self; diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index 68f1e49e5..d3c932b97 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -5,7 +5,6 @@ mod float_traits; pub mod hex_float; mod int_traits; -#[allow(unused_imports)] pub use float_traits::{Float, IntTy}; pub(crate) use float_traits::{f32_from_bits, f64_from_bits}; #[cfg(f16_enabled)] From 24021c3684e2d02356ac65ec36031e0e4f7edff0 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 7 Feb 2025 03:41:05 +0000 Subject: [PATCH 1171/1459] Convert `fmaf` to a generic implementation Introduce a version of generic `fma` that works when there is a larger hardware-backed float type available to compute the result with more precision. This is currently used only for `f32`, but with some minor adjustments it should work for `f16` as well. --- libm/src/math/fmaf.rs | 96 +-------------------------- libm/src/math/generic/fma.rs | 67 ++++++++++++++++++- libm/src/math/generic/mod.rs | 2 +- libm/src/math/mod.rs | 2 +- libm/src/math/support/float_traits.rs | 58 ++++++++++++++++ libm/src/math/support/mod.rs | 3 +- 6 files changed, 129 insertions(+), 99 deletions(-) diff --git a/libm/src/math/fmaf.rs b/libm/src/math/fmaf.rs index 79371c836..40d7f40d6 100644 --- a/libm/src/math/fmaf.rs +++ b/libm/src/math/fmaf.rs @@ -1,103 +1,11 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */ -/*- - * Copyright (c) 2005-2011 David Schultz - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -use core::f32; -use core::ptr::read_volatile; - -use super::fenv::{ - FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, feclearexcept, fegetround, feraiseexcept, fetestexcept, -}; - -/* - * Fused multiply-add: Compute x * y + z with a single rounding error. - * - * A double has more than twice as much precision than a float, so - * direct double-precision arithmetic suffices, except where double - * rounding occurs. - */ - /// Floating multiply add (f32) /// /// Computes `(x*y)+z`, rounded as one ternary operation: /// Computes the value (as if) to infinite precision and rounds once to the result format, /// according to the rounding mode characterized by the value of FLT_ROUNDS. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 { - let xy: f64; - let mut result: f64; - let mut ui: u64; - let e: i32; - - xy = x as f64 * y as f64; - result = xy + z as f64; - ui = result.to_bits(); - e = (ui >> 52) as i32 & 0x7ff; - /* Common case: The double precision result is fine. */ - if ( - /* not a halfway case */ - ui & 0x1fffffff) != 0x10000000 || - /* NaN */ - e == 0x7ff || - /* exact */ - (result - xy == z as f64 && result - z as f64 == xy) || - /* not round-to-nearest */ - fegetround() != FE_TONEAREST - { - /* - underflow may not be raised correctly, example: - fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) - */ - if ((0x3ff - 149)..(0x3ff - 126)).contains(&e) && fetestexcept(FE_INEXACT) != 0 { - feclearexcept(FE_INEXACT); - // prevent `xy + vz` from being CSE'd with `xy + z` above - let vz: f32 = unsafe { read_volatile(&z) }; - result = xy + vz as f64; - if fetestexcept(FE_INEXACT) != 0 { - feraiseexcept(FE_UNDERFLOW); - } else { - feraiseexcept(FE_INEXACT); - } - } - z = result as f32; - return z; - } - - /* - * If result is inexact, and exactly halfway between two float values, - * we need to adjust the low-order bit in the direction of the error. - */ - let neg = ui >> 63 != 0; - let err = if neg == (z as f64 > xy) { xy - result + z as f64 } else { z as f64 - result + xy }; - if neg == (err < 0.0) { - ui += 1; - } else { - ui -= 1; - } - f64::from_bits(ui) as f32 +pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { + super::generic::fma_wide(x, y, z) } #[cfg(test)] diff --git a/libm/src/math/generic/fma.rs b/libm/src/math/generic/fma.rs index 4c6f1fad6..a40d7aaaf 100644 --- a/libm/src/math/generic/fma.rs +++ b/libm/src/math/generic/fma.rs @@ -1,10 +1,13 @@ /* SPDX-License-Identifier: MIT */ -/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */ +/* origin: musl src/math/{fma,fmaf}.c. Ported to generic Rust algorithm in 2025, TG. */ use core::{f32, f64}; +use super::super::fenv::{ + FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, feclearexcept, fegetround, feraiseexcept, fetestexcept, +}; use super::super::support::{DInt, HInt, IntTy}; -use super::super::{CastFrom, CastInto, Float, Int, MinInt}; +use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, Int, MinInt}; /// Fused multiply-add that works when there is not a larger float size available. Currently this /// is still specialized only for `f64`. Computes `(x * y) + z`. @@ -212,6 +215,66 @@ where super::scalbn(r, e) } +/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`, +/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding. +pub fn fma_wide(x: F, y: F, z: F) -> F +where + F: Float + HFloat, + B: Float + DFloat, + B::Int: CastInto, + i32: CastFrom, +{ + let one = IntTy::::ONE; + + let xy: B = x.widen() * y.widen(); + let mut result: B = xy + z.widen(); + let mut ui: B::Int = result.to_bits(); + let re = result.exp(); + let zb: B = z.widen(); + + let prec_diff = B::SIG_BITS - F::SIG_BITS; + let excess_prec = ui & ((one << prec_diff) - one); + let halfway = one << (prec_diff - 1); + + // Common case: the larger precision is fine if... + // This is not a halfway case + if excess_prec != halfway + // Or the result is NaN + || re == B::EXP_SAT + // Or the result is exact + || (result - xy == zb && result - zb == xy) + // Or the mode is something other than round to nearest + || fegetround() != FE_TONEAREST + { + let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32; + let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32; + + if (min_inexact_exp..max_inexact_exp).contains(&re) && fetestexcept(FE_INEXACT) != 0 { + feclearexcept(FE_INEXACT); + // prevent `xy + vz` from being CSE'd with `xy + z` above + let vz: F = force_eval!(z); + result = xy + vz.widen(); + if fetestexcept(FE_INEXACT) != 0 { + feraiseexcept(FE_UNDERFLOW); + } else { + feraiseexcept(FE_INEXACT); + } + } + + return result.narrow(); + } + + let neg = ui >> (B::BITS - 1) != IntTy::::ZERO; + let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy }; + if neg == (err < B::ZERO) { + ui += one; + } else { + ui -= one; + } + + B::from_bits(ui).narrow() +} + /// Representation of `F` that has handled subnormals. #[derive(Clone, Copy, Debug)] struct Norm { diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index e19cc83a9..b34d3dfae 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -18,7 +18,7 @@ pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; pub use floor::floor; -pub use fma::fma; +pub use fma::{fma, fma_wide}; pub use fmax::fmax; pub use fmin::fmin; pub use fmod::fmod; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 677ed8d6e..e32045021 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -121,7 +121,7 @@ use self::rem_pio2::rem_pio2; use self::rem_pio2_large::rem_pio2_large; use self::rem_pio2f::rem_pio2f; #[allow(unused_imports)] -use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, IntTy, MinInt}; +use self::support::{CastFrom, CastInto, DFloat, DInt, Float, HFloat, HInt, Int, IntTy, MinInt}; // Public modules mod acos; diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 3b27f8de5..ee83c793d 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -276,6 +276,64 @@ pub const fn f64_from_bits(bits: u64) -> f64 { unsafe { mem::transmute::(bits) } } +/// Trait for floats twice the bit width of another integer. +pub trait DFloat: Float { + /// Float that is half the bit width of the floatthis trait is implemented for. + type H: HFloat; + + /// Narrow the float type. + fn narrow(self) -> Self::H; +} + +/// Trait for floats half the bit width of another float. +pub trait HFloat: Float { + /// Float that is double the bit width of the float this trait is implemented for. + type D: DFloat; + + /// Widen the float type. + fn widen(self) -> Self::D; +} + +macro_rules! impl_d_float { + ($($X:ident $D:ident),*) => { + $( + impl DFloat for $D { + type H = $X; + + fn narrow(self) -> Self::H { + self as $X + } + } + )* + }; +} + +macro_rules! impl_h_float { + ($($H:ident $X:ident),*) => { + $( + impl HFloat for $H { + type D = $X; + + fn widen(self) -> Self::D { + self as $X + } + } + )* + }; +} + +impl_d_float!(f32 f64); +#[cfg(f16_enabled)] +impl_d_float!(f16 f32); +#[cfg(f128_enabled)] +impl_d_float!(f64 f128); + +impl_h_float!(f32 f64); +#[cfg(f16_enabled)] +impl_h_float!(f16 f32); +#[cfg(f128_enabled)] +impl_h_float!(f64 f128); + #[cfg(test)] mod tests { use super::*; diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index d3c932b97..9eebd4403 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -5,7 +5,8 @@ mod float_traits; pub mod hex_float; mod int_traits; -pub use float_traits::{Float, IntTy}; +#[allow(unused_imports)] +pub use float_traits::{DFloat, Float, HFloat, IntTy}; pub(crate) use float_traits::{f32_from_bits, f64_from_bits}; #[cfg(f16_enabled)] #[allow(unused_imports)] From b15fa0fb2402610366a0e8fff82e6052cb2c87a8 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 7 Feb 2025 21:25:03 +0000 Subject: [PATCH 1172/1459] Uncomment some hex float tests that should work now --- libm/src/math/support/hex_float.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs index ebc4f7c64..99ad8bec3 100644 --- a/libm/src/math/support/hex_float.rs +++ b/libm/src/math/support/hex_float.rs @@ -1,7 +1,5 @@ //! Utilities for working with hex float formats. -#![allow(dead_code)] // FIXME: remove once this gets used - use core::fmt; use super::{Float, f32_from_bits, f64_from_bits}; @@ -13,6 +11,7 @@ pub const fn hf16(s: &str) -> f16 { } /// Construct a 32-bit float from hex float representation (C-style) +#[allow(unused)] pub const fn hf32(s: &str) -> f32 { f32_from_bits(parse_any(s, 32, 23) as u32) } @@ -548,14 +547,12 @@ mod parse_tests { #[test] fn test_macros() { - // FIXME(msrv): enable once parsing works - // #[cfg(f16_enabled)] - // assert_eq!(hf16!("0x1.ffp+8").to_bits(), 0x5ffc_u16); + #[cfg(f16_enabled)] + assert_eq!(hf16!("0x1.ffp+8").to_bits(), 0x5ffc_u16); assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000_u32); assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000_u64); - // FIXME(msrv): enable once parsing works - // #[cfg(f128_enabled)] - // assert_eq!(hf128!("0x1.ffep+8").to_bits(), 0x4007ffe0000000000000000000000000_u128); + #[cfg(f128_enabled)] + assert_eq!(hf128!("0x1.ffep+8").to_bits(), 0x4007ffe0000000000000000000000000_u128); } } From b0698f66794451ec286e5df35fe86a3111d42c53 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 7 Feb 2025 22:28:19 +0000 Subject: [PATCH 1173/1459] Work arouind iai-callgrind apt failures Usually `cargo binstall iai-callgrind-runner` handles apt dependencies. However, the following has been happening: Err:11 mirror+file:/etc/apt/apt-mirrors.txt noble-updates/main amd64 libc6-dbg amd64 2.39-0ubuntu8.3 404 Not Found [IP: 40.81.13.82 80] E: Failed to fetch mirror+file:/etc/apt/apt-mirrors.txt/pool/main/g/glibc/libc6-dbg_2.39-0ubuntu8.3_amd64.deb 404 Not Found [IP: 40.81.13.82 80] Fetched 19.8 MB in 6s (3138 kB/s) E: Unable to fetch some archives, maybe run apt-get update or try with --fix-missing? Installing the dependencies manually seems to resolve the issue. --- libm/.github/workflows/main.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index de131639b..265702965 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -158,6 +158,8 @@ jobs: - name: Set up dependencies run: | + sudo apt update + sudo apt install -y valgrind gdb libc6-dbg # Needed for iai-callgrind rustup update "$BENCHMARK_RUSTC" --no-self-update rustup default "$BENCHMARK_RUSTC" # Install the version of iai-callgrind-runner that is specified in Cargo.toml From de411d19b71294858a156e3d973cb3c36c96a982 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 25 Jan 2025 00:29:04 +0000 Subject: [PATCH 1174/1459] Add an enum representation of rounding mode We only round using nearest, but some incoming code has more handling of rounding modes that would be nice to `match` on. Rather than checking integer values, add an enum representation. --- libm/src/math/fenv.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/libm/src/math/fenv.rs b/libm/src/math/fenv.rs index c91272e82..328c9f346 100644 --- a/libm/src/math/fenv.rs +++ b/libm/src/math/fenv.rs @@ -5,6 +5,9 @@ pub(crate) const FE_UNDERFLOW: i32 = 0; pub(crate) const FE_INEXACT: i32 = 0; pub(crate) const FE_TONEAREST: i32 = 0; +pub(crate) const FE_DOWNWARD: i32 = 1; +pub(crate) const FE_UPWARD: i32 = 2; +pub(crate) const FE_TOWARDZERO: i32 = 3; #[inline] pub(crate) fn feclearexcept(_mask: i32) -> i32 { @@ -25,3 +28,22 @@ pub(crate) fn fetestexcept(_mask: i32) -> i32 { pub(crate) fn fegetround() -> i32 { FE_TONEAREST } + +#[derive(Clone, Copy, Debug, PartialEq)] +pub(crate) enum Rounding { + Nearest = FE_TONEAREST as isize, + Downward = FE_DOWNWARD as isize, + Upward = FE_UPWARD as isize, + ToZero = FE_TOWARDZERO as isize, +} + +impl Rounding { + pub(crate) fn get() -> Self { + match fegetround() { + x if x == FE_DOWNWARD => Self::Downward, + x if x == FE_UPWARD => Self::Upward, + x if x == FE_TOWARDZERO => Self::ToZero, + _ => Self::Nearest, + } + } +} From 75a7f3df3ed7caa383507b43f58b2c95527d91a6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 25 Oct 2024 03:56:09 -0500 Subject: [PATCH 1175/1459] Port the CORE-MATH version of `cbrt` Replace our current implementation with one that is correctly rounded. Source: https://gitlab.inria.fr/core-math/core-math/-/blob/81d447bb1c46592291bec3476bc24fa2c2688c67/src/binary64/cbrt/cbrt.c --- libm/src/math/cbrt.rs | 319 ++++++++++++++++++++++++++++-------------- 1 file changed, 216 insertions(+), 103 deletions(-) diff --git a/libm/src/math/cbrt.rs b/libm/src/math/cbrt.rs index b4e77eaa2..fbf81f77d 100644 --- a/libm/src/math/cbrt.rs +++ b/libm/src/math/cbrt.rs @@ -1,113 +1,226 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - * - * Optimized by Bruce D. Evans. +/* SPDX-License-Identifier: MIT */ +/* origin: core-math/src/binary64/cbrt/cbrt.c + * Copyright (c) 2021-2022 Alexei Sibidanov. + * Ported to Rust in 2025 by Trevor Gross. */ -/* cbrt(x) - * Return cube root of x - */ - -use core::f64; -const B1: u32 = 715094163; /* B1 = (1023-1023/3-0.03306235651)*2**20 */ -const B2: u32 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */ +use super::Float; +use super::fenv::Rounding; +use super::support::cold_path; -/* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */ -const P0: f64 = 1.87595182427177009643; /* 0x3ffe03e6, 0x0f61e692 */ -const P1: f64 = -1.88497979543377169875; /* 0xbffe28e0, 0x92f02420 */ -const P2: f64 = 1.621429720105354466140; /* 0x3ff9f160, 0x4a49d6c2 */ -const P3: f64 = -0.758397934778766047437; /* 0xbfe844cb, 0xbee751d9 */ -const P4: f64 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */ - -// Cube root (f64) -/// -/// Computes the cube root of the argument. +/// Compute the cube root of the argument. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cbrt(x: f64) -> f64 { - let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 - - let mut ui: u64 = x.to_bits(); - let mut r: f64; - let s: f64; - let mut t: f64; - let w: f64; - let mut hx: u32 = (ui >> 32) as u32 & 0x7fffffff; - - if hx >= 0x7ff00000 { - /* cbrt(NaN,INF) is itself */ - return x + x; + const ESCALE: [f64; 3] = [ + 1.0, + hf64!("0x1.428a2f98d728bp+0"), /* 2^(1/3) */ + hf64!("0x1.965fea53d6e3dp+0"), /* 2^(2/3) */ + ]; + + /* the polynomial c0+c1*x+c2*x^2+c3*x^3 approximates x^(1/3) on [1,2] + with maximal error < 9.2e-5 (attained at x=2) */ + const C: [f64; 4] = [ + hf64!("0x1.1b0babccfef9cp-1"), + hf64!("0x1.2c9a3e94d1da5p-1"), + hf64!("-0x1.4dc30b1a1ddbap-3"), + hf64!("0x1.7a8d3e4ec9b07p-6"), + ]; + + let u0: f64 = hf64!("0x1.5555555555555p-2"); + let u1: f64 = hf64!("0x1.c71c71c71c71cp-3"); + + let rsc = [1.0, -1.0, 0.5, -0.5, 0.25, -0.25]; + + let off = [hf64!("0x1p-53"), 0.0, 0.0, 0.0]; + + let rm = Rounding::get(); + + /* rm=0 for rounding to nearest, and other values for directed roundings */ + let hx: u64 = x.to_bits(); + let mut mant: u64 = hx & f64::SIG_MASK; + let sign: u64 = hx >> 63; + + let mut e: u32 = (hx >> f64::SIG_BITS) as u32 & f64::EXP_SAT; + + if ((e + 1) & f64::EXP_SAT) < 2 { + cold_path(); + + let ix: u64 = hx & !f64::SIGN_MASK; + + /* 0, inf, nan: we return x + x instead of simply x, + to that for x a signaling NaN, it correctly triggers + the invalid exception. */ + if e == f64::EXP_SAT || ix == 0 { + return x + x; + } + + let nz = ix.leading_zeros() - 11; /* subnormal */ + mant <<= nz; + mant &= f64::SIG_MASK; + e = e.wrapping_sub(nz - 1); + } + + e = e.wrapping_add(3072); + let cvt1: u64 = mant | (0x3ffu64 << 52); + let mut cvt5: u64 = cvt1; + + let et: u32 = e / 3; + let it: u32 = e % 3; + + /* 2^(3k+it) <= x < 2^(3k+it+1), with 0 <= it <= 3 */ + cvt5 += u64::from(it) << f64::SIG_BITS; + cvt5 |= sign << 63; + let zz: f64 = f64::from_bits(cvt5); + + /* cbrt(x) = cbrt(zz)*2^(et-1365) where 1 <= zz < 8 */ + let mut isc: u64 = ESCALE[it as usize].to_bits(); // todo: index + isc |= sign << 63; + let cvt2: u64 = isc; + let z: f64 = f64::from_bits(cvt1); + + /* cbrt(zz) = cbrt(z)*isc, where isc encodes 1, 2^(1/3) or 2^(2/3), + and 1 <= z < 2 */ + let r: f64 = 1.0 / z; + let rr: f64 = r * rsc[((it as usize) << 1) | sign as usize]; + let z2: f64 = z * z; + let c0: f64 = C[0] + z * C[1]; + let c2: f64 = C[2] + z * C[3]; + let mut y: f64 = c0 + z2 * c2; + let mut y2: f64 = y * y; + + /* y is an approximation of z^(1/3) */ + let mut h: f64 = y2 * (y * r) - 1.0; + + /* h determines the error between y and z^(1/3) */ + y -= (h * y) * (u0 - u1 * h); + + /* The correction y -= (h*y)*(u0 - u1*h) corresponds to a cubic variant + of Newton's method, with the function f(y) = 1-z/y^3. */ + y *= f64::from_bits(cvt2); + + /* Now y is an approximation of zz^(1/3), + * and rr an approximation of 1/zz. We now perform another iteration of + * Newton-Raphson, this time with a linear approximation only. */ + y2 = y * y; + let mut y2l: f64 = fmaf64(y, y, -y2); + + /* y2 + y2l = y^2 exactly */ + let mut y3: f64 = y2 * y; + let mut y3l: f64 = fmaf64(y, y2, -y3) + y * y2l; + + /* y3 + y3l approximates y^3 with about 106 bits of accuracy */ + h = ((y3 - zz) + y3l) * rr; + let mut dy: f64 = h * (y * u0); + + /* the approximation of zz^(1/3) is y - dy */ + let mut y1: f64 = y - dy; + dy = (y - y1) - dy; + + /* the approximation of zz^(1/3) is now y1 + dy, where |dy| < 1/2 ulp(y) + * (for rounding to nearest) */ + let mut ady: f64 = dy.abs(); + + /* For directed roundings, ady0 is tiny when dy is tiny, or ady0 is near + * from ulp(1); + * for rounding to nearest, ady0 is tiny when dy is near from 1/2 ulp(1), + * or from 3/2 ulp(1). */ + let mut ady0: f64 = (ady - off[rm as usize]).abs(); + let mut ady1: f64 = (ady - (hf64!("0x1p-52") + off[rm as usize])).abs(); + + if ady0 < hf64!("0x1p-75") || ady1 < hf64!("0x1p-75") { + cold_path(); + + y2 = y1 * y1; + y2l = fmaf64(y1, y1, -y2); + y3 = y2 * y1; + y3l = fmaf64(y1, y2, -y3) + y1 * y2l; + h = ((y3 - zz) + y3l) * rr; + dy = h * (y1 * u0); + y = y1 - dy; + dy = (y1 - y) - dy; + y1 = y; + ady = dy.abs(); + ady0 = (ady - off[rm as usize]).abs(); + ady1 = (ady - (hf64!("0x1p-52") + off[rm as usize])).abs(); + + if ady0 < hf64!("0x1p-98") || ady1 < hf64!("0x1p-98") { + cold_path(); + let azz: f64 = zz.abs(); + + // ~ 0x1.79d15d0e8d59b80000000000000ffc3dp+0 + if azz == hf64!("0x1.9b78223aa307cp+1") { + y1 = hf64!("0x1.79d15d0e8d59cp+0").copysign(zz); + } + + // ~ 0x1.de87aa837820e80000000000001c0f08p+0 + if azz == hf64!("0x1.a202bfc89ddffp+2") { + y1 = hf64!("0x1.de87aa837820fp+0").copysign(zz); + } + + if rm != Rounding::Nearest { + let wlist = [ + (hf64!("0x1.3a9ccd7f022dbp+0"), hf64!("0x1.1236160ba9b93p+0")), // ~ 0x1.1236160ba9b930000000000001e7e8fap+0 + (hf64!("0x1.7845d2faac6fep+0"), hf64!("0x1.23115e657e49cp+0")), // ~ 0x1.23115e657e49c0000000000001d7a799p+0 + (hf64!("0x1.d1ef81cbbbe71p+0"), hf64!("0x1.388fb44cdcf5ap+0")), // ~ 0x1.388fb44cdcf5a0000000000002202c55p+0 + (hf64!("0x1.0a2014f62987cp+1"), hf64!("0x1.46bcbf47dc1e8p+0")), // ~ 0x1.46bcbf47dc1e8000000000000303aa2dp+0 + (hf64!("0x1.fe18a044a5501p+1"), hf64!("0x1.95decfec9c904p+0")), // ~ 0x1.95decfec9c9040000000000000159e8ep+0 + (hf64!("0x1.a6bb8c803147bp+2"), hf64!("0x1.e05335a6401dep+0")), // ~ 0x1.e05335a6401de00000000000027ca017p+0 + (hf64!("0x1.ac8538a031cbdp+2"), hf64!("0x1.e281d87098de8p+0")), // ~ 0x1.e281d87098de80000000000000ee9314p+0 + ]; + + for (a, b) in wlist { + if azz == a { + let tmp = if rm as u64 + sign == 2 { hf64!("0x1p-52") } else { 0.0 }; + y1 = (b + tmp).copysign(zz); + } + } + } + } + } + + let mut cvt3: u64 = y1.to_bits(); + cvt3 = cvt3.wrapping_add(((et.wrapping_sub(342).wrapping_sub(1023)) as u64) << 52); + let m0: u64 = cvt3 << 30; + let m1 = m0 >> 63; + + if (m0 ^ m1) <= (1u64 << 30) { + cold_path(); + + let mut cvt4: u64 = y1.to_bits(); + cvt4 = (cvt4 + (164 << 15)) & 0xffffffffffff0000u64; + + if ((f64::from_bits(cvt4) - y1) - dy).abs() < hf64!("0x1p-60") || (zz).abs() == 1.0 { + cvt3 = (cvt3 + (1u64 << 15)) & 0xffffffffffff0000u64; + } } - /* - * Rough cbrt to 5 bits: - * cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3) - * where e is integral and >= 0, m is real and in [0, 1), and "/" and - * "%" are integer division and modulus with rounding towards minus - * infinity. The RHS is always >= the LHS and has a maximum relative - * error of about 1 in 16. Adding a bias of -0.03306235651 to the - * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE - * floating point representation, for finite positive normal values, - * ordinary integer divison of the value in bits magically gives - * almost exactly the RHS of the above provided we first subtract the - * exponent bias (1023 for doubles) and later add it back. We do the - * subtraction virtually to keep e >= 0 so that ordinary integer - * division rounds towards minus infinity; this is also efficient. - */ - if hx < 0x00100000 { - /* zero or subnormal? */ - ui = (x * x1p54).to_bits(); - hx = (ui >> 32) as u32 & 0x7fffffff; - if hx == 0 { - return x; /* cbrt(0) is itself */ + f64::from_bits(cvt3) +} + +fn fmaf64(x: f64, y: f64, z: f64) -> f64 { + #[cfg(intrinsics_enabled)] + { + return unsafe { core::intrinsics::fmaf64(x, y, z) }; + } + + #[cfg(not(intrinsics_enabled))] + { + return super::fma(x, y, z); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn spot_checks() { + if !cfg!(x86_no_sse) { + // Exposes a rounding mode problem. Ignored on i586 because of inaccurate FMA. + assert_biteq!( + cbrt(f64::from_bits(0xf7f792b28f600000)), + f64::from_bits(0xd29ce68655d962f3) + ); } - hx = hx / 3 + B2; - } else { - hx = hx / 3 + B1; } - ui &= 1 << 63; - ui |= (hx as u64) << 32; - t = f64::from_bits(ui); - - /* - * New cbrt to 23 bits: - * cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x) - * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r) - * to within 2**-23.5 when |r - 1| < 1/10. The rough approximation - * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this - * gives us bounds for r = t**3/x. - * - * Try to optimize for parallel evaluation as in __tanf.c. - */ - r = (t * t) * (t / x); - t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4)); - - /* - * Round t away from zero to 23 bits (sloppily except for ensuring that - * the result is larger in magnitude than cbrt(x) but not much more than - * 2 23-bit ulps larger). With rounding towards zero, the error bound - * would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps - * in the rounded t, the infinite-precision error in the Newton - * approximation barely affects third digit in the final error - * 0.667; the error in the rounded t can be up to about 3 23-bit ulps - * before the final error is larger than 0.667 ulps. - */ - ui = t.to_bits(); - ui = (ui + 0x80000000) & 0xffffffffc0000000; - t = f64::from_bits(ui); - - /* one step Newton iteration to 53 bits with error < 0.667 ulps */ - s = t * t; /* t*t is exact */ - r = x / s; /* error <= 0.5 ulps; |r| < |t| */ - w = t + t; /* t+t is exact */ - r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */ - t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */ - t } From 0803852b37d63bf73c6f2911819912989d9b3bdf Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 25 Jan 2025 00:31:49 +0000 Subject: [PATCH 1176/1459] Decrease the allowed error for `cbrt` With the correctly rounded implementation, we can reduce the ULP requirement for `cbrt` to zero. There is still an override required for `i586` because of the imprecise FMA. --- libm/crates/libm-test/src/precision.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 20aa96b6a..a85996539 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -41,7 +41,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { | Bn::Trunc => 0, // Operations that aren't required to be exact, but our implementations are. - Bn::Cbrt if ctx.fn_ident != Id::Cbrt => 0, + Bn::Cbrt => 0, // Bessel functions have large inaccuracies. Bn::J0 | Bn::J1 | Bn::Y0 | Bn::Y1 | Bn::Jn | Bn::Yn => 8_000_000, @@ -54,7 +54,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { Bn::Atan => 1, Bn::Atan2 => 2, Bn::Atanh => 2, - Bn::Cbrt => 1, Bn::Cos => 1, Bn::Cosh => 1, Bn::Erf => 1, @@ -92,6 +91,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { } match ctx.fn_ident { + Id::Cbrt => ulp = 2, // FIXME(#401): musl has an incorrect result here. Id::Fdim => ulp = 2, Id::Sincosf => ulp = 500, @@ -119,6 +119,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { Id::Asinh => ulp = 3, Id::Asinhf => ulp = 3, + Id::Cbrt => ulp = 1, Id::Exp10 | Id::Exp10f => ulp = 1_000_000, Id::Exp2 | Id::Exp2f => ulp = 10_000_000, Id::Log1p | Id::Log1pf => ulp = 2, From 7d0b45b534b2cdf5bdcc2e803f1135ce09d06071 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 8 Feb 2025 07:06:59 +0000 Subject: [PATCH 1177/1459] Add simple icount benchmarks for `u256` operations --- libm/crates/libm-test/benches/icount.rs | 101 ++++++++++++++++++++++++ libm/src/math/support/big.rs | 2 +- libm/src/math/support/mod.rs | 2 + 3 files changed, 104 insertions(+), 1 deletion(-) diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index c41cef24e..232a3de38 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -1,8 +1,10 @@ //! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable. use std::hint::black_box; +use std::ops::Shr; use iai_callgrind::{library_benchmark, library_benchmark_group, main}; +use libm::support::{HInt, u256}; use libm_test::gen::spaced; use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op}; @@ -51,8 +53,107 @@ libm_macros::for_each_function! { callback: icount_benches, } +fn setup_u128_mul() -> Vec<(u128, u128)> { + let step = u128::MAX / 300; + let mut x = 0u128; + let mut y = 0u128; + let mut v = Vec::new(); + + loop { + 'inner: loop { + match y.checked_add(step) { + Some(new) => y = new, + None => break 'inner, + } + + v.push((x, y)) + } + + match x.checked_add(step) { + Some(new) => x = new, + None => break, + } + } + + v +} + +/* +fn setup_u256_add() -> Vec<(u256, u256)> { + let mut v = Vec::new(); + for (x, y) in setup_u128_mul() { + // square the u128 inputs to cover most of the u256 range + v.push((x.widen_mul(x), y.widen_mul(y))); + } + // Doesn't get covered by `u128:MAX^2` + v.push((u256::MAX, u256::MAX)); + v +} +*/ + +fn setup_u256_shift() -> Vec<(u256, u32)> { + let mut v = Vec::new(); + + for (x, _) in setup_u128_mul() { + let x2 = x.widen_mul(x); + for y in 0u32..256 { + v.push((x2, y)); + } + } + + v +} + +#[library_benchmark] +#[bench::linspace(setup_u128_mul())] +fn icount_bench_u128_widen_mul(cases: Vec<(u128, u128)>) { + let f = black_box(u128::zero_widen_mul); + for (x, y) in cases.iter().copied() { + f(x, y); + } +} + +library_benchmark_group!( + name = icount_bench_u128_widen_mul_group; + benchmarks = icount_bench_u128_widen_mul +); + +/* Not yet implemented +#[library_benchmark] +#[bench::linspace(setup_u256_add())] +fn icount_bench_u256_add(cases: Vec<(u256, u256)>) { + let f = black_box(u256::add); + for (x, y) in cases.iter().copied() { + f(x, y); + } +} + +library_benchmark_group!( + name = icount_bench_u256_add_group; + benchmarks = icount_bench_u256_add +); +*/ + +#[library_benchmark] +#[bench::linspace(setup_u256_shift())] +fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) { + let f = black_box(u256::shr); + for (x, y) in cases.iter().copied() { + f(x, y); + } +} + +library_benchmark_group!( + name = icount_bench_u256_shr_group; + benchmarks = icount_bench_u256_shr +); + main!( library_benchmark_groups = + // u256-related benchmarks + icount_bench_u128_widen_mul_group, + // icount_bench_u256_add_group, + icount_bench_u256_shr_group, // verify-apilist-start // verify-sorted-start icount_bench_acos_group, diff --git a/libm/src/math/support/big.rs b/libm/src/math/support/big.rs index bf47d2001..7a437b67a 100644 --- a/libm/src/math/support/big.rs +++ b/libm/src/math/support/big.rs @@ -20,7 +20,7 @@ const U128_LO_MASK: u128 = u64::MAX as u128; pub struct u256(pub [u64; 4]); impl u256 { - #[cfg(test)] + #[allow(unused)] pub const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX]); /// Reinterpret as a signed integer diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index 9eebd4403..28e9fd413 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -5,6 +5,8 @@ mod float_traits; pub mod hex_float; mod int_traits; +#[allow(unused_imports)] +pub use big::{i256, u256}; #[allow(unused_imports)] pub use float_traits::{DFloat, Float, HFloat, IntTy}; pub(crate) use float_traits::{f32_from_bits, f64_from_bits}; From 7e3bd208443b5a33a355d348c806a62ce5788e65 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 8 Feb 2025 09:39:05 +0000 Subject: [PATCH 1178/1459] Change how operators are `black_box`ed For some reason, the upcoming limb changes in [1] seem to ignore the black boxing when applied to the operator function. Changing to instead black box the inputs appears to fix this. [1]: https://github.com/rust-lang/libm/pull/503 --- libm/crates/libm-test/benches/icount.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index 232a3de38..9fac52e0b 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -1,7 +1,6 @@ //! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable. use std::hint::black_box; -use std::ops::Shr; use iai_callgrind::{library_benchmark, library_benchmark_group, main}; use libm::support::{HInt, u256}; @@ -107,9 +106,8 @@ fn setup_u256_shift() -> Vec<(u256, u32)> { #[library_benchmark] #[bench::linspace(setup_u128_mul())] fn icount_bench_u128_widen_mul(cases: Vec<(u128, u128)>) { - let f = black_box(u128::zero_widen_mul); for (x, y) in cases.iter().copied() { - f(x, y); + black_box(black_box(x).zero_widen_mul(black_box(y))); } } @@ -122,9 +120,8 @@ library_benchmark_group!( #[library_benchmark] #[bench::linspace(setup_u256_add())] fn icount_bench_u256_add(cases: Vec<(u256, u256)>) { - let f = black_box(u256::add); for (x, y) in cases.iter().copied() { - f(x, y); + black_box(black_box(x) + black_box(y)); } } @@ -137,9 +134,8 @@ library_benchmark_group!( #[library_benchmark] #[bench::linspace(setup_u256_shift())] fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) { - let f = black_box(u256::shr); for (x, y) in cases.iter().copied() { - f(x, y); + black_box(black_box(x) >> black_box(y)); } } From 4e8deb356301ac4c675ad15df47ff05fea767f3b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 8 Feb 2025 09:48:14 +0000 Subject: [PATCH 1179/1459] Replace an `assert!` with `debug_assert!` in `u256::shr` The implementation came from the `compiler_builtins` port but this should be weakened to match other integer types. --- libm/src/math/support/big.rs | 5 ++++- libm/src/math/support/big/tests.rs | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/libm/src/math/support/big.rs b/libm/src/math/support/big.rs index 7a437b67a..017e9455e 100644 --- a/libm/src/math/support/big.rs +++ b/libm/src/math/support/big.rs @@ -109,7 +109,10 @@ impl ops::Shr for u256 { type Output = Self; fn shr(self, rhs: u32) -> Self::Output { - assert!(rhs < Self::BITS, "attempted to shift right with overflow"); + debug_assert!(rhs < Self::BITS, "attempted to shift right with overflow"); + if rhs >= Self::BITS { + return Self::ZERO; + } if rhs == 0 { return self; diff --git a/libm/src/math/support/big/tests.rs b/libm/src/math/support/big/tests.rs index f95f82973..815a62dfe 100644 --- a/libm/src/math/support/big/tests.rs +++ b/libm/src/math/support/big/tests.rs @@ -108,3 +108,22 @@ fn shr_u128() { } assert!(errors.is_empty()); } + +#[test] +#[should_panic] +#[cfg(debug_assertions)] +// FIXME(ppc): ppc64le seems to have issues with `should_panic` tests. +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +fn shr_u256_overflow() { + // Like regular shr, panic on overflow with debug assertions + let _ = u256::MAX >> 256; +} + +#[test] +#[cfg(not(debug_assertions))] +fn shr_u256_overflow() { + // No panic without debug assertions + assert_eq!(u256::MAX >> 256, u256::ZERO); + assert_eq!(u256::MAX >> 257, u256::ZERO); + assert_eq!(u256::MAX >> u32::MAX, u256::ZERO); +} From 8bac7ce3bf69ecac1e5fbe1443c59cc9e4a1ac02 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 10 Feb 2025 02:05:41 +0000 Subject: [PATCH 1180/1459] Increase the tolerance for `jn` and `yn` These still fail random tests, e.g.: called `Result::unwrap()` on an `Err` value: jn Caused by: 0: input: (1068, -16013.98381387313) as hex: (, -0x1.f46fded9ced39p+13) as bits: (0x0000042c, 0xc0cf46fded9ced39) expected: 6.7603314308122506e-6 0x1.c5ad9c102d413p-18 0x3edc5ad9c102d413 actual: 6.7603314308006335e-6 0x1.c5ad9c1029e80p-18 0x3edc5ad9c1029e80 1: ulp 13715 > 4000 Caused by: 0: input: (195, 42147.94) as hex: (, 0x1.4947e2p+15) as bits: (0x000000c3, 0x4724a3f1) expected: -2.13669e-7 -0x1.cad9c6p-23 0xb4656ce3 actual: -2.1376937e-7 -0x1.cb10f4p-23 0xb465887a 1: ulp 7063 > 4000 Caused by: 0: input: (194, 740.1916) as hex: (, 0x1.721886p+9) as bits: (0x000000c2, 0x44390c43) expected: 1.212096e-6 0x1.455e9ap-20 0x35a2af4d actual: 1.2172386e-6 0x1.46c000p-20 0x35a36000 1: ulp 45235 > 10000 Increase allowed precision to avoid spurious failures. --- libm/crates/libm-test/src/precision.rs | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index a85996539..2f55ad22e 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -523,18 +523,7 @@ fn int_float_common( && actual == F2::ZERO && expected == F2::ZERO { - return XFAIL("mpfr b"); - } - - // Our bessel functions blow up with large N values - if ctx.basis == Musl && (ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn) { - if input.0 > 4000 { - return XFAIL_NOCHECK; - } else if input.0 > 2000 { - return CheckAction::AssertWithUlp(20_000); - } else if input.0 > 1000 { - return CheckAction::AssertWithUlp(4_000); - } + return XFAIL("we disagree with MPFR on the sign of zero"); } // Values near infinity sometimes get cut off for us. `ynf(681, 509.90924) = -inf` but should @@ -549,6 +538,19 @@ fn int_float_common( return XFAIL_NOCHECK; } + // Our bessel functions blow up with large N values + if ctx.basis == Musl && (ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn) { + if cfg!(x86_no_sse) { + // Precision is especially bad on i586, not worth checking. + return XFAIL_NOCHECK; + } + + if input.0 > 4000 { + return XFAIL_NOCHECK; + } else if input.0 > 100 { + return CheckAction::AssertWithUlp(1_000_000); + } + } DEFAULT } From 5fd5f917432816994cf1935c6ba3f0cc25eedd88 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 9 Feb 2025 22:40:20 +0000 Subject: [PATCH 1181/1459] ci: Pin the nightly toolchain for i686-pc-windows-gnu Pin i686-pc-windows-gnu to nightly-2025-02-07 until [1] is resolved. [1]: https://github.com/rust-lang/rust/issues/136795 --- libm/.github/workflows/main.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index 265702965..f066f4a8c 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -68,7 +68,8 @@ jobs: os: windows-2025 - target: i686-pc-windows-gnu os: windows-2025 - channel: nightly-i686-gnu + # FIXME: pinned due to https://github.com/rust-lang/rust/issues/136795 + channel: nightly-2025-02-07-i686-gnu - target: x86_64-pc-windows-gnu os: windows-2025 channel: nightly-x86_64-gnu From 70db1867942f3a08599e05963b052cd8b9d37422 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 8 Feb 2025 04:09:59 +0000 Subject: [PATCH 1182/1459] Implement `u256` with two `u128`s rather than `u64` This produces better assembly, e.g. on aarch64: .globl libm::u128_wmul .p2align 2 libm::u128_wmul: Lfunc_begin124: .cfi_startproc mul x9, x2, x0 umulh x10, x2, x0 umulh x11, x3, x0 mul x12, x3, x0 umulh x13, x2, x1 mul x14, x2, x1 umulh x15, x3, x1 mul x16, x3, x1 adds x10, x10, x14 cinc x13, x13, hs adds x13, x13, x16 cinc x14, x15, hs adds x10, x10, x12 cinc x11, x11, hs adds x11, x13, x11 stp x9, x10, [x8] cinc x9, x14, hs stp x11, x9, [x8, rust-lang/libm#16] ret The original was ~70 instructions so the improvement is significant. With these changes, the result is reasonably close to what LLVM generates using `u256` operands [1]. [1]: https://llvm.godbolt.org/z/re1aGdaqY --- libm/crates/libm-test/benches/icount.rs | 6 +- libm/crates/libm-test/src/gen/random.rs | 2 +- libm/crates/libm-test/src/lib.rs | 5 +- libm/crates/libm-test/src/run_cfg.rs | 30 +++- libm/crates/libm-test/tests/u256.rs | 147 ++++++++++++++++ libm/src/math/support/big.rs | 217 +++++++++--------------- libm/src/math/support/big/tests.rs | 79 +++++---- 7 files changed, 298 insertions(+), 188 deletions(-) create mode 100644 libm/crates/libm-test/tests/u256.rs diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index 9fac52e0b..be85dd567 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -77,7 +77,6 @@ fn setup_u128_mul() -> Vec<(u128, u128)> { v } -/* fn setup_u256_add() -> Vec<(u256, u256)> { let mut v = Vec::new(); for (x, y) in setup_u128_mul() { @@ -88,7 +87,6 @@ fn setup_u256_add() -> Vec<(u256, u256)> { v.push((u256::MAX, u256::MAX)); v } -*/ fn setup_u256_shift() -> Vec<(u256, u32)> { let mut v = Vec::new(); @@ -116,7 +114,6 @@ library_benchmark_group!( benchmarks = icount_bench_u128_widen_mul ); -/* Not yet implemented #[library_benchmark] #[bench::linspace(setup_u256_add())] fn icount_bench_u256_add(cases: Vec<(u256, u256)>) { @@ -129,7 +126,6 @@ library_benchmark_group!( name = icount_bench_u256_add_group; benchmarks = icount_bench_u256_add ); -*/ #[library_benchmark] #[bench::linspace(setup_u256_shift())] @@ -148,7 +144,7 @@ main!( library_benchmark_groups = // u256-related benchmarks icount_bench_u128_widen_mul_group, - // icount_bench_u256_add_group, + icount_bench_u256_add_group, icount_bench_u256_shr_group, // verify-apilist-start // verify-sorted-start diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index 5b127f38d..c2cd172d1 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -14,7 +14,7 @@ use crate::run_cfg::{int_range, iteration_count}; pub(crate) const SEED_ENV: &str = "LIBM_SEED"; -pub(crate) static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| { +pub static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| { let s = env::var(SEED_ENV).unwrap_or_else(|_| { let mut rng = rand::thread_rng(); (0..32).map(|_| rng.sample(Alphanumeric) as char).collect() diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index d2fef2325..824f09a33 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -29,7 +29,10 @@ pub use op::{ }; pub use precision::{MaybeOverride, SpecialCase, default_ulp}; use run_cfg::extensive_max_iterations; -pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, skip_extensive_test}; +pub use run_cfg::{ + CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, bigint_fuzz_iteration_count, + skip_extensive_test, +}; pub use test_traits::{CheckOutput, Hex, TupleCall}; /// Result type for tests is usually from `anyhow`. Most times there is no success value to diff --git a/libm/crates/libm-test/src/run_cfg.rs b/libm/crates/libm-test/src/run_cfg.rs index 4dd43bdf3..6b2689976 100644 --- a/libm/crates/libm-test/src/run_cfg.rs +++ b/libm/crates/libm-test/src/run_cfg.rs @@ -158,14 +158,6 @@ impl TestEnv { let op = id.math_op(); let will_run_mp = cfg!(feature = "build-mpfr"); - - // Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run in QEMU. Start - // with a reduced number on these platforms. - let slow_on_ci = crate::emulated() - || usize::BITS < 64 - || cfg!(all(target_arch = "x86_64", target_vendor = "apple")); - let slow_platform = slow_on_ci && crate::ci(); - let large_float_ty = match op.float_ty { FloatTy::F16 | FloatTy::F32 => false, FloatTy::F64 | FloatTy::F128 => true, @@ -176,7 +168,7 @@ impl TestEnv { let input_count = op.rust_sig.args.len(); Self { - slow_platform, + slow_platform: slow_platform(), large_float_ty, should_run_extensive: will_run_extensive, mp_tests_enabled: will_run_mp, @@ -185,6 +177,17 @@ impl TestEnv { } } +/// Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run in QEMU. Start +/// with a reduced number on these platforms. +fn slow_platform() -> bool { + let slow_on_ci = crate::emulated() + || usize::BITS < 64 + || cfg!(all(target_arch = "x86_64", target_vendor = "apple")); + + // If not running in CI, there is no need to reduce iteration count. + slow_on_ci && crate::ci() +} + /// The number of iterations to run for a given test. pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { let t_env = TestEnv::from_env(ctx); @@ -351,3 +354,12 @@ pub fn skip_extensive_test(ctx: &CheckCtx) -> bool { let t_env = TestEnv::from_env(ctx); !t_env.should_run_extensive } + +/// The number of iterations to run for `u256` fuzz tests. +pub fn bigint_fuzz_iteration_count() -> u64 { + if !cfg!(optimizations_enabled) { + return 1000; + } + + if slow_platform() { 100_000 } else { 5_000_000 } +} diff --git a/libm/crates/libm-test/tests/u256.rs b/libm/crates/libm-test/tests/u256.rs new file mode 100644 index 000000000..4174820c0 --- /dev/null +++ b/libm/crates/libm-test/tests/u256.rs @@ -0,0 +1,147 @@ +//! Test the u256 implementation. the ops already get exercised reasonably well through the `f128` +//! routines, so this only does a few million fuzz iterations against GMP. + +#![cfg(feature = "build-mpfr")] + +use std::sync::LazyLock; + +use libm::support::{HInt, u256}; +type BigInt = rug::Integer; + +use libm_test::bigint_fuzz_iteration_count; +use libm_test::gen::random::SEED; +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaCha8Rng; +use rug::Assign; +use rug::integer::Order; +use rug::ops::NotAssign; + +static BIGINT_U256_MAX: LazyLock = + LazyLock::new(|| BigInt::from_digits(&[u128::MAX, u128::MAX], Order::Lsf)); + +/// Copied from the test module. +fn hexu(v: u256) -> String { + format!("0x{:032x}{:032x}", v.hi, v.lo) +} + +fn random_u256(rng: &mut ChaCha8Rng) -> u256 { + let lo: u128 = rng.gen(); + let hi: u128 = rng.gen(); + u256 { lo, hi } +} + +fn assign_bigint(bx: &mut BigInt, x: u256) { + bx.assign_digits(&[x.lo, x.hi], Order::Lsf); +} + +fn from_bigint(bx: &mut BigInt) -> u256 { + // Truncate so the result fits into `[u128; 2]`. This makes all ops overflowing. + *bx &= &*BIGINT_U256_MAX; + let mut bres = [0u128, 0]; + bx.write_digits(&mut bres, Order::Lsf); + bx.assign(0); + u256 { lo: bres[0], hi: bres[1] } +} + +fn check_one( + x: impl FnOnce() -> String, + y: impl FnOnce() -> Option, + actual: u256, + expected: &mut BigInt, +) { + let expected = from_bigint(expected); + if actual != expected { + let xmsg = x(); + let ymsg = y().map(|y| format!("y: {y}\n")).unwrap_or_default(); + panic!( + "Results do not match\n\ + input: {xmsg}\n\ + {ymsg}\ + actual: {}\n\ + expected: {}\ + ", + hexu(actual), + hexu(expected), + ) + } +} + +#[test] +fn mp_u256_bitor() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + let mut by = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x = random_u256(&mut rng); + let y = random_u256(&mut rng); + assign_bigint(&mut bx, x); + assign_bigint(&mut by, y); + let actual = x | y; + bx |= &by; + check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx); + } +} + +#[test] +fn mp_u256_not() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x = random_u256(&mut rng); + assign_bigint(&mut bx, x); + let actual = !x; + bx.not_assign(); + check_one(|| hexu(x), || None, actual, &mut bx); + } +} + +#[test] +fn mp_u256_add() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + let mut by = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x = random_u256(&mut rng); + let y = random_u256(&mut rng); + assign_bigint(&mut bx, x); + assign_bigint(&mut by, y); + let actual = x + y; + bx += &by; + check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx); + } +} + +#[test] +fn mp_u256_shr() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x = random_u256(&mut rng); + let shift: u32 = rng.gen_range(0..255); + assign_bigint(&mut bx, x); + let actual = x >> shift; + bx >>= shift; + check_one(|| hexu(x), || Some(shift.to_string()), actual, &mut bx); + } +} + +#[test] +fn mp_u256_widen_mul() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + let mut by = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x: u128 = rng.gen(); + let y: u128 = rng.gen(); + bx.assign(x); + by.assign(y); + let actual = x.widen_mul(y); + bx *= &by; + check_one(|| format!("{x:#034x}"), || Some(format!("{y:#034x}")), actual, &mut bx); + } +} diff --git a/libm/src/math/support/big.rs b/libm/src/math/support/big.rs index 017e9455e..eae08238e 100644 --- a/libm/src/math/support/big.rs +++ b/libm/src/math/support/big.rs @@ -7,40 +7,39 @@ use core::ops; use super::{DInt, HInt, Int, MinInt}; -const WORD_LO_MASK: u64 = 0x00000000ffffffff; -const WORD_HI_MASK: u64 = 0xffffffff00000000; -const WORD_FULL_MASK: u64 = 0xffffffffffffffff; const U128_LO_MASK: u128 = u64::MAX as u128; -/// A 256-bit unsigned integer represented as 4 64-bit limbs. -/// -/// Each limb is a native-endian number, but the array is little-limb-endian. +/// A 256-bit unsigned integer represented as two 128-bit native-endian limbs. #[allow(non_camel_case_types)] #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] -pub struct u256(pub [u64; 4]); +pub struct u256 { + pub lo: u128, + pub hi: u128, +} impl u256 { - #[allow(unused)] - pub const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX]); + #[cfg(any(test, feature = "unstable-public-internals"))] + pub const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX }; /// Reinterpret as a signed integer pub fn signed(self) -> i256 { - i256(self.0) + i256 { lo: self.lo, hi: self.hi } } } -/// A 256-bit signed integer represented as 4 64-bit limbs. -/// -/// Each limb is a native-endian number, but the array is little-limb-endian. +/// A 256-bit signed integer represented as two 128-bit native-endian limbs. #[allow(non_camel_case_types)] #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] -pub struct i256(pub [u64; 4]); +pub struct i256 { + pub lo: u128, + pub hi: u128, +} impl i256 { /// Reinterpret as an unsigned integer - #[cfg(test)] + #[cfg(any(test, feature = "unstable-public-internals"))] pub fn unsigned(self) -> u256 { - u256(self.0) + u256 { lo: self.lo, hi: self.hi } } } @@ -51,10 +50,10 @@ impl MinInt for u256 { const SIGNED: bool = false; const BITS: u32 = 256; - const ZERO: Self = Self([0u64; 4]); - const ONE: Self = Self([1, 0, 0, 0]); - const MIN: Self = Self([0u64; 4]); - const MAX: Self = Self([u64::MAX; 4]); + const ZERO: Self = Self { lo: 0, hi: 0 }; + const ONE: Self = Self { lo: 1, hi: 0 }; + const MIN: Self = Self { lo: 0, hi: 0 }; + const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX }; } impl MinInt for i256 { @@ -64,10 +63,10 @@ impl MinInt for i256 { const SIGNED: bool = false; const BITS: u32 = 256; - const ZERO: Self = Self([0u64; 4]); - const ONE: Self = Self([1, 0, 0, 0]); - const MIN: Self = Self([0, 0, 0, 1 << 63]); - const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX << 1]); + const ZERO: Self = Self { lo: 0, hi: 0 }; + const ONE: Self = Self { lo: 1, hi: 0 }; + const MIN: Self = Self { lo: 0, hi: 1 << 127 }; + const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX << 1 }; } macro_rules! impl_common { @@ -76,10 +75,8 @@ macro_rules! impl_common { type Output = Self; fn bitor(mut self, rhs: Self) -> Self::Output { - self.0[0] |= rhs.0[0]; - self.0[1] |= rhs.0[1]; - self.0[2] |= rhs.0[2]; - self.0[3] |= rhs.0[3]; + self.lo |= rhs.lo; + self.hi |= rhs.hi; self } } @@ -87,8 +84,10 @@ macro_rules! impl_common { impl ops::Not for $ty { type Output = Self; - fn not(self) -> Self::Output { - Self([!self.0[0], !self.0[1], !self.0[2], !self.0[3]]) + fn not(mut self) -> Self::Output { + self.lo = !self.lo; + self.hi = !self.hi; + self } } @@ -105,10 +104,21 @@ macro_rules! impl_common { impl_common!(i256); impl_common!(u256); +impl ops::Add for u256 { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + let (lo, carry) = self.lo.overflowing_add(rhs.lo); + let hi = self.hi.wrapping_add(carry as u128).wrapping_add(rhs.hi); + + Self { lo, hi } + } +} + impl ops::Shr for u256 { type Output = Self; - fn shr(self, rhs: u32) -> Self::Output { + fn shr(mut self, rhs: u32) -> Self::Output { debug_assert!(rhs < Self::BITS, "attempted to shift right with overflow"); if rhs >= Self::BITS { return Self::ZERO; @@ -118,57 +128,28 @@ impl ops::Shr for u256 { return self; } - let mut ret = self; - let byte_shift = rhs / 64; - let bit_shift = rhs % 64; - - for idx in 0..4 { - let base_idx = idx + byte_shift as usize; - - // FIXME(msrv): could be let...else. - let base = match ret.0.get(base_idx) { - Some(v) => v, - None => { - ret.0[idx] = 0; - continue; - } - }; - - let mut new_val = base >> bit_shift; - - if let Some(new) = ret.0.get(base_idx + 1) { - new_val |= new.overflowing_shl(64 - bit_shift).0; - } + if rhs < 128 { + self.lo >>= rhs; + self.lo |= self.hi << (128 - rhs); + } else { + self.lo = self.hi >> (rhs - 128); + } - ret.0[idx] = new_val; + if rhs < 128 { + self.hi >>= rhs; + } else { + self.hi = 0; } - ret + self } } -macro_rules! word { - (1, $val:expr) => { - (($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64 - }; - (2, $val:expr) => { - (($val >> (32 * 2)) & Self::from(WORD_LO_MASK)) as u64 - }; - (3, $val:expr) => { - (($val >> (32 * 1)) & Self::from(WORD_LO_MASK)) as u64 - }; - (4, $val:expr) => { - (($val >> (32 * 0)) & Self::from(WORD_LO_MASK)) as u64 - }; -} - impl HInt for u128 { type D = u256; fn widen(self) -> Self::D { - let w0 = self & u128::from(u64::MAX); - let w1 = (self >> u64::BITS) & u128::from(u64::MAX); - u256([w0 as u64, w1 as u64, 0, 0]) + u256 { lo: self, hi: 0 } } fn zero_widen(self) -> Self::D { @@ -176,57 +157,24 @@ impl HInt for u128 { } fn zero_widen_mul(self, rhs: Self) -> Self::D { - let product11: u64 = word!(1, self) * word!(1, rhs); - let product12: u64 = word!(1, self) * word!(2, rhs); - let product13: u64 = word!(1, self) * word!(3, rhs); - let product14: u64 = word!(1, self) * word!(4, rhs); - let product21: u64 = word!(2, self) * word!(1, rhs); - let product22: u64 = word!(2, self) * word!(2, rhs); - let product23: u64 = word!(2, self) * word!(3, rhs); - let product24: u64 = word!(2, self) * word!(4, rhs); - let product31: u64 = word!(3, self) * word!(1, rhs); - let product32: u64 = word!(3, self) * word!(2, rhs); - let product33: u64 = word!(3, self) * word!(3, rhs); - let product34: u64 = word!(3, self) * word!(4, rhs); - let product41: u64 = word!(4, self) * word!(1, rhs); - let product42: u64 = word!(4, self) * word!(2, rhs); - let product43: u64 = word!(4, self) * word!(3, rhs); - let product44: u64 = word!(4, self) * word!(4, rhs); - - let sum0: u128 = u128::from(product44); - let sum1: u128 = u128::from(product34) + u128::from(product43); - let sum2: u128 = u128::from(product24) + u128::from(product33) + u128::from(product42); - let sum3: u128 = u128::from(product14) - + u128::from(product23) - + u128::from(product32) - + u128::from(product41); - let sum4: u128 = u128::from(product13) + u128::from(product22) + u128::from(product31); - let sum5: u128 = u128::from(product12) + u128::from(product21); - let sum6: u128 = u128::from(product11); - - let r0: u128 = - (sum0 & u128::from(WORD_FULL_MASK)) + ((sum1 & u128::from(WORD_LO_MASK)) << 32); - let r1: u128 = (sum0 >> 64) - + ((sum1 >> 32) & u128::from(WORD_FULL_MASK)) - + (sum2 & u128::from(WORD_FULL_MASK)) - + ((sum3 << 32) & u128::from(WORD_HI_MASK)); - - let (lo, carry) = r0.overflowing_add(r1 << 64); - let hi = (r1 >> 64) - + (sum1 >> 96) - + (sum2 >> 64) - + (sum3 >> 32) - + sum4 - + (sum5 << 32) - + (sum6 << 64) - + u128::from(carry); - - u256([ - (lo & U128_LO_MASK) as u64, - ((lo >> 64) & U128_LO_MASK) as u64, - (hi & U128_LO_MASK) as u64, - ((hi >> 64) & U128_LO_MASK) as u64, - ]) + let l0 = self & U128_LO_MASK; + let l1 = rhs & U128_LO_MASK; + let h0 = self >> 64; + let h1 = rhs >> 64; + + let p_ll: u128 = l0.overflowing_mul(l1).0; + let p_lh: u128 = l0.overflowing_mul(h1).0; + let p_hl: u128 = h0.overflowing_mul(l1).0; + let p_hh: u128 = h0.overflowing_mul(h1).0; + + let s0 = p_hl + (p_ll >> 64); + let s1 = (p_ll & U128_LO_MASK) + (s0 << 64); + let s2 = p_lh + (s1 >> 64); + + let lo = (p_ll & U128_LO_MASK) + (s2 << 64); + let hi = p_hh + (s0 >> 64) + (s2 >> 64); + + u256 { lo, hi } } fn widen_mul(self, rhs: Self) -> Self::D { @@ -244,8 +192,7 @@ impl HInt for i128 { fn widen(self) -> Self::D { let mut ret = self.unsigned().zero_widen().signed(); if self.is_negative() { - ret.0[2] = u64::MAX; - ret.0[3] = u64::MAX; + ret.hi = u128::MAX; } ret } @@ -271,17 +218,11 @@ impl DInt for u256 { type H = u128; fn lo(self) -> Self::H { - let mut tmp = [0u8; 16]; - tmp[..8].copy_from_slice(&self.0[0].to_le_bytes()); - tmp[8..].copy_from_slice(&self.0[1].to_le_bytes()); - u128::from_le_bytes(tmp) + self.lo } fn hi(self) -> Self::H { - let mut tmp = [0u8; 16]; - tmp[..8].copy_from_slice(&self.0[2].to_le_bytes()); - tmp[8..].copy_from_slice(&self.0[3].to_le_bytes()); - u128::from_le_bytes(tmp) + self.hi } } @@ -289,16 +230,10 @@ impl DInt for i256 { type H = i128; fn lo(self) -> Self::H { - let mut tmp = [0u8; 16]; - tmp[..8].copy_from_slice(&self.0[0].to_le_bytes()); - tmp[8..].copy_from_slice(&self.0[1].to_le_bytes()); - i128::from_le_bytes(tmp) + self.lo as i128 } fn hi(self) -> Self::H { - let mut tmp = [0u8; 16]; - tmp[..8].copy_from_slice(&self.0[2].to_le_bytes()); - tmp[8..].copy_from_slice(&self.0[3].to_le_bytes()); - i128::from_le_bytes(tmp) + self.hi as i128 } } diff --git a/libm/src/math/support/big/tests.rs b/libm/src/math/support/big/tests.rs index 815a62dfe..6d06c700a 100644 --- a/libm/src/math/support/big/tests.rs +++ b/libm/src/math/support/big/tests.rs @@ -9,33 +9,30 @@ const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff; /// Print a `u256` as hex since we can't add format implementations fn hexu(v: u256) -> String { - format!("0x{:016x}{:016x}{:016x}{:016x}", v.0[3], v.0[2], v.0[1], v.0[0]) + format!("0x{:032x}{:032x}", v.hi, v.lo) } #[test] fn widen_u128() { - assert_eq!(u128::MAX.widen(), u256([u64::MAX, u64::MAX, 0, 0])); - assert_eq!(LOHI_SPLIT.widen(), u256([u64::MAX, 0xaaaaaaaaaaaaaaaa, 0, 0])); + assert_eq!(u128::MAX.widen(), u256 { lo: u128::MAX, hi: 0 }); + assert_eq!(LOHI_SPLIT.widen(), u256 { lo: LOHI_SPLIT, hi: 0 }); } #[test] fn widen_i128() { assert_eq!((-1i128).widen(), u256::MAX.signed()); - assert_eq!( - (LOHI_SPLIT as i128).widen(), - i256([u64::MAX, 0xaaaaaaaaaaaaaaaa, u64::MAX, u64::MAX]) - ); + assert_eq!((LOHI_SPLIT as i128).widen(), i256 { lo: LOHI_SPLIT, hi: u128::MAX }); assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen()); } #[test] fn widen_mul_u128() { let tests = [ - (u128::MAX / 2, 2_u128, u256([u64::MAX - 1, u64::MAX, 0, 0])), - (u128::MAX, 2_u128, u256([u64::MAX - 1, u64::MAX, 1, 0])), - (u128::MAX, u128::MAX, u256([1, 0, u64::MAX - 1, u64::MAX])), - (u128::MIN, u128::MIN, u256::ZERO), - (1234, 0, u256::ZERO), + (u128::MAX / 2, 2_u128, u256 { lo: u128::MAX - 1, hi: 0 }), + (u128::MAX, 2_u128, u256 { lo: u128::MAX - 1, hi: 1 }), + (u128::MAX, u128::MAX, u256 { lo: 1, hi: u128::MAX - 1 }), + (0, 0, u256::ZERO), + (1234u128, 0, u256::ZERO), (0, 1234, u256::ZERO), ]; @@ -50,20 +47,27 @@ fn widen_mul_u128() { } for (i, a, b, exp, res) in &errors { - eprintln!("FAILURE ({i}): {a:#034x} * {b:#034x} = {} got {}", hexu(*exp), hexu(*res)); + eprintln!( + "\ + FAILURE ({i}): {a:#034x} * {b:#034x}\n\ + expected: {}\n\ + got: {}\ + ", + hexu(*exp), + hexu(*res) + ); } assert!(errors.is_empty()); } #[test] -fn not_u128() { +fn not_u256() { assert_eq!(!u256::ZERO, u256::MAX); } #[test] -fn shr_u128() { +fn shr_u256() { let only_low = [1, u16::MAX.into(), u32::MAX.into(), u64::MAX.into(), u128::MAX]; - let mut errors = Vec::new(); for a in only_low { @@ -80,20 +84,24 @@ fn shr_u128() { } let check = [ - (u256::MAX, 1, u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1])), - (u256::MAX, 5, u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5])), - (u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])), - (u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])), - (u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])), - (u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])), - (u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])), - (u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])), - (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])), - (u256::MAX, 192, u256([u64::MAX, 0, 0, 0])), - (u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])), - (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])), - (u256::MAX, 254, u256([0b11, 0, 0, 0])), - (u256::MAX, 255, u256([1, 0, 0, 0])), + (u256::MAX, 1, u256 { lo: u128::MAX, hi: u128::MAX >> 1 }), + (u256::MAX, 5, u256 { lo: u128::MAX, hi: u128::MAX >> 5 }), + (u256::MAX, 63, u256 { lo: u128::MAX, hi: u64::MAX as u128 | (1 << 64) }), + (u256::MAX, 64, u256 { lo: u128::MAX, hi: u64::MAX as u128 }), + (u256::MAX, 65, u256 { lo: u128::MAX, hi: (u64::MAX >> 1) as u128 }), + (u256::MAX, 127, u256 { lo: u128::MAX, hi: 1 }), + (u256::MAX, 128, u256 { lo: u128::MAX, hi: 0 }), + (u256::MAX, 129, u256 { lo: u128::MAX >> 1, hi: 0 }), + (u256::MAX, 191, u256 { lo: u64::MAX as u128 | 1 << 64, hi: 0 }), + (u256::MAX, 192, u256 { lo: u64::MAX as u128, hi: 0 }), + (u256::MAX, 193, u256 { lo: u64::MAX as u128 >> 1, hi: 0 }), + (u256::MAX, 254, u256 { lo: 0b11, hi: 0 }), + (u256::MAX, 255, u256 { lo: 1, hi: 0 }), + ( + u256 { hi: LOHI_SPLIT, lo: 0 }, + 64, + u256 { lo: 0xffffffffffffffff0000000000000000, hi: 0xaaaaaaaaaaaaaaaa }, + ), ]; for (input, shift, expected) in check { @@ -104,7 +112,16 @@ fn shr_u128() { } for (a, b, res, expected) in &errors { - eprintln!("FAILURE: {} >> {b} = {} got {}", hexu(*a), hexu(*expected), hexu(*res),); + eprintln!( + "\ + FAILURE: {} >> {b}\n\ + expected: {}\n\ + got: {}\ + ", + hexu(*a), + hexu(*expected), + hexu(*res) + ); } assert!(errors.is_empty()); } From 59147bd242afdd0615d3f9dee29d81ec19deac1b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 10 Feb 2025 08:17:57 +0000 Subject: [PATCH 1183/1459] Introduce a trait constant for the minimum positive normal value --- libm/crates/libm-test/src/f8_impl.rs | 1 + libm/src/math/support/float_traits.rs | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/libm/crates/libm-test/src/f8_impl.rs b/libm/crates/libm-test/src/f8_impl.rs index 5dce9be18..56ea0b729 100644 --- a/libm/crates/libm-test/src/f8_impl.rs +++ b/libm/crates/libm-test/src/f8_impl.rs @@ -32,6 +32,7 @@ impl Float for f8 { const INFINITY: Self = Self(0b0_1111_000); const NEG_INFINITY: Self = Self(0b1_1111_000); const NAN: Self = Self(0b0_1111_100); + const MIN_POSITIVE_NORMAL: Self = Self(1 << Self::SIG_BITS); // FIXME: incorrect values const EPSILON: Self = Self::ZERO; const PI: Self = Self::ZERO; diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index ee83c793d..42ce31484 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -41,6 +41,8 @@ pub trait Float: const NEG_PI: Self; const FRAC_PI_2: Self; + const MIN_POSITIVE_NORMAL: Self; + /// The bitwidth of the float type const BITS: u32; @@ -200,6 +202,9 @@ macro_rules! float_impl { const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS)); const EPSILON: Self = <$ty>::EPSILON; + // Exponent is a 1 in the LSB + const MIN_POSITIVE_NORMAL: Self = $from_bits(1 << Self::SIG_BITS); + const PI: Self = core::$ty::consts::PI; const NEG_PI: Self = -Self::PI; const FRAC_PI_2: Self = core::$ty::consts::FRAC_PI_2; @@ -358,6 +363,7 @@ mod tests { // results for zero and subnormals. assert_eq!(f16::ZERO.exp_unbiased(), -15); assert_eq!(f16::from_bits(0x1).exp_unbiased(), -15); + assert_eq!(f16::MIN_POSITIVE, f16::MIN_POSITIVE_NORMAL); // `from_parts` assert_biteq!(f16::from_parts(true, f16::EXP_BIAS, 0), -1.0f16); @@ -383,6 +389,7 @@ mod tests { // results for zero and subnormals. assert_eq!(f32::ZERO.exp_unbiased(), -127); assert_eq!(f32::from_bits(0x1).exp_unbiased(), -127); + assert_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE_NORMAL); // `from_parts` assert_biteq!(f32::from_parts(true, f32::EXP_BIAS, 0), -1.0f32); @@ -409,6 +416,7 @@ mod tests { // results for zero and subnormals. assert_eq!(f64::ZERO.exp_unbiased(), -1023); assert_eq!(f64::from_bits(0x1).exp_unbiased(), -1023); + assert_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE_NORMAL); // `from_parts` assert_biteq!(f64::from_parts(true, f64::EXP_BIAS, 0), -1.0f64); @@ -436,6 +444,7 @@ mod tests { // results for zero and subnormals. assert_eq!(f128::ZERO.exp_unbiased(), -16383); assert_eq!(f128::from_bits(0x1).exp_unbiased(), -16383); + assert_eq!(f128::MIN_POSITIVE, f128::MIN_POSITIVE_NORMAL); // `from_parts` assert_biteq!(f128::from_parts(true, f128::EXP_BIAS, 0), -1.0f128); From d197af6f4365ff937b101ccb35e87436c9ecaa80 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 10 Feb 2025 09:17:54 +0000 Subject: [PATCH 1184/1459] Migrate away from nonfunctional `fenv` stubs Many routines have some form of handling for rounding mode and floating point exceptions, which are implemented via a combination of stubs and `force_eval!` use. This is suboptimal, however, because: 1. Rust does not interact with the floating point environment, so most of this code does nothing. 2. The parts of the code that are not dead are not testable. 3. `force_eval!` blocks optimizations, which is unnecessary because we do not rely on its side effects. We cannot ensure correct rounding and exception handling in all cases without some form of arithmetic operations that are aware of this behavior. However, the cases where rounding mode is explicitly handled or exceptions are explicitly raised are testable. Make this possible here for functions that depend on `math::fenv` by moving the implementation to a nonpublic function that takes a `Round` and returns a `Status`. Link: https://github.com/rust-lang/libm/issues/480 --- libm/src/math/cbrt.rs | 25 ++++--- libm/src/math/fenv.rs | 49 ------------- libm/src/math/generic/fma.rs | 133 ++++++++++++++++++---------------- libm/src/math/generic/sqrt.rs | 48 +++++++++--- libm/src/math/mod.rs | 1 - libm/src/math/support/env.rs | 118 ++++++++++++++++++++++++++++++ libm/src/math/support/mod.rs | 7 +- 7 files changed, 240 insertions(+), 141 deletions(-) delete mode 100644 libm/src/math/fenv.rs create mode 100644 libm/src/math/support/env.rs diff --git a/libm/src/math/cbrt.rs b/libm/src/math/cbrt.rs index fbf81f77d..8560d37ab 100644 --- a/libm/src/math/cbrt.rs +++ b/libm/src/math/cbrt.rs @@ -5,12 +5,15 @@ */ use super::Float; -use super::fenv::Rounding; -use super::support::cold_path; +use super::support::{FpResult, Round, cold_path}; /// Compute the cube root of the argument. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cbrt(x: f64) -> f64 { + cbrt_round(x, Round::Nearest).val +} + +pub fn cbrt_round(x: f64, round: Round) -> FpResult { const ESCALE: [f64; 3] = [ 1.0, hf64!("0x1.428a2f98d728bp+0"), /* 2^(1/3) */ @@ -33,8 +36,6 @@ pub fn cbrt(x: f64) -> f64 { let off = [hf64!("0x1p-53"), 0.0, 0.0, 0.0]; - let rm = Rounding::get(); - /* rm=0 for rounding to nearest, and other values for directed roundings */ let hx: u64 = x.to_bits(); let mut mant: u64 = hx & f64::SIG_MASK; @@ -51,7 +52,7 @@ pub fn cbrt(x: f64) -> f64 { to that for x a signaling NaN, it correctly triggers the invalid exception. */ if e == f64::EXP_SAT || ix == 0 { - return x + x; + return FpResult::ok(x + x); } let nz = ix.leading_zeros() - 11; /* subnormal */ @@ -124,8 +125,8 @@ pub fn cbrt(x: f64) -> f64 { * from ulp(1); * for rounding to nearest, ady0 is tiny when dy is near from 1/2 ulp(1), * or from 3/2 ulp(1). */ - let mut ady0: f64 = (ady - off[rm as usize]).abs(); - let mut ady1: f64 = (ady - (hf64!("0x1p-52") + off[rm as usize])).abs(); + let mut ady0: f64 = (ady - off[round as usize]).abs(); + let mut ady1: f64 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs(); if ady0 < hf64!("0x1p-75") || ady1 < hf64!("0x1p-75") { cold_path(); @@ -140,8 +141,8 @@ pub fn cbrt(x: f64) -> f64 { dy = (y1 - y) - dy; y1 = y; ady = dy.abs(); - ady0 = (ady - off[rm as usize]).abs(); - ady1 = (ady - (hf64!("0x1p-52") + off[rm as usize])).abs(); + ady0 = (ady - off[round as usize]).abs(); + ady1 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs(); if ady0 < hf64!("0x1p-98") || ady1 < hf64!("0x1p-98") { cold_path(); @@ -157,7 +158,7 @@ pub fn cbrt(x: f64) -> f64 { y1 = hf64!("0x1.de87aa837820fp+0").copysign(zz); } - if rm != Rounding::Nearest { + if round != Round::Nearest { let wlist = [ (hf64!("0x1.3a9ccd7f022dbp+0"), hf64!("0x1.1236160ba9b93p+0")), // ~ 0x1.1236160ba9b930000000000001e7e8fap+0 (hf64!("0x1.7845d2faac6fep+0"), hf64!("0x1.23115e657e49cp+0")), // ~ 0x1.23115e657e49c0000000000001d7a799p+0 @@ -170,7 +171,7 @@ pub fn cbrt(x: f64) -> f64 { for (a, b) in wlist { if azz == a { - let tmp = if rm as u64 + sign == 2 { hf64!("0x1p-52") } else { 0.0 }; + let tmp = if round as u64 + sign == 2 { hf64!("0x1p-52") } else { 0.0 }; y1 = (b + tmp).copysign(zz); } } @@ -194,7 +195,7 @@ pub fn cbrt(x: f64) -> f64 { } } - f64::from_bits(cvt3) + FpResult::ok(f64::from_bits(cvt3)) } fn fmaf64(x: f64, y: f64, z: f64) -> f64 { diff --git a/libm/src/math/fenv.rs b/libm/src/math/fenv.rs deleted file mode 100644 index 328c9f346..000000000 --- a/libm/src/math/fenv.rs +++ /dev/null @@ -1,49 +0,0 @@ -// src: musl/src/fenv/fenv.c -/* Dummy functions for archs lacking fenv implementation */ - -pub(crate) const FE_UNDERFLOW: i32 = 0; -pub(crate) const FE_INEXACT: i32 = 0; - -pub(crate) const FE_TONEAREST: i32 = 0; -pub(crate) const FE_DOWNWARD: i32 = 1; -pub(crate) const FE_UPWARD: i32 = 2; -pub(crate) const FE_TOWARDZERO: i32 = 3; - -#[inline] -pub(crate) fn feclearexcept(_mask: i32) -> i32 { - 0 -} - -#[inline] -pub(crate) fn feraiseexcept(_mask: i32) -> i32 { - 0 -} - -#[inline] -pub(crate) fn fetestexcept(_mask: i32) -> i32 { - 0 -} - -#[inline] -pub(crate) fn fegetround() -> i32 { - FE_TONEAREST -} - -#[derive(Clone, Copy, Debug, PartialEq)] -pub(crate) enum Rounding { - Nearest = FE_TONEAREST as isize, - Downward = FE_DOWNWARD as isize, - Upward = FE_UPWARD as isize, - ToZero = FE_TOWARDZERO as isize, -} - -impl Rounding { - pub(crate) fn get() -> Self { - match fegetround() { - x if x == FE_DOWNWARD => Self::Downward, - x if x == FE_UPWARD => Self::Upward, - x if x == FE_TOWARDZERO => Self::ToZero, - _ => Self::Nearest, - } - } -} diff --git a/libm/src/math/generic/fma.rs b/libm/src/math/generic/fma.rs index a40d7aaaf..821aee090 100644 --- a/libm/src/math/generic/fma.rs +++ b/libm/src/math/generic/fma.rs @@ -1,12 +1,7 @@ /* SPDX-License-Identifier: MIT */ /* origin: musl src/math/{fma,fmaf}.c. Ported to generic Rust algorithm in 2025, TG. */ -use core::{f32, f64}; - -use super::super::fenv::{ - FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, feclearexcept, fegetround, feraiseexcept, fetestexcept, -}; -use super::super::support::{DInt, HInt, IntTy}; +use super::super::support::{DInt, FpResult, HInt, IntTy, Round, Status}; use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, Int, MinInt}; /// Fused multiply-add that works when there is not a larger float size available. Currently this @@ -14,7 +9,18 @@ use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, Int, MinInt}; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fma(x: F, y: F, z: F) -> F where - F: Float + FmaHelper, + F: Float, + F: CastFrom, + F: CastFrom, + F::Int: HInt, + u32: CastInto, +{ + fma_round(x, y, z, Round::Nearest).val +} + +pub fn fma_round(x: F, y: F, z: F, _round: Round) -> FpResult +where + F: Float, F: CastFrom, F: CastFrom, F::Int: HInt, @@ -30,16 +36,16 @@ where if nx.is_zero_nan_inf() || ny.is_zero_nan_inf() { // Value will overflow, defer to non-fused operations. - return x * y + z; + return FpResult::ok(x * y + z); } if nz.is_zero_nan_inf() { if nz.is_zero() { // Empty add component means we only need to multiply. - return x * y; + return FpResult::ok(x * y); } // `z` is NaN or infinity, which sets the result. - return z; + return FpResult::ok(z); } // multiply: r = x * y @@ -147,7 +153,7 @@ where } } else { // exact +/- 0.0 - return x * y + z; + return FpResult::ok(x * y + z); } e -= d; @@ -168,6 +174,8 @@ where // Unbiased exponent for the maximum value of `r` let max_pow = F::BITS - 1 + F::EXP_BIAS; + let mut status = Status::OK; + if e < -(max_pow as i32 - 2) { // Result is subnormal before rounding if e == -(max_pow as i32 - 1) { @@ -178,7 +186,9 @@ where if r == c { // Min normal after rounding, - return r.raise_underflow_as_min_positive(); + status.set_underflow(true); + r = F::MIN_POSITIVE_NORMAL.copysign(r); + return FpResult::new(r, status); } if (rhi << (F::SIG_BITS + 1)) != zero { @@ -195,7 +205,7 @@ where // Remove the top bit r = F::cast_from(2i8) * r - c; - r += r.raise_underflow_ret_zero(); + status.set_underflow(true); } } else { // Only round once when scaled @@ -212,12 +222,22 @@ where } // Use our exponent to scale the final value. - super::scalbn(r, e) + FpResult::new(super::scalbn(r, e), status) } /// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`, /// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding. pub fn fma_wide(x: F, y: F, z: F) -> F +where + F: Float + HFloat, + B: Float + DFloat, + B::Int: CastInto, + i32: CastFrom, +{ + fma_wide_round(x, y, z, Round::Nearest).val +} + +pub fn fma_wide_round(x: F, y: F, z: F, round: Round) -> FpResult where F: Float + HFloat, B: Float + DFloat, @@ -244,24 +264,26 @@ where // Or the result is exact || (result - xy == zb && result - zb == xy) // Or the mode is something other than round to nearest - || fegetround() != FE_TONEAREST + || round != Round::Nearest { let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32; let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32; - if (min_inexact_exp..max_inexact_exp).contains(&re) && fetestexcept(FE_INEXACT) != 0 { - feclearexcept(FE_INEXACT); - // prevent `xy + vz` from being CSE'd with `xy + z` above - let vz: F = force_eval!(z); - result = xy + vz.widen(); - if fetestexcept(FE_INEXACT) != 0 { - feraiseexcept(FE_UNDERFLOW); + let mut status = Status::OK; + + if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() { + // This branch is never hit; requires previous operations to set a status + status.set_inexact(false); + + result = xy + z.widen(); + if status.inexact() { + status.set_underflow(true); } else { - feraiseexcept(FE_INEXACT); + status.set_inexact(true); } } - return result.narrow(); + return FpResult { val: result.narrow(), status }; } let neg = ui >> (B::BITS - 1) != IntTy::::ZERO; @@ -272,7 +294,7 @@ where ui -= one; } - B::from_bits(ui).narrow() + FpResult::ok(B::from_bits(ui).narrow()) } /// Representation of `F` that has handled subnormals. @@ -337,49 +359,13 @@ impl Norm { } } -/// Type-specific helpers that are not needed outside of fma. -pub trait FmaHelper { - /// Raise underflow and return the minimum positive normal value with the sign of `self`. - fn raise_underflow_as_min_positive(self) -> Self; - /// Raise underflow and return zero. - fn raise_underflow_ret_zero(self) -> Self; -} - -impl FmaHelper for f64 { - fn raise_underflow_as_min_positive(self) -> Self { - /* min normal after rounding, underflow depends - * on arch behaviour which can be imitated by - * a double to float conversion */ - let fltmin: f32 = (hf64!("0x0.ffffff8p-63") * f32::MIN_POSITIVE as f64 * self) as f32; - f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64 - } - - fn raise_underflow_ret_zero(self) -> Self { - /* raise underflow portably, such that it - * cannot be optimized away */ - let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * self; - (tiny * tiny) * (self - self) - } -} - -#[cfg(f128_enabled)] -impl FmaHelper for f128 { - fn raise_underflow_as_min_positive(self) -> Self { - f128::MIN_POSITIVE.copysign(self) - } - - fn raise_underflow_ret_zero(self) -> Self { - f128::ZERO - } -} - #[cfg(test)] mod tests { use super::*; fn spec_test() where - F: Float + FmaHelper, + F: Float, F: CastFrom, F: CastFrom, F::Int: HInt, @@ -401,6 +387,29 @@ mod tests { #[test] fn spec_test_f64() { spec_test::(); + + let expect_underflow = [ + ( + hf64!("0x1.0p-1070"), + hf64!("0x1.0p-1070"), + hf64!("0x1.ffffffffffffp-1023"), + hf64!("0x0.ffffffffffff8p-1022"), + ), + ( + // FIXME: we raise underflow but this should only be inexact (based on C and + // `rustc_apfloat`). + hf64!("0x1.0p-1070"), + hf64!("0x1.0p-1070"), + hf64!("-0x1.0p-1022"), + hf64!("-0x1.0p-1022"), + ), + ]; + + for (x, y, z, res) in expect_underflow { + let FpResult { val, status } = fma_round(x, y, z, Round::Nearest); + assert_biteq!(val, res); + assert_eq!(status, Status::UNDERFLOW); + } } #[test] diff --git a/libm/src/math/generic/sqrt.rs b/libm/src/math/generic/sqrt.rs index 90d6c01e9..fdd612493 100644 --- a/libm/src/math/generic/sqrt.rs +++ b/libm/src/math/generic/sqrt.rs @@ -41,10 +41,23 @@ //! Goldschmidt has the advantage over Newton-Raphson that `sqrt(x)` and `1/sqrt(x)` are //! computed at the same time, i.e. there is no need to calculate `1/sqrt(x)` and invert it. -use super::super::support::{IntTy, cold_path, raise_invalid}; +use super::super::support::{FpResult, IntTy, Round, Status, cold_path}; use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt}; pub fn sqrt(x: F) -> F +where + F: Float + SqrtHelper, + F::Int: HInt, + F::Int: From, + F::Int: From, + F::Int: CastInto, + F::Int: CastInto, + u32: CastInto, +{ + sqrt_round(x, Round::Nearest).val +} + +pub fn sqrt_round(x: F, _round: Round) -> FpResult where F: Float + SqrtHelper, F::Int: HInt, @@ -78,17 +91,17 @@ where // +/-0 if ix << 1 == zero { - return x; + return FpResult::ok(x); } // Positive infinity if ix == F::EXP_MASK { - return x; + return FpResult::ok(x); } // NaN or negative if ix > F::EXP_MASK { - return raise_invalid(x); + return FpResult::new(F::NAN, Status::INVALID); } // Normalize subnormals by multiplying by 1.0 << SIG_BITS (e.g. 0x1p52 for doubles). @@ -215,7 +228,7 @@ where y = y + t; } - y + FpResult::ok(y) } /// Multiply at the wider integer size, returning the high half. @@ -329,7 +342,7 @@ impl SqrtHelper for f128 { /// A U0.16 representation of `1/sqrt(x)`. /// -// / The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand. +/// The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand. #[rustfmt::skip] static RSQRT_TAB: [u16; 128] = [ 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43, @@ -354,7 +367,7 @@ static RSQRT_TAB: [u16; 128] = [ mod tests { use super::*; - /// Test against edge cases from https://en.cppreference.com/w/cpp/numeric/math/sqrt + /// Test behavior specified in IEEE 754 `squareRoot`. fn spec_test() where F: Float + SqrtHelper, @@ -365,11 +378,22 @@ mod tests { F::Int: CastInto, u32: CastInto, { - // Not Asserted: FE_INVALID exception is raised if argument is negative. - assert!(sqrt(F::NEG_ONE).is_nan()); - assert!(sqrt(F::NAN).is_nan()); - for f in [F::ZERO, F::NEG_ZERO, F::INFINITY].iter().copied() { - assert_biteq!(sqrt(f), f); + // Values that should return a NaN and raise invalid + let nan = [F::NEG_INFINITY, F::NEG_ONE, F::NAN, F::MIN]; + + // Values that return unaltered + let roundtrip = [F::ZERO, F::NEG_ZERO, F::INFINITY]; + + for x in nan { + let FpResult { val, status } = sqrt_round(x, Round::Nearest); + assert!(val.is_nan()); + assert!(status == Status::INVALID); + } + + for x in roundtrip { + let FpResult { val, status } = sqrt_round(x, Round::Nearest); + assert_biteq!(val, x); + assert!(status == Status::OK); } } diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index e32045021..ae4a278f2 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -94,7 +94,6 @@ cfg_if! { // Private modules mod arch; mod expo2; -mod fenv; mod k_cos; mod k_cosf; mod k_expo2; diff --git a/libm/src/math/support/env.rs b/libm/src/math/support/env.rs new file mode 100644 index 000000000..7244381da --- /dev/null +++ b/libm/src/math/support/env.rs @@ -0,0 +1,118 @@ +//! Support for rounding directions and status flags as specified by IEEE 754. +//! +//! Rust does not support the floating point environment so rounding mode is passed as an argument +//! and status flags are returned as part of the result. There is currently not much support for +//! this; most existing ports from musl use a form of `force_eval!` to raise exceptions, but this +//! has no side effects in Rust. Further, correct behavior relies on elementary operations making +//! use of the correct rounding and raising relevant exceptions, which is not the case for Rust. +//! +//! This module exists so no functionality is lost when porting algorithms that respect floating +//! point environment, and so that some functionality may be tested (that which does not rely on +//! side effects from elementary operations). Full support would require wrappers around basic +//! operations, but there is no plan to add this at the current time. + +/// A value combined with a floating point status. +pub struct FpResult { + pub val: T, + #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] + pub status: Status, +} + +impl FpResult { + pub fn new(val: T, status: Status) -> Self { + Self { val, status } + } + + /// Return `val` with `Status::OK`. + pub fn ok(val: T) -> Self { + Self { val, status: Status::OK } + } +} + +/// IEEE 754 rounding mode, excluding the optional `roundTiesToAway` version of nearest. +/// +/// Integer representation comes from what CORE-MATH uses for indexing. +#[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Round { + /// IEEE 754 nearest, `roundTiesToEven`. + Nearest = 0, + /// IEEE 754 `roundTowardNegative`. + Negative = 1, + /// IEEE 754 `roundTowardPositive`. + Positive = 2, + /// IEEE 754 `roundTowardZero`. + Zero = 3, +} + +/// IEEE 754 exception status flags. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Status(u8); + +impl Status { + /// Default status indicating no errors. + pub const OK: Self = Self(0); + + /// No definable result. + /// + /// Includes: + /// - Any ops on sNaN, with a few exceptions. + /// - `0 * inf`, `inf * 0`. + /// - `fma(0, inf, c)` or `fma(inf, 0, c)`, possibly excluding `c = qNaN`. + /// - `+inf + -inf` and similar (includes subtraction and fma). + /// - `0.0 / 0.0`, `inf / inf` + /// - `remainder(x, y)` if `y == 0.0` or `x == inf`, and neither is NaN. + /// - `sqrt(x)` with `x < 0.0`. + pub const INVALID: Self = Self(1); + + /// Division by zero. + /// + /// The default result for division is +/-inf based on operand sign. For `logB`, the default + /// result is -inf. + /// `x / y` when `x != 0.0` and `y == 0.0`, + + #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] + pub const DIVIDE_BY_ZERO: Self = Self(1 << 2); + + /// The result exceeds the maximum finite value. + /// + /// The default result depends on rounding mode. `Nearest*` rounds to +/- infinity, sign based + /// on the intermediate result. `Zero` rounds to the signed maximum finite. `Positive` and + /// `Negative` round to signed maximum finite in one direction, signed infinity in the other. + #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] + pub const OVERFLOW: Self = Self(1 << 3); + + /// The result is subnormal and lost precision. + pub const UNDERFLOW: Self = Self(1 << 4); + + /// The finite-precision result does not match that of infinite precision, and the reason + /// is not represented by one of the other flags. + pub const INEXACT: Self = Self(1 << 5); + + /// True if `UNDERFLOW` is set. + #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] + pub fn underflow(self) -> bool { + self.0 & Self::UNDERFLOW.0 != 0 + } + + pub fn set_underflow(&mut self, val: bool) { + self.set_flag(val, Self::UNDERFLOW); + } + + /// True if `INEXACT` is set. + pub fn inexact(self) -> bool { + self.0 & Self::INEXACT.0 != 0 + } + + pub fn set_inexact(&mut self, val: bool) { + self.set_flag(val, Self::INEXACT); + } + + fn set_flag(&mut self, val: bool, mask: Self) { + if val { + self.0 |= mask.0; + } else { + self.0 &= !mask.0; + } + } +} diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index 28e9fd413..ee3f2bbdf 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -1,12 +1,14 @@ #[macro_use] pub mod macros; mod big; +mod env; mod float_traits; pub mod hex_float; mod int_traits; #[allow(unused_imports)] pub use big::{i256, u256}; +pub use env::{FpResult, Round, Status}; #[allow(unused_imports)] pub use float_traits::{DFloat, Float, HFloat, IntTy}; pub(crate) use float_traits::{f32_from_bits, f64_from_bits}; @@ -25,8 +27,3 @@ pub fn cold_path() { #[cfg(intrinsics_enabled)] core::intrinsics::cold_path(); } - -/// Return `x`, first raising `FE_INVALID`. -pub fn raise_invalid(x: F) -> F { - (x - x) / (x - x) -} From b81598237767e77e4f5fcec85a50dbdf34bed27e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 10 Feb 2025 12:01:16 +0000 Subject: [PATCH 1185/1459] Eliminate the use of `force_eval!` in `ceil`, `floor`, and `trunc` --- libm/src/math/generic/ceil.rs | 91 ++++++++++++++++++++++++++++------ libm/src/math/generic/floor.rs | 77 +++++++++++++++++++++------- libm/src/math/generic/trunc.rs | 89 +++++++++++++++++++++++++++++++-- 3 files changed, 220 insertions(+), 37 deletions(-) diff --git a/libm/src/math/generic/ceil.rs b/libm/src/math/generic/ceil.rs index 971a4d3d8..bf7e1d8e2 100644 --- a/libm/src/math/generic/ceil.rs +++ b/libm/src/math/generic/ceil.rs @@ -7,9 +7,14 @@ //! performance seems to be better (based on icount) and it does not seem to experience rounding //! errors on i386. +use super::super::support::{FpResult, Status}; use super::super::{Float, Int, IntTy, MinInt}; pub fn ceil(x: F) -> F { + ceil_status(x).val +} + +pub fn ceil_status(x: F) -> FpResult { let zero = IntTy::::ZERO; let mut ix = x.to_bits(); @@ -17,20 +22,20 @@ pub fn ceil(x: F) -> F { // If the represented value has no fractional part, no truncation is needed. if e >= F::SIG_BITS as i32 { - return x; + return FpResult::ok(x); } - if e >= 0 { + let status; + let res = if e >= 0 { // |x| >= 1.0 - let m = F::SIG_MASK >> e.unsigned(); if (ix & m) == zero { // Portion to be masked is already zero; no adjustment needed. - return x; + return FpResult::ok(x); } // Otherwise, raise an inexact exception. - force_eval!(x + F::MAX); + status = Status::INEXACT; if x.is_sign_positive() { ix += m; @@ -40,7 +45,11 @@ pub fn ceil(x: F) -> F { F::from_bits(ix) } else { // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0). - force_eval!(x + F::MAX); + if ix & F::SIG_MASK == F::Int::ZERO { + status = Status::OK; + } else { + status = Status::INEXACT; + } if x.is_sign_negative() { // -1.0 < x <= -0.0; rounding up goes toward -0.0. @@ -52,18 +61,30 @@ pub fn ceil(x: F) -> F { // +0.0 remains unchanged x } - } + }; + + FpResult::new(res, status) } #[cfg(test)] mod tests { use super::*; + use crate::support::Hexf; /// Test against https://en.cppreference.com/w/cpp/numeric/math/ceil - fn spec_test() { - // Not Asserted: that the current rounding mode has no effect. - for f in [F::ZERO, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY].iter().copied() { - assert_biteq!(ceil(f), f); + fn spec_test(cases: &[(F, F, Status)]) { + let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY]; + + for x in roundtrip { + let FpResult { val, status } = ceil_status(x); + assert_biteq!(val, x, "{}", Hexf(x)); + assert_eq!(status, Status::OK, "{}", Hexf(x)); + } + + for &(x, res, res_stat) in cases { + let FpResult { val, status } = ceil_status(x); + assert_biteq!(val, res, "{}", Hexf(x)); + assert_eq!(status, res_stat, "{}", Hexf(x)); } } @@ -72,7 +93,17 @@ mod tests { #[test] #[cfg(f16_enabled)] fn spec_tests_f16() { - spec_test::(); + let cases = [ + (0.1, 1.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 1.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 2.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 2.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); } #[test] @@ -83,7 +114,17 @@ mod tests { #[test] fn spec_tests_f32() { - spec_test::(); + let cases = [ + (0.1, 1.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 1.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 2.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 2.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); } #[test] @@ -94,12 +135,32 @@ mod tests { #[test] fn spec_tests_f64() { - spec_test::(); + let cases = [ + (0.1, 1.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 1.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 2.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 2.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); } #[test] #[cfg(f128_enabled)] fn spec_tests_f128() { - spec_test::(); + let cases = [ + (0.1, 1.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 1.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 2.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 2.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); } } diff --git a/libm/src/math/generic/floor.rs b/libm/src/math/generic/floor.rs index 6754c08f8..779955164 100644 --- a/libm/src/math/generic/floor.rs +++ b/libm/src/math/generic/floor.rs @@ -7,9 +7,14 @@ //! performance seems to be better (based on icount) and it does not seem to experience rounding //! errors on i386. +use super::super::support::{FpResult, Status}; use super::super::{Float, Int, IntTy, MinInt}; pub fn floor(x: F) -> F { + floor_status(x).val +} + +pub fn floor_status(x: F) -> FpResult { let zero = IntTy::::ZERO; let mut ix = x.to_bits(); @@ -17,20 +22,20 @@ pub fn floor(x: F) -> F { // If the represented value has no fractional part, no truncation is needed. if e >= F::SIG_BITS as i32 { - return x; + return FpResult::ok(x); } - if e >= 0 { + let status; + let res = if e >= 0 { // |x| >= 1.0 - let m = F::SIG_MASK >> e.unsigned(); if ix & m == zero { // Portion to be masked is already zero; no adjustment needed. - return x; + return FpResult::ok(x); } // Otherwise, raise an inexact exception. - force_eval!(x + F::MAX); + status = Status::INEXACT; if x.is_sign_negative() { ix += m; @@ -39,8 +44,12 @@ pub fn floor(x: F) -> F { ix &= !m; F::from_bits(ix) } else { - // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0). - force_eval!(x + F::MAX); + // |x| < 1.0, raise an inexact exception since truncation will happen. + if ix & F::SIG_MASK == F::Int::ZERO { + status = Status::OK; + } else { + status = Status::INEXACT; + } if x.is_sign_positive() { // 0.0 <= x < 1.0; rounding down goes toward +0.0. @@ -52,27 +61,40 @@ pub fn floor(x: F) -> F { // -0.0 remains unchanged x } - } + }; + + FpResult::new(res, status) } #[cfg(test)] mod tests { use super::*; + use crate::support::Hexf; /// Test against https://en.cppreference.com/w/cpp/numeric/math/floor - fn spec_test() { - // Not Asserted: that the current rounding mode has no effect. - for f in [F::ZERO, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY].iter().copied() { - assert_biteq!(floor(f), f); + fn spec_test(cases: &[(F, F, Status)]) { + let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY]; + + for x in roundtrip { + let FpResult { val, status } = floor_status(x); + assert_biteq!(val, x, "{}", Hexf(x)); + assert_eq!(status, Status::OK, "{}", Hexf(x)); + } + + for &(x, res, res_stat) in cases { + let FpResult { val, status } = floor_status(x); + assert_biteq!(val, res, "{}", Hexf(x)); + assert_eq!(status, res_stat, "{}", Hexf(x)); } } - /* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */ + /* Skipping f16 / f128 "sanity_check"s and spec cases due to rejected literal lexing at MSRV */ #[test] #[cfg(f16_enabled)] fn spec_tests_f16() { - spec_test::(); + let cases = []; + spec_test::(&cases); } #[test] @@ -84,7 +106,17 @@ mod tests { #[test] fn spec_tests_f32() { - spec_test::(); + let cases = [ + (0.1, 0.0, Status::INEXACT), + (-0.1, -1.0, Status::INEXACT), + (0.9, 0.0, Status::INEXACT), + (-0.9, -1.0, Status::INEXACT), + (1.1, 1.0, Status::INEXACT), + (-1.1, -2.0, Status::INEXACT), + (1.9, 1.0, Status::INEXACT), + (-1.9, -2.0, Status::INEXACT), + ]; + spec_test::(&cases); } #[test] @@ -95,12 +127,23 @@ mod tests { #[test] fn spec_tests_f64() { - spec_test::(); + let cases = [ + (0.1, 0.0, Status::INEXACT), + (-0.1, -1.0, Status::INEXACT), + (0.9, 0.0, Status::INEXACT), + (-0.9, -1.0, Status::INEXACT), + (1.1, 1.0, Status::INEXACT), + (-1.1, -2.0, Status::INEXACT), + (1.9, 1.0, Status::INEXACT), + (-1.9, -2.0, Status::INEXACT), + ]; + spec_test::(&cases); } #[test] #[cfg(f128_enabled)] fn spec_tests_f128() { - spec_test::(); + let cases = []; + spec_test::(&cases); } } diff --git a/libm/src/math/generic/trunc.rs b/libm/src/math/generic/trunc.rs index ca5f1bdd6..0fb3fa5ad 100644 --- a/libm/src/math/generic/trunc.rs +++ b/libm/src/math/generic/trunc.rs @@ -1,15 +1,20 @@ /* SPDX-License-Identifier: MIT * origin: musl src/math/trunc.c */ +use super::super::support::{FpResult, Status}; use super::super::{Float, Int, IntTy, MinInt}; pub fn trunc(x: F) -> F { + trunc_status(x).val +} + +pub fn trunc_status(x: F) -> FpResult { let mut xi: F::Int = x.to_bits(); let e: i32 = x.exp_unbiased(); // C1: The represented value has no fractional part, so no truncation is needed if e >= F::SIG_BITS as i32 { - return x; + return FpResult::ok(x); } let mask = if e < 0 { @@ -23,22 +28,68 @@ pub fn trunc(x: F) -> F { // C4: If the to-be-masked-out portion is already zero, we have an exact result if (xi & !mask) == IntTy::::ZERO { - return x; + return FpResult::ok(x); } // C5: Otherwise the result is inexact and we will truncate. Raise `FE_INEXACT`, mask the // result, and return. - force_eval!(x + F::MAX); + + let status = if xi & F::SIG_MASK == F::Int::ZERO { Status::OK } else { Status::INEXACT }; xi &= mask; - F::from_bits(xi) + FpResult::new(F::from_bits(xi), status) } #[cfg(test)] mod tests { use super::*; + use crate::support::Hexf; + + fn spec_test(cases: &[(F, F, Status)]) { + let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY]; + + for x in roundtrip { + let FpResult { val, status } = trunc_status(x); + assert_biteq!(val, x, "{}", Hexf(x)); + assert_eq!(status, Status::OK, "{}", Hexf(x)); + } + + for &(x, res, res_stat) in cases { + let FpResult { val, status } = trunc_status(x); + assert_biteq!(val, res, "{}", Hexf(x)); + assert_eq!(status, res_stat, "{}", Hexf(x)); + } + } + + /* Skipping f16 / f128 "sanity_check"s and spec cases due to rejected literal lexing at MSRV */ + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + let cases = []; + spec_test::(&cases); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(trunc(0.5f32), 0.0); + assert_eq!(trunc(1.1f32), 1.0); + assert_eq!(trunc(2.9f32), 2.0); + } #[test] - fn sanity_check() { + fn spec_tests_f32() { + let cases = [ + (0.1, 0.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 0.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 1.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 1.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); + assert_biteq!(trunc(1.1f32), 1.0); assert_biteq!(trunc(1.1f64), 1.0); @@ -54,4 +105,32 @@ mod tests { assert_biteq!(trunc(hf32!("-0x1p-1")), -0.0); assert_biteq!(trunc(hf64!("-0x1p-1")), -0.0); } + + #[test] + fn sanity_check_f64() { + assert_eq!(trunc(1.1f64), 1.0); + assert_eq!(trunc(2.9f64), 2.0); + } + + #[test] + fn spec_tests_f64() { + let cases = [ + (0.1, 0.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 0.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 1.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 1.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + let cases = []; + spec_test::(&cases); + } } From 401e43e458a3f7e6ed2bf000f3528d911e52eeff Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 10 Feb 2025 05:26:50 +0000 Subject: [PATCH 1186/1459] Small refactor of bigint tests Print errors immediately rather than deferring to the end, so any debug output shows up immediately before the relevant failed test. --- libm/src/math/support/big/tests.rs | 65 ++++++++++++++++-------------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/libm/src/math/support/big/tests.rs b/libm/src/math/support/big/tests.rs index 6d06c700a..2c71191ba 100644 --- a/libm/src/math/support/big/tests.rs +++ b/libm/src/math/support/big/tests.rs @@ -1,6 +1,5 @@ extern crate std; use std::string::String; -use std::vec::Vec; use std::{eprintln, format}; use super::{HInt, MinInt, i256, u256}; @@ -36,28 +35,30 @@ fn widen_mul_u128() { (0, 1234, u256::ZERO), ]; - let mut errors = Vec::new(); - for (i, (a, b, exp)) in tests.iter().copied().enumerate() { - let res = a.widen_mul(b); - let res_z = a.zero_widen_mul(b); - assert_eq!(res, res_z); - if res != exp { - errors.push((i, a, b, exp, res)); - } - } - - for (i, a, b, exp, res) in &errors { + let mut has_errors = false; + let mut add_error = |i, a, b, expected, actual| { + has_errors = true; eprintln!( "\ FAILURE ({i}): {a:#034x} * {b:#034x}\n\ expected: {}\n\ got: {}\ ", - hexu(*exp), - hexu(*res) + hexu(expected), + hexu(actual) ); + }; + + for (i, (a, b, exp)) in tests.iter().copied().enumerate() { + let res = a.widen_mul(b); + let res_z = a.zero_widen_mul(b); + assert_eq!(res, res_z); + if res != exp { + add_error(i, a, b, exp, res); + } } - assert!(errors.is_empty()); + + assert!(!has_errors); } #[test] @@ -68,7 +69,21 @@ fn not_u256() { #[test] fn shr_u256() { let only_low = [1, u16::MAX.into(), u32::MAX.into(), u64::MAX.into(), u128::MAX]; - let mut errors = Vec::new(); + let mut has_errors = false; + + let mut add_error = |a, b, expected, actual| { + has_errors = true; + eprintln!( + "\ + FAILURE: {} >> {b}\n\ + expected: {}\n\ + actual: {}\ + ", + hexu(a), + hexu(expected), + hexu(actual), + ); + }; for a in only_low { for perturb in 0..10 { @@ -77,7 +92,7 @@ fn shr_u256() { let res = a.widen() >> shift; let expected = (a >> shift).widen(); if res != expected { - errors.push((a.widen(), shift, res, expected)); + add_error(a.widen(), shift, expected, res); } } } @@ -107,23 +122,11 @@ fn shr_u256() { for (input, shift, expected) in check { let res = input >> shift; if res != expected { - errors.push((input, shift, res, expected)); + add_error(input, shift, expected, res); } } - for (a, b, res, expected) in &errors { - eprintln!( - "\ - FAILURE: {} >> {b}\n\ - expected: {}\n\ - got: {}\ - ", - hexu(*a), - hexu(*expected), - hexu(*res) - ); - } - assert!(errors.is_empty()); + assert!(!has_errors); } #[test] From d809d64c3d3db1b42586d8b31249d10129c7917a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 10 Feb 2025 19:56:52 +0000 Subject: [PATCH 1187/1459] Combine `fmin{,f,f16,f128}` and `fmax{,f,f16,128}` into a single file These don't have much content since they now use the generic implementation. There will be more similar functions in the near future (fminimum, fmaximum, fminimum_num, fmaximum_num); start the pattern of combining similar functions now so we don't have to eventually maintain similar docs across 24 different files. --- libm/etc/function-definitions.json | 16 +++++----- libm/src/math/fmax.rs | 5 --- libm/src/math/fmaxf.rs | 5 --- libm/src/math/fmaxf128.rs | 5 --- libm/src/math/fmaxf16.rs | 5 --- libm/src/math/fmin.rs | 5 --- libm/src/math/fmin_fmax.rs | 51 ++++++++++++++++++++++++++++++ libm/src/math/fminf.rs | 5 --- libm/src/math/fminf128.rs | 5 --- libm/src/math/fminf16.rs | 5 --- libm/src/math/mod.rs | 22 ++++--------- 11 files changed, 65 insertions(+), 64 deletions(-) delete mode 100644 libm/src/math/fmax.rs delete mode 100644 libm/src/math/fmaxf.rs delete mode 100644 libm/src/math/fmaxf128.rs delete mode 100644 libm/src/math/fmaxf16.rs delete mode 100644 libm/src/math/fmin.rs create mode 100644 libm/src/math/fmin_fmax.rs delete mode 100644 libm/src/math/fminf.rs delete mode 100644 libm/src/math/fminf128.rs delete mode 100644 libm/src/math/fminf16.rs diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 5742ed585..d3e51f29a 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -365,56 +365,56 @@ }, "fmax": { "sources": [ - "src/math/fmax.rs", + "src/math/fmin_fmax.rs", "src/math/generic/fmax.rs" ], "type": "f64" }, "fmaxf": { "sources": [ - "src/math/fmaxf.rs", + "src/math/fmin_fmax.rs", "src/math/generic/fmax.rs" ], "type": "f32" }, "fmaxf128": { "sources": [ - "src/math/fmaxf128.rs", + "src/math/fmin_fmax.rs", "src/math/generic/fmax.rs" ], "type": "f128" }, "fmaxf16": { "sources": [ - "src/math/fmaxf16.rs", + "src/math/fmin_fmax.rs", "src/math/generic/fmax.rs" ], "type": "f16" }, "fmin": { "sources": [ - "src/math/fmin.rs", + "src/math/fmin_fmax.rs", "src/math/generic/fmin.rs" ], "type": "f64" }, "fminf": { "sources": [ - "src/math/fminf.rs", + "src/math/fmin_fmax.rs", "src/math/generic/fmin.rs" ], "type": "f32" }, "fminf128": { "sources": [ - "src/math/fminf128.rs", + "src/math/fmin_fmax.rs", "src/math/generic/fmin.rs" ], "type": "f128" }, "fminf16": { "sources": [ - "src/math/fminf16.rs", + "src/math/fmin_fmax.rs", "src/math/generic/fmin.rs" ], "type": "f16" diff --git a/libm/src/math/fmax.rs b/libm/src/math/fmax.rs deleted file mode 100644 index d5d9b513b..000000000 --- a/libm/src/math/fmax.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Return the greater of two arguments or, if either argument is NaN, the other argument. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmax(x: f64, y: f64) -> f64 { - super::generic::fmax(x, y) -} diff --git a/libm/src/math/fmaxf.rs b/libm/src/math/fmaxf.rs deleted file mode 100644 index 3197d5cf2..000000000 --- a/libm/src/math/fmaxf.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Return the greater of two arguments or, if either argument is NaN, the other argument. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaxf(x: f32, y: f32) -> f32 { - super::generic::fmax(x, y) -} diff --git a/libm/src/math/fmaxf128.rs b/libm/src/math/fmaxf128.rs deleted file mode 100644 index bace9ab53..000000000 --- a/libm/src/math/fmaxf128.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Return the greater of two arguments or, if either argument is NaN, the other argument. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaxf128(x: f128, y: f128) -> f128 { - super::generic::fmax(x, y) -} diff --git a/libm/src/math/fmaxf16.rs b/libm/src/math/fmaxf16.rs deleted file mode 100644 index fea15be8f..000000000 --- a/libm/src/math/fmaxf16.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Return the greater of two arguments or, if either argument is NaN, the other argument. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaxf16(x: f16, y: f16) -> f16 { - super::generic::fmax(x, y) -} diff --git a/libm/src/math/fmin.rs b/libm/src/math/fmin.rs deleted file mode 100644 index df8ff7c32..000000000 --- a/libm/src/math/fmin.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Return the lesser of two arguments or, if either argument is NaN, the other argument. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmin(x: f64, y: f64) -> f64 { - super::generic::fmin(x, y) -} diff --git a/libm/src/math/fmin_fmax.rs b/libm/src/math/fmin_fmax.rs new file mode 100644 index 000000000..97912e758 --- /dev/null +++ b/libm/src/math/fmin_fmax.rs @@ -0,0 +1,51 @@ +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminf16(x: f16, y: f16) -> f16 { + super::generic::fmin(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminf(x: f32, y: f32) -> f32 { + super::generic::fmin(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmin(x: f64, y: f64) -> f64 { + super::generic::fmin(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminf128(x: f128, y: f128) -> f128 { + super::generic::fmin(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaxf16(x: f16, y: f16) -> f16 { + super::generic::fmax(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaxf(x: f32, y: f32) -> f32 { + super::generic::fmax(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmax(x: f64, y: f64) -> f64 { + super::generic::fmax(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaxf128(x: f128, y: f128) -> f128 { + super::generic::fmax(x, y) +} diff --git a/libm/src/math/fminf.rs b/libm/src/math/fminf.rs deleted file mode 100644 index b2cdfe89d..000000000 --- a/libm/src/math/fminf.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Return the lesser of two arguments or, if either argument is NaN, the other argument. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fminf(x: f32, y: f32) -> f32 { - super::generic::fmin(x, y) -} diff --git a/libm/src/math/fminf128.rs b/libm/src/math/fminf128.rs deleted file mode 100644 index a9224c22a..000000000 --- a/libm/src/math/fminf128.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Return the lesser of two arguments or, if either argument is NaN, the other argument. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fminf128(x: f128, y: f128) -> f128 { - super::generic::fmin(x, y) -} diff --git a/libm/src/math/fminf16.rs b/libm/src/math/fminf16.rs deleted file mode 100644 index 6d936be34..000000000 --- a/libm/src/math/fminf16.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Return the lesser of two arguments or, if either argument is NaN, the other argument. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fminf16(x: f16, y: f16) -> f16 { - super::generic::fmin(x, y) -} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index ae4a278f2..ba0b933f1 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -165,10 +165,7 @@ mod floor; mod floorf; mod fma; mod fmaf; -mod fmax; -mod fmaxf; -mod fmin; -mod fminf; +mod fmin_fmax; mod fmod; mod fmodf; mod frexp; @@ -273,10 +270,7 @@ pub use self::floor::floor; pub use self::floorf::floorf; pub use self::fma::fma; pub use self::fmaf::fmaf; -pub use self::fmax::fmax; -pub use self::fmaxf::fmaxf; -pub use self::fmin::fmin; -pub use self::fminf::fminf; +pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf}; pub use self::fmod::fmod; pub use self::fmodf::fmodf; pub use self::frexp::frexp; @@ -346,8 +340,6 @@ cfg_if! { mod fabsf16; mod fdimf16; mod floorf16; - mod fmaxf16; - mod fminf16; mod fmodf16; mod ldexpf16; mod rintf16; @@ -363,8 +355,8 @@ cfg_if! { pub use self::fabsf16::fabsf16; pub use self::fdimf16::fdimf16; pub use self::floorf16::floorf16; - pub use self::fmaxf16::fmaxf16; - pub use self::fminf16::fminf16; + pub use self::fmin_fmax::fmaxf16; + pub use self::fmin_fmax::fminf16; pub use self::fmodf16::fmodf16; pub use self::ldexpf16::ldexpf16; pub use self::rintf16::rintf16; @@ -385,8 +377,6 @@ cfg_if! { mod fdimf128; mod floorf128; mod fmaf128; - mod fmaxf128; - mod fminf128; mod fmodf128; mod ldexpf128; mod rintf128; @@ -403,8 +393,8 @@ cfg_if! { pub use self::fdimf128::fdimf128; pub use self::floorf128::floorf128; pub use self::fmaf128::fmaf128; - pub use self::fmaxf128::fmaxf128; - pub use self::fminf128::fminf128; + pub use self::fmin_fmax::fmaxf128; + pub use self::fmin_fmax::fminf128; pub use self::fmodf128::fmodf128; pub use self::ldexpf128::ldexpf128; pub use self::rintf128::rintf128; From f67196e8feaa7f8d37d02b9b925d79b88bb76436 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 10 Feb 2025 19:43:49 +0000 Subject: [PATCH 1188/1459] Add `fminimum`, `fmaximum`, `fminimum_num`, and `fmaximum_num` These functions represent new operations from IEEE 754-2019. Introduce them for all float sizes. --- libm/crates/libm-macros/src/shared.rs | 32 ++++- libm/crates/libm-test/benches/icount.rs | 16 +++ libm/crates/libm-test/benches/random.rs | 16 +++ libm/crates/libm-test/src/domain.rs | 4 + libm/crates/libm-test/src/gen/case_list.rs | 80 ++++++++++++- libm/crates/libm-test/src/mpfloat.rs | 54 ++++++++- libm/crates/libm-test/src/precision.rs | 4 + .../libm-test/tests/compare_built_musl.rs | 16 +++ libm/crates/util/src/main.rs | 16 +++ libm/etc/function-definitions.json | 112 ++++++++++++++++++ libm/etc/function-list.txt | 16 +++ libm/src/libm_helper.rs | 18 ++- libm/src/math/fmin_fmax.rs | 24 ++++ libm/src/math/fminimum_fmaximum.rs | 67 +++++++++++ libm/src/math/fminimum_fmaximum_num.rs | 67 +++++++++++ libm/src/math/generic/fmax.rs | 77 ++++++++++-- libm/src/math/generic/fmaximum.rs | 78 ++++++++++++ libm/src/math/generic/fmaximum_num.rs | 77 ++++++++++++ libm/src/math/generic/fmin.rs | 77 ++++++++++-- libm/src/math/generic/fminimum.rs | 78 ++++++++++++ libm/src/math/generic/fminimum_num.rs | 77 ++++++++++++ libm/src/math/generic/mod.rs | 8 ++ libm/src/math/mod.rs | 14 ++- 23 files changed, 997 insertions(+), 31 deletions(-) create mode 100644 libm/src/math/fminimum_fmaximum.rs create mode 100644 libm/src/math/fminimum_fmaximum_num.rs create mode 100644 libm/src/math/generic/fmaximum.rs create mode 100644 libm/src/math/generic/fmaximum_num.rs create mode 100644 libm/src/math/generic/fminimum.rs create mode 100644 libm/src/math/generic/fminimum_num.rs diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index 48d19c50d..cb5a1d187 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -47,7 +47,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F16, Signature { args: &[Ty::F16, Ty::F16], returns: &[Ty::F16] }, None, - &["copysignf16", "fdimf16", "fmaxf16", "fminf16", "fmodf16"], + &[ + "copysignf16", + "fdimf16", + "fmaxf16", + "fmaximum_numf16", + "fmaximumf16", + "fminf16", + "fminimum_numf16", + "fminimumf16", + "fmodf16", + ], ), ( // `(f32, f32) -> f32` @@ -59,7 +69,11 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] "copysignf", "fdimf", "fmaxf", + "fmaximum_numf", + "fmaximumf", "fminf", + "fminimum_numf", + "fminimumf", "fmodf", "hypotf", "nextafterf", @@ -77,7 +91,11 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] "copysign", "fdim", "fmax", + "fmaximum", + "fmaximum_num", "fmin", + "fminimum", + "fminimum_num", "fmod", "hypot", "nextafter", @@ -90,7 +108,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F128, Signature { args: &[Ty::F128, Ty::F128], returns: &[Ty::F128] }, None, - &["copysignf128", "fdimf128", "fmaxf128", "fminf128", "fmodf128"], + &[ + "copysignf128", + "fdimf128", + "fmaxf128", + "fmaximum_numf128", + "fmaximumf128", + "fminf128", + "fminimum_numf128", + "fminimumf128", + "fmodf128", + ], ), ( // `(f32, f32, f32) -> f32` diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index be85dd567..e28f4973c 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -207,10 +207,26 @@ main!( icount_bench_fmaxf128_group, icount_bench_fmaxf16_group, icount_bench_fmaxf_group, + icount_bench_fmaximum_group, + icount_bench_fmaximum_num_group, + icount_bench_fmaximum_numf128_group, + icount_bench_fmaximum_numf16_group, + icount_bench_fmaximum_numf_group, + icount_bench_fmaximumf128_group, + icount_bench_fmaximumf16_group, + icount_bench_fmaximumf_group, icount_bench_fmin_group, icount_bench_fminf128_group, icount_bench_fminf16_group, icount_bench_fminf_group, + icount_bench_fminimum_group, + icount_bench_fminimum_num_group, + icount_bench_fminimum_numf128_group, + icount_bench_fminimum_numf16_group, + icount_bench_fminimum_numf_group, + icount_bench_fminimumf128_group, + icount_bench_fminimumf16_group, + icount_bench_fminimumf_group, icount_bench_fmod_group, icount_bench_fmodf128_group, icount_bench_fmodf16_group, diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 6e8a33479..6f6b05d95 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -130,8 +130,24 @@ libm_macros::for_each_function! { | fmaf128 | fmaxf128 | fmaxf16 + | fmaximum + | fmaximum_num + | fmaximum_numf + | fmaximum_numf128 + | fmaximum_numf16 + | fmaximumf + | fmaximumf128 + | fmaximumf16 | fminf128 | fminf16 + | fminimum + | fminimum_num + | fminimum_numf + | fminimum_numf128 + | fminimum_numf16 + | fminimumf + | fminimumf128 + | fminimumf16 | fmodf128 | fmodf16 | ldexpf128 diff --git a/libm/crates/libm-test/src/domain.rs b/libm/crates/libm-test/src/domain.rs index 5d650c00a..c662e95b4 100644 --- a/libm/crates/libm-test/src/domain.rs +++ b/libm/crates/libm-test/src/domain.rs @@ -221,7 +221,11 @@ pub fn get_domain( BaseName::Floor => &EitherPrim::UNBOUNDED1[..], BaseName::Fma => &EitherPrim::UNBOUNDED3[..], BaseName::Fmax => &EitherPrim::UNBOUNDED2[..], + BaseName::Fmaximum => &EitherPrim::UNBOUNDED2[..], + BaseName::FmaximumNum => &EitherPrim::UNBOUNDED2[..], BaseName::Fmin => &EitherPrim::UNBOUNDED2[..], + BaseName::Fminimum => &EitherPrim::UNBOUNDED2[..], + BaseName::FminimumNum => &EitherPrim::UNBOUNDED2[..], BaseName::Fmod => &EitherPrim::UNBOUNDED2[..], BaseName::Hypot => &EitherPrim::UNBOUNDED2[..], BaseName::Ilogb => &EitherPrim::UNBOUNDED1[..], diff --git a/libm/crates/libm-test/src/gen/case_list.rs b/libm/crates/libm-test/src/gen/case_list.rs index 23226d5c2..49e731b88 100644 --- a/libm/crates/libm-test/src/gen/case_list.rs +++ b/libm/crates/libm-test/src/gen/case_list.rs @@ -293,7 +293,8 @@ fn fmaf128_cases() -> Vec> { v } -fn fmax_cases() -> Vec> { +#[cfg(f16_enabled)] +fn fmaxf16_cases() -> Vec> { vec![] } @@ -301,17 +302,53 @@ fn fmaxf_cases() -> Vec> { vec![] } +fn fmax_cases() -> Vec> { + vec![] +} + #[cfg(f128_enabled)] fn fmaxf128_cases() -> Vec> { vec![] } #[cfg(f16_enabled)] -fn fmaxf16_cases() -> Vec> { +fn fmaximumf16_cases() -> Vec> { vec![] } -fn fmin_cases() -> Vec> { +fn fmaximumf_cases() -> Vec> { + vec![] +} + +fn fmaximum_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fmaximumf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fmaximum_numf16_cases() -> Vec> { + vec![] +} + +fn fmaximum_numf_cases() -> Vec> { + vec![] +} + +fn fmaximum_num_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fmaximum_numf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fminf16_cases() -> Vec> { vec![] } @@ -319,13 +356,48 @@ fn fminf_cases() -> Vec> { vec![] } +fn fmin_cases() -> Vec> { + vec![] +} + #[cfg(f128_enabled)] fn fminf128_cases() -> Vec> { vec![] } #[cfg(f16_enabled)] -fn fminf16_cases() -> Vec> { +fn fminimumf16_cases() -> Vec> { + vec![] +} + +fn fminimumf_cases() -> Vec> { + vec![] +} + +fn fminimum_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fminimumf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fminimum_numf16_cases() -> Vec> { + vec![] +} + +fn fminimum_numf_cases() -> Vec> { + vec![] +} + +fn fminimum_num_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fminimum_numf128_cases() -> Vec> { vec![] } diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index f4a9ff7ff..63cdebe4e 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -148,6 +148,14 @@ libm_macros::for_each_function! { floorf, floorf128, floorf16, + fmaximum, + fmaximumf, + fmaximumf128, + fmaximumf16, + fminimum, + fminimumf, + fminimumf128, + fminimumf16, fmod, fmodf, fmodf128, @@ -197,8 +205,10 @@ libm_macros::for_each_function! { fabs | fabsf => abs, fdim | fdimf | fdimf16 | fdimf128 => positive_diff, fma | fmaf | fmaf128 => mul_add, - fmax | fmaxf | fmaxf16 | fmaxf128 => max, - fmin | fminf | fminf16 | fminf128 => min, + fmax | fmaxf | fmaxf16 | fmaxf128 | + fmaximum_num | fmaximum_numf | fmaximum_numf16 | fmaximum_numf128 => max, + fmin | fminf | fminf16 | fminf128 | + fminimum_num | fminimum_numf | fminimum_numf16 | fminimum_numf128 => min, lgamma | lgammaf => ln_gamma, log | logf => ln, log1p | log1pf => ln_1p, @@ -446,6 +456,46 @@ macro_rules! impl_op_for_ty_all { } } + impl MpOp for crate::op::[< fmaximum $suffix >]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = if this.0.is_nan() || this.1.is_nan() { + this.0.assign($fty::NAN); + Ordering::Equal + } else { + this.0.max_round(&this.1, Nearest) + }; + prep_retval::(&mut this.0, ord) + } + } + + impl MpOp for crate::op::[< fminimum $suffix >]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = if this.0.is_nan() || this.1.is_nan() { + this.0.assign($fty::NAN); + Ordering::Equal + } else { + this.0.min_round(&this.1, Nearest) + }; + prep_retval::(&mut this.0, ord) + } + } + // `ldexp` and `scalbn` are the same for binary floating point, so just forward all // methods. impl MpOp for crate::op::[]::Routine { diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 2f55ad22e..1d916e572 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -25,7 +25,11 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { | Bn::Floor | Bn::Fma | Bn::Fmax + | Bn::Fmaximum + | Bn::FmaximumNum | Bn::Fmin + | Bn::Fminimum + | Bn::FminimumNum | Bn::Fmod | Bn::Frexp | Bn::Ilogb diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 7fa77e832..ffd7f1f60 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -102,8 +102,24 @@ libm_macros::for_each_function! { fmaf128, fmaxf128, fmaxf16, + fmaximum, + fmaximum_num, + fmaximum_numf, + fmaximum_numf128, + fmaximum_numf16, + fmaximumf, + fmaximumf128, + fmaximumf16, fminf128, fminf16, + fminimum, + fminimum_num, + fminimum_numf, + fminimum_numf128, + fminimum_numf16, + fminimumf, + fminimumf128, + fminimumf16, fmodf128, fmodf16, ldexpf128, diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index 0f845a1c4..a519713c0 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -99,8 +99,24 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { | fmaf128 | fmaxf128 | fmaxf16 + | fmaximum + | fmaximum_num + | fmaximum_numf + | fmaximum_numf128 + | fmaximum_numf16 + | fmaximumf + | fmaximumf128 + | fmaximumf16 | fminf128 | fminf16 + | fminimum + | fminimum_num + | fminimum_numf + | fminimum_numf128 + | fminimum_numf16 + | fminimumf + | fminimumf128 + | fminimumf16 | fmodf128 | fmodf16 | ldexpf128 diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index d3e51f29a..008a47df2 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -391,6 +391,62 @@ ], "type": "f16" }, + "fmaximum": { + "sources": [ + "src/math/fminimum_fmaximum.rs", + "src/math/generic/fmaximum.rs" + ], + "type": "f64" + }, + "fmaximum_num": { + "sources": [ + "src/math/fminimum_fmaximum_num.rs", + "src/math/generic/fmaximum_num.rs" + ], + "type": "f64" + }, + "fmaximum_numf": { + "sources": [ + "src/math/fminimum_fmaximum_num.rs", + "src/math/generic/fmaximum_num.rs" + ], + "type": "f32" + }, + "fmaximum_numf128": { + "sources": [ + "src/math/fminimum_fmaximum_num.rs", + "src/math/generic/fmaximum_num.rs" + ], + "type": "f128" + }, + "fmaximum_numf16": { + "sources": [ + "src/math/fminimum_fmaximum_num.rs", + "src/math/generic/fmaximum_num.rs" + ], + "type": "f16" + }, + "fmaximumf": { + "sources": [ + "src/math/fminimum_fmaximum.rs", + "src/math/generic/fmaximum.rs" + ], + "type": "f32" + }, + "fmaximumf128": { + "sources": [ + "src/math/fminimum_fmaximum.rs", + "src/math/generic/fmaximum.rs" + ], + "type": "f128" + }, + "fmaximumf16": { + "sources": [ + "src/math/fminimum_fmaximum.rs", + "src/math/generic/fmaximum.rs" + ], + "type": "f16" + }, "fmin": { "sources": [ "src/math/fmin_fmax.rs", @@ -419,6 +475,62 @@ ], "type": "f16" }, + "fminimum": { + "sources": [ + "src/math/fminimum_fmaximum.rs", + "src/math/generic/fminimum.rs" + ], + "type": "f64" + }, + "fminimum_num": { + "sources": [ + "src/math/fminimum_fmaximum_num.rs", + "src/math/generic/fminimum_num.rs" + ], + "type": "f64" + }, + "fminimum_numf": { + "sources": [ + "src/math/fminimum_fmaximum_num.rs", + "src/math/generic/fminimum_num.rs" + ], + "type": "f32" + }, + "fminimum_numf128": { + "sources": [ + "src/math/fminimum_fmaximum_num.rs", + "src/math/generic/fminimum_num.rs" + ], + "type": "f128" + }, + "fminimum_numf16": { + "sources": [ + "src/math/fminimum_fmaximum_num.rs", + "src/math/generic/fminimum_num.rs" + ], + "type": "f16" + }, + "fminimumf": { + "sources": [ + "src/math/fminimum_fmaximum.rs", + "src/math/generic/fminimum.rs" + ], + "type": "f32" + }, + "fminimumf128": { + "sources": [ + "src/math/fminimum_fmaximum.rs", + "src/math/generic/fminimum.rs" + ], + "type": "f128" + }, + "fminimumf16": { + "sources": [ + "src/math/fminimum_fmaximum.rs", + "src/math/generic/fminimum.rs" + ], + "type": "f16" + }, "fmod": { "sources": [ "src/math/fmod.rs", diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt index 1c9c5e3bc..90ca8f34e 100644 --- a/libm/etc/function-list.txt +++ b/libm/etc/function-list.txt @@ -58,10 +58,26 @@ fmax fmaxf fmaxf128 fmaxf16 +fmaximum +fmaximum_num +fmaximum_numf +fmaximum_numf128 +fmaximum_numf16 +fmaximumf +fmaximumf128 +fmaximumf16 fmin fminf fminf128 fminf16 +fminimum +fminimum_num +fminimum_numf +fminimum_numf128 +fminimum_numf16 +fminimumf +fminimumf128 +fminimumf16 fmod fmodf fmodf128 diff --git a/libm/src/libm_helper.rs b/libm/src/libm_helper.rs index 68f1fb362..489dbc0d4 100644 --- a/libm/src/libm_helper.rs +++ b/libm/src/libm_helper.rs @@ -137,7 +137,15 @@ libm_helper! { (fn floor(x: f64) -> (f64); => floor); (fn fma(x: f64, y: f64, z: f64) -> (f64); => fma); (fn fmax(x: f64, y: f64) -> (f64); => fmax); + (fn fmaximum(x: f64, y: f64) -> (f64); => fmaximum); + (fn fmaximum_num(x: f64, y: f64) -> (f64); => fmaximum_num); + (fn fmaximum_numf(x: f32, y: f32) -> (f32); => fmaximum_numf); + (fn fmaximumf(x: f32, y: f32) -> (f32); => fmaximumf); (fn fmin(x: f64, y: f64) -> (f64); => fmin); + (fn fminimum(x: f64, y: f64) -> (f64); => fminimum); + (fn fminimum_num(x: f64, y: f64) -> (f64); => fminimum_num); + (fn fminimum_numf(x: f32, y: f32) -> (f32); => fminimum_numf); + (fn fminimumf(x: f32, y: f32) -> (f32); => fminimumf); (fn fmod(x: f64, y: f64) -> (f64); => fmod); (fn frexp(x: f64) -> (f64, i32); => frexp); (fn hypot(x: f64, y: f64) -> (f64); => hypot); @@ -186,7 +194,11 @@ libm_helper! { (fn fdim(x: f16, y: f16) -> (f16); => fdimf16); (fn floorf(x: f16) -> (f16); => floorf16); (fn fmaxf(x: f16, y: f16) -> (f16); => fmaxf16); + (fn fmaximum_numf16(x: f16, y: f16) -> (f16); => fmaximum_numf16); + (fn fmaximumf16(x: f16, y: f16) -> (f16); => fmaximumf16); (fn fminf(x: f16, y: f16) -> (f16); => fminf16); + (fn fminimum_numf16(x: f16, y: f16) -> (f16); => fminimum_numf16); + (fn fminimumf16(x: f16, y: f16) -> (f16); => fminimumf16); (fn fmodf(x: f16, y: f16) -> (f16); => fmodf16); (fn ldexpf16(x: f16, n: i32) -> (f16); => ldexpf16); (fn rintf(x: f16) -> (f16); => rintf16); @@ -208,9 +220,13 @@ libm_helper! { (fn fabs(x: f128) -> (f128); => fabsf128); (fn fdim(x: f128, y: f128) -> (f128); => fdimf128); (fn floor(x: f128) -> (f128); => floorf128); - (fn fmaf128(x: f128, y: f128, z: f128) -> (f128); => fmaf128); + (fn fmaf128(x: f128, y: f128, z: f128) -> (f128); => fmaf128); (fn fmax(x: f128, y: f128) -> (f128); => fmaxf128); + (fn fmaximum_numf128(x: f128, y: f128) -> (f128); => fmaximum_numf128); + (fn fmaximumf128(x: f128, y: f128) -> (f128); => fmaximumf128); (fn fmin(x: f128, y: f128) -> (f128); => fminf128); + (fn fminimum_numf128(x: f128, y: f128) -> (f128); => fminimum_numf128); + (fn fminimumf128(x: f128, y: f128) -> (f128); => fminimumf128); (fn fmod(x: f128, y: f128) -> (f128); => fmodf128); (fn ldexpf128(x: f128, n: i32) -> (f128); => ldexpf128); (fn rint(x: f128) -> (f128); => rintf128); diff --git a/libm/src/math/fmin_fmax.rs b/libm/src/math/fmin_fmax.rs index 97912e758..4f9136dbb 100644 --- a/libm/src/math/fmin_fmax.rs +++ b/libm/src/math/fmin_fmax.rs @@ -1,4 +1,7 @@ /// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). #[cfg(f16_enabled)] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fminf16(x: f16, y: f16) -> f16 { @@ -6,18 +9,27 @@ pub fn fminf16(x: f16, y: f16) -> f16 { } /// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fminf(x: f32, y: f32) -> f32 { super::generic::fmin(x, y) } /// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmin(x: f64, y: f64) -> f64 { super::generic::fmin(x, y) } /// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). #[cfg(f128_enabled)] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fminf128(x: f128, y: f128) -> f128 { @@ -25,6 +37,9 @@ pub fn fminf128(x: f128, y: f128) -> f128 { } /// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). #[cfg(f16_enabled)] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmaxf16(x: f16, y: f16) -> f16 { @@ -32,18 +47,27 @@ pub fn fmaxf16(x: f16, y: f16) -> f16 { } /// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmaxf(x: f32, y: f32) -> f32 { super::generic::fmax(x, y) } /// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmax(x: f64, y: f64) -> f64 { super::generic::fmax(x, y) } /// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). #[cfg(f128_enabled)] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmaxf128(x: f128, y: f128) -> f128 { diff --git a/libm/src/math/fminimum_fmaximum.rs b/libm/src/math/fminimum_fmaximum.rs new file mode 100644 index 000000000..fd3c5ed10 --- /dev/null +++ b/libm/src/math/fminimum_fmaximum.rs @@ -0,0 +1,67 @@ +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimumf16(x: f16, y: f16) -> f16 { + super::generic::fminimum(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimum(x: f64, y: f64) -> f64 { + super::generic::fminimum(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimumf(x: f32, y: f32) -> f32 { + super::generic::fminimum(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimumf128(x: f128, y: f128) -> f128 { + super::generic::fminimum(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximumf16(x: f16, y: f16) -> f16 { + super::generic::fmaximum(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximumf(x: f32, y: f32) -> f32 { + super::generic::fmaximum(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximum(x: f64, y: f64) -> f64 { + super::generic::fmaximum(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximumf128(x: f128, y: f128) -> f128 { + super::generic::fmaximum(x, y) +} diff --git a/libm/src/math/fminimum_fmaximum_num.rs b/libm/src/math/fminimum_fmaximum_num.rs new file mode 100644 index 000000000..640ddfc9b --- /dev/null +++ b/libm/src/math/fminimum_fmaximum_num.rs @@ -0,0 +1,67 @@ +/// Return the lesser of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimum_numf16(x: f16, y: f16) -> f16 { + super::generic::fminimum_num(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimum_numf(x: f32, y: f32) -> f32 { + super::generic::fminimum_num(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimum_num(x: f64, y: f64) -> f64 { + super::generic::fminimum_num(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimum_numf128(x: f128, y: f128) -> f128 { + super::generic::fminimum_num(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximum_numf16(x: f16, y: f16) -> f16 { + super::generic::fmaximum_num(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximum_numf(x: f32, y: f32) -> f32 { + super::generic::fmaximum_num(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximum_num(x: f64, y: f64) -> f64 { + super::generic::fmaximum_num(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximum_numf128(x: f128, y: f128) -> f128 { + super::generic::fmaximum_num(x, y) +} diff --git a/libm/src/math/generic/fmax.rs b/libm/src/math/generic/fmax.rs index 97803052b..32613a46b 100644 --- a/libm/src/math/generic/fmax.rs +++ b/libm/src/math/generic/fmax.rs @@ -1,14 +1,73 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2011 `maxNum`. This has been superseded by IEEE 754-2019 `maximumNumber`. +//! +//! Per the spec, returns the canonicalized result of: +//! - `x` if `x > y` +//! - `y` if `y > x` +//! - The other number if one is NaN +//! - Otherwise, either `x` or `y`, canonicalized +//! - -0.0 and +0.0 may be disregarded (unlike newer operations) +//! +//! Excluded from our implementation is sNaN handling. +//! +//! More on the differences: [link]. +//! +//! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf + use super::super::Float; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmax(x: F, y: F) -> F { - // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the - // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it - // is either x or y, canonicalized (this means results might differ among implementations). - // When either x or y is a signalingNaN, then the result is according to 6.2. - // - // Since we do not support sNaN in Rust yet, we do not need to handle them. - // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by - // multiplying by 1.0. Should switch to the `canonicalize` when it works. - (if x.is_nan() || x < y { y } else { x }) * F::ONE + let res = if x.is_nan() || x < y { y } else { x }; + // Canonicalize + res * F::ONE +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Hexf, Int}; + + fn spec_test() { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ONE), + (F::ONE, F::ZERO, F::ONE), + (F::ZERO, F::NEG_ONE, F::ZERO), + (F::NEG_ONE, F::ZERO, F::ZERO), + (F::INFINITY, F::ZERO, F::INFINITY), + (F::NEG_INFINITY, F::ZERO, F::ZERO), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + ]; + + for (x, y, res) in cases { + let val = fmax(x, y); + assert_biteq!(val, res, "fmax({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(); + } + + #[test] + fn spec_tests_f32() { + spec_test::(); + } + + #[test] + fn spec_tests_f64() { + spec_test::(); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(); + } } diff --git a/libm/src/math/generic/fmaximum.rs b/libm/src/math/generic/fmaximum.rs new file mode 100644 index 000000000..5f653ce94 --- /dev/null +++ b/libm/src/math/generic/fmaximum.rs @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2019 `maximum`. +//! +//! Per the spec, returns the canonicalized result of: +//! - `x` if `x > y` +//! - `y` if `y > x` +//! - qNaN if either operation is NaN +//! - Logic following +0.0 > -0.0 +//! +//! Excluded from our implementation is sNaN handling. + +use super::super::Float; + +pub fn fmaximum(x: F, y: F) -> F { + let res = if x.is_nan() { + x + } else if y.is_nan() { + y + } else if x > y || (y.to_bits() == F::NEG_ZERO.to_bits() && x.is_sign_positive()) { + x + } else { + y + }; + + // Canonicalize + res * F::ONE +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Hexf, Int}; + + fn spec_test() { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ONE), + (F::ONE, F::ZERO, F::ONE), + (F::ZERO, F::NEG_ONE, F::ZERO), + (F::NEG_ONE, F::ZERO, F::ZERO), + (F::INFINITY, F::ZERO, F::INFINITY), + (F::NEG_INFINITY, F::ZERO, F::ZERO), + (F::NAN, F::ZERO, F::NAN), + (F::ZERO, F::NAN, F::NAN), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::ZERO), + (F::NEG_ZERO, F::ZERO, F::ZERO), + ]; + + for (x, y, res) in cases { + let val = fmaximum(x, y); + assert_biteq!(val, res, "fmaximum({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(); + } + + #[test] + fn spec_tests_f32() { + spec_test::(); + } + + #[test] + fn spec_tests_f64() { + spec_test::(); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(); + } +} diff --git a/libm/src/math/generic/fmaximum_num.rs b/libm/src/math/generic/fmaximum_num.rs new file mode 100644 index 000000000..224660123 --- /dev/null +++ b/libm/src/math/generic/fmaximum_num.rs @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2019 `maximumNumber`. +//! +//! Per the spec, returns: +//! - `x` if `x > y` +//! - `y` if `y > x` +//! - Non-NaN if one operand is NaN +//! - Logic following +0.0 > -0.0 +//! - Either `x` or `y` if `x == y` and the signs are the same +//! - qNaN if either operand is a NaN +//! +//! Excluded from our implementation is sNaN handling. + +use super::super::Float; + +pub fn fmaximum_num(x: F, y: F) -> F { + let res = + if x.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) { + y + } else { + x + }; + + // Canonicalize + res * F::ONE +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Hexf, Int}; + + fn spec_test() { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ONE), + (F::ONE, F::ZERO, F::ONE), + (F::ZERO, F::NEG_ONE, F::ZERO), + (F::NEG_ONE, F::ZERO, F::ZERO), + (F::INFINITY, F::ZERO, F::INFINITY), + (F::NEG_INFINITY, F::ZERO, F::ZERO), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::ZERO), + (F::NEG_ZERO, F::ZERO, F::ZERO), + ]; + + for (x, y, res) in cases { + let val = fmaximum_num(x, y); + assert_biteq!(val, res, "fmaximum_num({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(); + } + + #[test] + fn spec_tests_f32() { + spec_test::(); + } + + #[test] + fn spec_tests_f64() { + spec_test::(); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(); + } +} diff --git a/libm/src/math/generic/fmin.rs b/libm/src/math/generic/fmin.rs index 697f72004..5cc33e904 100644 --- a/libm/src/math/generic/fmin.rs +++ b/libm/src/math/generic/fmin.rs @@ -1,13 +1,72 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2008 `minNum`. This has been superseded by IEEE 754-2019 `minimumNumber`. +//! +//! Per the spec, returns the canonicalized result of: +//! - `x` if `x < y` +//! - `y` if `y < x` +//! - The other number if one is NaN +//! - Otherwise, either `x` or `y`, canonicalized +//! - -0.0 and +0.0 may be disregarded (unlike newer operations) +//! +//! Excluded from our implementation is sNaN handling. +//! +//! More on the differences: [link]. +//! +//! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf + use super::super::Float; pub fn fmin(x: F, y: F) -> F { - // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the - // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it - // is either x or y, canonicalized (this means results might differ among implementations). - // When either x or y is a signalingNaN, then the result is according to 6.2. - // - // Since we do not support sNaN in Rust yet, we do not need to handle them. - // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by - // multiplying by 1.0. Should switch to the `canonicalize` when it works. - (if y.is_nan() || x < y { x } else { y }) * F::ONE + let res = if y.is_nan() || x < y { x } else { y }; + // Canonicalize + res * F::ONE +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Hexf, Int}; + + fn spec_test() { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ZERO), + (F::ONE, F::ZERO, F::ZERO), + (F::ZERO, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::ZERO, F::NEG_ONE), + (F::INFINITY, F::ZERO, F::ZERO), + (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + ]; + + for (x, y, res) in cases { + let val = fmin(x, y); + assert_biteq!(val, res, "fmin({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(); + } + + #[test] + fn spec_tests_f32() { + spec_test::(); + } + + #[test] + fn spec_tests_f64() { + spec_test::(); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(); + } } diff --git a/libm/src/math/generic/fminimum.rs b/libm/src/math/generic/fminimum.rs new file mode 100644 index 000000000..f566d9631 --- /dev/null +++ b/libm/src/math/generic/fminimum.rs @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2019 `minimum`. +//! +//! Per the spec, returns the canonicalized result of: +//! - `x` if `x < y` +//! - `y` if `y < x` +//! - qNaN if either operation is NaN +//! - Logic following +0.0 > -0.0 +//! +//! Excluded from our implementation is sNaN handling. + +use super::super::Float; + +pub fn fminimum(x: F, y: F) -> F { + let res = if x.is_nan() { + x + } else if y.is_nan() { + y + } else if x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) { + x + } else { + y + }; + + // Canonicalize + res * F::ONE +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Hexf, Int}; + + fn spec_test() { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ZERO), + (F::ONE, F::ZERO, F::ZERO), + (F::ZERO, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::ZERO, F::NEG_ONE), + (F::INFINITY, F::ZERO, F::ZERO), + (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), + (F::NAN, F::ZERO, F::NAN), + (F::ZERO, F::NAN, F::NAN), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::ZERO, F::NEG_ZERO), + ]; + + for (x, y, res) in cases { + let val = fminimum(x, y); + assert_biteq!(val, res, "fminimum({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(); + } + + #[test] + fn spec_tests_f32() { + spec_test::(); + } + + #[test] + fn spec_tests_f64() { + spec_test::(); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(); + } +} diff --git a/libm/src/math/generic/fminimum_num.rs b/libm/src/math/generic/fminimum_num.rs new file mode 100644 index 000000000..e58a585c3 --- /dev/null +++ b/libm/src/math/generic/fminimum_num.rs @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2019 `minimum`. +//! +//! Per the spec, returns: +//! - `x` if `x < y` +//! - `y` if `y < x` +//! - Non-NaN if one operand is NaN +//! - Logic following +0.0 > -0.0 +//! - Either `x` or `y` if `x == y` and the signs are the same +//! - qNaN if either operand is a NaN +//! +//! Excluded from our implementation is sNaN handling. + +use super::super::Float; + +pub fn fminimum_num(x: F, y: F) -> F { + let res = + if y.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) { + x + } else { + y + }; + + // Canonicalize + res * F::ONE +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Hexf, Int}; + + fn spec_test() { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ZERO), + (F::ONE, F::ZERO, F::ZERO), + (F::ZERO, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::ZERO, F::NEG_ONE), + (F::INFINITY, F::ZERO, F::ZERO), + (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::ZERO, F::NEG_ZERO), + ]; + + for (x, y, res) in cases { + let val = fminimum_num(x, y); + assert_biteq!(val, res, "fminimum_num({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(); + } + + #[test] + fn spec_tests_f32() { + spec_test::(); + } + + #[test] + fn spec_tests_f64() { + spec_test::(); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(); + } +} diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index b34d3dfae..092f9317b 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -5,7 +5,11 @@ mod fdim; mod floor; mod fma; mod fmax; +mod fmaximum; +mod fmaximum_num; mod fmin; +mod fminimum; +mod fminimum_num; mod fmod; mod rint; mod round; @@ -20,7 +24,11 @@ pub use fdim::fdim; pub use floor::floor; pub use fma::{fma, fma_wide}; pub use fmax::fmax; +pub use fmaximum::fmaximum; +pub use fmaximum_num::fmaximum_num; pub use fmin::fmin; +pub use fminimum::fminimum; +pub use fminimum_num::fminimum_num; pub use fmod::fmod; pub use rint::rint; pub use round::round; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index ba0b933f1..4e75292a6 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -166,6 +166,8 @@ mod floorf; mod fma; mod fmaf; mod fmin_fmax; +mod fminimum_fmaximum; +mod fminimum_fmaximum_num; mod fmod; mod fmodf; mod frexp; @@ -271,6 +273,8 @@ pub use self::floorf::floorf; pub use self::fma::fma; pub use self::fmaf::fmaf; pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf}; +pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf}; +pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf}; pub use self::fmod::fmod; pub use self::fmodf::fmodf; pub use self::frexp::frexp; @@ -355,8 +359,9 @@ cfg_if! { pub use self::fabsf16::fabsf16; pub use self::fdimf16::fdimf16; pub use self::floorf16::floorf16; - pub use self::fmin_fmax::fmaxf16; - pub use self::fmin_fmax::fminf16; + pub use self::fmin_fmax::{fmaxf16, fminf16}; + pub use self::fminimum_fmaximum::{fmaximumf16, fminimumf16}; + pub use self::fminimum_fmaximum_num::{fmaximum_numf16, fminimum_numf16}; pub use self::fmodf16::fmodf16; pub use self::ldexpf16::ldexpf16; pub use self::rintf16::rintf16; @@ -393,8 +398,9 @@ cfg_if! { pub use self::fdimf128::fdimf128; pub use self::floorf128::floorf128; pub use self::fmaf128::fmaf128; - pub use self::fmin_fmax::fmaxf128; - pub use self::fmin_fmax::fminf128; + pub use self::fmin_fmax::{fmaxf128, fminf128}; + pub use self::fminimum_fmaximum::{fmaximumf128, fminimumf128}; + pub use self::fminimum_fmaximum_num::{fmaximum_numf128, fminimum_numf128}; pub use self::fmodf128::fmodf128; pub use self::ldexpf128::ldexpf128; pub use self::rintf128::rintf128; From 2ef97dd89167665e0ec8c6f93fdbafbe313185e2 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 10 Feb 2025 21:42:26 +0000 Subject: [PATCH 1189/1459] Increase allowed offset from infinity for ynf Failed with called `Result::unwrap()` on an `Err` value: ynf Caused by: 0: input: (223, 116.89665) as hex: (, 0x1.d3962cp+6) as bits: (0x000000df, 0x42e9cb16) expected: -3.1836905e38 -0x1.df074cp+127 0xff6f83a6 actual: -inf -inf 0xff800000 1: mismatched infinities --- libm/crates/libm-test/src/precision.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 1d916e572..8b0892546 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -537,7 +537,7 @@ fn int_float_common( && !expected.is_infinite() && actual.is_infinite() && (expected.abs().to_bits().abs_diff(actual.abs().to_bits()) - < F2::Int::cast_from(1_000_000u32)) + < F2::Int::cast_from(10_000_000u32)) { return XFAIL_NOCHECK; } From dd57e186401d4c9a7355d77ccca1cfe391bc1c03 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 11 Feb 2025 02:17:06 +0000 Subject: [PATCH 1190/1459] Fix parsing of negative hex float literals in util --- libm/crates/util/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index a519713c0..710adbb17 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -274,7 +274,7 @@ fn parse(input: &[&str], idx: usize) -> T { let msg = || format!("invalid {} input '{s}'", type_name::()); - if s.starts_with("0x") { + if s.starts_with("0x") || s.starts_with("-0x") { return T::from_str_radix(s, 16).unwrap_or_else(|_| panic!("{}", msg())); } From 4152a26ea940b4f1734aeea3a953ab6317999232 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 11 Feb 2025 00:17:32 +0000 Subject: [PATCH 1191/1459] Add `roundeven{,f,f16,f128}` C23 specifies a new set of `roundeven` functions that round to the nearest integral, with ties to even. It does not raise any floating point exceptions. This behavior is similar to two other functions: 1. `rint`, which rounds to the nearest integer respecting rounding mode and possibly raising exceptions. 2. `nearbyint`, which is identical to `rint` except it may not raise exceptions. Technically `rint`, `nearbyint`, and `roundeven` all behave the same in Rust because we assume default floating point environment. The backends are allowed to lower to `roundeven`, however, so we should provide it in case the fallback is needed. Add the `roundeven` family here and convert `rint` to a function that takes a rounding mode. This currently has no effect. --- libm/crates/libm-macros/src/shared.rs | 104 +++++++++++++++-- libm/crates/libm-test/benches/icount.rs | 4 + libm/crates/libm-test/benches/random.rs | 4 + libm/crates/libm-test/src/domain.rs | 1 + libm/crates/libm-test/src/gen/case_list.rs | 39 ++++++- libm/crates/libm-test/src/mpfloat.rs | 8 ++ libm/crates/libm-test/src/precision.rs | 3 +- .../libm-test/tests/compare_built_musl.rs | 4 + libm/crates/util/src/main.rs | 4 + libm/etc/function-definitions.json | 34 ++++-- libm/etc/function-list.txt | 4 + libm/src/libm_helper.rs | 42 +++---- libm/src/math/generic/mod.rs | 2 +- libm/src/math/generic/rint.rs | 105 +++++++++++++----- libm/src/math/mod.rs | 14 +-- libm/src/math/rint.rs | 33 +++++- libm/src/math/rintf.rs | 14 --- libm/src/math/rintf128.rs | 5 - libm/src/math/rintf16.rs | 5 - libm/src/math/roundeven.rs | 35 ++++++ 20 files changed, 363 insertions(+), 101 deletions(-) delete mode 100644 libm/src/math/rintf.rs delete mode 100644 libm/src/math/rintf128.rs delete mode 100644 libm/src/math/rintf16.rs create mode 100644 libm/src/math/roundeven.rs diff --git a/libm/crates/libm-macros/src/shared.rs b/libm/crates/libm-macros/src/shared.rs index cb5a1d187..5e58220eb 100644 --- a/libm/crates/libm-macros/src/shared.rs +++ b/libm/crates/libm-macros/src/shared.rs @@ -9,7 +9,16 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F16, Signature { args: &[Ty::F16], returns: &[Ty::F16] }, None, - &["ceilf16", "fabsf16", "floorf16", "rintf16", "roundf16", "sqrtf16", "truncf16"], + &[ + "ceilf16", + "fabsf16", + "floorf16", + "rintf16", + "roundevenf16", + "roundf16", + "sqrtf16", + "truncf16", + ], ), ( // `fn(f32) -> f32` @@ -17,10 +26,43 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] Signature { args: &[Ty::F32], returns: &[Ty::F32] }, None, &[ - "acosf", "acoshf", "asinf", "asinhf", "atanf", "atanhf", "cbrtf", "ceilf", "cosf", - "coshf", "erfcf", "erff", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf", - "j0f", "j1f", "lgammaf", "log10f", "log1pf", "log2f", "logf", "rintf", "roundf", - "sinf", "sinhf", "sqrtf", "tanf", "tanhf", "tgammaf", "truncf", "y0f", "y1f", + "acosf", + "acoshf", + "asinf", + "asinhf", + "atanf", + "atanhf", + "cbrtf", + "ceilf", + "cosf", + "coshf", + "erfcf", + "erff", + "exp10f", + "exp2f", + "expf", + "expm1f", + "fabsf", + "floorf", + "j0f", + "j1f", + "lgammaf", + "log10f", + "log1pf", + "log2f", + "logf", + "rintf", + "roundevenf", + "roundf", + "sinf", + "sinhf", + "sqrtf", + "tanf", + "tanhf", + "tgammaf", + "truncf", + "y0f", + "y1f", ], ), ( @@ -29,10 +71,43 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] Signature { args: &[Ty::F64], returns: &[Ty::F64] }, None, &[ - "acos", "acosh", "asin", "asinh", "atan", "atanh", "cbrt", "ceil", "cos", "cosh", - "erf", "erfc", "exp", "exp10", "exp2", "expm1", "fabs", "floor", "j0", "j1", "lgamma", - "log", "log10", "log1p", "log2", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh", - "tgamma", "trunc", "y0", "y1", + "acos", + "acosh", + "asin", + "asinh", + "atan", + "atanh", + "cbrt", + "ceil", + "cos", + "cosh", + "erf", + "erfc", + "exp", + "exp10", + "exp2", + "expm1", + "fabs", + "floor", + "j0", + "j1", + "lgamma", + "log", + "log10", + "log1p", + "log2", + "rint", + "round", + "roundeven", + "sin", + "sinh", + "sqrt", + "tan", + "tanh", + "tgamma", + "trunc", + "y0", + "y1", ], ), ( @@ -40,7 +115,16 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F128, Signature { args: &[Ty::F128], returns: &[Ty::F128] }, None, - &["ceilf128", "fabsf128", "floorf128", "rintf128", "roundf128", "sqrtf128", "truncf128"], + &[ + "ceilf128", + "fabsf128", + "floorf128", + "rintf128", + "roundevenf128", + "roundf128", + "sqrtf128", + "truncf128", + ], ), ( // `(f16, f16) -> f16` diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index e28f4973c..4a10ec383 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -274,6 +274,10 @@ main!( icount_bench_rintf16_group, icount_bench_rintf_group, icount_bench_round_group, + icount_bench_roundeven_group, + icount_bench_roundevenf128_group, + icount_bench_roundevenf16_group, + icount_bench_roundevenf_group, icount_bench_roundf128_group, icount_bench_roundf16_group, icount_bench_roundf_group, diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 6f6b05d95..17e4e0d55 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -154,6 +154,10 @@ libm_macros::for_each_function! { | ldexpf16 | rintf128 | rintf16 + | roundeven + | roundevenf + | roundevenf128 + | roundevenf16 | roundf128 | roundf16 | scalbnf128 diff --git a/libm/crates/libm-test/src/domain.rs b/libm/crates/libm-test/src/domain.rs index c662e95b4..41e948461 100644 --- a/libm/crates/libm-test/src/domain.rs +++ b/libm/crates/libm-test/src/domain.rs @@ -246,6 +246,7 @@ pub fn get_domain( BaseName::Remquo => &EitherPrim::UNBOUNDED2[..], BaseName::Rint => &EitherPrim::UNBOUNDED1[..], BaseName::Round => &EitherPrim::UNBOUNDED1[..], + BaseName::Roundeven => &EitherPrim::UNBOUNDED1[..], BaseName::Scalbn => &EitherPrim::UNBOUNDED_F_I[..], BaseName::Sin => &EitherPrim::TRIG[..], BaseName::Sincos => &EitherPrim::TRIG[..], diff --git a/libm/crates/libm-test/src/gen/case_list.rs b/libm/crates/libm-test/src/gen/case_list.rs index 49e731b88..8c7a735fa 100644 --- a/libm/crates/libm-test/src/gen/case_list.rs +++ b/libm/crates/libm-test/src/gen/case_list.rs @@ -6,6 +6,7 @@ //! //! This is useful for adding regression tests or expected failures. +use libm::hf64; #[cfg(f128_enabled)] use libm::hf128; @@ -574,7 +575,15 @@ fn remquof_cases() -> Vec> { } fn rint_cases() -> Vec> { - vec![] + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Failure on i586 + ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff994000p+38"))), + ], + ); + v } fn rintf_cases() -> Vec> { @@ -591,6 +600,11 @@ fn rintf16_cases() -> Vec> { vec![] } +#[cfg(f16_enabled)] +fn roundf16_cases() -> Vec> { + vec![] +} + fn round_cases() -> Vec> { vec![] } @@ -605,7 +619,28 @@ fn roundf128_cases() -> Vec> { } #[cfg(f16_enabled)] -fn roundf16_cases() -> Vec> { +fn roundevenf16_cases() -> Vec> { + vec![] +} + +fn roundeven_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Failure on i586 + ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff994000p+38"))), + ], + ); + v +} + +fn roundevenf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn roundevenf128_cases() -> Vec> { vec![] } diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm/crates/libm-test/src/mpfloat.rs index 63cdebe4e..9b51dc605 100644 --- a/libm/crates/libm-test/src/mpfloat.rs +++ b/libm/crates/libm-test/src/mpfloat.rs @@ -184,6 +184,10 @@ libm_macros::for_each_function! { rintf128, rintf16, round, + roundeven, + roundevenf, + roundevenf128, + roundevenf16, roundf, roundf128, roundf16, @@ -253,6 +257,8 @@ impl_no_round! { rint => round_even_mut; // FIXME: respect rounding mode rintf => round_even_mut; // FIXME: respect rounding mode round => round_mut; + roundeven => round_even_mut; + roundevenf => round_even_mut; roundf => round_mut; trunc => trunc_mut; truncf => trunc_mut; @@ -265,6 +271,7 @@ impl_no_round! { floorf16 => floor_mut; rintf16 => round_even_mut; // FIXME: respect rounding mode roundf16 => round_mut; + roundevenf16 => round_even_mut; truncf16 => trunc_mut; } @@ -275,6 +282,7 @@ impl_no_round! { floorf128 => floor_mut; rintf128 => round_even_mut; // FIXME: respect rounding mode roundf128 => round_mut; + roundevenf128 => round_even_mut; truncf128 => trunc_mut; } diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 8b0892546..8916b43ab 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -40,6 +40,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { | Bn::Remquo | Bn::Rint | Bn::Round + | Bn::Roundeven | Bn::Scalbn | Bn::Sqrt | Bn::Trunc => 0, @@ -282,7 +283,7 @@ impl MaybeOverride<(f64,)> for SpecialCase { } if cfg!(x86_no_sse) - && ctx.base_name == BaseName::Rint + && (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven) && (expected - actual).abs() <= F::ONE && (expected - actual).abs() > F::ZERO { diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index ffd7f1f60..2b16b9aa0 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -126,6 +126,10 @@ libm_macros::for_each_function! { ldexpf16, rintf128, rintf16, + roundeven, + roundevenf, + roundevenf128, + roundevenf16, roundf128, roundf16, scalbnf128, diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index 710adbb17..130ac4531 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -123,6 +123,10 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { | ldexpf16 | rintf128 | rintf16 + | roundeven + | roundevenf + | roundevenf128 + | roundevenf16 | roundf128 | roundf16 | scalbnf128 diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 008a47df2..a47aaad57 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -791,7 +791,6 @@ "sources": [ "src/math/arch/aarch64.rs", "src/math/arch/wasm32.rs", - "src/math/generic/rint.rs", "src/math/rint.rs" ], "type": "f64" @@ -800,22 +799,19 @@ "sources": [ "src/math/arch/aarch64.rs", "src/math/arch/wasm32.rs", - "src/math/generic/rint.rs", - "src/math/rintf.rs" + "src/math/rint.rs" ], "type": "f32" }, "rintf128": { "sources": [ - "src/math/generic/rint.rs", - "src/math/rintf128.rs" + "src/math/rint.rs" ], "type": "f128" }, "rintf16": { "sources": [ - "src/math/generic/rint.rs", - "src/math/rintf16.rs" + "src/math/rint.rs" ], "type": "f16" }, @@ -826,6 +822,30 @@ ], "type": "f64" }, + "roundeven": { + "sources": [ + "src/math/roundeven.rs" + ], + "type": "f64" + }, + "roundevenf": { + "sources": [ + "src/math/roundeven.rs" + ], + "type": "f32" + }, + "roundevenf128": { + "sources": [ + "src/math/roundeven.rs" + ], + "type": "f128" + }, + "roundevenf16": { + "sources": [ + "src/math/roundeven.rs" + ], + "type": "f16" + }, "roundf": { "sources": [ "src/math/generic/round.rs", diff --git a/libm/etc/function-list.txt b/libm/etc/function-list.txt index 90ca8f34e..1f226c8c0 100644 --- a/libm/etc/function-list.txt +++ b/libm/etc/function-list.txt @@ -125,6 +125,10 @@ rintf rintf128 rintf16 round +roundeven +roundevenf +roundevenf128 +roundevenf16 roundf roundf128 roundf16 diff --git a/libm/src/libm_helper.rs b/libm/src/libm_helper.rs index 489dbc0d4..dfa1ff77b 100644 --- a/libm/src/libm_helper.rs +++ b/libm/src/libm_helper.rs @@ -94,6 +94,7 @@ libm_helper! { (fn remquo(x: f32, y: f32) -> (f32, i32); => remquof); (fn rint(x: f32) -> (f32); => rintf); (fn round(x: f32) -> (f32); => roundf); + (fn roundeven(x: f32) -> (f32); => roundevenf); (fn scalbn(x: f32, n: i32) -> (f32); => scalbnf); (fn sin(x: f32) -> (f32); => sinf); (fn sincos(x: f32) -> (f32, f32); => sincosf); @@ -167,6 +168,7 @@ libm_helper! { (fn remquo(x: f64, y: f64) -> (f64, i32); => remquo); (fn rint(x: f64) -> (f64); => rint); (fn round(x: f64) -> (f64); => round); + (fn roundevem(x: f64) -> (f64); => roundeven); (fn scalbn(x: f64, n: i32) -> (f64); => scalbn); (fn sin(x: f64) -> (f64); => sin); (fn sincos(x: f64) -> (f64, f64); => sincos); @@ -188,22 +190,23 @@ libm_helper! { f16, funcs: { // verify-sorted-start - (fn ceilf(x: f16) -> (f16); => ceilf16); + (fn ceil(x: f16) -> (f16); => ceilf16); (fn copysign(x: f16, y: f16) -> (f16); => copysignf16); (fn fabs(x: f16) -> (f16); => fabsf16); (fn fdim(x: f16, y: f16) -> (f16); => fdimf16); - (fn floorf(x: f16) -> (f16); => floorf16); - (fn fmaxf(x: f16, y: f16) -> (f16); => fmaxf16); - (fn fmaximum_numf16(x: f16, y: f16) -> (f16); => fmaximum_numf16); + (fn floor(x: f16) -> (f16); => floorf16); + (fn fmax(x: f16, y: f16) -> (f16); => fmaxf16); + (fn fmaximum_num(x: f16, y: f16) -> (f16); => fmaximum_numf16); (fn fmaximumf16(x: f16, y: f16) -> (f16); => fmaximumf16); - (fn fminf(x: f16, y: f16) -> (f16); => fminf16); - (fn fminimum_numf16(x: f16, y: f16) -> (f16); => fminimum_numf16); - (fn fminimumf16(x: f16, y: f16) -> (f16); => fminimumf16); - (fn fmodf(x: f16, y: f16) -> (f16); => fmodf16); - (fn ldexpf16(x: f16, n: i32) -> (f16); => ldexpf16); - (fn rintf(x: f16) -> (f16); => rintf16); - (fn roundf(x: f16) -> (f16); => roundf16); - (fn scalbnf16(x: f16, n: i32) -> (f16); => ldexpf16); + (fn fmin(x: f16, y: f16) -> (f16); => fminf16); + (fn fminimum(x: f16, y: f16) -> (f16); => fminimumf16); + (fn fminimum_num(x: f16, y: f16) -> (f16); => fminimum_numf16); + (fn fmod(x: f16, y: f16) -> (f16); => fmodf16); + (fn ldexp(x: f16, n: i32) -> (f16); => ldexpf16); + (fn rint(x: f16) -> (f16); => rintf16); + (fn round(x: f16) -> (f16); => roundf16); + (fn roundeven(x: f16) -> (f16); => roundevenf16); + (fn scalbn(x: f16, n: i32) -> (f16); => scalbnf16); (fn sqrtf(x: f16) -> (f16); => sqrtf16); (fn truncf(x: f16) -> (f16); => truncf16); // verify-sorted-end @@ -220,18 +223,19 @@ libm_helper! { (fn fabs(x: f128) -> (f128); => fabsf128); (fn fdim(x: f128, y: f128) -> (f128); => fdimf128); (fn floor(x: f128) -> (f128); => floorf128); - (fn fmaf128(x: f128, y: f128, z: f128) -> (f128); => fmaf128); + (fn fma(x: f128, y: f128, z: f128) -> (f128); => fmaf128); (fn fmax(x: f128, y: f128) -> (f128); => fmaxf128); - (fn fmaximum_numf128(x: f128, y: f128) -> (f128); => fmaximum_numf128); - (fn fmaximumf128(x: f128, y: f128) -> (f128); => fmaximumf128); + (fn fmaximum(x: f128, y: f128) -> (f128); => fmaximumf128); + (fn fmaximum_num(x: f128, y: f128) -> (f128); => fmaximum_numf128); (fn fmin(x: f128, y: f128) -> (f128); => fminf128); - (fn fminimum_numf128(x: f128, y: f128) -> (f128); => fminimum_numf128); - (fn fminimumf128(x: f128, y: f128) -> (f128); => fminimumf128); + (fn fminimum(x: f128, y: f128) -> (f128); => fminimumf128); + (fn fminimum_num(x: f128, y: f128) -> (f128); => fminimum_numf128); (fn fmod(x: f128, y: f128) -> (f128); => fmodf128); - (fn ldexpf128(x: f128, n: i32) -> (f128); => ldexpf128); + (fn ldexp(x: f128, n: i32) -> (f128); => ldexpf128); (fn rint(x: f128) -> (f128); => rintf128); (fn round(x: f128) -> (f128); => roundf128); - (fn scalbnf128(x: f128, n: i32) -> (f128); => ldexpf128); + (fn roundeven(x: f128) -> (f128); => roundevenf128); + (fn scalbn(x: f128, n: i32) -> (f128); => scalbnf128); (fn sqrt(x: f128) -> (f128); => sqrtf128); (fn trunc(x: f128) -> (f128); => truncf128); // verify-sorted-end diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index 092f9317b..f224eba73 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -30,7 +30,7 @@ pub use fmin::fmin; pub use fminimum::fminimum; pub use fminimum_num::fminimum_num; pub use fmod::fmod; -pub use rint::rint; +pub use rint::rint_round; pub use round::round; pub use scalbn::scalbn; pub use sqrt::sqrt; diff --git a/libm/src/math/generic/rint.rs b/libm/src/math/generic/rint.rs index 80ba1faac..04e8f332f 100644 --- a/libm/src/math/generic/rint.rs +++ b/libm/src/math/generic/rint.rs @@ -2,27 +2,31 @@ /* origin: musl src/math/rint.c */ use super::super::Float; +use super::super::support::{FpResult, Round}; -pub fn rint(x: F) -> F { +/// IEEE 754-2019 `roundToIntegralExact`, which respects rounding mode and raises inexact if +/// applicable. +pub fn rint_round(x: F, _round: Round) -> FpResult { let toint = F::ONE / F::EPSILON; let e = x.exp(); let positive = x.is_sign_positive(); // On i386 `force_eval!` must be used to force rounding via storage to memory. Otherwise, // the excess precission from x87 would cause an incorrect final result. - let use_force = cfg!(x86_no_sse) && F::BITS == 32 || F::BITS == 64; + let force = |x| { + if cfg!(x86_no_sse) && (F::BITS == 32 || F::BITS == 64) { force_eval!(x) } else { x } + }; - if e >= F::EXP_BIAS + F::SIG_BITS { + let res = if e >= F::EXP_BIAS + F::SIG_BITS { // No fractional part; exact result can be returned. x } else { - // Apply a net-zero adjustment that nudges `y` in the direction of the rounding mode. + // Apply a net-zero adjustment that nudges `y` in the direction of the rounding mode. For + // Rust this is always nearest, but ideally it would take `round` into account. let y = if positive { - let tmp = if use_force { force_eval!(x) } else { x } + toint; - (if use_force { force_eval!(tmp) } else { tmp } - toint) + force(force(x) + toint) - toint } else { - let tmp = if use_force { force_eval!(x) } else { x } - toint; - (if use_force { force_eval!(tmp) } else { tmp } + toint) + force(force(x) - toint) + toint }; if y == F::ZERO { @@ -31,42 +35,85 @@ pub fn rint(x: F) -> F { } else { y } - } + }; + + FpResult::ok(res) } #[cfg(test)] mod tests { use super::*; + use crate::support::{Hexf, Int, Status}; + + fn spec_test(cases: &[(F, F, Status)]) { + let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY]; + + for x in roundtrip { + let FpResult { val, status } = rint_round(x, Round::Nearest); + assert_biteq!(val, x, "rint_round({})", Hexf(x)); + assert_eq!(status, Status::OK, "{}", Hexf(x)); + } + + for &(x, res, res_stat) in cases { + let FpResult { val, status } = rint_round(x, Round::Nearest); + assert_biteq!(val, res, "rint_round({})", Hexf(x)); + assert_eq!(status, res_stat, "{}", Hexf(x)); + } + } #[test] - fn zeroes_f32() { - assert_biteq!(rint(0.0_f32), 0.0_f32); - assert_biteq!(rint(-0.0_f32), -0.0_f32); + #[cfg(f16_enabled)] + fn spec_tests_f16() { + let cases = []; + spec_test::(&cases); } #[test] - fn sanity_check_f32() { - assert_biteq!(rint(-1.0_f32), -1.0); - assert_biteq!(rint(2.8_f32), 3.0); - assert_biteq!(rint(-0.5_f32), -0.0); - assert_biteq!(rint(0.5_f32), 0.0); - assert_biteq!(rint(-1.5_f32), -2.0); - assert_biteq!(rint(1.5_f32), 2.0); + fn spec_tests_f32() { + let cases = [ + (0.1, 0.0, Status::OK), + (-0.1, -0.0, Status::OK), + (0.5, 0.0, Status::OK), + (-0.5, -0.0, Status::OK), + (0.9, 1.0, Status::OK), + (-0.9, -1.0, Status::OK), + (1.1, 1.0, Status::OK), + (-1.1, -1.0, Status::OK), + (1.5, 2.0, Status::OK), + (-1.5, -2.0, Status::OK), + (1.9, 2.0, Status::OK), + (-1.9, -2.0, Status::OK), + (2.8, 3.0, Status::OK), + (-2.8, -3.0, Status::OK), + ]; + spec_test::(&cases); } #[test] - fn zeroes_f64() { - assert_biteq!(rint(0.0_f64), 0.0_f64); - assert_biteq!(rint(-0.0_f64), -0.0_f64); + fn spec_tests_f64() { + let cases = [ + (0.1, 0.0, Status::OK), + (-0.1, -0.0, Status::OK), + (0.5, 0.0, Status::OK), + (-0.5, -0.0, Status::OK), + (0.9, 1.0, Status::OK), + (-0.9, -1.0, Status::OK), + (1.1, 1.0, Status::OK), + (-1.1, -1.0, Status::OK), + (1.5, 2.0, Status::OK), + (-1.5, -2.0, Status::OK), + (1.9, 2.0, Status::OK), + (-1.9, -2.0, Status::OK), + (2.8, 3.0, Status::OK), + (-2.8, -3.0, Status::OK), + ]; + spec_test::(&cases); } #[test] - fn sanity_check_f64() { - assert_biteq!(rint(-1.0_f64), -1.0); - assert_biteq!(rint(2.8_f64), 3.0); - assert_biteq!(rint(-0.5_f64), -0.0); - assert_biteq!(rint(0.5_f64), 0.0); - assert_biteq!(rint(-1.5_f64), -2.0); - assert_biteq!(rint(1.5_f64), 2.0); + #[cfg(f128_enabled)] + fn spec_tests_f128() { + let cases = []; + spec_test::(&cases); } } diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 4e75292a6..e58d79adc 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -207,8 +207,8 @@ mod remainderf; mod remquo; mod remquof; mod rint; -mod rintf; mod round; +mod roundeven; mod roundf; mod scalbn; mod scalbnf; @@ -313,9 +313,9 @@ pub use self::remainder::remainder; pub use self::remainderf::remainderf; pub use self::remquo::remquo; pub use self::remquof::remquof; -pub use self::rint::rint; -pub use self::rintf::rintf; +pub use self::rint::{rint, rintf}; pub use self::round::round; +pub use self::roundeven::{roundeven, roundevenf}; pub use self::roundf::roundf; pub use self::scalbn::scalbn; pub use self::scalbnf::scalbnf; @@ -346,7 +346,6 @@ cfg_if! { mod floorf16; mod fmodf16; mod ldexpf16; - mod rintf16; mod roundf16; mod scalbnf16; mod sqrtf16; @@ -364,7 +363,8 @@ cfg_if! { pub use self::fminimum_fmaximum_num::{fmaximum_numf16, fminimum_numf16}; pub use self::fmodf16::fmodf16; pub use self::ldexpf16::ldexpf16; - pub use self::rintf16::rintf16; + pub use self::rint::rintf16; + pub use self::roundeven::roundevenf16; pub use self::roundf16::roundf16; pub use self::scalbnf16::scalbnf16; pub use self::sqrtf16::sqrtf16; @@ -384,7 +384,6 @@ cfg_if! { mod fmaf128; mod fmodf128; mod ldexpf128; - mod rintf128; mod roundf128; mod scalbnf128; mod sqrtf128; @@ -403,7 +402,8 @@ cfg_if! { pub use self::fminimum_fmaximum_num::{fmaximum_numf128, fminimum_numf128}; pub use self::fmodf128::fmodf128; pub use self::ldexpf128::ldexpf128; - pub use self::rintf128::rintf128; + pub use self::rint::rintf128; + pub use self::roundeven::roundevenf128; pub use self::roundf128::roundf128; pub use self::scalbnf128::scalbnf128; pub use self::sqrtf128::sqrtf128; diff --git a/libm/src/math/rint.rs b/libm/src/math/rint.rs index f409ec282..8a5cbeab4 100644 --- a/libm/src/math/rint.rs +++ b/libm/src/math/rint.rs @@ -1,3 +1,27 @@ +use super::support::Round; + +/// Round `x` to the nearest integer, breaking ties toward even. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn rintf16(x: f16) -> f16 { + super::generic::rint_round(x, Round::Nearest).val +} + +/// Round `x` to the nearest integer, breaking ties toward even. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn rintf(x: f32) -> f32 { + select_implementation! { + name: rintf, + use_arch: any( + all(target_arch = "wasm32", intrinsics_enabled), + all(target_arch = "aarch64", target_feature = "neon", target_endian = "little"), + ), + args: x, + } + + super::generic::rint_round(x, Round::Nearest).val +} + /// Round `x` to the nearest integer, breaking ties toward even. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn rint(x: f64) -> f64 { @@ -10,5 +34,12 @@ pub fn rint(x: f64) -> f64 { args: x, } - super::generic::rint(x) + super::generic::rint_round(x, Round::Nearest).val +} + +/// Round `x` to the nearest integer, breaking ties toward even. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn rintf128(x: f128) -> f128 { + super::generic::rint_round(x, Round::Nearest).val } diff --git a/libm/src/math/rintf.rs b/libm/src/math/rintf.rs deleted file mode 100644 index 5e9f5f718..000000000 --- a/libm/src/math/rintf.rs +++ /dev/null @@ -1,14 +0,0 @@ -/// Round `x` to the nearest integer, breaking ties toward even. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn rintf(x: f32) -> f32 { - select_implementation! { - name: rintf, - use_arch: any( - all(target_arch = "wasm32", intrinsics_enabled), - all(target_arch = "aarch64", target_feature = "neon", target_endian = "little"), - ), - args: x, - } - - super::generic::rint(x) -} diff --git a/libm/src/math/rintf128.rs b/libm/src/math/rintf128.rs deleted file mode 100644 index 6b16fcd84..000000000 --- a/libm/src/math/rintf128.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Round `x` to the nearest integer, breaking ties toward even. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn rintf128(x: f128) -> f128 { - super::generic::rint(x) -} diff --git a/libm/src/math/rintf16.rs b/libm/src/math/rintf16.rs deleted file mode 100644 index 84d792561..000000000 --- a/libm/src/math/rintf16.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Round `x` to the nearest integer, breaking ties toward even. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn rintf16(x: f16) -> f16 { - super::generic::rint(x) -} diff --git a/libm/src/math/roundeven.rs b/libm/src/math/roundeven.rs new file mode 100644 index 000000000..ec1738285 --- /dev/null +++ b/libm/src/math/roundeven.rs @@ -0,0 +1,35 @@ +use super::support::{Float, Round}; + +/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 +/// `roundToIntegralTiesToEven`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundevenf16(x: f16) -> f16 { + roundeven_impl(x) +} + +/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 +/// `roundToIntegralTiesToEven`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundevenf(x: f32) -> f32 { + roundeven_impl(x) +} + +/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 +/// `roundToIntegralTiesToEven`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundeven(x: f64) -> f64 { + roundeven_impl(x) +} + +/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 +/// `roundToIntegralTiesToEven`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundevenf128(x: f128) -> f128 { + roundeven_impl(x) +} + +pub fn roundeven_impl(x: F) -> F { + super::generic::rint_round(x, Round::Nearest).val +} From 8ee516beb5ad9e2040c0ddd59ef4a329b8121875 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 11 Feb 2025 07:45:14 +0000 Subject: [PATCH 1192/1459] Check exact values for specified cases Inputs in `case_list` shouldn't hit xfails or increased ULP tolerance. Ensure that overrides are skipped when testing against MPFR or a specified value and that NaNs, if any, are checked bitwise. --- libm/crates/libm-test/src/gen/case_list.rs | 10 ++++++++-- libm/crates/libm-test/src/test_traits.rs | 14 +++++++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/libm/crates/libm-test/src/gen/case_list.rs b/libm/crates/libm-test/src/gen/case_list.rs index 8c7a735fa..7cb9897d8 100644 --- a/libm/crates/libm-test/src/gen/case_list.rs +++ b/libm/crates/libm-test/src/gen/case_list.rs @@ -579,8 +579,11 @@ fn rint_cases() -> Vec> { TestCase::append_pairs( &mut v, &[ - // Failure on i586 + // Known failure on i586 + #[cfg(not(x86_no_sse))] ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff994000p+38"))), + #[cfg(x86_no_sse)] + ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff998000p+38"))), ], ); v @@ -628,8 +631,11 @@ fn roundeven_cases() -> Vec> { TestCase::append_pairs( &mut v, &[ - // Failure on i586 + // Known failure on i586 + #[cfg(not(x86_no_sse))] ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff994000p+38"))), + #[cfg(x86_no_sse)] + ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff998000p+38"))), ], ); v diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index 1bd5bce16..bba1fca64 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -12,7 +12,9 @@ use anyhow::{Context, anyhow, bail, ensure}; use libm::support::Hexf; use crate::precision::CheckAction; -use crate::{CheckCtx, Float, Int, MaybeOverride, SpecialCase, TestResult}; +use crate::{ + CheckBasis, CheckCtx, Float, GeneratorKind, Int, MaybeOverride, SpecialCase, TestResult, +}; /// Trait for calling a function with a tuple as arguments. /// @@ -207,6 +209,8 @@ where SpecialCase: MaybeOverride, { let (result, xfail_msg) = match SpecialCase::check_int(input, actual, expected, ctx) { + // `require_biteq` forbids overrides. + _ if ctx.gen_kind == GeneratorKind::List => (actual == expected, None), CheckAction::AssertSuccess => (actual == expected, None), CheckAction::AssertFailure(msg) => (actual != expected, Some(msg)), CheckAction::Custom(res) => return res, @@ -291,7 +295,12 @@ where let mut inner = || -> TestResult { let mut allowed_ulp = ctx.ulp; + // Forbid overrides if the items came from an explicit list, as long as we are checking + // against either MPFR or the result itself. + let require_biteq = ctx.gen_kind == GeneratorKind::List && ctx.basis != CheckBasis::Musl; + match SpecialCase::check_float(input, actual, expected, ctx) { + _ if require_biteq => (), CheckAction::AssertSuccess => (), CheckAction::AssertFailure(msg) => assert_failure_msg = Some(msg), CheckAction::Custom(res) => return res, @@ -301,6 +310,9 @@ where // Check when both are NaNs if actual.is_nan() && expected.is_nan() { + if require_biteq && ctx.basis == CheckBasis::None { + ensure!(actual.to_bits() == expected.to_bits(), "mismatched NaN bitpatterns"); + } // By default, NaNs have nothing special to check. return Ok(()); } else if actual.is_nan() || expected.is_nan() { From 37bbb09fdb76f5f717b1d2e38d3f4429aa2f8ee7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 11 Feb 2025 15:40:17 +0000 Subject: [PATCH 1193/1459] Rename `Float::exp` to `Float::ex` Our function to get the exponent conflicts with the inherent `exp` function for `e^x`. Rename `exp` to `ex` to avoid confusion and usage problems. --- libm/etc/function-definitions.json | 3 +-- libm/src/math/generic/fma.rs | 6 +++--- libm/src/math/generic/fmod.rs | 4 ++-- libm/src/math/generic/rint.rs | 2 +- libm/src/math/generic/sqrt.rs | 2 +- libm/src/math/support/float_traits.rs | 4 ++-- 6 files changed, 10 insertions(+), 11 deletions(-) diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index a47aaad57..63d9927ad 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -206,8 +206,7 @@ }, "exp": { "sources": [ - "src/math/exp.rs", - "src/math/support/float_traits.rs" + "src/math/exp.rs" ], "type": "f64" }, diff --git a/libm/src/math/generic/fma.rs b/libm/src/math/generic/fma.rs index 821aee090..cb1061cc3 100644 --- a/libm/src/math/generic/fma.rs +++ b/libm/src/math/generic/fma.rs @@ -249,7 +249,7 @@ where let xy: B = x.widen() * y.widen(); let mut result: B = xy + z.widen(); let mut ui: B::Int = result.to_bits(); - let re = result.exp(); + let re = result.ex(); let zb: B = z.widen(); let prec_diff = B::SIG_BITS - F::SIG_BITS; @@ -318,7 +318,7 @@ impl Norm { fn from_float(x: F) -> Self { let mut ix = x.to_bits(); - let mut e = x.exp() as i32; + let mut e = x.ex() as i32; let neg = x.is_sign_negative(); if e == 0 { // Normalize subnormals by multiplication @@ -326,7 +326,7 @@ impl Norm { let scale_f = F::from_parts(false, scale_i + F::EXP_BIAS, F::Int::ZERO); let scaled = x * scale_f; ix = scaled.to_bits(); - e = scaled.exp() as i32; + e = scaled.ex() as i32; e = if e == 0 { // If the exponent is still zero, the input was zero. Artifically set this value // such that the final `e` will exceed `ZERO_INF_NAN`. diff --git a/libm/src/math/generic/fmod.rs b/libm/src/math/generic/fmod.rs index ca1cda383..c74b593d5 100644 --- a/libm/src/math/generic/fmod.rs +++ b/libm/src/math/generic/fmod.rs @@ -9,8 +9,8 @@ pub fn fmod(x: F, y: F) -> F { let one = F::Int::ONE; let mut ix = x.to_bits(); let mut iy = y.to_bits(); - let mut ex = x.exp().signed(); - let mut ey = y.exp().signed(); + let mut ex = x.ex().signed(); + let mut ey = y.ex().signed(); let sx = ix & F::SIGN_MASK; if iy << 1 == zero || y.is_nan() || ex == F::EXP_SAT as i32 { diff --git a/libm/src/math/generic/rint.rs b/libm/src/math/generic/rint.rs index 04e8f332f..2f8b2b365 100644 --- a/libm/src/math/generic/rint.rs +++ b/libm/src/math/generic/rint.rs @@ -8,7 +8,7 @@ use super::super::support::{FpResult, Round}; /// applicable. pub fn rint_round(x: F, _round: Round) -> FpResult { let toint = F::ONE / F::EPSILON; - let e = x.exp(); + let e = x.ex(); let positive = x.is_sign_positive(); // On i386 `force_eval!` must be used to force rounding via storage to memory. Otherwise, diff --git a/libm/src/math/generic/sqrt.rs b/libm/src/math/generic/sqrt.rs index fdd612493..5918025bc 100644 --- a/libm/src/math/generic/sqrt.rs +++ b/libm/src/math/generic/sqrt.rs @@ -109,7 +109,7 @@ where ix = scaled.to_bits(); match top { Exp::Shifted(ref mut v) => { - *v = scaled.exp(); + *v = scaled.ex(); *v = (*v).wrapping_sub(F::SIG_BITS); } Exp::NoShift(()) => { diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 42ce31484..534ca9a07 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -128,13 +128,13 @@ pub trait Float: } /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero. - fn exp(self) -> u32 { + fn ex(self) -> u32 { u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_SAT } /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero. fn exp_unbiased(self) -> i32 { - self.exp().signed() - (Self::EXP_BIAS as i32) + self.ex().signed() - (Self::EXP_BIAS as i32) } /// Returns the significand with no implicit bit (or the "fractional" part) From 72d0f007fc4b9d473afd0e4884ce27758fc0ee57 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 12 Feb 2025 03:48:20 +0000 Subject: [PATCH 1194/1459] Add a way to print inputs on failure When there is a panic in an extensive test, tracing down where it came from can be difficult since no information is provides (messeges are e.g. "attempted to subtract with overflow"). Resolve this by calling the functions within `panic::catch_unwind`, printing the input, and continuing. --- libm/crates/libm-test/src/op.rs | 6 ++++-- libm/crates/libm-test/src/test_traits.rs | 19 ++++++++++++++++++- .../libm-test/tests/compare_built_musl.rs | 2 +- libm/crates/libm-test/tests/multiprecision.rs | 2 +- libm/crates/libm-test/tests/standalone.rs | 2 +- .../crates/libm-test/tests/z_extensive/run.rs | 2 +- libm/crates/util/src/main.rs | 2 +- 7 files changed, 27 insertions(+), 8 deletions(-) diff --git a/libm/crates/libm-test/src/op.rs b/libm/crates/libm-test/src/op.rs index 239c9a3e1..47d72ae58 100644 --- a/libm/crates/libm-test/src/op.rs +++ b/libm/crates/libm-test/src/op.rs @@ -14,6 +14,7 @@ //! level. `Op` is also used as the name for generic parameters since it is terse. use std::fmt; +use std::panic::{RefUnwindSafe, UnwindSafe}; pub use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty}; @@ -64,7 +65,7 @@ pub trait MathOp { type CRet; /// The signature of the Rust function as a `fn(...) -> ...` type. - type RustFn: Copy; + type RustFn: Copy + UnwindSafe; /// Arguments passed to the Rust library function as a tuple. /// @@ -72,7 +73,8 @@ pub trait MathOp { /// to the Rust function. type RustArgs: Copy + TupleCall - + TupleCall; + + TupleCall + + RefUnwindSafe; /// Type returned from the Rust function. type RustRet: CheckOutput; diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm/crates/libm-test/src/test_traits.rs index bba1fca64..c560dade8 100644 --- a/libm/crates/libm-test/src/test_traits.rs +++ b/libm/crates/libm-test/src/test_traits.rs @@ -6,7 +6,8 @@ //! - `CheckOutput`: implemented on anything that is an output type for validation against an //! expected value. -use std::fmt; +use std::panic::{RefUnwindSafe, UnwindSafe}; +use std::{fmt, panic}; use anyhow::{Context, anyhow, bail, ensure}; use libm::support::Hexf; @@ -23,6 +24,22 @@ use crate::{ pub trait TupleCall: fmt::Debug { type Output; fn call(self, f: Func) -> Self::Output; + + /// Intercept panics and print the input to stderr before continuing. + fn call_intercept_panics(self, f: Func) -> Self::Output + where + Self: RefUnwindSafe + Copy, + Func: UnwindSafe, + { + let res = panic::catch_unwind(|| self.call(f)); + match res { + Ok(v) => v, + Err(e) => { + eprintln!("panic with the following input: {self:?}"); + panic::resume_unwind(e) + } + } + } } /// A trait to implement on any output type so we can verify it in a generic way. diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 2b16b9aa0..897dfc26e 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -21,7 +21,7 @@ fn musl_runner( ) { for input in cases { let musl_res = input.call(musl_fn); - let crate_res = input.call(Op::ROUTINE); + let crate_res = input.call_intercept_panics(Op::ROUTINE); crate_res.validate(musl_res, input, ctx).unwrap(); } diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index fd1f11610..0ab4b64da 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -12,7 +12,7 @@ fn mp_runner(ctx: &CheckCtx, cases: impl Iterator( cases: impl Iterator, ) { for (input, expected) in cases { - let crate_res = input.call(Op::ROUTINE); + let crate_res = input.call_intercept_panics(Op::ROUTINE); crate_res.validate(expected, input, ctx).unwrap(); } } diff --git a/libm/crates/libm-test/tests/z_extensive/run.rs b/libm/crates/libm-test/tests/z_extensive/run.rs index a323c9110..786546a9d 100644 --- a/libm/crates/libm-test/tests/z_extensive/run.rs +++ b/libm/crates/libm-test/tests/z_extensive/run.rs @@ -113,7 +113,7 @@ where for input in input_vec { // Test the input. let mp_res = Op::run(mp_vals, input); - let crate_res = input.call(Op::ROUTINE); + let crate_res = input.call_intercept_panics(Op::ROUTINE); crate_res.validate(mp_res, input, ctx)?; let completed = completed.fetch_add(1, Ordering::Relaxed) + 1; diff --git a/libm/crates/util/src/main.rs b/libm/crates/util/src/main.rs index 130ac4531..ef70ec903 100644 --- a/libm/crates/util/src/main.rs +++ b/libm/crates/util/src/main.rs @@ -59,7 +59,7 @@ macro_rules! handle_call { let libm_fn: ::RustFn = libm::$fn_name; let output = match $basis { - "libm" => input.call(libm_fn), + "libm" => input.call_intercept_panics(libm_fn), #[cfg(feature = "build-musl")] "musl" => { let musl_fn: ::CFn = From f7904493b84734ad09a1643c38dea1d8e34f5cfd Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 12 Feb 2025 09:25:16 +0000 Subject: [PATCH 1195/1459] Scale test iteration count at a later point Currently the argument multiplier and large float multiplier happen before selecting count based on generator. However, this means that bivariate and trivariate functions don't get scaled at all (except for the special cased fma). Move this scaling to a later point. --- libm/crates/libm-test/src/run_cfg.rs | 37 ++++++++++++++++------------ 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/libm/crates/libm-test/src/run_cfg.rs b/libm/crates/libm-test/src/run_cfg.rs index 6b2689976..8e4fff53c 100644 --- a/libm/crates/libm-test/src/run_cfg.rs +++ b/libm/crates/libm-test/src/run_cfg.rs @@ -23,8 +23,8 @@ static EXTENSIVE_ITER_OVERRIDE: LazyLock> = LazyLock::new(|| { /// /// Contains the itentifier+generator combo to match on, plus the factor to reduce by. const EXTEMELY_SLOW_TESTS: &[(Identifier, GeneratorKind, u64)] = &[ - (Identifier::Fmodf128, GeneratorKind::QuickSpaced, 40), - (Identifier::Fmodf128, GeneratorKind::Extensive, 40), + (Identifier::Fmodf128, GeneratorKind::QuickSpaced, 50), + (Identifier::Fmodf128, GeneratorKind::Extensive, 50), ]; /// Maximum number of iterations to run for a single routine. @@ -200,15 +200,6 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { domain_iter_count = 100_000; } - // Larger float types get more iterations. - if t_env.large_float_ty { - domain_iter_count *= 4; - } - - // Functions with more arguments get more iterations. - let arg_multiplier = 1 << (t_env.input_count - 1); - domain_iter_count *= arg_multiplier; - // If we will be running tests against MPFR, we don't need to test as much against musl. // However, there are some platforms where we have to test against musl since MPFR can't be // built. @@ -228,6 +219,25 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { } }; + // Larger float types get more iterations. + if t_env.large_float_ty && ctx.gen_kind != GeneratorKind::Extensive { + if ctx.gen_kind == GeneratorKind::Extensive { + // Extensive already has a pretty high test count. + total_iterations *= 2; + } else { + total_iterations *= 4; + } + } + + // Functions with more arguments get more iterations. + let arg_multiplier = 1 << (t_env.input_count - 1); + total_iterations *= arg_multiplier; + + // FMA has a huge domain but is reasonably fast to run, so increase another 1.5x. + if ctx.base_name == BaseName::Fma { + total_iterations = 3 * total_iterations / 2; + } + // Some tests are significantly slower than others and need to be further reduced. if let Some((_id, _gen, scale)) = EXTEMELY_SLOW_TESTS .iter() @@ -239,11 +249,6 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { } } - // FMA has a huge domain but is reasonably fast to run, so increase iterations. - if ctx.base_name == BaseName::Fma { - total_iterations *= 4; - } - if cfg!(optimizations_enabled) { // Always run at least 10,000 tests. total_iterations = total_iterations.max(10_000); From f1996b3da9a438d8a6a089f6d4f29883f625b4c3 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 12 Feb 2025 10:12:24 +0000 Subject: [PATCH 1196/1459] fma refactor 1/3: remove math/fma.rs Done in stages so git tracks the moved file correctly. --- libm/src/math/fma.rs | 40 ---------------------------------------- 1 file changed, 40 deletions(-) delete mode 100644 libm/src/math/fma.rs diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs deleted file mode 100644 index 69cc3eb67..000000000 --- a/libm/src/math/fma.rs +++ /dev/null @@ -1,40 +0,0 @@ -/// Fused multiply add (f64) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fma(x: f64, y: f64, z: f64) -> f64 { - return super::generic::fma(x, y, z); -} - -#[cfg(test)] -mod tests { - use super::*; - #[test] - fn fma_segfault() { - // These two inputs cause fma to segfault on release due to overflow: - assert_eq!( - fma( - -0.0000000000000002220446049250313, - -0.0000000000000002220446049250313, - -0.0000000000000002220446049250313 - ), - -0.00000000000000022204460492503126, - ); - - let result = fma(-0.992, -0.992, -0.992); - //force rounding to storage format on x87 to prevent superious errors. - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let result = force_eval!(result); - assert_eq!(result, -0.007936000000000007,); - } - - #[test] - fn fma_sbb() { - assert_eq!(fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), -3991680619069439e277); - } - - #[test] - fn fma_underflow() { - assert_eq!(fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), 0.0,); - } -} From c1ea5dc941116bafe7bb7325c03b9f91902299ee Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 12 Feb 2025 10:12:24 +0000 Subject: [PATCH 1197/1459] fma refactor 2/3: move math/generic/fma.rs to math/fma.rs Done in stages so git tracks the moved file correctly. --- libm/src/math/{generic => }/fma.rs | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename libm/src/math/{generic => }/fma.rs (100%) diff --git a/libm/src/math/generic/fma.rs b/libm/src/math/fma.rs similarity index 100% rename from libm/src/math/generic/fma.rs rename to libm/src/math/fma.rs From 7672bd0106358942c3ddd394ada32432d3705737 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 12 Feb 2025 09:55:04 +0000 Subject: [PATCH 1198/1459] fma refactor 3/3: combine `fma` public API with its implementation Similar to other recent changes, just put public API in the same file as its generic implementation. To keep things slightly cleaner, split the default implementation from the `_wide` implementation. Also introduces a stub `fmaf16`. --- libm/etc/function-definitions.json | 9 +- libm/src/math/fma.rs | 140 ++++++++++++----------------- libm/src/math/fma_wide.rs | 97 ++++++++++++++++++++ libm/src/math/fmaf.rs | 21 ----- libm/src/math/fmaf128.rs | 7 -- libm/src/math/generic/mod.rs | 2 - libm/src/math/mod.rs | 10 ++- 7 files changed, 161 insertions(+), 125 deletions(-) create mode 100644 libm/src/math/fma_wide.rs delete mode 100644 libm/src/math/fmaf.rs delete mode 100644 libm/src/math/fmaf128.rs diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 63d9927ad..a966852b1 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -343,22 +343,19 @@ }, "fma": { "sources": [ - "src/math/fma.rs", - "src/math/generic/fma.rs" + "src/math/fma.rs" ], "type": "f64" }, "fmaf": { "sources": [ - "src/math/fmaf.rs", - "src/math/generic/fma.rs" + "src/math/fma_wide.rs" ], "type": "f32" }, "fmaf128": { "sources": [ - "src/math/fmaf128.rs", - "src/math/generic/fma.rs" + "src/math/fma.rs" ], "type": "f128" }, diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index cb1061cc3..a54984c93 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -1,23 +1,28 @@ /* SPDX-License-Identifier: MIT */ -/* origin: musl src/math/{fma,fmaf}.c. Ported to generic Rust algorithm in 2025, TG. */ +/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */ use super::super::support::{DInt, FpResult, HInt, IntTy, Round, Status}; -use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, Int, MinInt}; +use super::{CastFrom, CastInto, Float, Int, MinInt}; -/// Fused multiply-add that works when there is not a larger float size available. Currently this -/// is still specialized only for `f64`. Computes `(x * y) + z`. +/// Fused multiply add (f64) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fma(x: F, y: F, z: F) -> F -where - F: Float, - F: CastFrom, - F: CastFrom, - F::Int: HInt, - u32: CastInto, -{ +pub fn fma(x: f64, y: f64, z: f64) -> f64 { + fma_round(x, y, z, Round::Nearest).val +} + +/// Fused multiply add (f128) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { fma_round(x, y, z, Round::Nearest).val } +/// Fused multiply-add that works when there is not a larger float size available. Computes +/// `(x * y) + z`. pub fn fma_round(x: F, y: F, z: F, _round: Round) -> FpResult where F: Float, @@ -222,79 +227,7 @@ where } // Use our exponent to scale the final value. - FpResult::new(super::scalbn(r, e), status) -} - -/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`, -/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding. -pub fn fma_wide(x: F, y: F, z: F) -> F -where - F: Float + HFloat, - B: Float + DFloat, - B::Int: CastInto, - i32: CastFrom, -{ - fma_wide_round(x, y, z, Round::Nearest).val -} - -pub fn fma_wide_round(x: F, y: F, z: F, round: Round) -> FpResult -where - F: Float + HFloat, - B: Float + DFloat, - B::Int: CastInto, - i32: CastFrom, -{ - let one = IntTy::::ONE; - - let xy: B = x.widen() * y.widen(); - let mut result: B = xy + z.widen(); - let mut ui: B::Int = result.to_bits(); - let re = result.ex(); - let zb: B = z.widen(); - - let prec_diff = B::SIG_BITS - F::SIG_BITS; - let excess_prec = ui & ((one << prec_diff) - one); - let halfway = one << (prec_diff - 1); - - // Common case: the larger precision is fine if... - // This is not a halfway case - if excess_prec != halfway - // Or the result is NaN - || re == B::EXP_SAT - // Or the result is exact - || (result - xy == zb && result - zb == xy) - // Or the mode is something other than round to nearest - || round != Round::Nearest - { - let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32; - let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32; - - let mut status = Status::OK; - - if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() { - // This branch is never hit; requires previous operations to set a status - status.set_inexact(false); - - result = xy + z.widen(); - if status.inexact() { - status.set_underflow(true); - } else { - status.set_inexact(true); - } - } - - return FpResult { val: result.narrow(), status }; - } - - let neg = ui >> (B::BITS - 1) != IntTy::::ZERO; - let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy }; - if neg == (err < B::ZERO) { - ui += one; - } else { - ui -= one; - } - - FpResult::ok(B::from_bits(ui).narrow()) + FpResult::new(super::generic::scalbn(r, e), status) } /// Representation of `F` that has handled subnormals. @@ -363,6 +296,7 @@ impl Norm { mod tests { use super::*; + /// Test the generic `fma_round` algorithm for a given float. fn spec_test() where F: Float, @@ -375,6 +309,8 @@ mod tests { let y = F::from_bits(F::Int::ONE); let z = F::ZERO; + let fma = |x, y, z| fma_round(x, y, z, Round::Nearest).val; + // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the // exact result" @@ -384,6 +320,11 @@ mod tests { assert_biteq!(fma(-x, -y, z), F::ZERO); } + #[test] + fn spec_test_f32() { + spec_test::(); + } + #[test] fn spec_test_f64() { spec_test::(); @@ -417,4 +358,33 @@ mod tests { fn spec_test_f128() { spec_test::(); } + + #[test] + fn fma_segfault() { + // These two inputs cause fma to segfault on release due to overflow: + assert_eq!( + fma( + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313 + ), + -0.00000000000000022204460492503126, + ); + + let result = fma(-0.992, -0.992, -0.992); + //force rounding to storage format on x87 to prevent superious errors. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let result = force_eval!(result); + assert_eq!(result, -0.007936000000000007,); + } + + #[test] + fn fma_sbb() { + assert_eq!(fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), -3991680619069439e277); + } + + #[test] + fn fma_underflow() { + assert_eq!(fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), 0.0,); + } } diff --git a/libm/src/math/fma_wide.rs b/libm/src/math/fma_wide.rs new file mode 100644 index 000000000..a8c1a5488 --- /dev/null +++ b/libm/src/math/fma_wide.rs @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */ + +use super::super::support::{FpResult, IntTy, Round, Status}; +use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt}; + +// Placeholder so we can have `fmaf16` in the `Float` trait. +#[allow(unused)] +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { + unimplemented!() +} + +/// Floating multiply add (f32) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { + fma_wide_round(x, y, z, Round::Nearest).val +} + +/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`, +/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding. +pub fn fma_wide_round(x: F, y: F, z: F, round: Round) -> FpResult +where + F: Float + HFloat, + B: Float + DFloat, + B::Int: CastInto, + i32: CastFrom, +{ + let one = IntTy::::ONE; + + let xy: B = x.widen() * y.widen(); + let mut result: B = xy + z.widen(); + let mut ui: B::Int = result.to_bits(); + let re = result.ex(); + let zb: B = z.widen(); + + let prec_diff = B::SIG_BITS - F::SIG_BITS; + let excess_prec = ui & ((one << prec_diff) - one); + let halfway = one << (prec_diff - 1); + + // Common case: the larger precision is fine if... + // This is not a halfway case + if excess_prec != halfway + // Or the result is NaN + || re == B::EXP_SAT + // Or the result is exact + || (result - xy == zb && result - zb == xy) + // Or the mode is something other than round to nearest + || round != Round::Nearest + { + let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32; + let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32; + + let mut status = Status::OK; + + if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() { + // This branch is never hit; requires previous operations to set a status + status.set_inexact(false); + + result = xy + z.widen(); + if status.inexact() { + status.set_underflow(true); + } else { + status.set_inexact(true); + } + } + + return FpResult { val: result.narrow(), status }; + } + + let neg = ui >> (B::BITS - 1) != IntTy::::ZERO; + let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy }; + if neg == (err < B::ZERO) { + ui += one; + } else { + ui -= one; + } + + FpResult::ok(B::from_bits(ui).narrow()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn issue_263() { + let a = f32::from_bits(1266679807); + let b = f32::from_bits(1300234242); + let c = f32::from_bits(1115553792); + let expected = f32::from_bits(1501560833); + assert_eq!(fmaf(a, b, c), expected); + } +} diff --git a/libm/src/math/fmaf.rs b/libm/src/math/fmaf.rs deleted file mode 100644 index 40d7f40d6..000000000 --- a/libm/src/math/fmaf.rs +++ /dev/null @@ -1,21 +0,0 @@ -/// Floating multiply add (f32) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation: -/// Computes the value (as if) to infinite precision and rounds once to the result format, -/// according to the rounding mode characterized by the value of FLT_ROUNDS. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { - super::generic::fma_wide(x, y, z) -} - -#[cfg(test)] -mod tests { - #[test] - fn issue_263() { - let a = f32::from_bits(1266679807); - let b = f32::from_bits(1300234242); - let c = f32::from_bits(1115553792); - let expected = f32::from_bits(1501560833); - assert_eq!(super::fmaf(a, b, c), expected); - } -} diff --git a/libm/src/math/fmaf128.rs b/libm/src/math/fmaf128.rs deleted file mode 100644 index 50f7360de..000000000 --- a/libm/src/math/fmaf128.rs +++ /dev/null @@ -1,7 +0,0 @@ -/// Fused multiply add (f128) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { - return super::generic::fma(x, y, z); -} diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index f224eba73..9be185f80 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -3,7 +3,6 @@ mod copysign; mod fabs; mod fdim; mod floor; -mod fma; mod fmax; mod fmaximum; mod fmaximum_num; @@ -22,7 +21,6 @@ pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; pub use floor::floor; -pub use fma::{fma, fma_wide}; pub use fmax::fmax; pub use fmaximum::fmaximum; pub use fmaximum_num::fmaximum_num; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index e58d79adc..5fc8fa0b3 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -164,7 +164,7 @@ mod fdimf; mod floor; mod floorf; mod fma; -mod fmaf; +mod fma_wide; mod fmin_fmax; mod fminimum_fmaximum; mod fminimum_fmaximum_num; @@ -271,7 +271,7 @@ pub use self::fdimf::fdimf; pub use self::floor::floor; pub use self::floorf::floorf; pub use self::fma::fma; -pub use self::fmaf::fmaf; +pub use self::fma_wide::fmaf; pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf}; pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf}; pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf}; @@ -370,6 +370,9 @@ cfg_if! { pub use self::sqrtf16::sqrtf16; pub use self::truncf16::truncf16; // verify-sorted-end + + #[allow(unused_imports)] + pub(crate) use self::fma_wide::fmaf16; } } @@ -381,7 +384,6 @@ cfg_if! { mod fabsf128; mod fdimf128; mod floorf128; - mod fmaf128; mod fmodf128; mod ldexpf128; mod roundf128; @@ -396,7 +398,7 @@ cfg_if! { pub use self::fabsf128::fabsf128; pub use self::fdimf128::fdimf128; pub use self::floorf128::floorf128; - pub use self::fmaf128::fmaf128; + pub use self::fma::fmaf128; pub use self::fmin_fmax::{fmaxf128, fminf128}; pub use self::fminimum_fmaximum::{fmaximumf128, fminimumf128}; pub use self::fminimum_fmaximum_num::{fmaximum_numf128, fminimum_numf128}; From f1ea040cfbca1bbb7039f6f7dc17bc89690b7fa0 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 12 Feb 2025 10:16:48 +0000 Subject: [PATCH 1199/1459] Make `fma` a trait method on `Float` --- libm/crates/libm-test/src/f8_impl.rs | 4 ++++ libm/etc/function-definitions.json | 3 +-- libm/etc/update-api-list.py | 2 +- libm/src/math/cbrt.rs | 20 ++++---------------- libm/src/math/support/float_traits.rs | 26 ++++++++++++++++++++------ 5 files changed, 30 insertions(+), 25 deletions(-) diff --git a/libm/crates/libm-test/src/f8_impl.rs b/libm/crates/libm-test/src/f8_impl.rs index 56ea0b729..0683d8392 100644 --- a/libm/crates/libm-test/src/f8_impl.rs +++ b/libm/crates/libm-test/src/f8_impl.rs @@ -78,6 +78,10 @@ impl Float for f8 { libm::generic::copysign(self, other) } + fn fma(self, _y: Self, _z: Self) -> Self { + unimplemented!() + } + fn normalize(_significand: Self::Int) -> (i32, Self::Int) { unimplemented!() } diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index a966852b1..64a775ba9 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -130,8 +130,7 @@ "copysign": { "sources": [ "src/math/copysign.rs", - "src/math/generic/copysign.rs", - "src/math/support/float_traits.rs" + "src/math/generic/copysign.rs" ], "type": "f64" }, diff --git a/libm/etc/update-api-list.py b/libm/etc/update-api-list.py index c0b6e41d3..67d1b0508 100755 --- a/libm/etc/update-api-list.py +++ b/libm/etc/update-api-list.py @@ -24,7 +24,7 @@ DIRECTORIES = [".github", "ci", "crates", "etc", "src"] # These files do not trigger a retest. -IGNORED_SOURCES = ["src/libm_helper.rs"] +IGNORED_SOURCES = ["src/libm_helper.rs", "src/math/support/float_traits.rs"] IndexTy: TypeAlias = dict[str, dict[str, Any]] """Type of the `index` item in rustdoc's JSON output""" diff --git a/libm/src/math/cbrt.rs b/libm/src/math/cbrt.rs index 8560d37ab..9d3311cd6 100644 --- a/libm/src/math/cbrt.rs +++ b/libm/src/math/cbrt.rs @@ -103,11 +103,11 @@ pub fn cbrt_round(x: f64, round: Round) -> FpResult { * and rr an approximation of 1/zz. We now perform another iteration of * Newton-Raphson, this time with a linear approximation only. */ y2 = y * y; - let mut y2l: f64 = fmaf64(y, y, -y2); + let mut y2l: f64 = y.fma(y, -y2); /* y2 + y2l = y^2 exactly */ let mut y3: f64 = y2 * y; - let mut y3l: f64 = fmaf64(y, y2, -y3) + y * y2l; + let mut y3l: f64 = y.fma(y2, -y3) + y * y2l; /* y3 + y3l approximates y^3 with about 106 bits of accuracy */ h = ((y3 - zz) + y3l) * rr; @@ -132,9 +132,9 @@ pub fn cbrt_round(x: f64, round: Round) -> FpResult { cold_path(); y2 = y1 * y1; - y2l = fmaf64(y1, y1, -y2); + y2l = y1.fma(y1, -y2); y3 = y2 * y1; - y3l = fmaf64(y1, y2, -y3) + y1 * y2l; + y3l = y1.fma(y2, -y3) + y1 * y2l; h = ((y3 - zz) + y3l) * rr; dy = h * (y1 * u0); y = y1 - dy; @@ -198,18 +198,6 @@ pub fn cbrt_round(x: f64, round: Round) -> FpResult { FpResult::ok(f64::from_bits(cvt3)) } -fn fmaf64(x: f64, y: f64, z: f64) -> f64 { - #[cfg(intrinsics_enabled)] - { - return unsafe { core::intrinsics::fmaf64(x, y, z) }; - } - - #[cfg(not(intrinsics_enabled))] - { - return super::fma(x, y, z); - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 534ca9a07..96c209c85 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -160,9 +160,11 @@ pub trait Float: fn abs(self) -> Self; /// Returns a number composed of the magnitude of self and the sign of sign. - #[allow(dead_code)] fn copysign(self, other: Self) -> Self; + /// Fused multiply add, rounding once. + fn fma(self, y: Self, z: Self) -> Self; + /// Returns (normalized exponent, normalized significand) #[allow(dead_code)] fn normalize(significand: Self::Int) -> (i32, Self::Int); @@ -184,7 +186,9 @@ macro_rules! float_impl { $sity:ident, $bits:expr, $significand_bits:expr, - $from_bits:path + $from_bits:path, + $fma_fn:ident, + $fma_intrinsic:ident ) => { impl Float for $ty { type Int = $ity; @@ -252,6 +256,16 @@ macro_rules! float_impl { } } } + fn fma(self, y: Self, z: Self) -> Self { + cfg_if! { + // fma is not yet available in `core` + if #[cfg(intrinsics_enabled)] { + unsafe{ core::intrinsics::$fma_intrinsic(self, y, z) } + } else { + super::super::$fma_fn(self, y, z) + } + } + } fn normalize(significand: Self::Int) -> (i32, Self::Int) { let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int) @@ -261,11 +275,11 @@ macro_rules! float_impl { } #[cfg(f16_enabled)] -float_impl!(f16, u16, i16, 16, 10, f16::from_bits); -float_impl!(f32, u32, i32, 32, 23, f32_from_bits); -float_impl!(f64, u64, i64, 64, 52, f64_from_bits); +float_impl!(f16, u16, i16, 16, 10, f16::from_bits, fmaf16, fmaf16); +float_impl!(f32, u32, i32, 32, 23, f32_from_bits, fmaf, fmaf32); +float_impl!(f64, u64, i64, 64, 52, f64_from_bits, fma, fmaf64); #[cfg(f128_enabled)] -float_impl!(f128, u128, i128, 128, 112, f128::from_bits); +float_impl!(f128, u128, i128, 128, 112, f128::from_bits, fmaf128, fmaf128); /* FIXME(msrv): vendor some things that are not const stable at our MSRV */ From 782628a08538a01fccca53de2c8514501234e859 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 12 Feb 2025 21:06:15 +0000 Subject: [PATCH 1200/1459] Use `git ls-files` rather than manually globbing for tidy This avoids matching build directories, ignored files, and submodules. --- libm/etc/update-api-list.py | 54 ++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/libm/etc/update-api-list.py b/libm/etc/update-api-list.py index 67d1b0508..b4ce2c453 100755 --- a/libm/etc/update-api-list.py +++ b/libm/etc/update-api-list.py @@ -12,7 +12,7 @@ import subprocess as sp import sys from dataclasses import dataclass -from glob import glob, iglob +from glob import glob from pathlib import Path from typing import Any, Callable, TypeAlias @@ -20,9 +20,6 @@ ETC_DIR = SELF_PATH.parent ROOT_DIR = ETC_DIR.parent -# Loose approximation of what gets checked in to git, without needing `git ls-files`. -DIRECTORIES = [".github", "ci", "crates", "etc", "src"] - # These files do not trigger a retest. IGNORED_SOURCES = ["src/libm_helper.rs", "src/math/support/float_traits.rs"] @@ -190,30 +187,31 @@ def tidy_lists(self) -> None: """In each file, check annotations indicating blocks of code should be sorted or should include all public API. """ - for dirname in DIRECTORIES: - dir = ROOT_DIR.joinpath(dirname) - for fname in iglob("**", root_dir=dir, recursive=True): - fpath = dir.joinpath(fname) - if fpath.is_dir() or fpath == SELF_PATH: - continue - - lines = fpath.read_text().splitlines() - - validate_delimited_block( - fpath, - lines, - "verify-sorted-start", - "verify-sorted-end", - ensure_sorted, - ) - - validate_delimited_block( - fpath, - lines, - "verify-apilist-start", - "verify-apilist-end", - lambda p, n, lines: self.ensure_contains_api(p, n, lines), - ) + + flist = sp.check_output(["git", "ls-files"], cwd=ROOT_DIR, text=True) + + for path in flist.splitlines(): + fpath = ROOT_DIR.joinpath(path) + if fpath.is_dir() or fpath == SELF_PATH: + continue + + lines = fpath.read_text().splitlines() + + validate_delimited_block( + fpath, + lines, + "verify-sorted-start", + "verify-sorted-end", + ensure_sorted, + ) + + validate_delimited_block( + fpath, + lines, + "verify-apilist-start", + "verify-apilist-end", + lambda p, n, lines: self.ensure_contains_api(p, n, lines), + ) def ensure_contains_api(self, fpath: Path, line_num: int, lines: list[str]): """Given a list of strings, ensure that each public function we have is named From 108eabffcbe93ddcb74ac6eb261cc0c172c8f522 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 18 Feb 2025 15:39:18 -0500 Subject: [PATCH 1201/1459] ci: Update actions/cache to v4 Github has deprecated v2 so this needs to be bumped. --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fee5c45ea..a801f2722 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -117,7 +117,7 @@ jobs: with: key: ${{ matrix.target }} - name: Cache Docker layers - uses: actions/cache@v2 + uses: actions/cache@v4 if: matrix.os == 'ubuntu-latest' with: path: /tmp/.buildx-cache From 88e83b96ad09f3cf9e2d1b4543a7d43f9c5a77c0 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 18 Feb 2025 15:48:46 -0500 Subject: [PATCH 1202/1459] ci: Pin the nightly toolchain for i686-pc-windows-gnu Pin i686-pc-windows-gnu to nightly-2025-02-07 until [1] is resolved. [1]: https://github.com/rust-lang/rust/issues/136795 --- .github/workflows/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a801f2722..1575730fe 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -98,7 +98,8 @@ jobs: test_verbatim: 1 - target: i686-pc-windows-gnu os: windows-latest - rust: nightly-i686-gnu + # FIXME: pinned due to https://github.com/rust-lang/rust/issues/136795 + rust: nightly-2025-02-07-i686-gnu - target: x86_64-pc-windows-gnu os: windows-latest rust: nightly-x86_64-gnu From b2bcfc838e2a4b72fa62b333e3eb91f250aa4539 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 19 Feb 2025 15:26:07 -0500 Subject: [PATCH 1203/1459] ci: Pin the nightly toolchain for aarch64-unknown-linux-gnu Pin aarch64-unknown-linux-gnu to nightly-2025-02-07 until [1] is resolved. [1]: https://github.com/llvm/llvm-project/issues/127804 --- .github/workflows/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1575730fe..c179a3391 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -20,7 +20,8 @@ jobs: rust: nightly - target: aarch64-unknown-linux-gnu os: ubuntu-latest - rust: nightly + # FIXME: pinned due to https://github.com/llvm/llvm-project/issues/127804 + rust: nightly-2025-02-07 - target: aarch64-pc-windows-msvc os: windows-latest rust: nightly From 72d0cde3f1f7a0415bed59063f5267f54dc8bb35 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 18 Feb 2025 14:29:37 +0100 Subject: [PATCH 1204/1459] remove win64_128bit_abi_hack --- src/float/conv.rs | 4 --- src/int/sdiv.rs | 4 +-- src/int/udiv.rs | 3 -- src/macros.rs | 71 ----------------------------------------------- 4 files changed, 2 insertions(+), 80 deletions(-) diff --git a/src/float/conv.rs b/src/float/conv.rs index 4f52ac712..42a526bd5 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -403,7 +403,6 @@ intrinsics! { float_to_unsigned_int(f) } - #[win64_128bit_abi_hack] pub extern "C" fn __fixunssfti(f: f32) -> u128 { float_to_unsigned_int(f) } @@ -418,7 +417,6 @@ intrinsics! { float_to_unsigned_int(f) } - #[win64_128bit_abi_hack] pub extern "C" fn __fixunsdfti(f: f64) -> u128 { float_to_unsigned_int(f) } @@ -454,7 +452,6 @@ intrinsics! { float_to_signed_int(f) } - #[win64_128bit_abi_hack] pub extern "C" fn __fixsfti(f: f32) -> i128 { float_to_signed_int(f) } @@ -469,7 +466,6 @@ intrinsics! { float_to_signed_int(f) } - #[win64_128bit_abi_hack] pub extern "C" fn __fixdfti(f: f64) -> i128 { float_to_signed_int(f) } diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index 9d316c76e..9630c7d7d 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -165,5 +165,5 @@ sdivmod!( i128, maybe_use_optimized_c_shim ); -sdiv!(__udivti3, __divti3, u128, i128, win64_128bit_abi_hack); -smod!(__umodti3, __modti3, u128, i128, win64_128bit_abi_hack); +sdiv!(__udivti3, __divti3, u128, i128,); +smod!(__umodti3, __modti3, u128, i128,); diff --git a/src/int/udiv.rs b/src/int/udiv.rs index c891eede4..1fa761212 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -58,7 +58,6 @@ intrinsics! { // the existence of `u128_div_rem` to get 32-bit SPARC to compile, see `u128_divide_sparc` docs. #[avr_skip] - #[win64_128bit_abi_hack] /// Returns `n / d` pub extern "C" fn __udivti3(n: u128, d: u128) -> u128 { #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { @@ -70,7 +69,6 @@ intrinsics! { } #[avr_skip] - #[win64_128bit_abi_hack] /// Returns `n % d` pub extern "C" fn __umodti3(n: u128, d: u128) -> u128 { #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { @@ -84,7 +82,6 @@ intrinsics! { } #[avr_skip] - #[win64_128bit_abi_hack] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmodti4(n: u128, d: u128, rem: Option<&mut u128>) -> u128 { #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { diff --git a/src/macros.rs b/src/macros.rs index f51e49e98..88b54e82c 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -60,9 +60,6 @@ macro_rules! public_test_dep { /// the specified ABI everywhere else. /// * `unadjusted_on_win64` - like `aapcs_on_arm` this switches to the /// `"unadjusted"` abi on Win64 and the specified abi elsewhere. -/// * `win64_128bit_abi_hack` - this attribute is used for 128-bit integer -/// intrinsics where the ABI is slightly tweaked on Windows platforms, but -/// it's a normal ABI elsewhere for returning a 128 bit integer. /// * `arm_aeabi_alias` - handles the "aliasing" of various intrinsics on ARM /// their otherwise typical names to other prefixed ones. /// * `ppc_alias` - changes the name of the symbol on PowerPC platforms without @@ -231,51 +228,6 @@ macro_rules! intrinsics { intrinsics!($($rest)*); ); - // Some intrinsics on win64 which return a 128-bit integer have an.. unusual - // calling convention. That's managed here with this "abi hack" which alters - // the generated symbol's ABI. - // - // This will still define a function in this crate with the given name and - // signature, but the actual symbol for the intrinsic may have a slightly - // different ABI on win64. - ( - #[win64_128bit_abi_hack] - $(#[$($attr:tt)*])* - pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { - $($body:tt)* - } - - $($rest:tt)* - ) => ( - #[cfg(all(any(windows, target_os = "uefi"), target_arch = "x86_64"))] - $(#[$($attr)*])* - pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { - $($body)* - } - - #[cfg(all(any(windows, target_os = "uefi"), target_arch = "x86_64", not(feature = "mangled-names")))] - mod $name { - #[no_mangle] - #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] - extern $abi fn $name( $($argname: $ty),* ) - -> $crate::macros::win64_128bit_abi_hack::U64x2 - { - let e: $($ret)? = super::$name($($argname),*); - $crate::macros::win64_128bit_abi_hack::U64x2::from(e) - } - } - - #[cfg(not(all(any(windows, target_os = "uefi"), target_arch = "x86_64")))] - intrinsics! { - $(#[$($attr)*])* - pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { - $($body)* - } - } - - intrinsics!($($rest)*); - ); - // `arm_aeabi_alias` would conflict with `f16_apple_{arg,ret}_abi` not handled here. Avoid macro ambiguity by combining in a // single `#[]`. ( @@ -576,26 +528,3 @@ macro_rules! intrinsics { intrinsics!($($rest)*); ); } - -// Hack for LLVM expectations for ABI on windows. This is used by the -// `#[win64_128bit_abi_hack]` attribute recognized above -#[cfg(all(any(windows, target_os = "uefi"), target_pointer_width = "64"))] -pub mod win64_128bit_abi_hack { - #[repr(simd)] - pub struct U64x2([u64; 2]); - - impl From for U64x2 { - fn from(i: i128) -> U64x2 { - use crate::int::DInt; - let j = i as u128; - U64x2([j.lo(), j.hi()]) - } - } - - impl From for U64x2 { - fn from(i: u128) -> U64x2 { - use crate::int::DInt; - U64x2([i.lo(), i.hi()]) - } - } -} From ddd1a09e993c5cbd352988f47c4e1b1b1a73f44a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 19 Feb 2025 20:45:07 +0000 Subject: [PATCH 1205/1459] chore: release v0.1.147 --- CHANGELOG.md | 6 ++++++ Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d6962dc5b..a3890c6ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.147](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.146...compiler_builtins-v0.1.147) - 2025-02-19 + +### Other + +- remove win64_128bit_abi_hack + ## [0.1.146](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.145...compiler_builtins-v0.1.146) - 2025-02-06 ### Other diff --git a/Cargo.toml b/Cargo.toml index c93ca563c..ff1a10fc6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.146" +version = "0.1.147" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From f322090515efa18f63d861f9a87a27f497dd23c7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 23 Feb 2025 08:21:22 +0000 Subject: [PATCH 1206/1459] Revert "ci: Pin the nightly toolchain for i686-pc-windows-gnu" Since [1], the issue should be resolved so the workaround can be dropped. This reverts commit 88e83b96ad09f3cf9e2d1b4543a7d43f9c5a77c0. [1]: https://github.com/rust-lang/compiler-builtins/pull/759 --- .github/workflows/main.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c179a3391..7336efc42 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -99,8 +99,7 @@ jobs: test_verbatim: 1 - target: i686-pc-windows-gnu os: windows-latest - # FIXME: pinned due to https://github.com/rust-lang/rust/issues/136795 - rust: nightly-2025-02-07-i686-gnu + rust: nightly-i686-gnu - target: x86_64-pc-windows-gnu os: windows-latest rust: nightly-x86_64-gnu From 36a9f9844debf3b5f7813f22a14967af823fba41 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 24 Feb 2025 01:23:37 +0000 Subject: [PATCH 1207/1459] ci: Pin the nightly toolchain for aarch64 jobs Pin aarch64-unknown-linux-gnu and aarch64-apple-darwin to nightly-2025-02-07 until [1] makes it to a Rust nightly. [1]: https://github.com/llvm/llvm-project/issues/127804 --- libm/.github/workflows/main.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index f066f4a8c..e86f936f7 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -22,8 +22,12 @@ jobs: include: - target: aarch64-apple-darwin os: macos-15 + # FIXME: pinned due to https://github.com/llvm/llvm-project/issues/127804 + channel: nightly-2025-02-07 - target: aarch64-unknown-linux-gnu os: ubuntu-24.04-arm + # FIXME: pinned due to https://github.com/llvm/llvm-project/issues/127804 + channel: nightly-2025-02-07 - target: aarch64-pc-windows-msvc os: windows-2025 build_only: 1 # Can't run on x86 hosts From 514d2f0fe7caf5b4ca2b65f136e3ee3117a67f2b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 24 Feb 2025 00:01:04 +0000 Subject: [PATCH 1208/1459] Make the compiler-builtins test more accurately mirror compiler-builtins In `compiler-builtins`, `libm` is contained within a `math` module. The smoke test in this repo has a slightly different layout so some things were passing that shouldn't be. Change module layouts in `compiler-builtins-smoke-test` to match `compiler-builtins` and update a few instances of broken paths. --- .../compiler-builtins-smoke-test/src/lib.rs | 185 +----------------- .../compiler-builtins-smoke-test/src/math.rs | 182 +++++++++++++++++ libm/src/math/fma.rs | 2 +- libm/src/math/fma_wide.rs | 2 +- 4 files changed, 186 insertions(+), 185 deletions(-) create mode 100644 libm/crates/compiler-builtins-smoke-test/src/math.rs diff --git a/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/libm/crates/compiler-builtins-smoke-test/src/lib.rs index 77a4666a1..f9e6e75a8 100644 --- a/libm/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/libm/crates/compiler-builtins-smoke-test/src/lib.rs @@ -10,187 +10,6 @@ #![allow(internal_features)] #![no_std] -#[allow(dead_code)] -#[allow(clippy::all)] // We don't get `libm`'s list of `allow`s, so just ignore Clippy. -#[path = "../../../src/math/mod.rs"] -pub mod libm; - -use core::ffi::c_int; - +mod math; // Required for macro paths. -use libm::support; - -/// Mark functions `#[no_mangle]` and with the C ABI. -macro_rules! no_mangle { - ($( $name:ident( $($tt:tt)+ ) -> $ret:ty; )+) => { - $( no_mangle!(@inner $name( $($tt)+ ) -> $ret); )+ - }; - - // Handle simple functions with single return types - (@inner $name:ident( $($arg:ident: $aty:ty),+ ) -> $ret:ty) => { - #[no_mangle] - extern "C" fn $name($($arg: $aty),+) -> $ret { - libm::$name($($arg),+) - } - }; - - - // Functions with `&mut` return values need to be handled differently, use `|` to - // separate inputs vs. outputs. - ( - @inner $name:ident( $($arg:ident: $aty:ty),+ | $($rarg:ident: $rty:ty),+) -> $ret:ty - ) => { - #[no_mangle] - extern "C" fn $name($($arg: $aty,)+ $($rarg: $rty),+) -> $ret { - let ret; - (ret, $(*$rarg),+) = libm::$name($($arg),+); - ret - } - }; -} - -no_mangle! { - frexp(x: f64 | y: &mut c_int) -> f64; - frexpf(x: f32 | y: &mut c_int) -> f32; - acos(x: f64) -> f64; - acosf(x: f32) -> f32; - acosh(x: f64) -> f64; - acoshf(x: f32) -> f32; - asin(x: f64) -> f64; - asinf(x: f32) -> f32; - asinh(x: f64) -> f64; - asinhf(x: f32) -> f32; - atan(x: f64) -> f64; - atan2(x: f64, y: f64) -> f64; - atan2f(x: f32, y: f32) -> f32; - atanf(x: f32) -> f32; - atanh(x: f64) -> f64; - atanhf(x: f32) -> f32; - cbrt(x: f64) -> f64; - cbrtf(x: f32) -> f32; - ceil(x: f64) -> f64; - ceilf(x: f32) -> f32; - ceilf128(x: f128) -> f128; - ceilf16(x: f16) -> f16; - copysign(x: f64, y: f64) -> f64; - copysignf(x: f32, y: f32) -> f32; - copysignf128(x: f128, y: f128) -> f128; - copysignf16(x: f16, y: f16) -> f16; - cos(x: f64) -> f64; - cosf(x: f32) -> f32; - cosh(x: f64) -> f64; - coshf(x: f32) -> f32; - erf(x: f64) -> f64; - erfc(x: f64) -> f64; - erfcf(x: f32) -> f32; - erff(x: f32) -> f32; - exp(x: f64) -> f64; - exp10(x: f64) -> f64; - exp10f(x: f32) -> f32; - exp2(x: f64) -> f64; - exp2f(x: f32) -> f32; - expf(x: f32) -> f32; - expm1(x: f64) -> f64; - expm1f(x: f32) -> f32; - fabs(x: f64) -> f64; - fabsf(x: f32) -> f32; - fabsf128(x: f128) -> f128; - fabsf16(x: f16) -> f16; - fdim(x: f64, y: f64) -> f64; - fdimf(x: f32, y: f32) -> f32; - fdimf128(x: f128, y: f128) -> f128; - fdimf16(x: f16, y: f16) -> f16; - floor(x: f64) -> f64; - floorf(x: f32) -> f32; - floorf128(x: f128) -> f128; - floorf16(x: f16) -> f16; - fma(x: f64, y: f64, z: f64) -> f64; - fmaf(x: f32, y: f32, z: f32) -> f32; - fmax(x: f64, y: f64) -> f64; - fmaxf(x: f32, y: f32) -> f32; - fmin(x: f64, y: f64) -> f64; - fminf(x: f32, y: f32) -> f32; - fmod(x: f64, y: f64) -> f64; - fmodf(x: f32, y: f32) -> f32; - hypot(x: f64, y: f64) -> f64; - hypotf(x: f32, y: f32) -> f32; - ilogb(x: f64) -> c_int; - ilogbf(x: f32) -> c_int; - j0(x: f64) -> f64; - j0f(x: f32) -> f32; - j1(x: f64) -> f64; - j1f(x: f32) -> f32; - jn(x: c_int, y: f64) -> f64; - jnf(x: c_int, y: f32) -> f32; - ldexp(x: f64, y: c_int) -> f64; - ldexpf(x: f32, y: c_int) -> f32; - lgamma(x: f64) -> f64; - lgamma_r(x: f64 | r: &mut c_int) -> f64; - lgammaf(x: f32) -> f32; - lgammaf_r(x: f32 | r: &mut c_int) -> f32; - log(x: f64) -> f64; - log10(x: f64) -> f64; - log10f(x: f32) -> f32; - log1p(x: f64) -> f64; - log1pf(x: f32) -> f32; - log2(x: f64) -> f64; - log2f(x: f32) -> f32; - logf(x: f32) -> f32; - modf(x: f64 | r: &mut f64) -> f64; - modff(x: f32 | r: &mut f32) -> f32; - nextafter(x: f64, y: f64) -> f64; - nextafterf(x: f32, y: f32) -> f32; - pow(x: f64, y: f64) -> f64; - powf(x: f32, y: f32) -> f32; - remainder(x: f64, y: f64) -> f64; - remainderf(x: f32, y: f32) -> f32; - remquo(x: f64, y: f64 | q: &mut c_int) -> f64; - remquof(x: f32, y: f32 | q: &mut c_int) -> f32; - rint(x: f64) -> f64; - rintf(x: f32) -> f32; - rintf128(x: f128) -> f128; - rintf16(x: f16) -> f16; - round(x: f64) -> f64; - roundf(x: f32) -> f32; - scalbn(x: f64, y: c_int) -> f64; - scalbnf(x: f32, y: c_int) -> f32; - sin(x: f64) -> f64; - sinf(x: f32) -> f32; - sinh(x: f64) -> f64; - sinhf(x: f32) -> f32; - sqrt(x: f64) -> f64; - sqrtf(x: f32) -> f32; - tan(x: f64) -> f64; - tanf(x: f32) -> f32; - tanh(x: f64) -> f64; - tanhf(x: f32) -> f32; - tgamma(x: f64) -> f64; - tgammaf(x: f32) -> f32; - trunc(x: f64) -> f64; - truncf(x: f32) -> f32; - truncf128(x: f128) -> f128; - truncf16(x: f16) -> f16; - y0(x: f64) -> f64; - y0f(x: f32) -> f32; - y1(x: f64) -> f64; - y1f(x: f32) -> f32; - yn(x: c_int, y: f64) -> f64; - ynf(x: c_int, y: f32) -> f32; -} - -/* sincos has no direct return type, not worth handling in the macro */ - -#[no_mangle] -extern "C" fn sincos(x: f64, s: &mut f64, c: &mut f64) { - (*s, *c) = libm::sincos(x); -} - -#[no_mangle] -extern "C" fn sincosf(x: f32, s: &mut f32, c: &mut f32) { - (*s, *c) = libm::sincosf(x); -} - -#[panic_handler] -fn panic(_info: &core::panic::PanicInfo) -> ! { - loop {} -} +use math::libm::support; diff --git a/libm/crates/compiler-builtins-smoke-test/src/math.rs b/libm/crates/compiler-builtins-smoke-test/src/math.rs new file mode 100644 index 000000000..7e0146998 --- /dev/null +++ b/libm/crates/compiler-builtins-smoke-test/src/math.rs @@ -0,0 +1,182 @@ +use core::ffi::c_int; + +#[allow(dead_code)] +#[allow(clippy::all)] // We don't get `libm`'s list of `allow`s, so just ignore Clippy. +#[allow(unused_imports)] +#[path = "../../../src/math/mod.rs"] +pub mod libm; + +/// Mark functions `#[no_mangle]` and with the C ABI. +macro_rules! no_mangle { + ($( $name:ident( $($tt:tt)+ ) -> $ret:ty; )+) => { + $( no_mangle!(@inner $name( $($tt)+ ) -> $ret); )+ + }; + + // Handle simple functions with single return types + (@inner $name:ident( $($arg:ident: $aty:ty),+ ) -> $ret:ty) => { + #[no_mangle] + extern "C" fn $name($($arg: $aty),+) -> $ret { + libm::$name($($arg),+) + } + }; + + + // Functions with `&mut` return values need to be handled differently, use `|` to + // separate inputs vs. outputs. + ( + @inner $name:ident( $($arg:ident: $aty:ty),+ | $($rarg:ident: $rty:ty),+) -> $ret:ty + ) => { + #[no_mangle] + extern "C" fn $name($($arg: $aty,)+ $($rarg: $rty),+) -> $ret { + let ret; + (ret, $(*$rarg),+) = libm::$name($($arg),+); + ret + } + }; +} + +no_mangle! { + frexp(x: f64 | y: &mut c_int) -> f64; + frexpf(x: f32 | y: &mut c_int) -> f32; + acos(x: f64) -> f64; + acosf(x: f32) -> f32; + acosh(x: f64) -> f64; + acoshf(x: f32) -> f32; + asin(x: f64) -> f64; + asinf(x: f32) -> f32; + asinh(x: f64) -> f64; + asinhf(x: f32) -> f32; + atan(x: f64) -> f64; + atan2(x: f64, y: f64) -> f64; + atan2f(x: f32, y: f32) -> f32; + atanf(x: f32) -> f32; + atanh(x: f64) -> f64; + atanhf(x: f32) -> f32; + cbrt(x: f64) -> f64; + cbrtf(x: f32) -> f32; + ceil(x: f64) -> f64; + ceilf(x: f32) -> f32; + ceilf128(x: f128) -> f128; + ceilf16(x: f16) -> f16; + copysign(x: f64, y: f64) -> f64; + copysignf(x: f32, y: f32) -> f32; + copysignf128(x: f128, y: f128) -> f128; + copysignf16(x: f16, y: f16) -> f16; + cos(x: f64) -> f64; + cosf(x: f32) -> f32; + cosh(x: f64) -> f64; + coshf(x: f32) -> f32; + erf(x: f64) -> f64; + erfc(x: f64) -> f64; + erfcf(x: f32) -> f32; + erff(x: f32) -> f32; + exp(x: f64) -> f64; + exp10(x: f64) -> f64; + exp10f(x: f32) -> f32; + exp2(x: f64) -> f64; + exp2f(x: f32) -> f32; + expf(x: f32) -> f32; + expm1(x: f64) -> f64; + expm1f(x: f32) -> f32; + fabs(x: f64) -> f64; + fabsf(x: f32) -> f32; + fabsf128(x: f128) -> f128; + fabsf16(x: f16) -> f16; + fdim(x: f64, y: f64) -> f64; + fdimf(x: f32, y: f32) -> f32; + fdimf128(x: f128, y: f128) -> f128; + fdimf16(x: f16, y: f16) -> f16; + floor(x: f64) -> f64; + floorf(x: f32) -> f32; + floorf128(x: f128) -> f128; + floorf16(x: f16) -> f16; + fma(x: f64, y: f64, z: f64) -> f64; + fmaf(x: f32, y: f32, z: f32) -> f32; + fmax(x: f64, y: f64) -> f64; + fmaxf(x: f32, y: f32) -> f32; + fmin(x: f64, y: f64) -> f64; + fminf(x: f32, y: f32) -> f32; + fmod(x: f64, y: f64) -> f64; + fmodf(x: f32, y: f32) -> f32; + hypot(x: f64, y: f64) -> f64; + hypotf(x: f32, y: f32) -> f32; + ilogb(x: f64) -> c_int; + ilogbf(x: f32) -> c_int; + j0(x: f64) -> f64; + j0f(x: f32) -> f32; + j1(x: f64) -> f64; + j1f(x: f32) -> f32; + jn(x: c_int, y: f64) -> f64; + jnf(x: c_int, y: f32) -> f32; + ldexp(x: f64, y: c_int) -> f64; + ldexpf(x: f32, y: c_int) -> f32; + lgamma(x: f64) -> f64; + lgamma_r(x: f64 | r: &mut c_int) -> f64; + lgammaf(x: f32) -> f32; + lgammaf_r(x: f32 | r: &mut c_int) -> f32; + log(x: f64) -> f64; + log10(x: f64) -> f64; + log10f(x: f32) -> f32; + log1p(x: f64) -> f64; + log1pf(x: f32) -> f32; + log2(x: f64) -> f64; + log2f(x: f32) -> f32; + logf(x: f32) -> f32; + modf(x: f64 | r: &mut f64) -> f64; + modff(x: f32 | r: &mut f32) -> f32; + nextafter(x: f64, y: f64) -> f64; + nextafterf(x: f32, y: f32) -> f32; + pow(x: f64, y: f64) -> f64; + powf(x: f32, y: f32) -> f32; + remainder(x: f64, y: f64) -> f64; + remainderf(x: f32, y: f32) -> f32; + remquo(x: f64, y: f64 | q: &mut c_int) -> f64; + remquof(x: f32, y: f32 | q: &mut c_int) -> f32; + rint(x: f64) -> f64; + rintf(x: f32) -> f32; + rintf128(x: f128) -> f128; + rintf16(x: f16) -> f16; + round(x: f64) -> f64; + roundf(x: f32) -> f32; + scalbn(x: f64, y: c_int) -> f64; + scalbnf(x: f32, y: c_int) -> f32; + sin(x: f64) -> f64; + sinf(x: f32) -> f32; + sinh(x: f64) -> f64; + sinhf(x: f32) -> f32; + sqrt(x: f64) -> f64; + sqrtf(x: f32) -> f32; + tan(x: f64) -> f64; + tanf(x: f32) -> f32; + tanh(x: f64) -> f64; + tanhf(x: f32) -> f32; + tgamma(x: f64) -> f64; + tgammaf(x: f32) -> f32; + trunc(x: f64) -> f64; + truncf(x: f32) -> f32; + truncf128(x: f128) -> f128; + truncf16(x: f16) -> f16; + y0(x: f64) -> f64; + y0f(x: f32) -> f32; + y1(x: f64) -> f64; + y1f(x: f32) -> f32; + yn(x: c_int, y: f64) -> f64; + ynf(x: c_int, y: f32) -> f32; +} + +/* sincos has no direct return type, not worth handling in the macro */ + +#[no_mangle] +extern "C" fn sincos(x: f64, s: &mut f64, c: &mut f64) { + (*s, *c) = libm::sincos(x); +} + +#[no_mangle] +extern "C" fn sincosf(x: f32, s: &mut f32, c: &mut f32) { + (*s, *c) = libm::sincosf(x); +} + +#[panic_handler] +fn panic(_info: &core::panic::PanicInfo) -> ! { + loop {} +} diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index a54984c93..049f573cc 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: MIT */ /* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */ -use super::super::support::{DInt, FpResult, HInt, IntTy, Round, Status}; +use super::support::{DInt, FpResult, HInt, IntTy, Round, Status}; use super::{CastFrom, CastInto, Float, Int, MinInt}; /// Fused multiply add (f64) diff --git a/libm/src/math/fma_wide.rs b/libm/src/math/fma_wide.rs index a8c1a5488..d0cf33baf 100644 --- a/libm/src/math/fma_wide.rs +++ b/libm/src/math/fma_wide.rs @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: MIT */ /* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */ -use super::super::support::{FpResult, IntTy, Round, Status}; +use super::support::{FpResult, IntTy, Round, Status}; use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt}; // Placeholder so we can have `fmaf16` in the `Float` trait. From ad9a84a9f15b2a476311a07c33e50745851b41de Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 15 Jan 2025 05:27:14 +0000 Subject: [PATCH 1209/1459] Eliminate the use of `public_test_dep!` for a third time Replace `public_test_dep!` by placing optionally public items into new modules, then controlling what is exported with the `public-test-deps` feature. This is nicer for automatic formatting and diagnostics. This is a reland of 2e2a9255 ("Eliminate the use of `public_test_dep!`"), which was reverted in 47e50fd2 ('Revert "Eliminate the use of..."') due to a bug exposed at [1], reapplied in d4abaf4efa because the issue should have been fixed in [2], then reverted again in f6eef07f53 because [2] did not actually fix the issue. [3] has landed in rust-lang/rust since then, which should resolve the last problem remaining after [2]. So, apply this change for what is hopefully the final time. [1]: https://github.com/rust-lang/rust/pull/128691 [2]: https://github.com/rust-lang/rust/pull/135278 [3]: https://github.com/rust-lang/rust/pull/135501 --- src/float/mod.rs | 195 +---------- src/float/traits.rs | 189 +++++++++++ src/int/leading_zeros.rs | 227 ++++++------- src/int/mod.rs | 424 +----------------------- src/int/specialized_div_rem/delegate.rs | 4 +- src/int/trailing_zeros.rs | 69 ++-- src/int/traits.rs | 411 +++++++++++++++++++++++ src/macros.rs | 16 - 8 files changed, 763 insertions(+), 772 deletions(-) create mode 100644 src/float/traits.rs create mode 100644 src/int/traits.rs diff --git a/src/float/mod.rs b/src/float/mod.rs index 6ee55950e..41b308626 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -1,7 +1,3 @@ -use core::ops; - -use crate::int::{DInt, Int, MinInt}; - pub mod add; pub mod cmp; pub mod conv; @@ -10,192 +6,11 @@ pub mod extend; pub mod mul; pub mod pow; pub mod sub; +pub(crate) mod traits; pub mod trunc; -/// Wrapper to extract the integer type half of the float's size -pub(crate) type HalfRep = <::Int as DInt>::H; - -public_test_dep! { -/// Trait for some basic operations on floats -#[allow(dead_code)] -pub(crate) trait Float: - Copy - + core::fmt::Debug - + PartialEq - + PartialOrd - + ops::AddAssign - + ops::MulAssign - + ops::Add - + ops::Sub - + ops::Div - + ops::Rem -{ - /// A uint of the same width as the float - type Int: Int; - - /// A int of the same width as the float - type SignedInt: Int + MinInt; - - /// An int capable of containing the exponent bits plus a sign bit. This is signed. - type ExpInt: Int; - - const ZERO: Self; - const ONE: Self; - - /// The bitwidth of the float type. - const BITS: u32; - - /// The bitwidth of the significand. - const SIG_BITS: u32; - - /// The bitwidth of the exponent. - const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; - - /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite - /// representation. - /// - /// This is in the rightmost position, use `EXP_MASK` for the shifted value. - const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1; - - /// The exponent bias value. - const EXP_BIAS: u32 = Self::EXP_SAT >> 1; - - /// A mask for the sign bit. - const SIGN_MASK: Self::Int; - - /// A mask for the significand. - const SIG_MASK: Self::Int; - - /// The implicit bit of the float format. - const IMPLICIT_BIT: Self::Int; - - /// A mask for the exponent. - const EXP_MASK: Self::Int; - - /// Returns `self` transmuted to `Self::Int` - fn to_bits(self) -> Self::Int; - - /// Returns `self` transmuted to `Self::SignedInt` - fn to_bits_signed(self) -> Self::SignedInt; - - /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be - /// represented in multiple different ways. This method returns `true` if two NaNs are - /// compared. - fn eq_repr(self, rhs: Self) -> bool; - - /// Returns true if the sign is negative - fn is_sign_negative(self) -> bool; - - /// Returns the exponent, not adjusting for bias. - fn exp(self) -> Self::ExpInt; - - /// Returns the significand with no implicit bit (or the "fractional" part) - fn frac(self) -> Self::Int; - - /// Returns the significand with implicit bit - fn imp_frac(self) -> Self::Int; - - /// Returns a `Self::Int` transmuted back to `Self` - fn from_bits(a: Self::Int) -> Self; - - /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. - fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self; - - fn abs(self) -> Self { - let abs_mask = !Self::SIGN_MASK ; - Self::from_bits(self.to_bits() & abs_mask) - } - - /// Returns (normalized exponent, normalized significand) - fn normalize(significand: Self::Int) -> (i32, Self::Int); - - /// Returns if `self` is subnormal - fn is_subnormal(self) -> bool; -} -} - -macro_rules! float_impl { - ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { - impl Float for $ty { - type Int = $ity; - type SignedInt = $sity; - type ExpInt = $expty; - - const ZERO: Self = 0.0; - const ONE: Self = 1.0; - - const BITS: u32 = $bits; - const SIG_BITS: u32 = $significand_bits; - - const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); - const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1; - const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS; - const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK); - - fn to_bits(self) -> Self::Int { - self.to_bits() - } - fn to_bits_signed(self) -> Self::SignedInt { - self.to_bits() as Self::SignedInt - } - fn eq_repr(self, rhs: Self) -> bool { - #[cfg(feature = "mangled-names")] - fn is_nan(x: $ty) -> bool { - // When using mangled-names, the "real" compiler-builtins might not have the - // necessary builtin (__unordtf2) to test whether `f128` is NaN. - // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin - // x is NaN if all the bits of the exponent are set and the significand is non-0 - x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0 - } - #[cfg(not(feature = "mangled-names"))] - fn is_nan(x: $ty) -> bool { - x.is_nan() - } - if is_nan(self) && is_nan(rhs) { - true - } else { - self.to_bits() == rhs.to_bits() - } - } - fn is_sign_negative(self) -> bool { - self.is_sign_negative() - } - fn exp(self) -> Self::ExpInt { - ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt - } - fn frac(self) -> Self::Int { - self.to_bits() & Self::SIG_MASK - } - fn imp_frac(self) -> Self::Int { - self.frac() | Self::IMPLICIT_BIT - } - fn from_bits(a: Self::Int) -> Self { - Self::from_bits(a) - } - fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self { - Self::from_bits( - ((negative as Self::Int) << (Self::BITS - 1)) - | ((exponent << Self::SIG_BITS) & Self::EXP_MASK) - | (significand & Self::SIG_MASK), - ) - } - fn normalize(significand: Self::Int) -> (i32, Self::Int) { - let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); - ( - 1i32.wrapping_sub(shift as i32), - significand << shift as Self::Int, - ) - } - fn is_subnormal(self) -> bool { - (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO - } - } - }; -} +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use traits::{Float, HalfRep}; -#[cfg(f16_enabled)] -float_impl!(f16, u16, i16, i8, 16, 10); -float_impl!(f32, u32, i32, i16, 32, 23); -float_impl!(f64, u64, i64, i16, 64, 52); -#[cfg(f128_enabled)] -float_impl!(f128, u128, i128, i16, 128, 112); +#[cfg(feature = "public-test-deps")] +pub use traits::{Float, HalfRep}; diff --git a/src/float/traits.rs b/src/float/traits.rs new file mode 100644 index 000000000..8ccaa7bcb --- /dev/null +++ b/src/float/traits.rs @@ -0,0 +1,189 @@ +use core::ops; + +use crate::int::{DInt, Int, MinInt}; + +/// Wrapper to extract the integer type half of the float's size +pub type HalfRep = <::Int as DInt>::H; + +/// Trait for some basic operations on floats +#[allow(dead_code)] +pub trait Float: + Copy + + core::fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::MulAssign + + ops::Add + + ops::Sub + + ops::Div + + ops::Rem +{ + /// A uint of the same width as the float + type Int: Int; + + /// A int of the same width as the float + type SignedInt: Int + MinInt; + + /// An int capable of containing the exponent bits plus a sign bit. This is signed. + type ExpInt: Int; + + const ZERO: Self; + const ONE: Self; + + /// The bitwidth of the float type. + const BITS: u32; + + /// The bitwidth of the significand. + const SIG_BITS: u32; + + /// The bitwidth of the exponent. + const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; + + /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite + /// representation. + /// + /// This is in the rightmost position, use `EXP_MASK` for the shifted value. + const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1; + + /// The exponent bias value. + const EXP_BIAS: u32 = Self::EXP_SAT >> 1; + + /// A mask for the sign bit. + const SIGN_MASK: Self::Int; + + /// A mask for the significand. + const SIG_MASK: Self::Int; + + /// The implicit bit of the float format. + const IMPLICIT_BIT: Self::Int; + + /// A mask for the exponent. + const EXP_MASK: Self::Int; + + /// Returns `self` transmuted to `Self::Int` + fn to_bits(self) -> Self::Int; + + /// Returns `self` transmuted to `Self::SignedInt` + fn to_bits_signed(self) -> Self::SignedInt; + + /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be + /// represented in multiple different ways. This method returns `true` if two NaNs are + /// compared. + fn eq_repr(self, rhs: Self) -> bool; + + /// Returns true if the sign is negative + fn is_sign_negative(self) -> bool; + + /// Returns the exponent, not adjusting for bias. + fn exp(self) -> Self::ExpInt; + + /// Returns the significand with no implicit bit (or the "fractional" part) + fn frac(self) -> Self::Int; + + /// Returns the significand with implicit bit + fn imp_frac(self) -> Self::Int; + + /// Returns a `Self::Int` transmuted back to `Self` + fn from_bits(a: Self::Int) -> Self; + + /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. + fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self; + + fn abs(self) -> Self { + let abs_mask = !Self::SIGN_MASK; + Self::from_bits(self.to_bits() & abs_mask) + } + + /// Returns (normalized exponent, normalized significand) + fn normalize(significand: Self::Int) -> (i32, Self::Int); + + /// Returns if `self` is subnormal + fn is_subnormal(self) -> bool; +} + +macro_rules! float_impl { + ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { + impl Float for $ty { + type Int = $ity; + type SignedInt = $sity; + type ExpInt = $expty; + + const ZERO: Self = 0.0; + const ONE: Self = 1.0; + + const BITS: u32 = $bits; + const SIG_BITS: u32 = $significand_bits; + + const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); + const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1; + const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS; + const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK); + + fn to_bits(self) -> Self::Int { + self.to_bits() + } + fn to_bits_signed(self) -> Self::SignedInt { + self.to_bits() as Self::SignedInt + } + fn eq_repr(self, rhs: Self) -> bool { + #[cfg(feature = "mangled-names")] + fn is_nan(x: $ty) -> bool { + // When using mangled-names, the "real" compiler-builtins might not have the + // necessary builtin (__unordtf2) to test whether `f128` is NaN. + // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin + // x is NaN if all the bits of the exponent are set and the significand is non-0 + x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0 + } + #[cfg(not(feature = "mangled-names"))] + fn is_nan(x: $ty) -> bool { + x.is_nan() + } + if is_nan(self) && is_nan(rhs) { + true + } else { + self.to_bits() == rhs.to_bits() + } + } + fn is_sign_negative(self) -> bool { + self.is_sign_negative() + } + fn exp(self) -> Self::ExpInt { + ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt + } + fn frac(self) -> Self::Int { + self.to_bits() & Self::SIG_MASK + } + fn imp_frac(self) -> Self::Int { + self.frac() | Self::IMPLICIT_BIT + } + fn from_bits(a: Self::Int) -> Self { + Self::from_bits(a) + } + fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self { + Self::from_bits( + ((negative as Self::Int) << (Self::BITS - 1)) + | ((exponent << Self::SIG_BITS) & Self::EXP_MASK) + | (significand & Self::SIG_MASK), + ) + } + fn normalize(significand: Self::Int) -> (i32, Self::Int) { + let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); + ( + 1i32.wrapping_sub(shift as i32), + significand << shift as Self::Int, + ) + } + fn is_subnormal(self) -> bool { + (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO + } + } + }; +} + +#[cfg(f16_enabled)] +float_impl!(f16, u16, i16, i8, 16, 10); +float_impl!(f32, u32, i32, i16, 32, 23); +float_impl!(f64, u64, i64, i16, 64, 52); +#[cfg(f128_enabled)] +float_impl!(f128, u128, i128, i16, 128, 112); diff --git a/src/int/leading_zeros.rs b/src/int/leading_zeros.rs index 1fee9fcf5..ba735aa74 100644 --- a/src/int/leading_zeros.rs +++ b/src/int/leading_zeros.rs @@ -3,135 +3,138 @@ // adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`. // Compilers will insert the check for zero in cases where it is needed. -use crate::int::{CastInto, Int}; +#[cfg(feature = "public-test-deps")] +pub use implementation::{leading_zeros_default, leading_zeros_riscv}; +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use implementation::{leading_zeros_default, leading_zeros_riscv}; -public_test_dep! { -/// Returns the number of leading binary zeros in `x`. -#[allow(dead_code)] -pub(crate) fn leading_zeros_default>(x: T) -> usize { - // The basic idea is to test if the higher bits of `x` are zero and bisect the number - // of leading zeros. It is possible for all branches of the bisection to use the same - // code path by conditionally shifting the higher parts down to let the next bisection - // step work on the higher or lower parts of `x`. Instead of starting with `z == 0` - // and adding to the number of zeros, it is slightly faster to start with - // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros, - // because it simplifies the final bisection step. - let mut x = x; - // the number of potential leading zeros - let mut z = T::BITS as usize; - // a temporary - let mut t: T; +mod implementation { + use crate::int::{CastInto, Int}; - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - t = x >> 32; + /// Returns the number of leading binary zeros in `x`. + #[allow(dead_code)] + pub fn leading_zeros_default>(x: T) -> usize { + // The basic idea is to test if the higher bits of `x` are zero and bisect the number + // of leading zeros. It is possible for all branches of the bisection to use the same + // code path by conditionally shifting the higher parts down to let the next bisection + // step work on the higher or lower parts of `x`. Instead of starting with `z == 0` + // and adding to the number of zeros, it is slightly faster to start with + // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros, + // because it simplifies the final bisection step. + let mut x = x; + // the number of potential leading zeros + let mut z = T::BITS as usize; + // a temporary + let mut t: T; + + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + t = x >> 32; + if t != T::ZERO { + z -= 32; + x = t; + } + } + if T::BITS >= 32 { + t = x >> 16; + if t != T::ZERO { + z -= 16; + x = t; + } + } + const { assert!(T::BITS >= 16) }; + t = x >> 8; if t != T::ZERO { - z -= 32; + z -= 8; x = t; } - } - if T::BITS >= 32 { - t = x >> 16; + t = x >> 4; if t != T::ZERO { - z -= 16; + z -= 4; x = t; } - } - const { assert!(T::BITS >= 16) }; - t = x >> 8; - if t != T::ZERO { - z -= 8; - x = t; - } - t = x >> 4; - if t != T::ZERO { - z -= 4; - x = t; - } - t = x >> 2; - if t != T::ZERO { - z -= 2; - x = t; - } - // the last two bisections are combined into one conditional - t = x >> 1; - if t != T::ZERO { - z - 2 - } else { - z - x.cast() - } + t = x >> 2; + if t != T::ZERO { + z -= 2; + x = t; + } + // the last two bisections are combined into one conditional + t = x >> 1; + if t != T::ZERO { + z - 2 + } else { + z - x.cast() + } - // We could potentially save a few cycles by using the LUT trick from - // "https://embeddedgurus.com/state-space/2014/09/ - // fast-deterministic-and-portable-counting-leading-zeros/". - // However, 256 bytes for a LUT is too large for embedded use cases. We could remove - // the last 3 bisections and use this 16 byte LUT for the rest of the work: - //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]; - //z -= LUT[x] as usize; - //z - // However, it ends up generating about the same number of instructions. When benchmarked - // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO - // execution effects. Changing to using a LUT and branching is risky for smaller cores. -} -} + // We could potentially save a few cycles by using the LUT trick from + // "https://embeddedgurus.com/state-space/2014/09/ + // fast-deterministic-and-portable-counting-leading-zeros/". + // However, 256 bytes for a LUT is too large for embedded use cases. We could remove + // the last 3 bisections and use this 16 byte LUT for the rest of the work: + //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]; + //z -= LUT[x] as usize; + //z + // However, it ends up generating about the same number of instructions. When benchmarked + // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO + // execution effects. Changing to using a LUT and branching is risky for smaller cores. + } -// The above method does not compile well on RISC-V (because of the lack of predicated -// instructions), producing code with many branches or using an excessively long -// branchless solution. This method takes advantage of the set-if-less-than instruction on -// RISC-V that allows `(x >= power-of-two) as usize` to be branchless. + // The above method does not compile well on RISC-V (because of the lack of predicated + // instructions), producing code with many branches or using an excessively long + // branchless solution. This method takes advantage of the set-if-less-than instruction on + // RISC-V that allows `(x >= power-of-two) as usize` to be branchless. -public_test_dep! { -/// Returns the number of leading binary zeros in `x`. -#[allow(dead_code)] -pub(crate) fn leading_zeros_riscv>(x: T) -> usize { - let mut x = x; - // the number of potential leading zeros - let mut z = T::BITS; - // a temporary - let mut t: u32; + /// Returns the number of leading binary zeros in `x`. + #[allow(dead_code)] + pub fn leading_zeros_riscv>(x: T) -> usize { + let mut x = x; + // the number of potential leading zeros + let mut z = T::BITS; + // a temporary + let mut t: u32; - // RISC-V does not have a set-if-greater-than-or-equal instruction and - // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is - // still the most optimal method. A conditional set can only be turned into a single - // immediate instruction if `x` is compared with an immediate `imm` (that can fit into - // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the - // right). If we try to save an instruction by using `x < imm` for each bisection, we - // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, - // but the immediate will never fit into 12 bits and never save an instruction. - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise - // `t` is set to 0. - t = ((x >= (T::ONE << 32)) as u32) << 5; - // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the - // next step to process. + // RISC-V does not have a set-if-greater-than-or-equal instruction and + // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is + // still the most optimal method. A conditional set can only be turned into a single + // immediate instruction if `x` is compared with an immediate `imm` (that can fit into + // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the + // right). If we try to save an instruction by using `x < imm` for each bisection, we + // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, + // but the immediate will never fit into 12 bits and never save an instruction. + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise + // `t` is set to 0. + t = ((x >= (T::ONE << 32)) as u32) << 5; + // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the + // next step to process. + x >>= t; + // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential + // leading zeros + z -= t; + } + if T::BITS >= 32 { + t = ((x >= (T::ONE << 16)) as u32) << 4; + x >>= t; + z -= t; + } + const { assert!(T::BITS >= 16) }; + t = ((x >= (T::ONE << 8)) as u32) << 3; x >>= t; - // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential - // leading zeros z -= t; - } - if T::BITS >= 32 { - t = ((x >= (T::ONE << 16)) as u32) << 4; + t = ((x >= (T::ONE << 4)) as u32) << 2; + x >>= t; + z -= t; + t = ((x >= (T::ONE << 2)) as u32) << 1; x >>= t; z -= t; + t = (x >= (T::ONE << 1)) as u32; + x >>= t; + z -= t; + // All bits except the LSB are guaranteed to be zero for this final bisection step. + // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. + z as usize - x.cast() } - const { assert!(T::BITS >= 16) }; - t = ((x >= (T::ONE << 8)) as u32) << 3; - x >>= t; - z -= t; - t = ((x >= (T::ONE << 4)) as u32) << 2; - x >>= t; - z -= t; - t = ((x >= (T::ONE << 2)) as u32) << 1; - x >>= t; - z -= t; - t = (x >= (T::ONE << 1)) as u32; - x >>= t; - z -= t; - // All bits except the LSB are guaranteed to be zero for this final bisection step. - // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. - z as usize - x.cast() -} } intrinsics! { diff --git a/src/int/mod.rs b/src/int/mod.rs index c0d5a6715..1f1be711b 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -1,5 +1,3 @@ -use core::ops; - mod specialized_div_rem; pub mod addsub; @@ -10,425 +8,13 @@ pub mod mul; pub mod sdiv; pub mod shift; pub mod trailing_zeros; +mod traits; pub mod udiv; pub use big::{i256, u256}; -public_test_dep! { -/// Minimal integer implementations needed on all integer types, including wide integers. -#[allow(dead_code)] -pub(crate) trait MinInt: Copy - + core::fmt::Debug - + ops::BitOr - + ops::Not - + ops::Shl -{ - - /// Type with the same width but other signedness - type OtherSign: MinInt; - /// Unsigned version of Self - type UnsignedInt: MinInt; - - /// If `Self` is a signed integer - const SIGNED: bool; - - /// The bitwidth of the int type - const BITS: u32; - - const ZERO: Self; - const ONE: Self; - const MIN: Self; - const MAX: Self; -} -} - -public_test_dep! { -/// Trait for some basic operations on integers -#[allow(dead_code)] -pub(crate) trait Int: MinInt - + PartialEq - + PartialOrd - + ops::AddAssign - + ops::SubAssign - + ops::BitAndAssign - + ops::BitOrAssign - + ops::BitXorAssign - + ops::ShlAssign - + ops::ShrAssign - + ops::Add - + ops::Sub - + ops::Mul - + ops::Div - + ops::Shr - + ops::BitXor - + ops::BitAnd -{ - /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing - /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, - /// 112,119,120,125,126,127]. - const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(::BITS); - - /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. - const FUZZ_NUM: usize = { - let log2 = (::BITS - 1).count_ones() as usize; - if log2 == 3 { - // case for u8 - 6 - } else { - // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate - // boundaries. - 8 + (4 * (log2 - 4)) - } - }; - - fn unsigned(self) -> Self::UnsignedInt; - fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; - fn unsigned_abs(self) -> Self::UnsignedInt; - - fn from_bool(b: bool) -> Self; - - /// Prevents the need for excessive conversions between signed and unsigned - fn logical_shr(self, other: u32) -> Self; - - /// Absolute difference between two integers. - fn abs_diff(self, other: Self) -> Self::UnsignedInt; - - // copied from primitive integers, but put in a trait - fn is_zero(self) -> bool; - fn wrapping_neg(self) -> Self; - fn wrapping_add(self, other: Self) -> Self; - fn wrapping_mul(self, other: Self) -> Self; - fn wrapping_sub(self, other: Self) -> Self; - fn wrapping_shl(self, other: u32) -> Self; - fn wrapping_shr(self, other: u32) -> Self; - fn rotate_left(self, other: u32) -> Self; - fn overflowing_add(self, other: Self) -> (Self, bool); - fn leading_zeros(self) -> u32; - fn ilog2(self) -> u32; -} -} - -pub(crate) const fn make_fuzz_lengths(bits: u32) -> [u8; 20] { - let mut v = [0u8; 20]; - v[0] = 0; - v[1] = 1; - v[2] = 2; // important for parity and the iX::MIN case when reversed - let mut i = 3; - - // No need for any more until the byte boundary, because there should be no algorithms - // that are sensitive to anything not next to byte boundaries after 2. We also scale - // in powers of two, which is important to prevent u128 corner tests from getting too - // big. - let mut l = 8; - loop { - if l >= ((bits / 2) as u8) { - break; - } - // get both sides of the byte boundary - v[i] = l - 1; - i += 1; - v[i] = l; - i += 1; - l *= 2; - } - - if bits != 8 { - // add the lower side of the middle boundary - v[i] = ((bits / 2) - 1) as u8; - i += 1; - } - - // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS - // boundary because of algorithms that split the high part up. We reverse the scaling - // as we go to Self::BITS. - let mid = i; - let mut j = 1; - loop { - v[i] = (bits as u8) - (v[mid - j]) - 1; - if j == mid { - break; - } - i += 1; - j += 1; - } - v -} - -macro_rules! int_impl_common { - ($ty:ty) => { - fn from_bool(b: bool) -> Self { - b as $ty - } - - fn logical_shr(self, other: u32) -> Self { - Self::from_unsigned(self.unsigned().wrapping_shr(other)) - } - - fn is_zero(self) -> bool { - self == Self::ZERO - } - - fn wrapping_neg(self) -> Self { - ::wrapping_neg(self) - } - - fn wrapping_add(self, other: Self) -> Self { - ::wrapping_add(self, other) - } - - fn wrapping_mul(self, other: Self) -> Self { - ::wrapping_mul(self, other) - } - fn wrapping_sub(self, other: Self) -> Self { - ::wrapping_sub(self, other) - } - - fn wrapping_shl(self, other: u32) -> Self { - ::wrapping_shl(self, other) - } - - fn wrapping_shr(self, other: u32) -> Self { - ::wrapping_shr(self, other) - } - - fn rotate_left(self, other: u32) -> Self { - ::rotate_left(self, other) - } - - fn overflowing_add(self, other: Self) -> (Self, bool) { - ::overflowing_add(self, other) - } - - fn leading_zeros(self) -> u32 { - ::leading_zeros(self) - } - - fn ilog2(self) -> u32 { - ::ilog2(self) - } - }; -} - -macro_rules! int_impl { - ($ity:ty, $uty:ty) => { - impl MinInt for $uty { - type OtherSign = $ity; - type UnsignedInt = $uty; - - const BITS: u32 = ::ZERO.count_zeros(); - const SIGNED: bool = Self::MIN != Self::ZERO; - - const ZERO: Self = 0; - const ONE: Self = 1; - const MIN: Self = ::MIN; - const MAX: Self = ::MAX; - } - - impl Int for $uty { - fn unsigned(self) -> $uty { - self - } - - // It makes writing macros easier if this is implemented for both signed and unsigned - #[allow(clippy::wrong_self_convention)] - fn from_unsigned(me: $uty) -> Self { - me - } - - fn unsigned_abs(self) -> Self { - self - } - - fn abs_diff(self, other: Self) -> Self { - self.abs_diff(other) - } - - int_impl_common!($uty); - } - - impl MinInt for $ity { - type OtherSign = $uty; - type UnsignedInt = $uty; - - const BITS: u32 = ::ZERO.count_zeros(); - const SIGNED: bool = Self::MIN != Self::ZERO; - - const ZERO: Self = 0; - const ONE: Self = 1; - const MIN: Self = ::MIN; - const MAX: Self = ::MAX; - } - - impl Int for $ity { - fn unsigned(self) -> $uty { - self as $uty - } - - fn from_unsigned(me: $uty) -> Self { - me as $ity - } - - fn unsigned_abs(self) -> Self::UnsignedInt { - self.unsigned_abs() - } - - fn abs_diff(self, other: Self) -> $uty { - self.abs_diff(other) - } - - int_impl_common!($ity); - } - }; -} - -int_impl!(isize, usize); -int_impl!(i8, u8); -int_impl!(i16, u16); -int_impl!(i32, u32); -int_impl!(i64, u64); -int_impl!(i128, u128); - -public_test_dep! { -/// Trait for integers twice the bit width of another integer. This is implemented for all -/// primitives except for `u8`, because there is not a smaller primitive. -pub(crate) trait DInt: MinInt { - /// Integer that is half the bit width of the integer this trait is implemented for - type H: HInt; - - /// Returns the low half of `self` - fn lo(self) -> Self::H; - /// Returns the high half of `self` - fn hi(self) -> Self::H; - /// Returns the low and high halves of `self` as a tuple - fn lo_hi(self) -> (Self::H, Self::H) { - (self.lo(), self.hi()) - } - /// Constructs an integer using lower and higher half parts - fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { - lo.zero_widen() | hi.widen_hi() - } -} -} - -public_test_dep! { -/// Trait for integers half the bit width of another integer. This is implemented for all -/// primitives except for `u128`, because it there is not a larger primitive. -pub(crate) trait HInt: Int { - /// Integer that is double the bit width of the integer this trait is implemented for - type D: DInt + MinInt; - - // NB: some of the below methods could have default implementations (e.g. `widen_hi`), but for - // unknown reasons this can cause infinite recursion when optimizations are disabled. See - // for context. - - /// Widens (using default extension) the integer to have double bit width - fn widen(self) -> Self::D; - /// Widens (zero extension only) the integer to have double bit width. This is needed to get - /// around problems with associated type bounds (such as `Int`) being unstable - fn zero_widen(self) -> Self::D; - /// Widens the integer to have double bit width and shifts the integer into the higher bits - fn widen_hi(self) -> Self::D; - /// Widening multiplication with zero widening. This cannot overflow. - fn zero_widen_mul(self, rhs: Self) -> Self::D; - /// Widening multiplication. This cannot overflow. - fn widen_mul(self, rhs: Self) -> Self::D; -} -} - -macro_rules! impl_d_int { - ($($X:ident $D:ident),*) => { - $( - impl DInt for $D { - type H = $X; - - fn lo(self) -> Self::H { - self as $X - } - fn hi(self) -> Self::H { - (self >> <$X as MinInt>::BITS) as $X - } - } - )* - }; -} - -macro_rules! impl_h_int { - ($($H:ident $uH:ident $X:ident),*) => { - $( - impl HInt for $H { - type D = $X; - - fn widen(self) -> Self::D { - self as $X - } - fn zero_widen(self) -> Self::D { - (self as $uH) as $X - } - fn zero_widen_mul(self, rhs: Self) -> Self::D { - self.zero_widen().wrapping_mul(rhs.zero_widen()) - } - fn widen_mul(self, rhs: Self) -> Self::D { - self.widen().wrapping_mul(rhs.widen()) - } - fn widen_hi(self) -> Self::D { - (self as $X) << ::BITS - } - } - )* - }; -} - -impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); -impl_h_int!( - u8 u8 u16, - u16 u16 u32, - u32 u32 u64, - u64 u64 u128, - i8 u8 i16, - i16 u16 i32, - i32 u32 i64, - i64 u64 i128 -); - -public_test_dep! { -/// Trait to express (possibly lossy) casting of integers -pub(crate) trait CastInto: Copy { - fn cast(self) -> T; -} - -pub(crate) trait CastFrom:Copy { - fn cast_from(value: T) -> Self; -} -} - -impl + Copy> CastFrom for T { - fn cast_from(value: U) -> Self { - value.cast() - } -} - -macro_rules! cast_into { - ($ty:ty) => { - cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); - }; - ($ty:ty; $($into:ty),*) => {$( - impl CastInto<$into> for $ty { - fn cast(self) -> $into { - self as $into - } - } - )*}; -} +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; -cast_into!(usize); -cast_into!(isize); -cast_into!(u8); -cast_into!(i8); -cast_into!(u16); -cast_into!(i16); -cast_into!(u32); -cast_into!(i32); -cast_into!(u64); -cast_into!(i64); -cast_into!(u128); -cast_into!(i128); +#[cfg(feature = "public-test-deps")] +pub use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; diff --git a/src/int/specialized_div_rem/delegate.rs b/src/int/specialized_div_rem/delegate.rs index 330c6e4f8..f5c6e5023 100644 --- a/src/int/specialized_div_rem/delegate.rs +++ b/src/int/specialized_div_rem/delegate.rs @@ -185,7 +185,6 @@ macro_rules! impl_delegate { }; } -public_test_dep! { /// Returns `n / d` and sets `*rem = n % d`. /// /// This specialization exists because: @@ -195,7 +194,7 @@ public_test_dep! { /// delegate algorithm strategy the only reasonably fast way to perform `u128` division. // used on SPARC #[allow(dead_code)] -pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { +pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { use super::*; let duo_lo = duo as u64; let duo_hi = (duo >> 64) as u64; @@ -316,4 +315,3 @@ pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { } } } -} diff --git a/src/int/trailing_zeros.rs b/src/int/trailing_zeros.rs index cea366b07..dbc0cce9f 100644 --- a/src/int/trailing_zeros.rs +++ b/src/int/trailing_zeros.rs @@ -1,44 +1,49 @@ -use crate::int::{CastInto, Int}; +#[cfg(feature = "public-test-deps")] +pub use implementation::trailing_zeros; +#[cfg(not(feature = "public-test-deps"))] +pub(crate) use implementation::trailing_zeros; -public_test_dep! { -/// Returns number of trailing binary zeros in `x`. -#[allow(dead_code)] -pub(crate) fn trailing_zeros + CastInto + CastInto>(x: T) -> usize { - let mut x = x; - let mut r: u32 = 0; - let mut t: u32; +mod implementation { + use crate::int::{CastInto, Int}; - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - r += ((CastInto::::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 - x >>= r; // remove 32 zero bits - } + /// Returns number of trailing binary zeros in `x`. + #[allow(dead_code)] + pub fn trailing_zeros + CastInto + CastInto>(x: T) -> usize { + let mut x = x; + let mut r: u32 = 0; + let mut t: u32; - if T::BITS >= 32 { - t = ((CastInto::::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 - r += t; - x >>= t; // x = [0 - 0xFFFF] + higher garbage bits - } + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + r += ((CastInto::::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 + x >>= r; // remove 32 zero bits + } - const { assert!(T::BITS >= 16) }; - t = ((CastInto::::cast(x) == 0) as u32) << 3; - x >>= t; // x = [0 - 0xFF] + higher garbage bits - r += t; + if T::BITS >= 32 { + t = ((CastInto::::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 + r += t; + x >>= t; // x = [0 - 0xFFFF] + higher garbage bits + } - let mut x: u8 = x.cast(); + const { assert!(T::BITS >= 16) }; + t = ((CastInto::::cast(x) == 0) as u32) << 3; + x >>= t; // x = [0 - 0xFF] + higher garbage bits + r += t; - t = (((x & 0x0F) == 0) as u32) << 2; - x >>= t; // x = [0 - 0xF] + higher garbage bits - r += t; + let mut x: u8 = x.cast(); - t = (((x & 0x3) == 0) as u32) << 1; - x >>= t; // x = [0 - 0x3] + higher garbage bits - r += t; + t = (((x & 0x0F) == 0) as u32) << 2; + x >>= t; // x = [0 - 0xF] + higher garbage bits + r += t; - x &= 3; + t = (((x & 0x3) == 0) as u32) << 1; + x >>= t; // x = [0 - 0x3] + higher garbage bits + r += t; - r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg()) -} + x &= 3; + + r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg()) + } } intrinsics! { diff --git a/src/int/traits.rs b/src/int/traits.rs new file mode 100644 index 000000000..9b079e2aa --- /dev/null +++ b/src/int/traits.rs @@ -0,0 +1,411 @@ +use core::ops; + +/// Minimal integer implementations needed on all integer types, including wide integers. +#[allow(dead_code)] +pub trait MinInt: + Copy + + core::fmt::Debug + + ops::BitOr + + ops::Not + + ops::Shl +{ + /// Type with the same width but other signedness + type OtherSign: MinInt; + /// Unsigned version of Self + type UnsignedInt: MinInt; + + /// If `Self` is a signed integer + const SIGNED: bool; + + /// The bitwidth of the int type + const BITS: u32; + + const ZERO: Self; + const ONE: Self; + const MIN: Self; + const MAX: Self; +} + +/// Trait for some basic operations on integers +#[allow(dead_code)] +pub trait Int: + MinInt + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::SubAssign + + ops::BitAndAssign + + ops::BitOrAssign + + ops::BitXorAssign + + ops::ShlAssign + + ops::ShrAssign + + ops::Add + + ops::Sub + + ops::Mul + + ops::Div + + ops::Shr + + ops::BitXor + + ops::BitAnd +{ + /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing + /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, + /// 112,119,120,125,126,127]. + const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(::BITS); + + /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. + const FUZZ_NUM: usize = { + let log2 = (::BITS - 1).count_ones() as usize; + if log2 == 3 { + // case for u8 + 6 + } else { + // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate + // boundaries. + 8 + (4 * (log2 - 4)) + } + }; + + fn unsigned(self) -> Self::UnsignedInt; + fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; + fn unsigned_abs(self) -> Self::UnsignedInt; + + fn from_bool(b: bool) -> Self; + + /// Prevents the need for excessive conversions between signed and unsigned + fn logical_shr(self, other: u32) -> Self; + + /// Absolute difference between two integers. + fn abs_diff(self, other: Self) -> Self::UnsignedInt; + + // copied from primitive integers, but put in a trait + fn is_zero(self) -> bool; + fn wrapping_neg(self) -> Self; + fn wrapping_add(self, other: Self) -> Self; + fn wrapping_mul(self, other: Self) -> Self; + fn wrapping_sub(self, other: Self) -> Self; + fn wrapping_shl(self, other: u32) -> Self; + fn wrapping_shr(self, other: u32) -> Self; + fn rotate_left(self, other: u32) -> Self; + fn overflowing_add(self, other: Self) -> (Self, bool); + fn leading_zeros(self) -> u32; + fn ilog2(self) -> u32; +} + +pub(crate) const fn make_fuzz_lengths(bits: u32) -> [u8; 20] { + let mut v = [0u8; 20]; + v[0] = 0; + v[1] = 1; + v[2] = 2; // important for parity and the iX::MIN case when reversed + let mut i = 3; + + // No need for any more until the byte boundary, because there should be no algorithms + // that are sensitive to anything not next to byte boundaries after 2. We also scale + // in powers of two, which is important to prevent u128 corner tests from getting too + // big. + let mut l = 8; + loop { + if l >= ((bits / 2) as u8) { + break; + } + // get both sides of the byte boundary + v[i] = l - 1; + i += 1; + v[i] = l; + i += 1; + l *= 2; + } + + if bits != 8 { + // add the lower side of the middle boundary + v[i] = ((bits / 2) - 1) as u8; + i += 1; + } + + // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS + // boundary because of algorithms that split the high part up. We reverse the scaling + // as we go to Self::BITS. + let mid = i; + let mut j = 1; + loop { + v[i] = (bits as u8) - (v[mid - j]) - 1; + if j == mid { + break; + } + i += 1; + j += 1; + } + v +} + +macro_rules! int_impl_common { + ($ty:ty) => { + fn from_bool(b: bool) -> Self { + b as $ty + } + + fn logical_shr(self, other: u32) -> Self { + Self::from_unsigned(self.unsigned().wrapping_shr(other)) + } + + fn is_zero(self) -> bool { + self == Self::ZERO + } + + fn wrapping_neg(self) -> Self { + ::wrapping_neg(self) + } + + fn wrapping_add(self, other: Self) -> Self { + ::wrapping_add(self, other) + } + + fn wrapping_mul(self, other: Self) -> Self { + ::wrapping_mul(self, other) + } + fn wrapping_sub(self, other: Self) -> Self { + ::wrapping_sub(self, other) + } + + fn wrapping_shl(self, other: u32) -> Self { + ::wrapping_shl(self, other) + } + + fn wrapping_shr(self, other: u32) -> Self { + ::wrapping_shr(self, other) + } + + fn rotate_left(self, other: u32) -> Self { + ::rotate_left(self, other) + } + + fn overflowing_add(self, other: Self) -> (Self, bool) { + ::overflowing_add(self, other) + } + + fn leading_zeros(self) -> u32 { + ::leading_zeros(self) + } + + fn ilog2(self) -> u32 { + ::ilog2(self) + } + }; +} + +macro_rules! int_impl { + ($ity:ty, $uty:ty) => { + impl MinInt for $uty { + type OtherSign = $ity; + type UnsignedInt = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $uty { + fn unsigned(self) -> $uty { + self + } + + // It makes writing macros easier if this is implemented for both signed and unsigned + #[allow(clippy::wrong_self_convention)] + fn from_unsigned(me: $uty) -> Self { + me + } + + fn unsigned_abs(self) -> Self { + self + } + + fn abs_diff(self, other: Self) -> Self { + self.abs_diff(other) + } + + int_impl_common!($uty); + } + + impl MinInt for $ity { + type OtherSign = $uty; + type UnsignedInt = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $ity { + fn unsigned(self) -> $uty { + self as $uty + } + + fn from_unsigned(me: $uty) -> Self { + me as $ity + } + + fn unsigned_abs(self) -> Self::UnsignedInt { + self.unsigned_abs() + } + + fn abs_diff(self, other: Self) -> $uty { + self.abs_diff(other) + } + + int_impl_common!($ity); + } + }; +} + +int_impl!(isize, usize); +int_impl!(i8, u8); +int_impl!(i16, u16); +int_impl!(i32, u32); +int_impl!(i64, u64); +int_impl!(i128, u128); + +/// Trait for integers twice the bit width of another integer. This is implemented for all +/// primitives except for `u8`, because there is not a smaller primitive. +pub trait DInt: MinInt { + /// Integer that is half the bit width of the integer this trait is implemented for + type H: HInt; + + /// Returns the low half of `self` + fn lo(self) -> Self::H; + /// Returns the high half of `self` + fn hi(self) -> Self::H; + /// Returns the low and high halves of `self` as a tuple + fn lo_hi(self) -> (Self::H, Self::H) { + (self.lo(), self.hi()) + } + /// Constructs an integer using lower and higher half parts + fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { + lo.zero_widen() | hi.widen_hi() + } +} + +/// Trait for integers half the bit width of another integer. This is implemented for all +/// primitives except for `u128`, because it there is not a larger primitive. +pub trait HInt: Int { + /// Integer that is double the bit width of the integer this trait is implemented for + type D: DInt + MinInt; + + // NB: some of the below methods could have default implementations (e.g. `widen_hi`), but for + // unknown reasons this can cause infinite recursion when optimizations are disabled. See + // for context. + + /// Widens (using default extension) the integer to have double bit width + fn widen(self) -> Self::D; + /// Widens (zero extension only) the integer to have double bit width. This is needed to get + /// around problems with associated type bounds (such as `Int`) being unstable + fn zero_widen(self) -> Self::D; + /// Widens the integer to have double bit width and shifts the integer into the higher bits + fn widen_hi(self) -> Self::D; + /// Widening multiplication with zero widening. This cannot overflow. + fn zero_widen_mul(self, rhs: Self) -> Self::D; + /// Widening multiplication. This cannot overflow. + fn widen_mul(self, rhs: Self) -> Self::D; +} + +macro_rules! impl_d_int { + ($($X:ident $D:ident),*) => { + $( + impl DInt for $D { + type H = $X; + + fn lo(self) -> Self::H { + self as $X + } + fn hi(self) -> Self::H { + (self >> <$X as MinInt>::BITS) as $X + } + } + )* + }; +} + +macro_rules! impl_h_int { + ($($H:ident $uH:ident $X:ident),*) => { + $( + impl HInt for $H { + type D = $X; + + fn widen(self) -> Self::D { + self as $X + } + fn zero_widen(self) -> Self::D { + (self as $uH) as $X + } + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen().wrapping_mul(rhs.zero_widen()) + } + fn widen_mul(self, rhs: Self) -> Self::D { + self.widen().wrapping_mul(rhs.widen()) + } + fn widen_hi(self) -> Self::D { + (self as $X) << ::BITS + } + } + )* + }; +} + +impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); +impl_h_int!( + u8 u8 u16, + u16 u16 u32, + u32 u32 u64, + u64 u64 u128, + i8 u8 i16, + i16 u16 i32, + i32 u32 i64, + i64 u64 i128 +); + +/// Trait to express (possibly lossy) casting of integers +pub trait CastInto: Copy { + fn cast(self) -> T; +} + +pub trait CastFrom: Copy { + fn cast_from(value: T) -> Self; +} + +impl + Copy> CastFrom for T { + fn cast_from(value: U) -> Self { + value.cast() + } +} + +macro_rules! cast_into { + ($ty:ty) => { + cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); + }; + ($ty:ty; $($into:ty),*) => {$( + impl CastInto<$into> for $ty { + fn cast(self) -> $into { + self as $into + } + } + )*}; +} + +cast_into!(usize); +cast_into!(isize); +cast_into!(u8); +cast_into!(i8); +cast_into!(u16); +cast_into!(i16); +cast_into!(u32); +cast_into!(i32); +cast_into!(u64); +cast_into!(i64); +cast_into!(u128); +cast_into!(i128); diff --git a/src/macros.rs b/src/macros.rs index 88b54e82c..5d9f58197 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -1,21 +1,5 @@ //! Macros shared throughout the compiler-builtins implementation -/// Changes the visibility to `pub` if feature "public-test-deps" is set -#[cfg(not(feature = "public-test-deps"))] -macro_rules! public_test_dep { - ($(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*) => { - $(#[$($meta)*])* pub(crate) $ident $($tokens)* - }; -} - -/// Changes the visibility to `pub` if feature "public-test-deps" is set -#[cfg(feature = "public-test-deps")] -macro_rules! public_test_dep { - {$(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*} => { - $(#[$($meta)*])* pub $ident $($tokens)* - }; -} - /// The "main macro" used for defining intrinsics. /// /// The compiler-builtins library is super platform-specific with tons of crazy From 238b0550adfe94b97db827e65a6634434699f6ea Mon Sep 17 00:00:00 2001 From: Martin Nordholts Date: Tue, 18 Feb 2025 09:47:07 +0100 Subject: [PATCH 1210/1459] Enable `f16` for MIPS It seems as if `f16` works on MIPS now according to my testing on Rust master with LLVM 20, and I was asked to create PRs with my changes. I only tested on the flavour of `mipsel-unknown-linux-gnu` hardware that happens to be available to me, so I can't say anything about other MIPS hardware, but from a casual skimming of the LLVM code ([1], [2]) it seems like `f16` should work on all MIPS hardware. So enable it for all MIPS hardware. [1]: https://github.com/rust-lang/llvm-project/blob/rustc/20.1-2025-02-13/llvm/lib/Target/Mips/MipsISelLowering.h#L370 [2]: https://github.com/rust-lang/llvm-project/blob/rustc/20.1-2025-02-13/llvm/lib/CodeGen/TargetLoweringBase.cpp#L1367-L1388 --- configure.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/configure.rs b/configure.rs index fa3e302ea..ff52e88da 100644 --- a/configure.rs +++ b/configure.rs @@ -78,7 +78,6 @@ pub fn configure_f16_f128(target: &Target) { "csky" => false, "hexagon" => false, "loongarch64" => false, - "mips" | "mips64" | "mips32r6" | "mips64r6" => false, "powerpc" | "powerpc64" => false, "sparc" | "sparc64" => false, "wasm32" | "wasm64" => false, From c0b46288d6c1f2d3cb58571069b1f4e505d5fb36 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 24 Feb 2025 04:06:07 +0000 Subject: [PATCH 1211/1459] Resolve monomorphization errors in `compiler-builtins` `compiler-builtins` is not allowed to call anything from `core`; however, there are a couple of cases where we do so in `libm` for debug output. Gate relevant locations behind the `compiler-builtins` Cargo feature. --- libm/Cargo.toml | 7 +++++++ .../compiler-builtins-smoke-test/Cargo.toml | 1 + .../compiler-builtins-smoke-test/src/lib.rs | 2 ++ libm/src/math/support/hex_float.rs | 16 ++++++++++++++-- libm/src/math/support/int_traits.rs | 1 + 5 files changed, 25 insertions(+), 2 deletions(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index f24f4423c..eb133dada 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -61,6 +61,13 @@ exclude = [ [dev-dependencies] no-panic = "0.1.33" + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = [ + # compiler-builtins sets this feature, but we use it in `libm` + 'cfg(feature, values("compiler-builtins"))', +] } + # The default release profile is unchanged. # Release mode with debug assertions diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index 24b33645e..6ef905ea7 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -22,6 +22,7 @@ unexpected_cfgs = { level = "warn", check-cfg = [ "cfg(arch_enabled)", "cfg(assert_no_panic)", "cfg(intrinsics_enabled)", + 'cfg(feature, values("compiler-builtins"))', 'cfg(feature, values("force-soft-floats"))', 'cfg(feature, values("unstable"))', 'cfg(feature, values("unstable-intrinsics"))', diff --git a/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/libm/crates/compiler-builtins-smoke-test/src/lib.rs index f9e6e75a8..e70f6d9e0 100644 --- a/libm/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/libm/crates/compiler-builtins-smoke-test/src/lib.rs @@ -4,7 +4,9 @@ //! Additionally, it provides a `#[no_mangle]` C API that can be easier to inspect than the //! default `.rlib`. +#![compiler_builtins] #![feature(core_intrinsics)] +#![feature(compiler_builtins)] #![feature(f16)] #![feature(f128)] #![allow(internal_features)] diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs index 99ad8bec3..2155d5c58 100644 --- a/libm/src/math/support/hex_float.rs +++ b/libm/src/math/support/hex_float.rs @@ -246,7 +246,13 @@ fn fmt_any_hex(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result { impl fmt::LowerHex for Hexf { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt_any_hex(&self.0, f) + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + unreachable!() + } else { + fmt_any_hex(&self.0, f) + } + } } } @@ -264,7 +270,13 @@ impl fmt::LowerHex for Hexf<(F, i32)> { impl fmt::LowerHex for Hexf { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::LowerHex::fmt(&self.0, f) + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + unreachable!() + } else { + fmt::LowerHex::fmt(&self.0, f) + } + } } } diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index d34797764..f19c86835 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -418,6 +418,7 @@ macro_rules! cast_into_float { ($ty:ty; $($into:ty),*) => {$( impl CastInto<$into> for $ty { fn cast(self) -> $into { + #[cfg(not(feature = "compiler-builtins"))] debug_assert_eq!(self as $into as $ty, self, "inexact float cast"); self as $into } From 33fb12bd73779101993ec846e299d67850c8abd3 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 24 Feb 2025 04:55:38 +0000 Subject: [PATCH 1212/1459] Ignore unused variables when `compiler-builtins` is set --- libm/crates/compiler-builtins-smoke-test/Cargo.toml | 6 +++--- libm/src/math/support/hex_float.rs | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml index 6ef905ea7..38a511669 100644 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -11,18 +11,18 @@ test = false bench = false [features] -default = ["arch", "unstable-float"] +default = ["arch", "compiler-builtins", "unstable-float"] # Copied from `libm`'s root `Cargo.toml`' -unstable-float = [] arch = [] +compiler-builtins = [] +unstable-float = [] [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = [ "cfg(arch_enabled)", "cfg(assert_no_panic)", "cfg(intrinsics_enabled)", - 'cfg(feature, values("compiler-builtins"))', 'cfg(feature, values("force-soft-floats"))', 'cfg(feature, values("unstable"))', 'cfg(feature, values("unstable-intrinsics"))', diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs index 2155d5c58..0ecf61695 100644 --- a/libm/src/math/support/hex_float.rs +++ b/libm/src/math/support/hex_float.rs @@ -248,7 +248,8 @@ impl fmt::LowerHex for Hexf { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { cfg_if! { if #[cfg(feature = "compiler-builtins")] { - unreachable!() + let _ = f; + unimplemented!() } else { fmt_any_hex(&self.0, f) } @@ -272,7 +273,8 @@ impl fmt::LowerHex for Hexf { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { cfg_if! { if #[cfg(feature = "compiler-builtins")] { - unreachable!() + let _ = f; + unimplemented!() } else { fmt::LowerHex::fmt(&self.0, f) } From b60ad3e94e95a295c0f9dabcdcf0db849d6a7a6a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 24 Feb 2025 05:45:41 +0000 Subject: [PATCH 1213/1459] Configure out remaining formatting when `compiler-builtins` is set These are still causing errors in the compiler-builtins CI. --- libm/src/math/support/hex_float.rs | 42 +++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs index 0ecf61695..be7d7607f 100644 --- a/libm/src/math/support/hex_float.rs +++ b/libm/src/math/support/hex_float.rs @@ -211,6 +211,7 @@ const fn u128_ilog2(v: u128) -> u32 { pub struct Hexf(pub F); // Adapted from https://github.com/ericseppanen/hexfloat2/blob/a5c27932f0ff/src/format.rs +#[cfg(not(feature = "compiler-builtins"))] fn fmt_any_hex(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result { if x.is_sign_negative() { write!(f, "-")?; @@ -244,6 +245,11 @@ fn fmt_any_hex(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}") } +#[cfg(feature = "compiler-builtins")] +fn fmt_any_hex(_x: &F, _f: &mut fmt::Formatter<'_>) -> fmt::Result { + unimplemented!() +} + impl fmt::LowerHex for Hexf { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { cfg_if! { @@ -259,13 +265,27 @@ impl fmt::LowerHex for Hexf { impl fmt::LowerHex for Hexf<(F, F)> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) + } + } } } impl fmt::LowerHex for Hexf<(F, i32)> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) + } + } } } @@ -287,7 +307,14 @@ where Hexf: fmt::LowerHex, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::LowerHex::fmt(self, f) + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + fmt::LowerHex::fmt(self, f) + } + } } } @@ -296,7 +323,14 @@ where Hexf: fmt::LowerHex, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::LowerHex::fmt(self, f) + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + fmt::LowerHex::fmt(self, f) + } + } } } From 519de42a11ca7a5b8871aabf3bec8c2638787bfb Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 24 Feb 2025 06:52:48 +0000 Subject: [PATCH 1214/1459] Gate another assertion behind `compiler-builtins` This is causing link errors on Windows. --- libm/src/math/support/int_traits.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index f19c86835..491adb1f2 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -394,6 +394,7 @@ macro_rules! cast_into { fn cast(self) -> $into { // All we can really do to enforce casting rules is check the rules when in // debug mode. + #[cfg(not(feature = "compiler-builtins"))] debug_assert!(<$into>::try_from(self).is_ok(), "failed cast from {self}"); self as $into } From 0a3d5c0d671fb808334fe1b0c648971a136514ff Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 23 Feb 2025 23:43:26 +0000 Subject: [PATCH 1215/1459] Update the `libm` submodule This requires privately reexporting `libm`'s `support` module at crate root, where it is expected for macros. Once `libm` is made always available, the reexport can be simplified. This delta adds a lot of routines to `f16` and `f128`: * ceil * floor * fma (f128 only) * fmax * fmin * fmod * ldexp * rint * round * scalbn * sqrt Additionally, the following new API was added for all four float types: * fmaximum * fmaximum_num * fminimum * fminimum_num * roundeven There are also some significant performance improvements for `sqrt` and `sqrtf`, as well as precision improvements for `cbrt` (both `f32` and `f64` versions of this function are now always correctly rounded). --- libm | 2 +- src/lib.rs | 15 +++++++++++++++ src/math.rs | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/libm b/libm index 8e82616f1..69219c491 160000 --- a/libm +++ b/libm @@ -1 +1 @@ -Subproject commit 8e82616f154b06cf4ee9cdb82a4f56474a403d04 +Subproject commit 69219c491ee9f05761d2068fd6d4c7c0de6faa3a diff --git a/src/lib.rs b/src/lib.rs index ffcd3586c..533878137 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ #![cfg_attr(feature = "compiler-builtins", compiler_builtins)] +#![cfg_attr(all(target_family = "wasm"), feature(wasm_numeric_instr))] #![feature(abi_unadjusted)] #![feature(asm_experimental_arch)] #![feature(cfg_target_has_atomic)] @@ -58,6 +59,20 @@ pub mod int; all(target_family = "wasm", not(target_os = "unknown")) )))] pub mod math; + +// `libm` expects its `support` module to be available in the crate root. This config can be +// cleaned up once `libm` is made always available. +#[cfg(not(any( + all( + target_arch = "x86", + not(target_feature = "sse2"), + not(target_os = "uefi"), + ), + unix, + all(target_family = "wasm", not(target_os = "unknown")) +)))] +use math::libm::support; + pub mod mem; #[cfg(target_arch = "arm")] diff --git a/src/math.rs b/src/math.rs index 21670f243..fef5358e3 100644 --- a/src/math.rs +++ b/src/math.rs @@ -3,7 +3,7 @@ #[allow(unused_imports)] #[allow(clippy::all)] #[path = "../libm/src/math/mod.rs"] -mod libm; +pub(crate) mod libm; #[allow(unused_macros)] macro_rules! no_mangle { From 342ce4669e61ab0104e08c728dbeefcd48c6b57f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 24 Feb 2025 07:34:00 +0000 Subject: [PATCH 1216/1459] chore: release v0.1.148 --- CHANGELOG.md | 8 ++++++++ Cargo.toml | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3890c6ce..c1e1e73da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.148](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.147...compiler_builtins-v0.1.148) - 2025-02-24 + +### Other + +- Update the `libm` submodule +- Enable `f16` for MIPS +- Eliminate the use of `public_test_dep!` for a third time + ## [0.1.147](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.146...compiler_builtins-v0.1.147) - 2025-02-19 ### Other diff --git a/Cargo.toml b/Cargo.toml index ff1a10fc6..684a2a0b6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.147" +version = "0.1.148" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 7b628128c5809e90160d5ed19be49dbe8328833a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 13 Feb 2025 03:27:00 +0000 Subject: [PATCH 1217/1459] Make a subset of `libm` symbols weakly available on all platforms 018616e78b ("Always have math functions but with `weak` linking attribute if we can") made all math symbols available on platforms that support weak linkage. This caused some unexpected regressions, however, because our less accurate and sometimes slow routines were being selected over the system `libm`, which also tends to be weak [1]. Thus, 0fab77e8d7 ("Don't include `math` for `unix` and `wasi` targets") was applied to undo these changes on many platforms. Now that some improvements have been made to `libm`, add back a subset of these functions: * cbrt * ceil * copysign * fabs * fdim * floor * fma * fmax * fmaximum * fmin * fminimum * fmod * rint * round * roundeven * sqrt * trunc This list includes only functions that produce exact results (verified with exhaustive / extensive tests, and also required by IEEE in most cases), and for which benchmarks indicate performance similar to or better than Musl's soft float math routines [^1]. All except `cbrt` also have `f16` and `f128` implementations. Once more routines meet these criteria, we can move them from platform-specific availability to always available. Once this change makes it to rust-lang/rust, we will also be able to move the relevant functions from `std` to `core`. [^1]: We still rely on the backend to provide optimized assmebly routines when available. [1]: https://github.com/rust-lang/rust/issues/128386 --- src/lib.rs | 29 +----- src/math.rs | 259 ++++++++++++++++++++++++++++++++++------------------ 2 files changed, 173 insertions(+), 115 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 533878137..6f5bd8598 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,40 +41,13 @@ mod macros; pub mod float; pub mod int; - -// Disable for any of the following: -// - x86 without sse2 due to ABI issues -// - -// - but exclude UEFI since it is a soft-float target -// - -// - All unix targets (linux, macos, freebsd, android, etc) -// - wasm with known target_os -#[cfg(not(any( - all( - target_arch = "x86", - not(target_feature = "sse2"), - not(target_os = "uefi"), - ), - unix, - all(target_family = "wasm", not(target_os = "unknown")) -)))] pub mod math; +pub mod mem; // `libm` expects its `support` module to be available in the crate root. This config can be // cleaned up once `libm` is made always available. -#[cfg(not(any( - all( - target_arch = "x86", - not(target_feature = "sse2"), - not(target_os = "uefi"), - ), - unix, - all(target_family = "wasm", not(target_os = "unknown")) -)))] use math::libm::support; -pub mod mem; - #[cfg(target_arch = "arm")] pub mod arm; diff --git a/src/math.rs b/src/math.rs index fef5358e3..ccd9c5421 100644 --- a/src/math.rs +++ b/src/math.rs @@ -5,110 +5,195 @@ #[path = "../libm/src/math/mod.rs"] pub(crate) mod libm; -#[allow(unused_macros)] -macro_rules! no_mangle { +macro_rules! libm_intrinsics { ($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => { intrinsics! { $( pub extern "C" fn $fun($($iid: $ity),+) -> $oty { - self::libm::$fun($($iid),+) + $crate::math::libm::$fun($($iid),+) } )+ } } } -#[cfg(not(windows))] -no_mangle! { - fn acos(x: f64) -> f64; - fn asin(x: f64) -> f64; - fn cbrt(x: f64) -> f64; - fn expm1(x: f64) -> f64; - fn hypot(x: f64, y: f64) -> f64; - fn tan(x: f64) -> f64; - fn cos(x: f64) -> f64; - fn expf(x: f32) -> f32; - fn log2(x: f64) -> f64; - fn log2f(x: f32) -> f32; - fn log10(x: f64) -> f64; - fn log10f(x: f32) -> f32; - fn log(x: f64) -> f64; - fn logf(x: f32) -> f32; - fn round(x: f64) -> f64; - fn roundf(x: f32) -> f32; - fn rint(x: f64) -> f64; - fn rintf(x: f32) -> f32; - fn sin(x: f64) -> f64; - fn pow(x: f64, y: f64) -> f64; - fn powf(x: f32, y: f32) -> f32; - fn acosf(n: f32) -> f32; - fn atan2f(a: f32, b: f32) -> f32; - fn atanf(n: f32) -> f32; - fn coshf(n: f32) -> f32; - fn expm1f(n: f32) -> f32; - fn fdim(a: f64, b: f64) -> f64; - fn fdimf(a: f32, b: f32) -> f32; - fn log1pf(n: f32) -> f32; - fn sinhf(n: f32) -> f32; - fn tanhf(n: f32) -> f32; - fn ldexp(f: f64, n: i32) -> f64; - fn ldexpf(f: f32, n: i32) -> f32; - fn tgamma(x: f64) -> f64; - fn tgammaf(x: f32) -> f32; - fn atan(x: f64) -> f64; - fn atan2(x: f64, y: f64) -> f64; - fn cosh(x: f64) -> f64; - fn log1p(x: f64) -> f64; - fn sinh(x: f64) -> f64; - fn tanh(x: f64) -> f64; - fn cosf(x: f32) -> f32; - fn exp(x: f64) -> f64; - fn sinf(x: f32) -> f32; - fn exp2(x: f64) -> f64; - fn exp2f(x: f32) -> f32; - fn fma(x: f64, y: f64, z: f64) -> f64; - fn fmaf(x: f32, y: f32, z: f32) -> f32; - fn asinf(n: f32) -> f32; - fn cbrtf(n: f32) -> f32; - fn hypotf(x: f32, y: f32) -> f32; - fn tanf(n: f32) -> f32; +/// This set of functions is well tested in `libm` and known to provide similar performance to +/// system `libm`, as well as the same or better accuracy. +pub mod full_availability { + #[cfg(f16_enabled)] + libm_intrinsics! { + fn ceilf16(x: f16) -> f16; + fn copysignf16(x: f16, y: f16) -> f16; + fn fabsf16(x: f16) -> f16; + fn fdimf16(x: f16, y: f16) -> f16; + fn floorf16(x: f16) -> f16; + fn fmaxf16(x: f16, y: f16) -> f16; + fn fmaximumf16(x: f16, y: f16) -> f16; + fn fminf16(x: f16, y: f16) -> f16; + fn fminimumf16(x: f16, y: f16) -> f16; + fn fmodf16(x: f16, y: f16) -> f16; + fn rintf16(x: f16) -> f16; + fn roundevenf16(x: f16) -> f16; + fn roundf16(x: f16) -> f16; + fn sqrtf16(x: f16) -> f16; + fn truncf16(x: f16) -> f16; + } + + /* Weak linkage is unreliable on Windows and Apple, so we don't expose symbols that we know + * the system libc provides in order to avoid conflicts. */ - fn sqrtf(x: f32) -> f32; - fn sqrt(x: f64) -> f64; + #[cfg(all(not(windows), not(target_vendor = "apple")))] + libm_intrinsics! { + /* f32 */ + fn cbrtf(n: f32) -> f32; + fn ceilf(x: f32) -> f32; + fn copysignf(x: f32, y: f32) -> f32; + fn fabsf(x: f32) -> f32; + fn fdimf(a: f32, b: f32) -> f32; + fn floorf(x: f32) -> f32; + fn fmaf(x: f32, y: f32, z: f32) -> f32; + fn fmaxf(x: f32, y: f32) -> f32; + fn fminf(x: f32, y: f32) -> f32; + fn fmodf(x: f32, y: f32) -> f32; + fn rintf(x: f32) -> f32; + fn roundf(x: f32) -> f32; + fn sqrtf(x: f32) -> f32; + fn truncf(x: f32) -> f32; - fn ceil(x: f64) -> f64; - fn ceilf(x: f32) -> f32; - fn floor(x: f64) -> f64; - fn floorf(x: f32) -> f32; - fn trunc(x: f64) -> f64; - fn truncf(x: f32) -> f32; + /* f64 */ + fn cbrt(x: f64) -> f64; + fn ceil(x: f64) -> f64; + fn copysign(x: f64, y: f64) -> f64; + fn fabs(x: f64) -> f64; + fn fdim(a: f64, b: f64) -> f64; + fn floor(x: f64) -> f64; + fn fma(x: f64, y: f64, z: f64) -> f64; + fn fmax(x: f64, y: f64) -> f64; + fn fmin(x: f64, y: f64) -> f64; + fn fmod(x: f64, y: f64) -> f64; + fn rint(x: f64) -> f64; + fn round(x: f64) -> f64; + fn sqrt(x: f64) -> f64; + fn trunc(x: f64) -> f64; + } - fn fmin(x: f64, y: f64) -> f64; - fn fminf(x: f32, y: f32) -> f32; - fn fmax(x: f64, y: f64) -> f64; - fn fmaxf(x: f32, y: f32) -> f32; - // `f64 % f64` - fn fmod(x: f64, y: f64) -> f64; - // `f32 % f32` - fn fmodf(x: f32, y: f32) -> f32; + // Windows and MacOS do not yet expose roundeven and IEEE 754-2019 `maximum` / `minimum`, + // however, so we still provide a fallback. + libm_intrinsics! { + fn fmaximum(x: f64, y: f64) -> f64; + fn fmaximumf(x: f32, y: f32) -> f32; + fn fminimum(x: f64, y: f64) -> f64; + fn fminimumf(x: f32, y: f32) -> f32; + fn roundeven(x: f64) -> f64; + fn roundevenf(x: f32) -> f32; + } - fn erf(x: f64) -> f64; - fn erff(x: f32) -> f32; - fn erfc(x: f64) -> f64; - fn erfcf(x: f32) -> f32; + #[cfg(f128_enabled)] + libm_intrinsics! { + fn ceilf128(x: f128) -> f128; + fn copysignf128(x: f128, y: f128) -> f128; + fn fabsf128(x: f128) -> f128; + fn fdimf128(x: f128, y: f128) -> f128; + fn floorf128(x: f128) -> f128; + fn fmaf128(x: f128, y: f128, z: f128) -> f128; + fn fmaxf128(x: f128, y: f128) -> f128; + fn fmaximumf128(x: f128, y: f128) -> f128; + fn fminf128(x: f128, y: f128) -> f128; + fn fminimumf128(x: f128, y: f128) -> f128; + fn fmodf128(x: f128, y: f128) -> f128; + fn rintf128(x: f128) -> f128; + fn roundevenf128(x: f128) -> f128; + fn roundf128(x: f128) -> f128; + fn sqrtf128(x: f128) -> f128; + fn truncf128(x: f128) -> f128; + } } -// allow for windows (and other targets) -intrinsics! { - pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 { - let r = self::libm::lgamma_r(x); - *s = r.1; - r.0 +/// This group of functions has more performance or precision issues than system versions, or +/// are otherwise less well tested. Provide them only on platforms that have problems with the +/// system `libm`. +/// +/// As `libm` improves, more functions will be moved from this group to the first group. +/// +/// Do not supply for any of the following: +/// - x86 without sse2 due to ABI issues +/// - +/// - but exclude UEFI since it is a soft-float target +/// - +/// - All unix targets (linux, macos, freebsd, android, etc) +/// - wasm with known target_os +#[cfg(not(any( + all( + target_arch = "x86", + not(target_feature = "sse2"), + not(target_os = "uefi"), + ), + unix, + all(target_family = "wasm", not(target_os = "unknown")) +)))] +pub mod partial_availability { + #[cfg(not(windows))] + libm_intrinsics! { + fn acos(x: f64) -> f64; + fn acosf(n: f32) -> f32; + fn asin(x: f64) -> f64; + fn asinf(n: f32) -> f32; + fn atan(x: f64) -> f64; + fn atan2(x: f64, y: f64) -> f64; + fn atan2f(a: f32, b: f32) -> f32; + fn atanf(n: f32) -> f32; + fn cos(x: f64) -> f64; + fn cosf(x: f32) -> f32; + fn cosh(x: f64) -> f64; + fn coshf(n: f32) -> f32; + fn erf(x: f64) -> f64; + fn erfc(x: f64) -> f64; + fn erfcf(x: f32) -> f32; + fn erff(x: f32) -> f32; + fn exp(x: f64) -> f64; + fn exp2(x: f64) -> f64; + fn exp2f(x: f32) -> f32; + fn expf(x: f32) -> f32; + fn expm1(x: f64) -> f64; + fn expm1f(n: f32) -> f32; + fn hypot(x: f64, y: f64) -> f64; + fn hypotf(x: f32, y: f32) -> f32; + fn ldexp(f: f64, n: i32) -> f64; + fn ldexpf(f: f32, n: i32) -> f32; + fn log(x: f64) -> f64; + fn log10(x: f64) -> f64; + fn log10f(x: f32) -> f32; + fn log1p(x: f64) -> f64; + fn log1pf(n: f32) -> f32; + fn log2(x: f64) -> f64; + fn log2f(x: f32) -> f32; + fn logf(x: f32) -> f32; + fn pow(x: f64, y: f64) -> f64; + fn powf(x: f32, y: f32) -> f32; + fn sin(x: f64) -> f64; + fn sinf(x: f32) -> f32; + fn sinh(x: f64) -> f64; + fn sinhf(n: f32) -> f32; + fn tan(x: f64) -> f64; + fn tanf(n: f32) -> f32; + fn tanh(x: f64) -> f64; + fn tanhf(n: f32) -> f32; + fn tgamma(x: f64) -> f64; + fn tgammaf(x: f32) -> f32; } - pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 { - let r = self::libm::lgammaf_r(x); - *s = r.1; - r.0 + // allow for windows (and other targets) + intrinsics! { + pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 { + let r = super::libm::lgamma_r(x); + *s = r.1; + r.0 + } + + pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 { + let r = super::libm::lgammaf_r(x); + *s = r.1; + r.0 + } } } From 453da663e447fe80001733417a89c45bb624d1f4 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 25 Feb 2025 18:55:26 +0000 Subject: [PATCH 1218/1459] chore: release v0.1.149 --- CHANGELOG.md | 6 ++++++ Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c1e1e73da..9b8aaaa08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.149](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.148...compiler_builtins-v0.1.149) - 2025-02-25 + +### Other + +- Make a subset of `libm` symbols weakly available on all platforms + ## [0.1.148](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.147...compiler_builtins-v0.1.148) - 2025-02-24 ### Other diff --git a/Cargo.toml b/Cargo.toml index 684a2a0b6..d1575d6df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.148" +version = "0.1.149" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 3c7672c91ec4614803b7bde1f2adbd7ff4178048 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 26 Feb 2025 07:40:35 +0000 Subject: [PATCH 1219/1459] Update LLVM downloads to 20.1-2025-02-13 This matches the version used by rust-lang/rust. --- .github/workflows/main.yml | 2 +- README.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7336efc42..fd1f6d532 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -4,7 +4,7 @@ on: [push, pull_request] env: RUSTDOCFLAGS: -Dwarnings RUSTFLAGS: -Dwarnings - RUST_LLVM_VERSION: 19.1-2024-09-17 + RUST_LLVM_VERSION: 20.1-2025-02-13 RUST_COMPILER_RT_ROOT: ./compiler-rt jobs: diff --git a/README.md b/README.md index a2b38cce0..d91d88a85 100644 --- a/README.md +++ b/README.md @@ -89,8 +89,8 @@ to test against, located in a directory called `compiler-rt`. This can be obtained with the following: ```sh -curl -L -o rustc-llvm-19.1.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/19.1-2024-09-17.tar.gz -tar xzf rustc-llvm-19.1.tar.gz --strip-components 1 llvm-project-rustc-19.1-2024-09-17/compiler-rt +curl -L -o rustc-llvm-20.1.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/20.1-2025-02-13.tar.gz +tar xzf rustc-llvm-20.1.tar.gz --strip-components 1 llvm-project-rustc-20.1-2025-02-13/compiler-rt ``` Local targets may also be tested with `./ci/run.sh [target]`. From 5cf417a9e92bb48e4e55756a645826fd167b9f3a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 1 Mar 2025 19:32:06 +0000 Subject: [PATCH 1220/1459] Disable `f16` on AArch64 without the `neon` feature There is an LLVM regression that breaks some `f16`-related code when `fp-armv8` is disabled [1]. Since Rust ties that feature to `neon`, disable `f16` if `neon` is not available. [1]: https://github.com/llvm/llvm-project/issues/129394 --- configure.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configure.rs b/configure.rs index ff52e88da..2bb0f8383 100644 --- a/configure.rs +++ b/configure.rs @@ -71,6 +71,8 @@ pub fn configure_f16_f128(target: &Target) { let f16_enabled = match target.arch.as_str() { // Unsupported "arm64ec" => false, + // Crash in LLVM20 + "aarch64" if !target.features.iter().any(|f| f == "neon") => false, // Selection failure "s390x" => false, // Infinite recursion From 8586a1a37e1db2bc8318b9ae2b8e7ec7995023df Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 1 Mar 2025 19:40:30 +0000 Subject: [PATCH 1221/1459] chore: release v0.1.150 --- CHANGELOG.md | 7 +++++++ Cargo.toml | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b8aaaa08..087a912d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.150](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.149...compiler_builtins-v0.1.150) - 2025-03-01 + +### Other + +- Disable `f16` on AArch64 without the `neon` feature +- Update LLVM downloads to 20.1-2025-02-13 + ## [0.1.149](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.148...compiler_builtins-v0.1.149) - 2025-02-25 ### Other diff --git a/Cargo.toml b/Cargo.toml index d1575d6df..3bf9b8ddd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.149" +version = "0.1.150" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From f6a6911b7a71615c0ab2ef8875380ee14afb4311 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 1 Mar 2025 23:20:04 -0500 Subject: [PATCH 1222/1459] Remove outdated information from the readme --- README.md | 48 +++++------------------------------------------- 1 file changed, 5 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index d91d88a85..d76bd289b 100644 --- a/README.md +++ b/README.md @@ -1,50 +1,12 @@ # `compiler-builtins` -> Porting `compiler-rt` intrinsics to Rust +This crate provides external symbols that the compiler expects to be available when +building Rust projects, typically software routines for basic operations that do not +have hardware support. It is largely a port of LLVM's [`compiler-rt`]. -See [rust-lang/rust#35437][0]. +It is distributed as part of Rust's sysroot. -[0]: https://github.com/rust-lang/rust/issues/35437 - -## When and how to use this crate? - -If you are working with a target that doesn't have binary releases of std -available via rustup (this probably means you are building the core crate -yourself) and need compiler-rt intrinsics (i.e. you are probably getting linker -errors when building an executable: `undefined reference to __aeabi_memcpy`), -you can use this crate to get those intrinsics and solve the linker errors. To -do that, add this crate somewhere in the dependency graph of the crate you are -building: - -```toml -# Cargo.toml -[dependencies] -compiler_builtins = { git = "https://github.com/rust-lang/compiler-builtins" } -``` - -```rust -extern crate compiler_builtins; - -// ... -``` - -If you still get an "undefined reference to $INTRINSIC" error after that change, -that means that we haven't ported `$INTRINSIC` to Rust yet! Please open [an -issue] with the name of the intrinsic and the LLVM triple (e.g. -thumbv7m-none-eabi) of the target you are using. That way we can prioritize -porting that particular intrinsic. - -If you've got a C compiler available for your target then while we implement -this intrinsic you can temporarily enable a fallback to the actual compiler-rt -implementation as well for unimplemented intrinsics: - -```toml -[dependencies.compiler_builtins] -git = "https://github.com/rust-lang/compiler-builtins" -features = ["c"] -``` - -[an issue]: https://github.com/rust-lang/compiler-builtins/issues +[`compiler-rt`]: https://github.com/llvm/llvm-project/tree/1b1dc505057322f4fa1110ef4f53c44347f52986/compiler-rt ## Contributing From 9ed21c4d56d2fe1abe6c5bc34eb43bd911d32419 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 4 Mar 2025 16:21:15 -0500 Subject: [PATCH 1223/1459] Add __extendhfdf2 and add __truncdfhf2 test LLVM doesn't seem to emit this intrinsic but it probably should, in some cases it lowers f16->f64 conversions as f16->f32->f64 with two libcalls. GCC provides this intrinsic so it is good to have anyway. Additionally, add a test for f64->f16 which was missing. [1]: https://rust.godbolt.org/z/xezM9PEnz --- src/float/extend.rs | 8 ++++++++ testcrate/Cargo.toml | 3 ++- testcrate/benches/float_extend.rs | 23 +++++++++++++++++++++++ testcrate/benches/float_trunc.rs | 2 +- testcrate/build.rs | 11 +++++++++++ testcrate/tests/conv.rs | 2 ++ 6 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/float/extend.rs b/src/float/extend.rs index a1a9b9720..f05e3a924 100644 --- a/src/float/extend.rs +++ b/src/float/extend.rs @@ -96,6 +96,14 @@ intrinsics! { extend(a) } + #[avr_skip] + #[aapcs_on_arm] + #[apple_f16_arg_abi] + #[cfg(f16_enabled)] + pub extern "C" fn __extendhfdf2(a: f16) -> f64 { + extend(a) + } + #[avr_skip] #[aapcs_on_arm] #[ppc_alias = __extendhfkf2] diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 21cec1701..91e2f668f 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -43,8 +43,9 @@ no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"] # Some platforms have some f128 functions but everything except integer conversions no-sys-f128-int-convert = [] no-sys-f16-f128-convert = [] +no-sys-f16-f64-convert = [] # Skip tests that rely on f16 symbols being available on the system -no-sys-f16 = [] +no-sys-f16 = ["no-sys-f16-f64-convert"] # Enable report generation without bringing in more dependencies by default benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] diff --git a/testcrate/benches/float_extend.rs b/testcrate/benches/float_extend.rs index a0cdaf48a..12f195984 100644 --- a/testcrate/benches/float_extend.rs +++ b/testcrate/benches/float_extend.rs @@ -28,6 +28,28 @@ float_bench! { ], } +#[cfg(f16_enabled)] +float_bench! { + name: extend_f16_f64, + sig: (a: f16) -> f64, + crate_fn: extend::__extendhfdf2, + sys_fn: __extendhfdf2, + sys_available: not(feature = "no-sys-f16-f64-convert"), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "fcvt {ret:d}, {a:h}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + #[cfg(all(f16_enabled, f128_enabled))] float_bench! { name: extend_f16_f128, @@ -93,6 +115,7 @@ pub fn float_extend() { #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] { extend_f16_f32(&mut criterion); + extend_f16_f64(&mut criterion); #[cfg(f128_enabled)] extend_f16_f128(&mut criterion); diff --git a/testcrate/benches/float_trunc.rs b/testcrate/benches/float_trunc.rs index de9b5bf8c..cb6aee13a 100644 --- a/testcrate/benches/float_trunc.rs +++ b/testcrate/benches/float_trunc.rs @@ -33,7 +33,7 @@ float_bench! { sig: (a: f64) -> f16, crate_fn: trunc::__truncdfhf2, sys_fn: __truncdfhf2, - sys_available: not(feature = "no-sys-f16"), + sys_available: not(feature = "no-sys-f16-f64-convert"), asm: [ #[cfg(target_arch = "aarch64")] { let ret: f16; diff --git a/testcrate/build.rs b/testcrate/build.rs index 6205c7ac6..3e5f780ac 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -6,6 +6,7 @@ enum Feature { NoSysF128, NoSysF128IntConvert, NoSysF16, + NoSysF16F64Convert, NoSysF16F128Convert, } @@ -66,9 +67,15 @@ fn main() { || target.arch == "wasm64" { features.insert(Feature::NoSysF16); + features.insert(Feature::NoSysF16F64Convert); features.insert(Feature::NoSysF16F128Convert); } + // These platforms are missing either `__extendhfdf2` or `__truncdfhf2`. + if target.vendor == "apple" || target.os == "windows" { + features.insert(Feature::NoSysF16F64Convert); + } + for feature in features { let (name, warning) = match feature { Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"), @@ -76,6 +83,10 @@ fn main() { "no-sys-f128-int-convert", "using apfloat fallback for f128 <-> int conversions", ), + Feature::NoSysF16F64Convert => ( + "no-sys-f16-f64-convert", + "using apfloat fallback for f16 <-> f64 conversions", + ), Feature::NoSysF16F128Convert => ( "no-sys-f16-f128-convert", "using apfloat fallback for f16 <-> f128 conversions", diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index a08748af7..7f33d27cc 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -311,6 +311,7 @@ mod extend { extend, f16 => f32, Half => Single, __extendhfsf2, not(feature = "no-sys-f16"); f16 => f32, Half => Single, __gnu_h2f_ieee, not(feature = "no-sys-f16"); + f16 => f64, Half => Double, __extendhfdf2, not(feature = "no-sys-f16-f64-convert"); f16 => f128, Half => Quad, __extendhftf2, not(feature = "no-sys-f16-f128-convert"); f32 => f128, Single => Quad, __extendsftf2, not(feature = "no-sys-f128"); f64 => f128, Double => Quad, __extenddftf2, not(feature = "no-sys-f128"); @@ -340,6 +341,7 @@ mod trunc { trunc, f32 => f16, Single => Half, __truncsfhf2, not(feature = "no-sys-f16"); f32 => f16, Single => Half, __gnu_f2h_ieee, not(feature = "no-sys-f16"); + f64 => f16, Double => Half, __truncdfhf2, not(feature = "no-sys-f16-f64-convert"); f128 => f16, Quad => Half, __trunctfhf2, not(feature = "no-sys-f16-f128-convert"); f128 => f32, Quad => Single, __trunctfsf2, not(feature = "no-sys-f128"); f128 => f64, Quad => Double, __trunctfdf2, not(feature = "no-sys-f128"); From 94a8f2eab7244f349fb82affe862d2816af997de Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 4 Mar 2025 17:46:50 -0500 Subject: [PATCH 1224/1459] Simplify test crate build features Since we have a handful of different float-related configuration in testcrate, track a list of which are implied by others rather than repeating the config. --- testcrate/benches/float_conv.rs | 2 +- testcrate/build.rs | 31 ++++++++++++++++++++++++------- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/testcrate/benches/float_conv.rs b/testcrate/benches/float_conv.rs index 0625a1ae5..7d8549b43 100644 --- a/testcrate/benches/float_conv.rs +++ b/testcrate/benches/float_conv.rs @@ -665,7 +665,7 @@ pub fn float_conv() { conv_f64_i64(&mut criterion); conv_f64_i128(&mut criterion); - #[cfg(all(f128_enabled))] + #[cfg(f128_enabled)] // FIXME: ppc64le has a sporadic overflow panic in the crate functions // #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] diff --git a/testcrate/build.rs b/testcrate/build.rs index 3e5f780ac..427fa799b 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -1,7 +1,11 @@ use std::collections::HashSet; +mod builtins_configure { + include!("../configure.rs"); +} + /// Features to enable -#[derive(Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] enum Feature { NoSysF128, NoSysF128IntConvert, @@ -10,8 +14,16 @@ enum Feature { NoSysF16F128Convert, } -mod builtins_configure { - include!("../configure.rs"); +impl Feature { + fn implies(self) -> &'static [Self] { + match self { + Self::NoSysF128 => [Self::NoSysF128IntConvert, Self::NoSysF16F128Convert].as_slice(), + Self::NoSysF128IntConvert => [].as_slice(), + Self::NoSysF16 => [Self::NoSysF16F64Convert, Self::NoSysF16F128Convert].as_slice(), + Self::NoSysF16F64Convert => [].as_slice(), + Self::NoSysF16F128Convert => [].as_slice(), + } + } } fn main() { @@ -40,8 +52,6 @@ fn main() { || target.arch == "powerpc64" { features.insert(Feature::NoSysF128); - features.insert(Feature::NoSysF128IntConvert); - features.insert(Feature::NoSysF16F128Convert); } if target.arch == "x86" { @@ -67,8 +77,6 @@ fn main() { || target.arch == "wasm64" { features.insert(Feature::NoSysF16); - features.insert(Feature::NoSysF16F64Convert); - features.insert(Feature::NoSysF16F128Convert); } // These platforms are missing either `__extendhfdf2` or `__truncdfhf2`. @@ -76,6 +84,15 @@ fn main() { features.insert(Feature::NoSysF16F64Convert); } + // Add implied features. Collection is required for borrows. + features.extend( + features + .iter() + .flat_map(|x| x.implies()) + .copied() + .collect::>(), + ); + for feature in features { let (name, warning) = match feature { Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"), From b7b93103fb9293c0c502dc1ae34e2ad5c871bc39 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 4 Mar 2025 17:36:27 -0500 Subject: [PATCH 1225/1459] Add a test config for __gnu_h2f_ieee and __gnu_f2h_ieee Some targets do not provide these symbols since they always use __extendhfsf and __truncsfhf. Add a configuration option for this. --- testcrate/Cargo.toml | 3 ++- testcrate/build.rs | 18 +++++++++++++++++- testcrate/tests/conv.rs | 4 ++-- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 91e2f668f..e06864846 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -44,8 +44,9 @@ no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"] no-sys-f128-int-convert = [] no-sys-f16-f128-convert = [] no-sys-f16-f64-convert = [] +no-sys-f16-gnu-convert = [] # Skip tests that rely on f16 symbols being available on the system -no-sys-f16 = ["no-sys-f16-f64-convert"] +no-sys-f16 = ["no-sys-f16-f64-convert", "no-sys-f16-gnu-convert"] # Enable report generation without bringing in more dependencies by default benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] diff --git a/testcrate/build.rs b/testcrate/build.rs index 427fa799b..171c1d521 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -12,6 +12,7 @@ enum Feature { NoSysF16, NoSysF16F64Convert, NoSysF16F128Convert, + NoSysF16GnuConvert, } impl Feature { @@ -19,9 +20,15 @@ impl Feature { match self { Self::NoSysF128 => [Self::NoSysF128IntConvert, Self::NoSysF16F128Convert].as_slice(), Self::NoSysF128IntConvert => [].as_slice(), - Self::NoSysF16 => [Self::NoSysF16F64Convert, Self::NoSysF16F128Convert].as_slice(), + Self::NoSysF16 => [ + Self::NoSysF16F64Convert, + Self::NoSysF16F128Convert, + Feature::NoSysF16GnuConvert, + ] + .as_slice(), Self::NoSysF16F64Convert => [].as_slice(), Self::NoSysF16F128Convert => [].as_slice(), + Self::NoSysF16GnuConvert => [].as_slice(), } } } @@ -84,6 +91,11 @@ fn main() { features.insert(Feature::NoSysF16F64Convert); } + // These platforms do not have `__gnu_f2h_ieee` or `__gnu_h2f_ieee`. + if false { + features.insert(Feature::NoSysF16GnuConvert); + } + // Add implied features. Collection is required for borrows. features.extend( features @@ -108,6 +120,10 @@ fn main() { "no-sys-f16-f128-convert", "using apfloat fallback for f16 <-> f128 conversions", ), + Feature::NoSysF16GnuConvert => ( + "no-sys-f16-gnu-convert", + "using apfloat fallback for __gnu f16", + ), Feature::NoSysF16 => ("no-sys-f16", "using apfloat fallback for f16"), }; println!("cargo:warning={warning}"); diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 7f33d27cc..f94aaf174 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -310,7 +310,7 @@ mod extend { f_to_f! { extend, f16 => f32, Half => Single, __extendhfsf2, not(feature = "no-sys-f16"); - f16 => f32, Half => Single, __gnu_h2f_ieee, not(feature = "no-sys-f16"); + f16 => f32, Half => Single, __gnu_h2f_ieee, not(feature = "no-sys-f16-gnu-convert"); f16 => f64, Half => Double, __extendhfdf2, not(feature = "no-sys-f16-f64-convert"); f16 => f128, Half => Quad, __extendhftf2, not(feature = "no-sys-f16-f128-convert"); f32 => f128, Single => Quad, __extendsftf2, not(feature = "no-sys-f128"); @@ -340,7 +340,7 @@ mod trunc { f_to_f! { trunc, f32 => f16, Single => Half, __truncsfhf2, not(feature = "no-sys-f16"); - f32 => f16, Single => Half, __gnu_f2h_ieee, not(feature = "no-sys-f16"); + f32 => f16, Single => Half, __gnu_f2h_ieee, not(feature = "no-sys-f16-gnu-convert"); f64 => f16, Double => Half, __truncdfhf2, not(feature = "no-sys-f16-f64-convert"); f128 => f16, Quad => Half, __trunctfhf2, not(feature = "no-sys-f16-f128-convert"); f128 => f32, Quad => Single, __trunctfsf2, not(feature = "no-sys-f128"); From 8daa56684557c3ca3b6a7c7bc23426e708142b09 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 4 Mar 2025 22:06:35 -0500 Subject: [PATCH 1226/1459] Revert "ci: Pin the nightly toolchain for aarch64-unknown-linux-gnu" The fix to this issue was synced in [1] so we should no longer need to keep aarch64 pinned. This reverts commit b2bcfc838e2a4b72fa62b333e3eb91f250aa4539. [1]: https://github.com/rust-lang/rust/pull/137661 --- .github/workflows/main.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fd1f6d532..1c367a2d0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -20,8 +20,7 @@ jobs: rust: nightly - target: aarch64-unknown-linux-gnu os: ubuntu-latest - # FIXME: pinned due to https://github.com/llvm/llvm-project/issues/127804 - rust: nightly-2025-02-07 + rust: nightly - target: aarch64-pc-windows-msvc os: windows-latest rust: nightly From 4690673e1f2338b3e76d60e49c863dea79f237fa Mon Sep 17 00:00:00 2001 From: hev Date: Wed, 5 Mar 2025 14:11:51 +0800 Subject: [PATCH 1227/1459] Enable `f16` for LoongArch (#770) [ the configured-out tests should be re-enabled once we have the symbols in nightly - Trevor ] --- configure.rs | 2 -- testcrate/build.rs | 1 + testcrate/tests/conv.rs | 12 ++++++++++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/configure.rs b/configure.rs index 2bb0f8383..931e96629 100644 --- a/configure.rs +++ b/configure.rs @@ -76,10 +76,8 @@ pub fn configure_f16_f128(target: &Target) { // Selection failure "s390x" => false, // Infinite recursion - // FIXME(llvm20): loongarch fixed by "csky" => false, "hexagon" => false, - "loongarch64" => false, "powerpc" | "powerpc64" => false, "sparc" | "sparc64" => false, "wasm32" | "wasm64" => false, diff --git a/testcrate/build.rs b/testcrate/build.rs index 171c1d521..868d8700d 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -75,6 +75,7 @@ fn main() { || target.arch == "powerpc" || target.arch == "powerpc64" || target.arch == "powerpc64le" + || target.arch == "loongarch64" || (target.arch == "x86" && !target.has_feature("sse")) || target.os == "windows" // Linking says "error: function signature mismatch: __extendhfsf2" and seems to diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index f94aaf174..d70663540 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -306,7 +306,11 @@ mod extend { } #[cfg(all(f16_enabled, f128_enabled))] - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(not(any( + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "loongarch64" + )))] f_to_f! { extend, f16 => f32, Half => Single, __extendhfsf2, not(feature = "no-sys-f16"); @@ -336,7 +340,11 @@ mod trunc { } #[cfg(all(f16_enabled, f128_enabled))] - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + #[cfg(not(any( + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "loongarch64" + )))] f_to_f! { trunc, f32 => f16, Single => Half, __truncsfhf2, not(feature = "no-sys-f16"); From 5883d7177abf3a2324cfd5e7c2d28469787b1427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=AE=87=E9=80=B8?= Date: Fri, 28 Feb 2025 23:17:56 +0800 Subject: [PATCH 1228/1459] Add cygwin support Co-authored-by: Ookiineko --- build.rs | 14 ++++++++------ examples/intrinsics.rs | 6 +++--- src/macros.rs | 18 +++++++++--------- src/probestack.rs | 4 ++-- src/x86_64.rs | 6 +++++- 5 files changed, 27 insertions(+), 21 deletions(-) diff --git a/build.rs b/build.rs index 39cee311f..3c04423f2 100644 --- a/build.rs +++ b/build.rs @@ -575,7 +575,7 @@ mod c { ("__fe_raise_inexact", "fp_mode.c"), ]); - if target.os != "windows" { + if target.os != "windows" && target.os != "cygwin" { sources.extend(&[("__multc3", "multc3.c")]); } } @@ -608,13 +608,15 @@ mod c { sources.remove(&["__aeabi_cdcmp", "__aeabi_cfcmp"]); } - // Android uses emulated TLS so we need a runtime support function. - if target.os == "android" { + // Android and Cygwin uses emulated TLS so we need a runtime support function. + if target.os == "android" || target.os == "cygwin" { sources.extend(&[("__emutls_get_address", "emutls.c")]); + } - // Work around a bug in the NDK headers (fixed in - // https://r.android.com/2038949 which will be released in a future - // NDK version) by providing a definition of LONG_BIT. + // Work around a bug in the NDK headers (fixed in + // https://r.android.com/2038949 which will be released in a future + // NDK version) by providing a definition of LONG_BIT. + if target.os == "android" { cfg.define("LONG_BIT", "(8 * sizeof(long))"); } diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 59a70e207..e90cfb33d 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -673,17 +673,17 @@ pub fn __aeabi_unwind_cpp_pr0() {} #[no_mangle] pub fn __aeabi_unwind_cpp_pr1() {} -#[cfg(not(windows))] +#[cfg(not(any(windows, target_os = "cygwin")))] #[allow(non_snake_case)] #[no_mangle] pub fn _Unwind_Resume() {} -#[cfg(not(windows))] +#[cfg(not(any(windows, target_os = "cygwin")))] #[lang = "eh_personality"] #[no_mangle] pub extern "C" fn eh_personality() {} -#[cfg(all(windows, target_env = "gnu"))] +#[cfg(any(all(windows, target_env = "gnu"), target_os = "cygwin"))] mod mingw_unwinding { #[no_mangle] pub fn rust_eh_personality() {} diff --git a/src/macros.rs b/src/macros.rs index 5d9f58197..b1b71379c 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -193,7 +193,7 @@ macro_rules! intrinsics { $($rest:tt)* ) => ( - #[cfg(all(any(windows, all(target_os = "uefi", target_arch = "x86_64")), target_pointer_width = "64"))] + #[cfg(all(any(windows, target_os = "cygwin", all(target_os = "uefi", target_arch = "x86_64")), target_pointer_width = "64"))] intrinsics! { $(#[$($attr)*])* pub extern "unadjusted" fn $name( $($argname: $ty),* ) $(-> $ret)? { @@ -201,7 +201,7 @@ macro_rules! intrinsics { } } - #[cfg(not(all(any(windows, all(target_os = "uefi", target_arch = "x86_64")), target_pointer_width = "64")))] + #[cfg(not(all(any(windows, target_os = "cygwin", all(target_os = "uefi", target_arch = "x86_64")), target_pointer_width = "64")))] intrinsics! { $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { @@ -257,7 +257,7 @@ macro_rules! intrinsics { #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"), not(feature = "mangled-names")))] mod $name { #[no_mangle] - #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] + #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] $(#[$($attr)*])* extern $abi fn $name( $($argname: u16),* ) $(-> $ret)? { super::$name($(f16::from_bits($argname)),*) @@ -293,7 +293,7 @@ macro_rules! intrinsics { #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"), not(feature = "mangled-names")))] mod $name { #[no_mangle] - #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] + #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] $(#[$($attr)*])* extern $abi fn $name( $($argname: $ty),* ) -> u16 { super::$name($($argname),*).to_bits() @@ -334,7 +334,7 @@ macro_rules! intrinsics { #[cfg(all(target_arch = "arm", not(feature = "mangled-names")))] mod $name { #[no_mangle] - #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] + #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] $(#[$($attr)*])* extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) @@ -344,7 +344,7 @@ macro_rules! intrinsics { #[cfg(all(target_arch = "arm", not(feature = "mangled-names")))] mod $alias { #[no_mangle] - #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] + #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] $(#[$($attr)*])* extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) @@ -411,7 +411,7 @@ macro_rules! intrinsics { mod $name { $(#[$($attr)*])* #[no_mangle] - #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] + #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } @@ -436,7 +436,7 @@ macro_rules! intrinsics { #[naked] $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] + #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } @@ -503,7 +503,7 @@ macro_rules! intrinsics { mod $name { $(#[$($attr)*])* #[no_mangle] - #[cfg_attr(not(all(windows, target_env = "gnu")), linkage = "weak")] + #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] $(unsafe $($empty)?)? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } diff --git a/src/probestack.rs b/src/probestack.rs index 0c30384db..5b6abd21a 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -42,8 +42,8 @@ //! be more than welcome to accept such a change! #![cfg(not(feature = "mangled-names"))] -// Windows already has builtins to do this. -#![cfg(not(windows))] +// Windows and Cygwin already has builtins to do this. +#![cfg(not(any(windows, target_os = "cygwin")))] // All these builtins require assembly #![cfg(not(feature = "no-asm"))] // We only define stack probing for these architectures today. diff --git a/src/x86_64.rs b/src/x86_64.rs index 9c91a4556..aae601f58 100644 --- a/src/x86_64.rs +++ b/src/x86_64.rs @@ -10,7 +10,11 @@ use core::intrinsics; intrinsics! { #[naked] #[cfg(all( - any(all(windows, target_env = "gnu"), target_os = "uefi"), + any( + all(windows, target_env = "gnu"), + target_os = "cygwin", + target_os = "uefi" + ), not(feature = "no-asm") ))] pub unsafe extern "C" fn ___chkstk_ms() { From 7aa9d0be10f9d5378ad53a6c5f7ffd5c5e7d91f1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 5 Mar 2025 06:18:13 +0000 Subject: [PATCH 1229/1459] chore: release v0.1.151 --- CHANGELOG.md | 9 +++++++++ Cargo.toml | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 087a912d7..763b0e10e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.151](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.150...compiler_builtins-v0.1.151) - 2025-03-05 + +### Other + +- Add cygwin support +- Enable `f16` for LoongArch ([#770](https://github.com/rust-lang/compiler-builtins/pull/770)) +- Add __extendhfdf2 and add __truncdfhf2 test +- Remove outdated information from the readme + ## [0.1.150](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.149...compiler_builtins-v0.1.150) - 2025-03-01 ### Other diff --git a/Cargo.toml b/Cargo.toml index 3bf9b8ddd..baef22aa7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.150" +version = "0.1.151" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 56cfe9770bce5f598a18207bf9d23e30e2909f9f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 5 Mar 2025 01:36:52 -0500 Subject: [PATCH 1230/1459] Revert "Add a test config for __gnu_h2f_ieee and __gnu_f2h_ieee" This turned out to not be useful, so remove it. This reverts commit b7b93103fb9293c0c502dc1ae34e2ad5c871bc39. --- testcrate/Cargo.toml | 3 +-- testcrate/build.rs | 18 +----------------- testcrate/tests/conv.rs | 4 ++-- 3 files changed, 4 insertions(+), 21 deletions(-) diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index e06864846..91e2f668f 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -44,9 +44,8 @@ no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"] no-sys-f128-int-convert = [] no-sys-f16-f128-convert = [] no-sys-f16-f64-convert = [] -no-sys-f16-gnu-convert = [] # Skip tests that rely on f16 symbols being available on the system -no-sys-f16 = ["no-sys-f16-f64-convert", "no-sys-f16-gnu-convert"] +no-sys-f16 = ["no-sys-f16-f64-convert"] # Enable report generation without bringing in more dependencies by default benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] diff --git a/testcrate/build.rs b/testcrate/build.rs index 868d8700d..15e4e771c 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -12,7 +12,6 @@ enum Feature { NoSysF16, NoSysF16F64Convert, NoSysF16F128Convert, - NoSysF16GnuConvert, } impl Feature { @@ -20,15 +19,9 @@ impl Feature { match self { Self::NoSysF128 => [Self::NoSysF128IntConvert, Self::NoSysF16F128Convert].as_slice(), Self::NoSysF128IntConvert => [].as_slice(), - Self::NoSysF16 => [ - Self::NoSysF16F64Convert, - Self::NoSysF16F128Convert, - Feature::NoSysF16GnuConvert, - ] - .as_slice(), + Self::NoSysF16 => [Self::NoSysF16F64Convert, Self::NoSysF16F128Convert].as_slice(), Self::NoSysF16F64Convert => [].as_slice(), Self::NoSysF16F128Convert => [].as_slice(), - Self::NoSysF16GnuConvert => [].as_slice(), } } } @@ -92,11 +85,6 @@ fn main() { features.insert(Feature::NoSysF16F64Convert); } - // These platforms do not have `__gnu_f2h_ieee` or `__gnu_h2f_ieee`. - if false { - features.insert(Feature::NoSysF16GnuConvert); - } - // Add implied features. Collection is required for borrows. features.extend( features @@ -121,10 +109,6 @@ fn main() { "no-sys-f16-f128-convert", "using apfloat fallback for f16 <-> f128 conversions", ), - Feature::NoSysF16GnuConvert => ( - "no-sys-f16-gnu-convert", - "using apfloat fallback for __gnu f16", - ), Feature::NoSysF16 => ("no-sys-f16", "using apfloat fallback for f16"), }; println!("cargo:warning={warning}"); diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index d70663540..db1493cae 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -314,7 +314,7 @@ mod extend { f_to_f! { extend, f16 => f32, Half => Single, __extendhfsf2, not(feature = "no-sys-f16"); - f16 => f32, Half => Single, __gnu_h2f_ieee, not(feature = "no-sys-f16-gnu-convert"); + f16 => f32, Half => Single, __gnu_h2f_ieee, not(feature = "no-sys-f16"); f16 => f64, Half => Double, __extendhfdf2, not(feature = "no-sys-f16-f64-convert"); f16 => f128, Half => Quad, __extendhftf2, not(feature = "no-sys-f16-f128-convert"); f32 => f128, Single => Quad, __extendsftf2, not(feature = "no-sys-f128"); @@ -348,7 +348,7 @@ mod trunc { f_to_f! { trunc, f32 => f16, Single => Half, __truncsfhf2, not(feature = "no-sys-f16"); - f32 => f16, Single => Half, __gnu_f2h_ieee, not(feature = "no-sys-f16-gnu-convert"); + f32 => f16, Single => Half, __gnu_f2h_ieee, not(feature = "no-sys-f16"); f64 => f16, Double => Half, __truncdfhf2, not(feature = "no-sys-f16-f64-convert"); f128 => f16, Quad => Half, __trunctfhf2, not(feature = "no-sys-f16-f128-convert"); f128 => f32, Quad => Single, __trunctfsf2, not(feature = "no-sys-f128"); From 4de6d276afb9ac720bff171c1fa0f8e0f6de11e9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 6 Mar 2025 08:50:35 +0000 Subject: [PATCH 1231/1459] Migrate `testcrate` and `panic-handler` to edition 2024 Includes `extern` -> `unsafe extern` blocks and formatting updates. --- crates/panic-handler/Cargo.toml | 2 +- testcrate/Cargo.toml | 2 +- testcrate/benches/float_add.rs | 2 +- testcrate/benches/float_cmp.rs | 2 +- testcrate/benches/float_conv.rs | 2 +- testcrate/benches/float_div.rs | 2 +- testcrate/benches/float_extend.rs | 2 +- testcrate/benches/float_mul.rs | 2 +- testcrate/benches/float_pow.rs | 2 +- testcrate/benches/float_sub.rs | 2 +- testcrate/benches/float_trunc.rs | 2 +- testcrate/benches/mem.rs | 2 +- testcrate/src/bench.rs | 7 ++++++- testcrate/src/lib.rs | 2 +- testcrate/tests/big.rs | 2 +- 15 files changed, 20 insertions(+), 15 deletions(-) diff --git a/crates/panic-handler/Cargo.toml b/crates/panic-handler/Cargo.toml index 2ad858409..96b83eaa2 100644 --- a/crates/panic-handler/Cargo.toml +++ b/crates/panic-handler/Cargo.toml @@ -2,7 +2,7 @@ name = "panic-handler" version = "0.1.0" authors = ["Alex Crichton "] -edition = "2021" +edition = "2024" publish = false [dependencies] diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 91e2f668f..71c461c57 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -2,7 +2,7 @@ name = "testcrate" version = "0.1.0" authors = ["Alex Crichton "] -edition = "2021" +edition = "2024" publish = false [lib] diff --git a/testcrate/benches/float_add.rs b/testcrate/benches/float_add.rs index 3311e7b5b..a578655f8 100644 --- a/testcrate/benches/float_add.rs +++ b/testcrate/benches/float_add.rs @@ -1,7 +1,7 @@ #![cfg_attr(f128_enabled, feature(f128))] use compiler_builtins::float::add; -use criterion::{criterion_main, Criterion}; +use criterion::{Criterion, criterion_main}; use testcrate::float_bench; float_bench! { diff --git a/testcrate/benches/float_cmp.rs b/testcrate/benches/float_cmp.rs index 400c09b42..4c269e488 100644 --- a/testcrate/benches/float_cmp.rs +++ b/testcrate/benches/float_cmp.rs @@ -1,6 +1,6 @@ #![cfg_attr(f128_enabled, feature(f128))] -use criterion::{criterion_main, Criterion}; +use criterion::{Criterion, criterion_main}; use testcrate::float_bench; use compiler_builtins::float::cmp; diff --git a/testcrate/benches/float_conv.rs b/testcrate/benches/float_conv.rs index 7d8549b43..e3f2af863 100644 --- a/testcrate/benches/float_conv.rs +++ b/testcrate/benches/float_conv.rs @@ -2,7 +2,7 @@ #![cfg_attr(f128_enabled, feature(f128))] use compiler_builtins::float::conv; -use criterion::{criterion_main, Criterion}; +use criterion::{Criterion, criterion_main}; use testcrate::float_bench; /* unsigned int -> float */ diff --git a/testcrate/benches/float_div.rs b/testcrate/benches/float_div.rs index 6a039a82a..c42f3f386 100644 --- a/testcrate/benches/float_div.rs +++ b/testcrate/benches/float_div.rs @@ -1,7 +1,7 @@ #![cfg_attr(f128_enabled, feature(f128))] use compiler_builtins::float::div; -use criterion::{criterion_main, Criterion}; +use criterion::{Criterion, criterion_main}; use testcrate::float_bench; float_bench! { diff --git a/testcrate/benches/float_extend.rs b/testcrate/benches/float_extend.rs index 12f195984..1e7fedefe 100644 --- a/testcrate/benches/float_extend.rs +++ b/testcrate/benches/float_extend.rs @@ -3,7 +3,7 @@ #![cfg_attr(f16_enabled, feature(f16))] use compiler_builtins::float::extend; -use criterion::{criterion_main, Criterion}; +use criterion::{Criterion, criterion_main}; use testcrate::float_bench; #[cfg(f16_enabled)] diff --git a/testcrate/benches/float_mul.rs b/testcrate/benches/float_mul.rs index 6e30b7866..0857a68a2 100644 --- a/testcrate/benches/float_mul.rs +++ b/testcrate/benches/float_mul.rs @@ -1,7 +1,7 @@ #![cfg_attr(f128_enabled, feature(f128))] use compiler_builtins::float::mul; -use criterion::{criterion_main, Criterion}; +use criterion::{Criterion, criterion_main}; use testcrate::float_bench; float_bench! { diff --git a/testcrate/benches/float_pow.rs b/testcrate/benches/float_pow.rs index 46da3f25c..e84fee51c 100644 --- a/testcrate/benches/float_pow.rs +++ b/testcrate/benches/float_pow.rs @@ -1,7 +1,7 @@ #![cfg_attr(f128_enabled, feature(f128))] use compiler_builtins::float::pow; -use criterion::{criterion_main, Criterion}; +use criterion::{Criterion, criterion_main}; use testcrate::float_bench; float_bench! { diff --git a/testcrate/benches/float_sub.rs b/testcrate/benches/float_sub.rs index cdb678eef..7a6c05ea5 100644 --- a/testcrate/benches/float_sub.rs +++ b/testcrate/benches/float_sub.rs @@ -1,7 +1,7 @@ #![cfg_attr(f128_enabled, feature(f128))] use compiler_builtins::float::sub; -use criterion::{criterion_main, Criterion}; +use criterion::{Criterion, criterion_main}; use testcrate::float_bench; float_bench! { diff --git a/testcrate/benches/float_trunc.rs b/testcrate/benches/float_trunc.rs index cb6aee13a..4ceb62ab0 100644 --- a/testcrate/benches/float_trunc.rs +++ b/testcrate/benches/float_trunc.rs @@ -2,7 +2,7 @@ #![cfg_attr(f16_enabled, feature(f16))] use compiler_builtins::float::trunc; -use criterion::{criterion_main, Criterion}; +use criterion::{Criterion, criterion_main}; use testcrate::float_bench; #[cfg(f16_enabled)] diff --git a/testcrate/benches/mem.rs b/testcrate/benches/mem.rs index 98a040958..3f83926b6 100644 --- a/testcrate/benches/mem.rs +++ b/testcrate/benches/mem.rs @@ -1,7 +1,7 @@ #![feature(test)] extern crate test; -use test::{black_box, Bencher}; +use test::{Bencher, black_box}; extern crate compiler_builtins; use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; diff --git a/testcrate/src/bench.rs b/testcrate/src/bench.rs index f5da1f3ae..45a3a1ad4 100644 --- a/testcrate/src/bench.rs +++ b/testcrate/src/bench.rs @@ -89,6 +89,10 @@ pub fn skip_asm_checks(_test_name: &str) -> bool { /// Create a comparison of the system symbol, compiler_builtins, and optionally handwritten /// assembly. +/// +/// # Safety +/// +/// The signature must be correct and any assembly must be sound. #[macro_export] macro_rules! float_bench { ( @@ -120,8 +124,9 @@ macro_rules! float_bench { ] $(,)? ) => {paste::paste! { + // SAFETY: macro invocation must use the correct signature #[cfg($sys_available)] - extern "C" { + unsafe extern "C" { /// Binding for the system function #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty; diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs index 894c2782a..c61618755 100644 --- a/testcrate/src/lib.rs +++ b/testcrate/src/lib.rs @@ -22,8 +22,8 @@ extern crate alloc; use compiler_builtins::float::Float; use compiler_builtins::int::{Int, MinInt}; -use rand_xoshiro::rand_core::{RngCore, SeedableRng}; use rand_xoshiro::Xoshiro128StarStar; +use rand_xoshiro::rand_core::{RngCore, SeedableRng}; /// Sets the number of fuzz iterations run for most tests. In practice, the vast majority of bugs /// are caught by the edge case testers. Most of the remaining bugs triggered by more complex diff --git a/testcrate/tests/big.rs b/testcrate/tests/big.rs index 595f62256..d1ae88bd1 100644 --- a/testcrate/tests/big.rs +++ b/testcrate/tests/big.rs @@ -1,4 +1,4 @@ -use compiler_builtins::int::{i256, u256, HInt, MinInt}; +use compiler_builtins::int::{HInt, MinInt, i256, u256}; const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff; From 9ded1538f3c2d262159fbf4adc483723a5f162f4 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 6 Mar 2025 08:57:10 +0000 Subject: [PATCH 1232/1459] Use the v2 resolver in the workspace --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.toml b/Cargo.toml index baef22aa7..9d1448342 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -76,6 +76,7 @@ name = "intrinsics" required-features = ["compiler-builtins"] [workspace] +resolver = "2" members = ["testcrate"] [profile.release] From 7bec089672eb5cd83d7902edd59479527bc9d8d1 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 8 Mar 2025 00:20:34 +0000 Subject: [PATCH 1233/1459] Replace some uses of `sign` with `sig` It seems like "sign" was used as a shortened version of "significand", but that is easy to confuse with "sign". Update these to use "sig" like most other places. --- src/float/extend.rs | 22 +++++++++++----------- src/float/trunc.rs | 8 ++++---- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/float/extend.rs b/src/float/extend.rs index f05e3a924..ce00da31d 100644 --- a/src/float/extend.rs +++ b/src/float/extend.rs @@ -15,22 +15,22 @@ where let src_zero = F::Int::ZERO; let src_one = F::Int::ONE; let src_bits = F::BITS; - let src_sign_bits = F::SIG_BITS; + let src_sig_bits = F::SIG_BITS; let src_exp_bias = F::EXP_BIAS; let src_min_normal = F::IMPLICIT_BIT; let src_infinity = F::EXP_MASK; - let src_sign_mask = F::SIGN_MASK as F::Int; + let src_sign_mask = F::SIGN_MASK; let src_abs_mask = src_sign_mask - src_one; let src_qnan = F::SIG_MASK; let src_nan_code = src_qnan - src_one; let dst_bits = R::BITS; - let dst_sign_bits = R::SIG_BITS; + let dst_sig_bits = R::SIG_BITS; let dst_inf_exp = R::EXP_SAT; let dst_exp_bias = R::EXP_BIAS; let dst_min_normal = R::IMPLICIT_BIT; - let sign_bits_delta = dst_sign_bits - src_sign_bits; + let sig_bits_delta = dst_sig_bits - src_sig_bits; let exp_bias_delta = dst_exp_bias - src_exp_bias; let a_abs = a.to_bits() & src_abs_mask; let mut abs_result = R::Int::ZERO; @@ -41,8 +41,8 @@ where // exponent into the proper position and rebiasing the exponent. let abs_dst: R::Int = a_abs.cast(); let bias_dst: R::Int = exp_bias_delta.cast(); - abs_result = abs_dst.wrapping_shl(sign_bits_delta); - abs_result += bias_dst.wrapping_shl(dst_sign_bits); + abs_result = abs_dst.wrapping_shl(sig_bits_delta); + abs_result += bias_dst.wrapping_shl(dst_sig_bits); } else if a_abs >= src_infinity { // a is NaN or infinity. // Conjure the result by beginning with infinity, then setting the qNaN @@ -51,9 +51,9 @@ where let qnan_dst: R::Int = (a_abs & src_qnan).cast(); let nan_code_dst: R::Int = (a_abs & src_nan_code).cast(); let inf_exp_dst: R::Int = dst_inf_exp.cast(); - abs_result = inf_exp_dst.wrapping_shl(dst_sign_bits); - abs_result |= qnan_dst.wrapping_shl(sign_bits_delta); - abs_result |= nan_code_dst.wrapping_shl(sign_bits_delta); + abs_result = inf_exp_dst.wrapping_shl(dst_sig_bits); + abs_result |= qnan_dst.wrapping_shl(sig_bits_delta); + abs_result |= nan_code_dst.wrapping_shl(sig_bits_delta); } else if a_abs != src_zero { // a is denormal. // Renormalize the significand and clear the leading bit, then insert @@ -61,8 +61,8 @@ where let scale = a_abs.leading_zeros() - src_min_normal.leading_zeros(); let abs_dst: R::Int = a_abs.cast(); let bias_dst: R::Int = (exp_bias_delta - scale + 1).cast(); - abs_result = abs_dst.wrapping_shl(sign_bits_delta + scale); - abs_result = (abs_result ^ dst_min_normal) | (bias_dst.wrapping_shl(dst_sign_bits)); + abs_result = abs_dst.wrapping_shl(sig_bits_delta + scale); + abs_result = (abs_result ^ dst_min_normal) | (bias_dst.wrapping_shl(dst_sig_bits)); } let sign_result: R::Int = (a.to_bits() & src_sign_mask).cast(); diff --git a/src/float/trunc.rs b/src/float/trunc.rs index 3759aa7dc..928eba0c8 100644 --- a/src/float/trunc.rs +++ b/src/float/trunc.rs @@ -17,7 +17,7 @@ where let src_exp_bias = F::EXP_BIAS; let src_min_normal = F::IMPLICIT_BIT; - let src_significand_mask = F::SIG_MASK; + let src_sig_mask = F::SIG_MASK; let src_infinity = F::EXP_MASK; let src_sign_mask = F::SIGN_MASK; let src_abs_mask = src_sign_mask - src_one; @@ -40,7 +40,7 @@ where let dst_qnan = R::Int::ONE << (R::SIG_BITS - 1); let dst_nan_code = dst_qnan - dst_one; - let sign_bits_delta = F::SIG_BITS - R::SIG_BITS; + let sig_bits_delta = F::SIG_BITS - R::SIG_BITS; // Break a into a sign and representation of the absolute value. let a_abs = a.to_bits() & src_abs_mask; let sign = a.to_bits() & src_sign_mask; @@ -50,7 +50,7 @@ where // The exponent of a is within the range of normal numbers in the // destination format. We can convert by simply right-shifting with // rounding and adjusting the exponent. - abs_result = (a_abs >> sign_bits_delta).cast(); + abs_result = (a_abs >> sig_bits_delta).cast(); // Cast before shifting to prevent overflow. let bias_diff: R::Int = src_exp_bias.wrapping_sub(dst_exp_bias).cast(); let tmp = bias_diff << R::SIG_BITS; @@ -85,7 +85,7 @@ where let a_exp: u32 = (a_abs >> F::SIG_BITS).cast(); let shift = src_exp_bias - dst_exp_bias - a_exp + 1; - let significand = (a.to_bits() & src_significand_mask) | src_min_normal; + let significand = (a.to_bits() & src_sig_mask) | src_min_normal; // Right shift by the denormalization amount with sticky. if shift > F::SIG_BITS { From 98aab137ff3bd302d09782a49311464173d58b47 Mon Sep 17 00:00:00 2001 From: Jens Reidel Date: Mon, 17 Mar 2025 02:41:53 +0100 Subject: [PATCH 1234/1459] Revert "Disable some PPC64 tests which are failing due to an LLVM(?) bug" This reverts commit 265fdacab9b3c63b2c17a42fb17c51996c703ef8. Fixes: https://github.com/rust-lang/rust/issues/99853 Signed-off-by: Jens Reidel --- testcrate/tests/mem.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/testcrate/tests/mem.rs b/testcrate/tests/mem.rs index 5099d69ed..48ac95adc 100644 --- a/testcrate/tests/mem.rs +++ b/testcrate/tests/mem.rs @@ -230,8 +230,6 @@ fn memmove_backward_aligned() { } } -// PowerPC tests are failing: https://github.com/rust-lang/rust/issues/99853 -#[cfg(not(target_arch = "powerpc64"))] #[test] fn memset_backward_misaligned_nonaligned_start() { let mut arr = gen_arr::<32>(); @@ -244,8 +242,6 @@ fn memset_backward_misaligned_nonaligned_start() { } } -// PowerPC tests are failing: https://github.com/rust-lang/rust/issues/99853 -#[cfg(not(target_arch = "powerpc64"))] #[test] fn memset_backward_misaligned_aligned_start() { let mut arr = gen_arr::<32>(); @@ -258,8 +254,6 @@ fn memset_backward_misaligned_aligned_start() { } } -// PowerPC tests are failing: https://github.com/rust-lang/rust/issues/99853 -#[cfg(not(target_arch = "powerpc64"))] #[test] fn memset_backward_aligned() { let mut arr = gen_arr::<32>(); From 6e91b0346c3dc57f3a2a6b45338013455a2bcded Mon Sep 17 00:00:00 2001 From: Jens Reidel Date: Mon, 17 Mar 2025 21:59:56 +0100 Subject: [PATCH 1235/1459] Revert "Disable broken powerpc64 test due to https://github.com/rust-lang/rust/issues/88520" This reverts commit 55f6ecb6de9e2e10d9187b287b9e87b202d07d1e. Fixes: https://github.com/rust-lang/rust/issues/88520 Signed-off-by: Jens Reidel --- testcrate/tests/cmp.rs | 3 --- testcrate/tests/conv.rs | 2 -- 2 files changed, 5 deletions(-) diff --git a/testcrate/tests/cmp.rs b/testcrate/tests/cmp.rs index e3161f374..19d90c664 100644 --- a/testcrate/tests/cmp.rs +++ b/testcrate/tests/cmp.rs @@ -2,11 +2,8 @@ #![allow(unreachable_code)] #![cfg_attr(f128_enabled, feature(f128))] -#[cfg(not(target_arch = "powerpc64"))] use testcrate::*; -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] mod float_comparisons { use super::*; diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index db1493cae..381d3e155 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -141,8 +141,6 @@ mod i_to_f { } } -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] mod f_to_i { use super::*; From 04ec5de0c2e5387ffd6e7f859ce31df646a16730 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 12 Mar 2025 16:36:38 +0100 Subject: [PATCH 1236/1459] remove element_unordered_atomic intrinsics --- src/mem/mod.rs | 132 ------------------------------------------------- 1 file changed, 132 deletions(-) diff --git a/src/mem/mod.rs b/src/mem/mod.rs index f10439e2d..ec160039d 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -8,10 +8,6 @@ type c_int = i16; #[cfg(not(target_pointer_width = "16"))] type c_int = i32; -use core::intrinsics::{atomic_load_unordered, atomic_store_unordered, exact_div}; -use core::mem; -use core::ops::{BitOr, Shl}; - // memcpy/memmove/memset have optimized implementations on some architectures #[cfg_attr( all(not(feature = "no-asm"), target_arch = "x86_64"), @@ -60,131 +56,3 @@ intrinsics! { impls::c_string_length(s) } } - -// `bytes` must be a multiple of `mem::size_of::()` -#[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))] -fn memcpy_element_unordered_atomic(dest: *mut T, src: *const T, bytes: usize) { - unsafe { - let n = exact_div(bytes, mem::size_of::()); - let mut i = 0; - while i < n { - atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); - i += 1; - } - } -} - -// `bytes` must be a multiple of `mem::size_of::()` -#[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))] -fn memmove_element_unordered_atomic(dest: *mut T, src: *const T, bytes: usize) { - unsafe { - let n = exact_div(bytes, mem::size_of::()); - if src < dest as *const T { - // copy from end - let mut i = n; - while i != 0 { - i -= 1; - atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); - } - } else { - // copy from beginning - let mut i = 0; - while i < n { - atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); - i += 1; - } - } - } -} - -// `T` must be a primitive integer type, and `bytes` must be a multiple of `mem::size_of::()` -#[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))] -fn memset_element_unordered_atomic(s: *mut T, c: u8, bytes: usize) -where - T: Copy + From + Shl + BitOr, -{ - unsafe { - let n = exact_div(bytes, mem::size_of::()); - - // Construct a value of type `T` consisting of repeated `c` - // bytes, to let us ensure we write each `T` atomically. - let mut x = T::from(c); - let mut i = 1; - while i < mem::size_of::() { - x = (x << 8) | T::from(c); - i += 1; - } - - // Write it to `s` - let mut i = 0; - while i < n { - atomic_store_unordered(s.add(i), x); - i += 1; - } - } -} - -intrinsics! { - #[cfg(target_has_atomic_load_store = "8")] - pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { - memcpy_element_unordered_atomic(dest, src, bytes); - } - #[cfg(target_has_atomic_load_store = "16")] - pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { - memcpy_element_unordered_atomic(dest, src, bytes); - } - #[cfg(target_has_atomic_load_store = "32")] - pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { - memcpy_element_unordered_atomic(dest, src, bytes); - } - #[cfg(target_has_atomic_load_store = "64")] - pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { - memcpy_element_unordered_atomic(dest, src, bytes); - } - #[cfg(target_has_atomic_load_store = "128")] - pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { - memcpy_element_unordered_atomic(dest, src, bytes); - } - - #[cfg(target_has_atomic_load_store = "8")] - pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { - memmove_element_unordered_atomic(dest, src, bytes); - } - #[cfg(target_has_atomic_load_store = "16")] - pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { - memmove_element_unordered_atomic(dest, src, bytes); - } - #[cfg(target_has_atomic_load_store = "32")] - pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { - memmove_element_unordered_atomic(dest, src, bytes); - } - #[cfg(target_has_atomic_load_store = "64")] - pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { - memmove_element_unordered_atomic(dest, src, bytes); - } - #[cfg(target_has_atomic_load_store = "128")] - pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { - memmove_element_unordered_atomic(dest, src, bytes); - } - - #[cfg(target_has_atomic_load_store = "8")] - pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_1(s: *mut u8, c: u8, bytes: usize) -> () { - memset_element_unordered_atomic(s, c, bytes); - } - #[cfg(target_has_atomic_load_store = "16")] - pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_2(s: *mut u16, c: u8, bytes: usize) -> () { - memset_element_unordered_atomic(s, c, bytes); - } - #[cfg(target_has_atomic_load_store = "32")] - pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_4(s: *mut u32, c: u8, bytes: usize) -> () { - memset_element_unordered_atomic(s, c, bytes); - } - #[cfg(target_has_atomic_load_store = "64")] - pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_8(s: *mut u64, c: u8, bytes: usize) -> () { - memset_element_unordered_atomic(s, c, bytes); - } - #[cfg(target_has_atomic_load_store = "128")] - pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_16(s: *mut u128, c: u8, bytes: usize) -> () { - memset_element_unordered_atomic(s, c, bytes); - } -} From 942ab9fc37891a29ae1c1d65db623acb4f4714a1 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 18 Mar 2025 10:13:47 +0000 Subject: [PATCH 1237/1459] Move `examples/intrinsics.rs` to its own crate Currently there is an interesting situation with the way features get enabled; `testcrate` enables `mangled-names`, but the `intrinsics.rs` example requires this feature be disabled (otherwise the test fails with missing symbols, as expected). This is also the reason that `testcrate` is not a default workspace member, meaning `cargo test` doesn't actually run `testcrate`'s tests; making it a default member would mean that `compiler-builtins/mangled-names` gets enabled when `examples/intrinsics.rs` gets built, due to the way features get unified. Simplify the situation by making moving the example to its own crate as `builtins-test-intrinsics`. This also means `testcrate` can become a default member so it is included in `cargo check` or `cargo test` when run at the workspace root. `testcrate` and `builtins-test-intrinsics` still can't be built at the same time since there isn't a straightforward way to have Cargo build `compiler-builtins` twice with different features. This is a side effect of us using non-additive features, but there isn't really a better option since enabling both mangled and unmangled names would render `builtins-test-intrinsics` useless. --- Cargo.toml | 17 +++++++++----- README.md | 4 ++-- build.rs | 17 ++------------ builtins-test-intrinsics/Cargo.toml | 11 ++++++++++ builtins-test-intrinsics/build.rs | 11 ++++++++++ .../src/main.rs | 0 ci/run.sh | 18 +++++++-------- configure.rs | 22 ++++++++++++++++++- testcrate/build.rs | 1 + 9 files changed, 69 insertions(+), 32 deletions(-) create mode 100644 builtins-test-intrinsics/Cargo.toml create mode 100644 builtins-test-intrinsics/build.rs rename examples/intrinsics.rs => builtins-test-intrinsics/src/main.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index 9d1448342..60de27758 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,13 +71,20 @@ rustc-dep-of-std = ['compiler-builtins', 'core'] # are not normally public but are required by the `testcrate` public-test-deps = [] -[[example]] -name = "intrinsics" -required-features = ["compiler-builtins"] - [workspace] resolver = "2" -members = ["testcrate"] +members = [ + # Note that builtins-test-intrinsics cannot be a default member because it + # needs the `mangled-names` feature disabled, while `testcrate` needs it + # enabled. + "builtins-test-intrinsics", + "testcrate", +] + +default-members = [ + ".", + "testcrate", +] [profile.release] panic = 'abort' diff --git a/README.md b/README.md index d76bd289b..e5350d58c 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,8 @@ It is distributed as part of Rust's sysroot. [C implementation][2] to Rust. 4. Add a test to compare the behavior of the ported intrinsic(s) with their implementation on the testing host. -5. Add the intrinsic to `examples/intrinsics.rs` to verify it can be linked on - all targets. +5. Add the intrinsic to `builtins-test-intrinsics/src/main.rs` to verify it + can be linked on all targets. 6. Send a Pull Request (PR). 7. Once the PR passes our extensive testing infrastructure, we'll merge it! 8. Celebrate :tada: diff --git a/build.rs b/build.rs index 3c04423f2..369354a1d 100644 --- a/build.rs +++ b/build.rs @@ -2,7 +2,7 @@ use std::{collections::BTreeMap, env, path::PathBuf, sync::atomic::Ordering}; mod configure; -use configure::{configure_f16_f128, Target}; +use configure::{configure_aliases, configure_f16_f128, Target}; fn main() { println!("cargo::rerun-if-changed=build.rs"); @@ -13,6 +13,7 @@ fn main() { configure_check_cfg(); configure_f16_f128(&target); + configure_aliases(&target); configure_libm(&target); @@ -71,20 +72,6 @@ fn main() { } } - // To compile intrinsics.rs for thumb targets, where there is no libc - println!("cargo::rustc-check-cfg=cfg(thumb)"); - if llvm_target[0].starts_with("thumb") { - println!("cargo:rustc-cfg=thumb") - } - - // compiler-rt `cfg`s away some intrinsics for thumbv6m and thumbv8m.base because - // these targets do not have full Thumb-2 support but only original Thumb-1. - // We have to cfg our code accordingly. - println!("cargo::rustc-check-cfg=cfg(thumb_1)"); - if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" { - println!("cargo:rustc-cfg=thumb_1") - } - // Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. This // includes the old androideabi. It is deprecated but it is available as a // rustc target (arm-linux-androideabi). diff --git a/builtins-test-intrinsics/Cargo.toml b/builtins-test-intrinsics/Cargo.toml new file mode 100644 index 000000000..8c7cca4bd --- /dev/null +++ b/builtins-test-intrinsics/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "builtins-test-intrinsics" +version = "0.1.0" +edition = "2021" + +[dependencies] +compiler_builtins = { path = "../", features = ["compiler-builtins"]} +panic-handler = { path = '../crates/panic-handler' } + +[features] +c = ["compiler_builtins/c"] diff --git a/builtins-test-intrinsics/build.rs b/builtins-test-intrinsics/build.rs new file mode 100644 index 000000000..a38c6c1ff --- /dev/null +++ b/builtins-test-intrinsics/build.rs @@ -0,0 +1,11 @@ +mod builtins_configure { + include!("../configure.rs"); +} + +fn main() { + println!("cargo::rerun-if-changed=../configure.rs"); + + let target = builtins_configure::Target::from_env(); + builtins_configure::configure_f16_f128(&target); + builtins_configure::configure_aliases(&target); +} diff --git a/examples/intrinsics.rs b/builtins-test-intrinsics/src/main.rs similarity index 100% rename from examples/intrinsics.rs rename to builtins-test-intrinsics/src/main.rs diff --git a/ci/run.sh b/ci/run.sh index 057cdb083..3625dde79 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -120,22 +120,22 @@ done rm -f "${rlib_paths[@]}" -build_intrinsics() { - cargo build --target "$target" -v --example intrinsics "$@" +build_intrinsics_test() { + cargo build --target "$target" -v --package builtins-test-intrinsics "$@" } -# Verify that we haven't drop any intrinsic/symbol -build_intrinsics -build_intrinsics --release -build_intrinsics --features c -build_intrinsics --features c --release +# Verify that we haven't dropped any intrinsics/symbols +build_intrinsics_test +build_intrinsics_test --release +build_intrinsics_test --features c +build_intrinsics_test --features c --release # Verify that there are no undefined symbols to `panic` within our # implementations CARGO_PROFILE_DEV_LTO=true \ - cargo build --target "$target" --example intrinsics + cargo build --target "$target" --package builtins-test-intrinsics CARGO_PROFILE_RELEASE_LTO=true \ - cargo build --target "$target" --example intrinsics --release + cargo build --target "$target" --package builtins-test-intrinsics --release # Ensure no references to any symbols from core update_rlib_paths diff --git a/configure.rs b/configure.rs index 931e96629..2f134e578 100644 --- a/configure.rs +++ b/configure.rs @@ -6,6 +6,7 @@ use std::env; #[allow(dead_code)] pub struct Target { pub triple: String, + pub triple_split: Vec, pub opt_level: String, pub cargo_features: Vec, pub os: String, @@ -19,6 +20,8 @@ pub struct Target { impl Target { pub fn from_env() -> Self { + let triple = env::var("TARGET").unwrap(); + let triple_split = triple.split('-').map(ToOwned::to_owned).collect(); let little_endian = match env::var("CARGO_CFG_TARGET_ENDIAN").unwrap().as_str() { "little" => true, "big" => false, @@ -30,7 +33,8 @@ impl Target { .collect(); Self { - triple: env::var("TARGET").unwrap(), + triple, + triple_split, os: env::var("CARGO_CFG_TARGET_OS").unwrap(), opt_level: env::var("OPT_LEVEL").unwrap(), cargo_features, @@ -56,6 +60,22 @@ impl Target { } } +pub fn configure_aliases(target: &Target) { + // To compile builtins-test-intrinsics for thumb targets, where there is no libc + println!("cargo::rustc-check-cfg=cfg(thumb)"); + if target.triple_split[0].starts_with("thumb") { + println!("cargo:rustc-cfg=thumb") + } + + // compiler-rt `cfg`s away some intrinsics for thumbv6m and thumbv8m.base because + // these targets do not have full Thumb-2 support but only original Thumb-1. + // We have to cfg our code accordingly. + println!("cargo::rustc-check-cfg=cfg(thumb_1)"); + if target.triple_split[0] == "thumbv6m" || target.triple_split[0] == "thumbv8m.base" { + println!("cargo:rustc-cfg=thumb_1") + } +} + /// Configure whether or not `f16` and `f128` support should be enabled. pub fn configure_f16_f128(target: &Target) { // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means diff --git a/testcrate/build.rs b/testcrate/build.rs index 15e4e771c..566b985d2 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -115,5 +115,6 @@ fn main() { println!("cargo:rustc-cfg=feature=\"{name}\""); } + builtins_configure::configure_aliases(&target); builtins_configure::configure_f16_f128(&target); } From b833653714d676654fcc8c231d2763e59ebd72fb Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 18 Mar 2025 10:39:26 +0000 Subject: [PATCH 1238/1459] Mark `builtins-test-intrinsics` as `publish = false` --- builtins-test-intrinsics/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/builtins-test-intrinsics/Cargo.toml b/builtins-test-intrinsics/Cargo.toml index 8c7cca4bd..9b2e5bb7c 100644 --- a/builtins-test-intrinsics/Cargo.toml +++ b/builtins-test-intrinsics/Cargo.toml @@ -2,6 +2,7 @@ name = "builtins-test-intrinsics" version = "0.1.0" edition = "2021" +publish = false [dependencies] compiler_builtins = { path = "../", features = ["compiler-builtins"]} From 571ce5ff04a1eb54fc5743012ef0b8a2ffbe5f82 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 18 Mar 2025 11:14:04 +0000 Subject: [PATCH 1239/1459] Add a script for downloading compiler-rt Rather than needing to copy the version and URL from the CI workflow, put this into a script that can be directly run locally. --- .github/workflows/main.yml | 7 ++----- build.rs | 5 ++++- ci/download-compiler-rt.sh | 10 ++++++++++ 3 files changed, 16 insertions(+), 6 deletions(-) create mode 100755 ci/download-compiler-rt.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1c367a2d0..34742e349 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -4,7 +4,6 @@ on: [push, pull_request] env: RUSTDOCFLAGS: -Dwarnings RUSTFLAGS: -Dwarnings - RUST_LLVM_VERSION: 20.1-2025-02-13 RUST_COMPILER_RT_ROOT: ./compiler-rt jobs: @@ -129,12 +128,10 @@ jobs: uses: actions/cache@v4 with: path: compiler-rt - key: ${{ runner.os }}-compiler-rt-${{ env.RUST_LLVM_VERSION }} + key: ${{ runner.os }}-compiler-rt-${{ hashFiles('ci/download-compiler-rt.sh') }} - name: Download compiler-rt reference sources if: steps.cache-compiler-rt.outputs.cache-hit != 'true' - run: | - curl -L -o code.tar.gz "https://github.com/rust-lang/llvm-project/archive/rustc/${RUST_LLVM_VERSION}.tar.gz" - tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-${RUST_LLVM_VERSION}/compiler-rt + run: ./ci/download-compiler-rt.sh shell: bash # Non-linux tests just use our raw script diff --git a/build.rs b/build.rs index 369354a1d..3003d51af 100644 --- a/build.rs +++ b/build.rs @@ -619,7 +619,10 @@ mod c { let root = match env::var_os("RUST_COMPILER_RT_ROOT") { Some(s) => PathBuf::from(s), None => { - panic!("RUST_COMPILER_RT_ROOT is not set. You may need to download compiler-rt.") + panic!( + "RUST_COMPILER_RT_ROOT is not set. You may need to run \ + `ci/download-compiler-rt.sh`." + ); } }; if !root.exists() { diff --git a/ci/download-compiler-rt.sh b/ci/download-compiler-rt.sh new file mode 100755 index 000000000..bf7f8c248 --- /dev/null +++ b/ci/download-compiler-rt.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# Download sources to build C versions of intrinsics. Once being run, +# `RUST_COMPILER_RT_ROOT` must be set. + +set -eux + +rust_llvm_version=20.1-2025-02-13 + +curl -L -o code.tar.gz "https://github.com/rust-lang/llvm-project/archive/rustc/${rust_llvm_version}.tar.gz" +tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-${rust_llvm_version}/compiler-rt From 45007cc2c5cffdcc48cc40f8ce746e749f4a8307 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 18 Mar 2025 22:24:20 +0100 Subject: [PATCH 1240/1459] nightlies without clippy are not a thing any more --- .github/workflows/main.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 34742e349..50844a66c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -178,10 +178,9 @@ jobs: with: submodules: true # Unlike rustfmt, stable clippy does not work on code with nightly features. - # This acquires the most recent nightly with a clippy component. - name: Install nightly `clippy` run: | - rustup set profile minimal && rustup default "nightly-$(curl -s https://rust-lang.github.io/rustup-components-history/x86_64-unknown-linux-gnu/clippy)" && rustup component add clippy + rustup set profile minimal && rustup default nightly && rustup component add clippy - uses: Swatinem/rust-cache@v2 - run: cargo clippy -- -D clippy::all From 0bb8532e4abd497e334e2cc41bfe825339cb3815 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 18 Mar 2025 23:51:19 +0000 Subject: [PATCH 1241/1459] Upgrade all dependencies to the latest This is mostly done to get the latest version of `rand`, which includes some breaking changes. --- libm/Cargo.toml | 3 +-- libm/crates/libm-macros/Cargo.toml | 6 +++--- libm/crates/libm-test/Cargo.toml | 14 +++++--------- libm/crates/libm-test/src/gen/random.rs | 10 +++++----- libm/crates/libm-test/tests/u256.rs | 10 +++++----- libm/crates/musl-math-sys/Cargo.toml | 2 +- 6 files changed, 20 insertions(+), 25 deletions(-) diff --git a/libm/Cargo.toml b/libm/Cargo.toml index eb133dada..e0aeb07d5 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -59,8 +59,7 @@ exclude = [ ] [dev-dependencies] -no-panic = "0.1.33" - +no-panic = "0.1.35" [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = [ diff --git a/libm/crates/libm-macros/Cargo.toml b/libm/crates/libm-macros/Cargo.toml index f0de0e176..314f4ae37 100644 --- a/libm/crates/libm-macros/Cargo.toml +++ b/libm/crates/libm-macros/Cargo.toml @@ -9,9 +9,9 @@ proc-macro = true [dependencies] heck = "0.5.0" -proc-macro2 = "1.0.93" -quote = "1.0.38" -syn = { version = "2.0.96", features = ["full", "extra-traits", "visit-mut"] } +proc-macro2 = "1.0.94" +quote = "1.0.40" +syn = { version = "2.0.100", features = ["full", "extra-traits", "visit-mut"] } [lints.rust] # Values used during testing diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index dcbddb667..98da73cea 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -27,26 +27,22 @@ icount = ["dep:iai-callgrind"] short-benchmarks = [] [dependencies] -anyhow = "1.0.95" +anyhow = "1.0.97" # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`. gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false } iai-callgrind = { version = "0.14.0", optional = true } -indicatif = { version = "0.17.9", default-features = false } +indicatif = { version = "0.17.11", default-features = false } libm = { path = "../..", features = ["unstable-public-internals"] } libm-macros = { path = "../libm-macros" } musl-math-sys = { path = "../musl-math-sys", optional = true } paste = "1.0.15" -rand = "0.8.5" -rand_chacha = "0.3.1" +rand = "0.9.0" +rand_chacha = "0.9.0" rayon = "1.10.0" rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "integer", "std"] } -[target.'cfg(target_family = "wasm")'.dependencies] -# Enable randomness on WASM -getrandom = { version = "0.2", features = ["js"] } - [build-dependencies] -rand = { version = "0.8.5", optional = true } +rand = { version = "0.9.0", optional = true } [dev-dependencies] criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/gen/random.rs index c2cd172d1..e8a7ee905 100644 --- a/libm/crates/libm-test/src/gen/random.rs +++ b/libm/crates/libm-test/src/gen/random.rs @@ -3,7 +3,7 @@ use std::ops::RangeInclusive; use std::sync::LazyLock; use libm::support::Float; -use rand::distributions::{Alphanumeric, Standard}; +use rand::distr::{Alphanumeric, StandardUniform}; use rand::prelude::Distribution; use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; @@ -16,7 +16,7 @@ pub(crate) const SEED_ENV: &str = "LIBM_SEED"; pub static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| { let s = env::var(SEED_ENV).unwrap_or_else(|_| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); (0..32).map(|_| rng.sample(Alphanumeric) as char).collect() }); @@ -33,19 +33,19 @@ pub trait RandomInput: Sized { /// Generate a sequence of deterministically random floats. fn random_floats(count: u64) -> impl Iterator where - Standard: Distribution, + StandardUniform: Distribution, { let mut rng = ChaCha8Rng::from_seed(*SEED); // Generate integers to get a full range of bitpatterns (including NaNs), then convert back // to the float type. - (0..count).map(move |_| F::from_bits(rng.gen::())) + (0..count).map(move |_| F::from_bits(rng.random::())) } /// Generate a sequence of deterministically random `i32`s within a specified range. fn random_ints(count: u64, range: RangeInclusive) -> impl Iterator { let mut rng = ChaCha8Rng::from_seed(*SEED); - (0..count).map(move |_| rng.gen_range::(range.clone())) + (0..count).map(move |_| rng.random_range::(range.clone())) } macro_rules! impl_random_input { diff --git a/libm/crates/libm-test/tests/u256.rs b/libm/crates/libm-test/tests/u256.rs index 4174820c0..460353424 100644 --- a/libm/crates/libm-test/tests/u256.rs +++ b/libm/crates/libm-test/tests/u256.rs @@ -25,8 +25,8 @@ fn hexu(v: u256) -> String { } fn random_u256(rng: &mut ChaCha8Rng) -> u256 { - let lo: u128 = rng.gen(); - let hi: u128 = rng.gen(); + let lo: u128 = rng.random(); + let hi: u128 = rng.random(); u256 { lo, hi } } @@ -121,7 +121,7 @@ fn mp_u256_shr() { for _ in 0..bigint_fuzz_iteration_count() { let x = random_u256(&mut rng); - let shift: u32 = rng.gen_range(0..255); + let shift: u32 = rng.random_range(0..255); assign_bigint(&mut bx, x); let actual = x >> shift; bx >>= shift; @@ -136,8 +136,8 @@ fn mp_u256_widen_mul() { let mut by = BigInt::new(); for _ in 0..bigint_fuzz_iteration_count() { - let x: u128 = rng.gen(); - let y: u128 = rng.gen(); + let x: u128 = rng.random(); + let y: u128 = rng.random(); bx.assign(x); by.assign(y); let actual = x.widen_mul(y); diff --git a/libm/crates/musl-math-sys/Cargo.toml b/libm/crates/musl-math-sys/Cargo.toml index cde78fd3c..34682b74c 100644 --- a/libm/crates/musl-math-sys/Cargo.toml +++ b/libm/crates/musl-math-sys/Cargo.toml @@ -10,4 +10,4 @@ publish = false libm = { path = "../../" } [build-dependencies] -cc = "1.2.10" +cc = "1.2.16" From cdf8cab99cfcb98982eaa230a2603f21c08b8ae2 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 19 Mar 2025 04:02:54 +0000 Subject: [PATCH 1242/1459] Temporarily disable the test call to `rust_begin_unwind` Since [1] this symbol is mangled, meaning it is not easy to call directly. A better fix will come in [2] but for now, just disable that portion of the test. [1]: https://github.com/rust-lang/rust/pull/127173 [2]: https://github.com/rust-lang/compiler-builtins/pull/802 --- builtins-test-intrinsics/src/main.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/builtins-test-intrinsics/src/main.rs b/builtins-test-intrinsics/src/main.rs index e90cfb33d..21d0a083c 100644 --- a/builtins-test-intrinsics/src/main.rs +++ b/builtins-test-intrinsics/src/main.rs @@ -626,13 +626,14 @@ fn run() { something_with_a_dtor(&|| assert_eq!(bb(1), 1)); - extern "C" { - fn rust_begin_unwind(x: usize); - } - - unsafe { - rust_begin_unwind(0); - } + // FIXME(#802): This should be re-enabled once a workaround is found. + // extern "C" { + // fn rust_begin_unwind(x: usize); + // } + + // unsafe { + // rust_begin_unwind(0); + // } } fn something_with_a_dtor(f: &dyn Fn()) { From 3806cd05c2635a53804b6a1006a25d3504421f5b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 19 Mar 2025 05:19:17 +0000 Subject: [PATCH 1243/1459] Switch repository layout to use a virtual manifest The current setup has the `Cargo.toml` for `compiler-builtins` at the repository root, which means all support crates and other files are located within the package root. This works for now but is not the cleanest setup since files that should or shouldn't be included in the package need to be configured in `Cargo.toml`. If we eventually merge `libm` development into this repository, it would be nice to make this separation more straightforward. Begin cleaning things up by moving the crate source to a new `compiler-builtins` directory and adding a virtual manifest. For now the `libm` submodule is also moved, but in the future it can likely move back to the top level (ideally `compiler-builtins/src` would contain a symlink to `libm/src/math`, but unfortunately it seems like Cargo does not like something about the submodule + symlink combination). --- .github/workflows/main.yml | 3 +- .github/workflows/publish.yml | 2 +- .gitmodules | 4 +- Cargo.toml | 80 +------------------ builtins-test-intrinsics/Cargo.toml | 2 +- builtins-test-intrinsics/build.rs | 2 +- ci/run-docker.sh | 2 +- ci/run.sh | 4 +- compiler-builtins/Cargo.toml | 72 +++++++++++++++++ build.rs => compiler-builtins/build.rs | 4 +- .../configure.rs | 0 libm => compiler-builtins/libm | 0 {src => compiler-builtins/src}/aarch64.rs | 0 .../src}/aarch64_linux.rs | 0 {src => compiler-builtins/src}/arm.rs | 0 {src => compiler-builtins/src}/arm_linux.rs | 0 {src => compiler-builtins/src}/float/add.rs | 0 {src => compiler-builtins/src}/float/cmp.rs | 0 {src => compiler-builtins/src}/float/conv.rs | 0 {src => compiler-builtins/src}/float/div.rs | 0 .../src}/float/extend.rs | 0 {src => compiler-builtins/src}/float/mod.rs | 0 {src => compiler-builtins/src}/float/mul.rs | 0 {src => compiler-builtins/src}/float/pow.rs | 0 {src => compiler-builtins/src}/float/sub.rs | 0 .../src}/float/traits.rs | 0 {src => compiler-builtins/src}/float/trunc.rs | 0 {src => compiler-builtins/src}/hexagon.rs | 0 .../src}/hexagon/dfaddsub.s | 0 .../src}/hexagon/dfdiv.s | 0 .../src}/hexagon/dffma.s | 0 .../src}/hexagon/dfminmax.s | 0 .../src}/hexagon/dfmul.s | 0 .../src}/hexagon/dfsqrt.s | 0 .../src}/hexagon/divdi3.s | 0 .../src}/hexagon/divsi3.s | 0 .../src}/hexagon/fastmath2_dlib_asm.s | 0 .../src}/hexagon/fastmath2_ldlib_asm.s | 0 .../src}/hexagon/func_macro.s | 0 .../src}/hexagon/memcpy_forward_vp4cp4n2.s | 0 .../src}/hexagon/memcpy_likely_aligned.s | 0 .../src}/hexagon/moddi3.s | 0 .../src}/hexagon/modsi3.s | 0 .../src}/hexagon/sfdiv_opt.s | 0 .../src}/hexagon/sfsqrt_opt.s | 0 .../src}/hexagon/udivdi3.s | 0 .../src}/hexagon/udivmoddi4.s | 0 .../src}/hexagon/udivmodsi4.s | 0 .../src}/hexagon/udivsi3.s | 0 .../src}/hexagon/umoddi3.s | 0 .../src}/hexagon/umodsi3.s | 0 {src => compiler-builtins/src}/int/addsub.rs | 0 {src => compiler-builtins/src}/int/big.rs | 0 {src => compiler-builtins/src}/int/bswap.rs | 0 .../src}/int/leading_zeros.rs | 0 {src => compiler-builtins/src}/int/mod.rs | 0 {src => compiler-builtins/src}/int/mul.rs | 0 {src => compiler-builtins/src}/int/sdiv.rs | 0 {src => compiler-builtins/src}/int/shift.rs | 0 .../int/specialized_div_rem/asymmetric.rs | 0 .../int/specialized_div_rem/binary_long.rs | 0 .../src}/int/specialized_div_rem/delegate.rs | 0 .../src}/int/specialized_div_rem/mod.rs | 0 .../int/specialized_div_rem/norm_shift.rs | 0 .../src}/int/specialized_div_rem/trifecta.rs | 0 .../src}/int/trailing_zeros.rs | 0 {src => compiler-builtins/src}/int/traits.rs | 0 {src => compiler-builtins/src}/int/udiv.rs | 0 {src => compiler-builtins/src}/lib.miri.rs | 0 {src => compiler-builtins/src}/lib.rs | 3 +- {src => compiler-builtins/src}/macros.rs | 0 {src => compiler-builtins/src}/math.rs | 0 {src => compiler-builtins/src}/mem/impls.rs | 0 {src => compiler-builtins/src}/mem/mod.rs | 0 {src => compiler-builtins/src}/mem/x86_64.rs | 0 {src => compiler-builtins/src}/probestack.rs | 0 {src => compiler-builtins/src}/riscv.rs | 0 {src => compiler-builtins/src}/x86.rs | 0 {src => compiler-builtins/src}/x86_64.rs | 0 crates/panic-handler/Cargo.toml | 4 + testcrate/Cargo.toml | 2 +- testcrate/build.rs | 2 +- 82 files changed, 95 insertions(+), 91 deletions(-) create mode 100644 compiler-builtins/Cargo.toml rename build.rs => compiler-builtins/build.rs (100%) rename configure.rs => compiler-builtins/configure.rs (100%) rename libm => compiler-builtins/libm (100%) rename {src => compiler-builtins/src}/aarch64.rs (100%) rename {src => compiler-builtins/src}/aarch64_linux.rs (100%) rename {src => compiler-builtins/src}/arm.rs (100%) rename {src => compiler-builtins/src}/arm_linux.rs (100%) rename {src => compiler-builtins/src}/float/add.rs (100%) rename {src => compiler-builtins/src}/float/cmp.rs (100%) rename {src => compiler-builtins/src}/float/conv.rs (100%) rename {src => compiler-builtins/src}/float/div.rs (100%) rename {src => compiler-builtins/src}/float/extend.rs (100%) rename {src => compiler-builtins/src}/float/mod.rs (100%) rename {src => compiler-builtins/src}/float/mul.rs (100%) rename {src => compiler-builtins/src}/float/pow.rs (100%) rename {src => compiler-builtins/src}/float/sub.rs (100%) rename {src => compiler-builtins/src}/float/traits.rs (100%) rename {src => compiler-builtins/src}/float/trunc.rs (100%) rename {src => compiler-builtins/src}/hexagon.rs (100%) rename {src => compiler-builtins/src}/hexagon/dfaddsub.s (100%) rename {src => compiler-builtins/src}/hexagon/dfdiv.s (100%) rename {src => compiler-builtins/src}/hexagon/dffma.s (100%) rename {src => compiler-builtins/src}/hexagon/dfminmax.s (100%) rename {src => compiler-builtins/src}/hexagon/dfmul.s (100%) rename {src => compiler-builtins/src}/hexagon/dfsqrt.s (100%) rename {src => compiler-builtins/src}/hexagon/divdi3.s (100%) rename {src => compiler-builtins/src}/hexagon/divsi3.s (100%) rename {src => compiler-builtins/src}/hexagon/fastmath2_dlib_asm.s (100%) rename {src => compiler-builtins/src}/hexagon/fastmath2_ldlib_asm.s (100%) rename {src => compiler-builtins/src}/hexagon/func_macro.s (100%) rename {src => compiler-builtins/src}/hexagon/memcpy_forward_vp4cp4n2.s (100%) rename {src => compiler-builtins/src}/hexagon/memcpy_likely_aligned.s (100%) rename {src => compiler-builtins/src}/hexagon/moddi3.s (100%) rename {src => compiler-builtins/src}/hexagon/modsi3.s (100%) rename {src => compiler-builtins/src}/hexagon/sfdiv_opt.s (100%) rename {src => compiler-builtins/src}/hexagon/sfsqrt_opt.s (100%) rename {src => compiler-builtins/src}/hexagon/udivdi3.s (100%) rename {src => compiler-builtins/src}/hexagon/udivmoddi4.s (100%) rename {src => compiler-builtins/src}/hexagon/udivmodsi4.s (100%) rename {src => compiler-builtins/src}/hexagon/udivsi3.s (100%) rename {src => compiler-builtins/src}/hexagon/umoddi3.s (100%) rename {src => compiler-builtins/src}/hexagon/umodsi3.s (100%) rename {src => compiler-builtins/src}/int/addsub.rs (100%) rename {src => compiler-builtins/src}/int/big.rs (100%) rename {src => compiler-builtins/src}/int/bswap.rs (100%) rename {src => compiler-builtins/src}/int/leading_zeros.rs (100%) rename {src => compiler-builtins/src}/int/mod.rs (100%) rename {src => compiler-builtins/src}/int/mul.rs (100%) rename {src => compiler-builtins/src}/int/sdiv.rs (100%) rename {src => compiler-builtins/src}/int/shift.rs (100%) rename {src => compiler-builtins/src}/int/specialized_div_rem/asymmetric.rs (100%) rename {src => compiler-builtins/src}/int/specialized_div_rem/binary_long.rs (100%) rename {src => compiler-builtins/src}/int/specialized_div_rem/delegate.rs (100%) rename {src => compiler-builtins/src}/int/specialized_div_rem/mod.rs (100%) rename {src => compiler-builtins/src}/int/specialized_div_rem/norm_shift.rs (100%) rename {src => compiler-builtins/src}/int/specialized_div_rem/trifecta.rs (100%) rename {src => compiler-builtins/src}/int/trailing_zeros.rs (100%) rename {src => compiler-builtins/src}/int/traits.rs (100%) rename {src => compiler-builtins/src}/int/udiv.rs (100%) rename {src => compiler-builtins/src}/lib.miri.rs (100%) rename {src => compiler-builtins/src}/lib.rs (96%) rename {src => compiler-builtins/src}/macros.rs (100%) rename {src => compiler-builtins/src}/math.rs (100%) rename {src => compiler-builtins/src}/mem/impls.rs (100%) rename {src => compiler-builtins/src}/mem/mod.rs (100%) rename {src => compiler-builtins/src}/mem/x86_64.rs (100%) rename {src => compiler-builtins/src}/probestack.rs (100%) rename {src => compiler-builtins/src}/riscv.rs (100%) rename {src => compiler-builtins/src}/x86.rs (100%) rename {src => compiler-builtins/src}/x86_64.rs (100%) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 50844a66c..c337c26a2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -4,7 +4,6 @@ on: [push, pull_request] env: RUSTDOCFLAGS: -Dwarnings RUSTFLAGS: -Dwarnings - RUST_COMPILER_RT_ROOT: ./compiler-rt jobs: test: @@ -133,6 +132,8 @@ jobs: if: steps.cache-compiler-rt.outputs.cache-hit != 'true' run: ./ci/download-compiler-rt.sh shell: bash + - run: echo "RUST_COMPILER_RT_ROOT=$(realpath ./compiler-rt)" >> "$GITHUB_ENV" + shell: bash # Non-linux tests just use our raw script - run: ./ci/run.sh ${{ matrix.target }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index d568f3757..7d6a8df03 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -22,7 +22,7 @@ jobs: - name: Install Rust (rustup) run: rustup update nightly --no-self-update && rustup default nightly - name: Publish `libm` as part of builtins, rather than its own crate - run: rm libm/Cargo.toml + run: rm compiler-builtins/libm/Cargo.toml - name: Run release-plz uses: MarcoIeni/release-plz-action@v0.5 env: diff --git a/.gitmodules b/.gitmodules index 726b1c5c6..a0b0d021d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ -[submodule "libm"] - path = libm +[submodule "compiler-builtins/libm"] + path = compiler-builtins/libm url = https://github.com/rust-lang/libm.git diff --git a/Cargo.toml b/Cargo.toml index 60de27758..db4c45dfa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,88 +1,16 @@ -[package] -authors = ["Jorge Aparicio "] -name = "compiler_builtins" -version = "0.1.151" -license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" -readme = "README.md" -repository = "https://github.com/rust-lang/compiler-builtins" -homepage = "https://github.com/rust-lang/compiler-builtins" -documentation = "https://docs.rs/compiler_builtins" -edition = "2021" -description = """ -Compiler intrinsics used by the Rust compiler. Also available for other targets -if necessary! -""" -include = [ - '/Cargo.toml', - '/build.rs', - '/configure.rs', - '/src/*', - '/examples/*', - '/LICENSE.txt', - '/README.md', - '/compiler-rt/*', - '/libm/src/math/*', -] -links = 'compiler-rt' - -[lib] -test = false - -[dependencies] -# For more information on this dependency see -# https://github.com/rust-lang/rust/tree/master/library/rustc-std-workspace-core -core = { version = "1.0.0", optional = true, package = 'rustc-std-workspace-core' } - -[build-dependencies] -cc = { optional = true, version = "1.0" } - -[dev-dependencies] -panic-handler = { path = 'crates/panic-handler' } - -[features] -default = ["compiler-builtins"] - -# Enable compilation of C code in compiler-rt, filling in some more optimized -# implementations and also filling in unimplemented intrinsics -c = ["cc"] - -# Workaround for the Cranelift codegen backend. Disables any implementations -# which use inline assembly and fall back to pure Rust versions (if avalible). -no-asm = [] - -# Workaround for codegen backends which haven't yet implemented `f16` and -# `f128` support. Disabled any intrinsics which use those types. -no-f16-f128 = [] - -# Flag this library as the unstable compiler-builtins lib -compiler-builtins = [] - -# Generate memory-related intrinsics like memcpy -mem = [] - -# Mangle all names so this can be linked in with other versions or other -# compiler-rt implementations. Also used for testing -mangled-names = [] - -# Only used in the compiler's build system -rustc-dep-of-std = ['compiler-builtins', 'core'] - -# This makes certain traits and function specializations public that -# are not normally public but are required by the `testcrate` -public-test-deps = [] - [workspace] -resolver = "2" +resolver = "3" members = [ - # Note that builtins-test-intrinsics cannot be a default member because it + # Note that builtins-test-intrinsics cannot be a default member because it # needs the `mangled-names` feature disabled, while `testcrate` needs it # enabled. "builtins-test-intrinsics", + "compiler-builtins", "testcrate", ] default-members = [ - ".", + "compiler-builtins", "testcrate", ] diff --git a/builtins-test-intrinsics/Cargo.toml b/builtins-test-intrinsics/Cargo.toml index 9b2e5bb7c..9dbd3c32f 100644 --- a/builtins-test-intrinsics/Cargo.toml +++ b/builtins-test-intrinsics/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" publish = false [dependencies] -compiler_builtins = { path = "../", features = ["compiler-builtins"]} +compiler_builtins = { path = "../compiler-builtins", features = ["compiler-builtins"]} panic-handler = { path = '../crates/panic-handler' } [features] diff --git a/builtins-test-intrinsics/build.rs b/builtins-test-intrinsics/build.rs index a38c6c1ff..89b126ff2 100644 --- a/builtins-test-intrinsics/build.rs +++ b/builtins-test-intrinsics/build.rs @@ -1,5 +1,5 @@ mod builtins_configure { - include!("../configure.rs"); + include!("../compiler-builtins/configure.rs"); } fn main() { diff --git a/ci/run-docker.sh b/ci/run-docker.sh index 215ad71a3..5e19cf4d0 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -38,7 +38,7 @@ run() { fi if [ -d compiler-rt ]; then - export RUST_COMPILER_RT_ROOT=./compiler-rt + export RUST_COMPILER_RT_ROOT="/checkout/compiler-rt" fi if [ "${GITHUB_ACTIONS:-}" = "true" ]; then diff --git a/ci/run.sh b/ci/run.sh index 3625dde79..9abbf25a7 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -23,7 +23,7 @@ fi if [ "${NO_STD:-}" = "1" ]; then echo "nothing to do for no_std" else - run="cargo test --manifest-path testcrate/Cargo.toml --no-fail-fast --target $target" + run="cargo test --package testcrate --no-fail-fast --target $target" $run $run --release $run --features c @@ -38,7 +38,7 @@ fi if [ "${TEST_VERBATIM:-}" = "1" ]; then verb_path=$(cmd.exe //C echo \\\\?\\%cd%\\testcrate\\target2) - cargo build --manifest-path testcrate/Cargo.toml \ + cargo build --package testcrate \ --target "$target" --target-dir "$verb_path" --features c fi diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml new file mode 100644 index 000000000..9797b5e65 --- /dev/null +++ b/compiler-builtins/Cargo.toml @@ -0,0 +1,72 @@ +[package] +authors = ["Jorge Aparicio "] +name = "compiler_builtins" +version = "0.1.151" +license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" +readme = "../README.md" +repository = "https://github.com/rust-lang/compiler-builtins" +homepage = "https://github.com/rust-lang/compiler-builtins" +documentation = "https://docs.rs/compiler_builtins" +edition = "2021" +description = """ +Compiler intrinsics used by the Rust compiler. Also available for other targets +if necessary! +""" +include = [ + '/Cargo.toml', + '/build.rs', + '/configure.rs', + '/src/*', + '../LICENSE.txt', + '../README.md', + '../compiler-rt/*', + 'libm/src/math/*', +] +links = 'compiler-rt' + +[lib] +test = false +bench = false + +[dependencies] +# For more information on this dependency see +# https://github.com/rust-lang/rust/tree/master/library/rustc-std-workspace-core +core = { version = "1.0.0", optional = true, package = 'rustc-std-workspace-core' } + +[build-dependencies] +cc = { optional = true, version = "1.0" } + +[dev-dependencies] +panic-handler = { path = '../crates/panic-handler' } + +[features] +default = ["compiler-builtins"] + +# Enable compilation of C code in compiler-rt, filling in some more optimized +# implementations and also filling in unimplemented intrinsics +c = ["cc"] + +# Workaround for the Cranelift codegen backend. Disables any implementations +# which use inline assembly and fall back to pure Rust versions (if avalible). +no-asm = [] + +# Workaround for codegen backends which haven't yet implemented `f16` and +# `f128` support. Disabled any intrinsics which use those types. +no-f16-f128 = [] + +# Flag this library as the unstable compiler-builtins lib +compiler-builtins = [] + +# Generate memory-related intrinsics like memcpy +mem = [] + +# Mangle all names so this can be linked in with other versions or other +# compiler-rt implementations. Also used for testing +mangled-names = [] + +# Only used in the compiler's build system +rustc-dep-of-std = ['compiler-builtins', 'core'] + +# This makes certain traits and function specializations public that +# are not normally public but are required by the `testcrate` +public-test-deps = [] diff --git a/build.rs b/compiler-builtins/build.rs similarity index 100% rename from build.rs rename to compiler-builtins/build.rs index 3003d51af..fdfb61b17 100644 --- a/build.rs +++ b/compiler-builtins/build.rs @@ -1,7 +1,7 @@ -use std::{collections::BTreeMap, env, path::PathBuf, sync::atomic::Ordering}; - mod configure; +use std::{collections::BTreeMap, env, path::PathBuf, sync::atomic::Ordering}; + use configure::{configure_aliases, configure_f16_f128, Target}; fn main() { diff --git a/configure.rs b/compiler-builtins/configure.rs similarity index 100% rename from configure.rs rename to compiler-builtins/configure.rs diff --git a/libm b/compiler-builtins/libm similarity index 100% rename from libm rename to compiler-builtins/libm diff --git a/src/aarch64.rs b/compiler-builtins/src/aarch64.rs similarity index 100% rename from src/aarch64.rs rename to compiler-builtins/src/aarch64.rs diff --git a/src/aarch64_linux.rs b/compiler-builtins/src/aarch64_linux.rs similarity index 100% rename from src/aarch64_linux.rs rename to compiler-builtins/src/aarch64_linux.rs diff --git a/src/arm.rs b/compiler-builtins/src/arm.rs similarity index 100% rename from src/arm.rs rename to compiler-builtins/src/arm.rs diff --git a/src/arm_linux.rs b/compiler-builtins/src/arm_linux.rs similarity index 100% rename from src/arm_linux.rs rename to compiler-builtins/src/arm_linux.rs diff --git a/src/float/add.rs b/compiler-builtins/src/float/add.rs similarity index 100% rename from src/float/add.rs rename to compiler-builtins/src/float/add.rs diff --git a/src/float/cmp.rs b/compiler-builtins/src/float/cmp.rs similarity index 100% rename from src/float/cmp.rs rename to compiler-builtins/src/float/cmp.rs diff --git a/src/float/conv.rs b/compiler-builtins/src/float/conv.rs similarity index 100% rename from src/float/conv.rs rename to compiler-builtins/src/float/conv.rs diff --git a/src/float/div.rs b/compiler-builtins/src/float/div.rs similarity index 100% rename from src/float/div.rs rename to compiler-builtins/src/float/div.rs diff --git a/src/float/extend.rs b/compiler-builtins/src/float/extend.rs similarity index 100% rename from src/float/extend.rs rename to compiler-builtins/src/float/extend.rs diff --git a/src/float/mod.rs b/compiler-builtins/src/float/mod.rs similarity index 100% rename from src/float/mod.rs rename to compiler-builtins/src/float/mod.rs diff --git a/src/float/mul.rs b/compiler-builtins/src/float/mul.rs similarity index 100% rename from src/float/mul.rs rename to compiler-builtins/src/float/mul.rs diff --git a/src/float/pow.rs b/compiler-builtins/src/float/pow.rs similarity index 100% rename from src/float/pow.rs rename to compiler-builtins/src/float/pow.rs diff --git a/src/float/sub.rs b/compiler-builtins/src/float/sub.rs similarity index 100% rename from src/float/sub.rs rename to compiler-builtins/src/float/sub.rs diff --git a/src/float/traits.rs b/compiler-builtins/src/float/traits.rs similarity index 100% rename from src/float/traits.rs rename to compiler-builtins/src/float/traits.rs diff --git a/src/float/trunc.rs b/compiler-builtins/src/float/trunc.rs similarity index 100% rename from src/float/trunc.rs rename to compiler-builtins/src/float/trunc.rs diff --git a/src/hexagon.rs b/compiler-builtins/src/hexagon.rs similarity index 100% rename from src/hexagon.rs rename to compiler-builtins/src/hexagon.rs diff --git a/src/hexagon/dfaddsub.s b/compiler-builtins/src/hexagon/dfaddsub.s similarity index 100% rename from src/hexagon/dfaddsub.s rename to compiler-builtins/src/hexagon/dfaddsub.s diff --git a/src/hexagon/dfdiv.s b/compiler-builtins/src/hexagon/dfdiv.s similarity index 100% rename from src/hexagon/dfdiv.s rename to compiler-builtins/src/hexagon/dfdiv.s diff --git a/src/hexagon/dffma.s b/compiler-builtins/src/hexagon/dffma.s similarity index 100% rename from src/hexagon/dffma.s rename to compiler-builtins/src/hexagon/dffma.s diff --git a/src/hexagon/dfminmax.s b/compiler-builtins/src/hexagon/dfminmax.s similarity index 100% rename from src/hexagon/dfminmax.s rename to compiler-builtins/src/hexagon/dfminmax.s diff --git a/src/hexagon/dfmul.s b/compiler-builtins/src/hexagon/dfmul.s similarity index 100% rename from src/hexagon/dfmul.s rename to compiler-builtins/src/hexagon/dfmul.s diff --git a/src/hexagon/dfsqrt.s b/compiler-builtins/src/hexagon/dfsqrt.s similarity index 100% rename from src/hexagon/dfsqrt.s rename to compiler-builtins/src/hexagon/dfsqrt.s diff --git a/src/hexagon/divdi3.s b/compiler-builtins/src/hexagon/divdi3.s similarity index 100% rename from src/hexagon/divdi3.s rename to compiler-builtins/src/hexagon/divdi3.s diff --git a/src/hexagon/divsi3.s b/compiler-builtins/src/hexagon/divsi3.s similarity index 100% rename from src/hexagon/divsi3.s rename to compiler-builtins/src/hexagon/divsi3.s diff --git a/src/hexagon/fastmath2_dlib_asm.s b/compiler-builtins/src/hexagon/fastmath2_dlib_asm.s similarity index 100% rename from src/hexagon/fastmath2_dlib_asm.s rename to compiler-builtins/src/hexagon/fastmath2_dlib_asm.s diff --git a/src/hexagon/fastmath2_ldlib_asm.s b/compiler-builtins/src/hexagon/fastmath2_ldlib_asm.s similarity index 100% rename from src/hexagon/fastmath2_ldlib_asm.s rename to compiler-builtins/src/hexagon/fastmath2_ldlib_asm.s diff --git a/src/hexagon/func_macro.s b/compiler-builtins/src/hexagon/func_macro.s similarity index 100% rename from src/hexagon/func_macro.s rename to compiler-builtins/src/hexagon/func_macro.s diff --git a/src/hexagon/memcpy_forward_vp4cp4n2.s b/compiler-builtins/src/hexagon/memcpy_forward_vp4cp4n2.s similarity index 100% rename from src/hexagon/memcpy_forward_vp4cp4n2.s rename to compiler-builtins/src/hexagon/memcpy_forward_vp4cp4n2.s diff --git a/src/hexagon/memcpy_likely_aligned.s b/compiler-builtins/src/hexagon/memcpy_likely_aligned.s similarity index 100% rename from src/hexagon/memcpy_likely_aligned.s rename to compiler-builtins/src/hexagon/memcpy_likely_aligned.s diff --git a/src/hexagon/moddi3.s b/compiler-builtins/src/hexagon/moddi3.s similarity index 100% rename from src/hexagon/moddi3.s rename to compiler-builtins/src/hexagon/moddi3.s diff --git a/src/hexagon/modsi3.s b/compiler-builtins/src/hexagon/modsi3.s similarity index 100% rename from src/hexagon/modsi3.s rename to compiler-builtins/src/hexagon/modsi3.s diff --git a/src/hexagon/sfdiv_opt.s b/compiler-builtins/src/hexagon/sfdiv_opt.s similarity index 100% rename from src/hexagon/sfdiv_opt.s rename to compiler-builtins/src/hexagon/sfdiv_opt.s diff --git a/src/hexagon/sfsqrt_opt.s b/compiler-builtins/src/hexagon/sfsqrt_opt.s similarity index 100% rename from src/hexagon/sfsqrt_opt.s rename to compiler-builtins/src/hexagon/sfsqrt_opt.s diff --git a/src/hexagon/udivdi3.s b/compiler-builtins/src/hexagon/udivdi3.s similarity index 100% rename from src/hexagon/udivdi3.s rename to compiler-builtins/src/hexagon/udivdi3.s diff --git a/src/hexagon/udivmoddi4.s b/compiler-builtins/src/hexagon/udivmoddi4.s similarity index 100% rename from src/hexagon/udivmoddi4.s rename to compiler-builtins/src/hexagon/udivmoddi4.s diff --git a/src/hexagon/udivmodsi4.s b/compiler-builtins/src/hexagon/udivmodsi4.s similarity index 100% rename from src/hexagon/udivmodsi4.s rename to compiler-builtins/src/hexagon/udivmodsi4.s diff --git a/src/hexagon/udivsi3.s b/compiler-builtins/src/hexagon/udivsi3.s similarity index 100% rename from src/hexagon/udivsi3.s rename to compiler-builtins/src/hexagon/udivsi3.s diff --git a/src/hexagon/umoddi3.s b/compiler-builtins/src/hexagon/umoddi3.s similarity index 100% rename from src/hexagon/umoddi3.s rename to compiler-builtins/src/hexagon/umoddi3.s diff --git a/src/hexagon/umodsi3.s b/compiler-builtins/src/hexagon/umodsi3.s similarity index 100% rename from src/hexagon/umodsi3.s rename to compiler-builtins/src/hexagon/umodsi3.s diff --git a/src/int/addsub.rs b/compiler-builtins/src/int/addsub.rs similarity index 100% rename from src/int/addsub.rs rename to compiler-builtins/src/int/addsub.rs diff --git a/src/int/big.rs b/compiler-builtins/src/int/big.rs similarity index 100% rename from src/int/big.rs rename to compiler-builtins/src/int/big.rs diff --git a/src/int/bswap.rs b/compiler-builtins/src/int/bswap.rs similarity index 100% rename from src/int/bswap.rs rename to compiler-builtins/src/int/bswap.rs diff --git a/src/int/leading_zeros.rs b/compiler-builtins/src/int/leading_zeros.rs similarity index 100% rename from src/int/leading_zeros.rs rename to compiler-builtins/src/int/leading_zeros.rs diff --git a/src/int/mod.rs b/compiler-builtins/src/int/mod.rs similarity index 100% rename from src/int/mod.rs rename to compiler-builtins/src/int/mod.rs diff --git a/src/int/mul.rs b/compiler-builtins/src/int/mul.rs similarity index 100% rename from src/int/mul.rs rename to compiler-builtins/src/int/mul.rs diff --git a/src/int/sdiv.rs b/compiler-builtins/src/int/sdiv.rs similarity index 100% rename from src/int/sdiv.rs rename to compiler-builtins/src/int/sdiv.rs diff --git a/src/int/shift.rs b/compiler-builtins/src/int/shift.rs similarity index 100% rename from src/int/shift.rs rename to compiler-builtins/src/int/shift.rs diff --git a/src/int/specialized_div_rem/asymmetric.rs b/compiler-builtins/src/int/specialized_div_rem/asymmetric.rs similarity index 100% rename from src/int/specialized_div_rem/asymmetric.rs rename to compiler-builtins/src/int/specialized_div_rem/asymmetric.rs diff --git a/src/int/specialized_div_rem/binary_long.rs b/compiler-builtins/src/int/specialized_div_rem/binary_long.rs similarity index 100% rename from src/int/specialized_div_rem/binary_long.rs rename to compiler-builtins/src/int/specialized_div_rem/binary_long.rs diff --git a/src/int/specialized_div_rem/delegate.rs b/compiler-builtins/src/int/specialized_div_rem/delegate.rs similarity index 100% rename from src/int/specialized_div_rem/delegate.rs rename to compiler-builtins/src/int/specialized_div_rem/delegate.rs diff --git a/src/int/specialized_div_rem/mod.rs b/compiler-builtins/src/int/specialized_div_rem/mod.rs similarity index 100% rename from src/int/specialized_div_rem/mod.rs rename to compiler-builtins/src/int/specialized_div_rem/mod.rs diff --git a/src/int/specialized_div_rem/norm_shift.rs b/compiler-builtins/src/int/specialized_div_rem/norm_shift.rs similarity index 100% rename from src/int/specialized_div_rem/norm_shift.rs rename to compiler-builtins/src/int/specialized_div_rem/norm_shift.rs diff --git a/src/int/specialized_div_rem/trifecta.rs b/compiler-builtins/src/int/specialized_div_rem/trifecta.rs similarity index 100% rename from src/int/specialized_div_rem/trifecta.rs rename to compiler-builtins/src/int/specialized_div_rem/trifecta.rs diff --git a/src/int/trailing_zeros.rs b/compiler-builtins/src/int/trailing_zeros.rs similarity index 100% rename from src/int/trailing_zeros.rs rename to compiler-builtins/src/int/trailing_zeros.rs diff --git a/src/int/traits.rs b/compiler-builtins/src/int/traits.rs similarity index 100% rename from src/int/traits.rs rename to compiler-builtins/src/int/traits.rs diff --git a/src/int/udiv.rs b/compiler-builtins/src/int/udiv.rs similarity index 100% rename from src/int/udiv.rs rename to compiler-builtins/src/int/udiv.rs diff --git a/src/lib.miri.rs b/compiler-builtins/src/lib.miri.rs similarity index 100% rename from src/lib.miri.rs rename to compiler-builtins/src/lib.miri.rs diff --git a/src/lib.rs b/compiler-builtins/src/lib.rs similarity index 96% rename from src/lib.rs rename to compiler-builtins/src/lib.rs index 6f5bd8598..16de96b4d 100644 --- a/src/lib.rs +++ b/compiler-builtins/src/lib.rs @@ -44,8 +44,7 @@ pub mod int; pub mod math; pub mod mem; -// `libm` expects its `support` module to be available in the crate root. This config can be -// cleaned up once `libm` is made always available. +// `libm` expects its `support` module to be available in the crate root. use math::libm::support; #[cfg(target_arch = "arm")] diff --git a/src/macros.rs b/compiler-builtins/src/macros.rs similarity index 100% rename from src/macros.rs rename to compiler-builtins/src/macros.rs diff --git a/src/math.rs b/compiler-builtins/src/math.rs similarity index 100% rename from src/math.rs rename to compiler-builtins/src/math.rs diff --git a/src/mem/impls.rs b/compiler-builtins/src/mem/impls.rs similarity index 100% rename from src/mem/impls.rs rename to compiler-builtins/src/mem/impls.rs diff --git a/src/mem/mod.rs b/compiler-builtins/src/mem/mod.rs similarity index 100% rename from src/mem/mod.rs rename to compiler-builtins/src/mem/mod.rs diff --git a/src/mem/x86_64.rs b/compiler-builtins/src/mem/x86_64.rs similarity index 100% rename from src/mem/x86_64.rs rename to compiler-builtins/src/mem/x86_64.rs diff --git a/src/probestack.rs b/compiler-builtins/src/probestack.rs similarity index 100% rename from src/probestack.rs rename to compiler-builtins/src/probestack.rs diff --git a/src/riscv.rs b/compiler-builtins/src/riscv.rs similarity index 100% rename from src/riscv.rs rename to compiler-builtins/src/riscv.rs diff --git a/src/x86.rs b/compiler-builtins/src/x86.rs similarity index 100% rename from src/x86.rs rename to compiler-builtins/src/x86.rs diff --git a/src/x86_64.rs b/compiler-builtins/src/x86_64.rs similarity index 100% rename from src/x86_64.rs rename to compiler-builtins/src/x86_64.rs diff --git a/crates/panic-handler/Cargo.toml b/crates/panic-handler/Cargo.toml index 96b83eaa2..a6764fc48 100644 --- a/crates/panic-handler/Cargo.toml +++ b/crates/panic-handler/Cargo.toml @@ -5,4 +5,8 @@ authors = ["Alex Crichton "] edition = "2024" publish = false +[lib] +test = false +bench = false + [dependencies] diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 71c461c57..be60e694f 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -18,7 +18,7 @@ rand_xoshiro = "0.6" rustc_apfloat = "0.2.1" [dependencies.compiler_builtins] -path = ".." +path = "../compiler-builtins" default-features = false features = ["public-test-deps"] diff --git a/testcrate/build.rs b/testcrate/build.rs index 566b985d2..e8f4eb4dd 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -1,7 +1,7 @@ use std::collections::HashSet; mod builtins_configure { - include!("../configure.rs"); + include!("../compiler-builtins/configure.rs"); } /// Features to enable From 1f67000aa6708d3a95b2a40b2b42c7e6b10ba196 Mon Sep 17 00:00:00 2001 From: beetrees Date: Wed, 12 Mar 2025 18:02:51 +0000 Subject: [PATCH 1244/1459] Remove use of `atomic_load_unordered` and undefined behaviour from `arm_linux.rs` --- compiler-builtins/src/arm_linux.rs | 47 ++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/compiler-builtins/src/arm_linux.rs b/compiler-builtins/src/arm_linux.rs index 8f22eb628..aeb3ff3e5 100644 --- a/compiler-builtins/src/arm_linux.rs +++ b/compiler-builtins/src/arm_linux.rs @@ -1,5 +1,6 @@ -use core::intrinsics; +use core::arch; use core::mem; +use core::sync::atomic::{AtomicU32, Ordering}; // Kernel-provided user-mode helper functions: // https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt @@ -7,6 +8,7 @@ unsafe fn __kuser_cmpxchg(oldval: u32, newval: u32, ptr: *mut u32) -> bool { let f: extern "C" fn(u32, u32, *mut u32) -> u32 = mem::transmute(0xffff0fc0usize as *const ()); f(oldval, newval, ptr) == 0 } + unsafe fn __kuser_memory_barrier() { let f: extern "C" fn() = mem::transmute(0xffff0fa0usize as *const ()); f(); @@ -54,13 +56,52 @@ fn insert_aligned(aligned: u32, val: u32, shift: u32, mask: u32) -> u32 { (aligned & !(mask << shift)) | ((val & mask) << shift) } +/// Performs a relaxed atomic load of 4 bytes at `ptr`. Some of the bytes are allowed to be out of +/// bounds as long as `size_of::()` bytes are in bounds. +/// +/// # Safety +/// +/// - `ptr` must be 4-aligned. +/// - `size_of::()` must be at most 4. +/// - if `size_of::() == 1`, `ptr` or `ptr` offset by 1, 2 or 3 bytes must be valid for a relaxed +/// atomic read of 1 byte. +/// - if `size_of::() == 2`, `ptr` or `ptr` offset by 2 bytes must be valid for a relaxed atomic +/// read of 2 bytes. +/// - if `size_of::() == 4`, `ptr` must be valid for a relaxed atomic read of 4 bytes. +unsafe fn atomic_load_aligned(ptr: *mut u32) -> u32 { + if mem::size_of::() == 4 { + // SAFETY: As `T` has a size of 4, the caller garantees this is sound. + unsafe { AtomicU32::from_ptr(ptr).load(Ordering::Relaxed) } + } else { + // SAFETY: + // As all 4 bytes pointed to by `ptr` might not be dereferenceable due to being out of + // bounds when doing atomic operations on a `u8`/`i8`/`u16`/`i16`, inline ASM is used to + // avoid causing undefined behaviour. However, as `ptr` is 4-aligned and at least 1 byte of + // `ptr` is dereferencable, the load won't cause a segfault as the page size is always + // larger than 4 bytes. + // The `ldr` instruction does not touch the stack or flags, or write to memory, so + // `nostack`, `preserves_flags` and `readonly` are sound. The caller garantees that `ptr` is + // 4-aligned, as required by `ldr`. + unsafe { + let res: u32; + arch::asm!( + "ldr {res}, [{ptr}]", + ptr = in(reg) ptr, + res = lateout(reg) res, + options(nostack, preserves_flags, readonly) + ); + res + } + } +} + // Generic atomic read-modify-write operation unsafe fn atomic_rmw u32, G: Fn(u32, u32) -> u32>(ptr: *mut T, f: F, g: G) -> u32 { let aligned_ptr = align_ptr(ptr); let (shift, mask) = get_shift_mask(ptr); loop { - let curval_aligned = intrinsics::atomic_load_unordered(aligned_ptr); + let curval_aligned = atomic_load_aligned::(aligned_ptr); let curval = extract_aligned(curval_aligned, shift, mask); let newval = f(curval); let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask); @@ -76,7 +117,7 @@ unsafe fn atomic_cmpxchg(ptr: *mut T, oldval: u32, newval: u32) -> u32 { let (shift, mask) = get_shift_mask(ptr); loop { - let curval_aligned = intrinsics::atomic_load_unordered(aligned_ptr); + let curval_aligned = atomic_load_aligned::(aligned_ptr); let curval = extract_aligned(curval_aligned, shift, mask); if curval != oldval { return curval; From 52d96c47681ef504a8ad7398efffe53214898aab Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 20 Mar 2025 02:02:43 +0000 Subject: [PATCH 1245/1459] chore: release v0.1.152 --- compiler-builtins/CHANGELOG.md | 15 +++++++++++++++ compiler-builtins/Cargo.toml | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 compiler-builtins/CHANGELOG.md diff --git a/compiler-builtins/CHANGELOG.md b/compiler-builtins/CHANGELOG.md new file mode 100644 index 000000000..987f87713 --- /dev/null +++ b/compiler-builtins/CHANGELOG.md @@ -0,0 +1,15 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.1.152](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.151...compiler_builtins-v0.1.152) - 2025-03-20 + +### Other + +- Remove use of `atomic_load_unordered` and undefined behaviour from `arm_linux.rs` +- Switch repository layout to use a virtual manifest diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 9797b5e65..8de8bce6a 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.151" +version = "0.1.152" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "../README.md" repository = "https://github.com/rust-lang/compiler-builtins" From e49ff02ce147653497d18087814a2e0360cccf5a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 20 Mar 2025 11:19:09 +0000 Subject: [PATCH 1246/1459] Add benchmarks using `iai-callgrind` This crate [1] makes it reasonably easy to get instruction count performance metrics that are stable enough to run in CI, and has worked out well since integrating it with `libm`. Add new benchmarks for `mem` functions using `iai-callgrind`, modeling them off of the existing benchmarks. [1]: https://github.com/iai-callgrind/iai-callgrind --- testcrate/Cargo.toml | 14 + testcrate/benches/mem_icount.rs | 474 ++++++++++++++++++++++++++++++++ 2 files changed, 488 insertions(+) create mode 100644 testcrate/benches/mem_icount.rs diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index be60e694f..bda2b641d 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -16,6 +16,8 @@ doctest = false rand_xoshiro = "0.6" # To compare float builtins against rustc_apfloat = "0.2.1" +# Really a dev dependency, but dev dependencies can't be optional +iai-callgrind = { version = "0.14.0", optional = true } [dependencies.compiler_builtins] path = "../compiler-builtins" @@ -47,9 +49,16 @@ no-sys-f16-f64-convert = [] # Skip tests that rely on f16 symbols being available on the system no-sys-f16 = ["no-sys-f16-f64-convert"] +# Enable icount benchmarks (requires iai-callgrind and valgrind) +icount = ["dep:iai-callgrind"] + # Enable report generation without bringing in more dependencies by default benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] +# NOTE: benchmarks must be run with `--no-default-features` or with +# `-p testcrate`, otherwise the default `compiler-builtins` feature of the +# `compiler_builtins` crate gets activated, resulting in linker errors. + [[bench]] name = "float_add" harness = false @@ -85,3 +94,8 @@ harness = false [[bench]] name = "float_pow" harness = false + +[[bench]] +name = "mem_icount" +harness = false +required-features = ["icount"] diff --git a/testcrate/benches/mem_icount.rs b/testcrate/benches/mem_icount.rs new file mode 100644 index 000000000..be3d13dff --- /dev/null +++ b/testcrate/benches/mem_icount.rs @@ -0,0 +1,474 @@ +//! Benchmarks that use Callgrind (via `iai_callgrind`) to report instruction count metrics. This +//! is stable enough to be tested in CI. + +use std::hint::black_box; +use std::{ops, slice}; + +use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; +use iai_callgrind::{library_benchmark, library_benchmark_group, main}; + +const PAGE_SIZE: usize = 0x1000; + +#[derive(Clone)] +#[repr(C, align(0x1000))] +struct Page([u8; PAGE_SIZE]); + +/// A buffer that is page-aligned by default, with an optional offset to create a +/// misalignment. +struct AlignedSlice { + buf: Box<[Page]>, + len: usize, + offset: usize, +} + +impl AlignedSlice { + /// Allocate a slice aligned to ALIGN with at least `len` items, with `offset` from + /// page alignment. + fn new_zeroed(len: usize, offset: usize) -> Self { + assert!(offset < PAGE_SIZE); + let total_len = len + offset; + let items = (total_len / PAGE_SIZE) + if total_len % PAGE_SIZE > 0 { 1 } else { 0 }; + let buf = vec![Page([0u8; PAGE_SIZE]); items].into_boxed_slice(); + AlignedSlice { buf, len, offset } + } +} + +impl ops::Deref for AlignedSlice { + type Target = [u8]; + fn deref(&self) -> &Self::Target { + unsafe { slice::from_raw_parts(self.buf.as_ptr().cast::().add(self.offset), self.len) } + } +} + +impl ops::DerefMut for AlignedSlice { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { + slice::from_raw_parts_mut( + self.buf.as_mut_ptr().cast::().add(self.offset), + self.len, + ) + } + } +} + +mod mcpy { + use super::*; + + struct Cfg { + len: usize, + s_off: usize, + d_off: usize, + } + + fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) { + let Cfg { len, s_off, d_off } = cfg; + println!("{len} bytes, {s_off} src offset, {d_off} dst offset"); + let mut src = AlignedSlice::new_zeroed(len, s_off); + let dst = AlignedSlice::new_zeroed(len, d_off); + src.fill(1); + (len, src, dst) + } + + #[library_benchmark] + #[benches::aligned( + args = [ + Cfg { len: 16, s_off: 0, d_off: 0 }, + Cfg { len: 16, s_off: 0, d_off: 0 }, + Cfg { len: 28, s_off: 0, d_off: 0 }, + Cfg { len: 32, s_off: 0, d_off: 0 }, + Cfg { len: 36, s_off: 0, d_off: 0 }, + Cfg { len: 60, s_off: 0, d_off: 0 }, + Cfg { len: 64, s_off: 0, d_off: 0 }, + Cfg { len: 68, s_off: 0, d_off: 0 }, + Cfg { len: 128, s_off: 0, d_off: 0 }, + Cfg { len: 256, s_off: 0, d_off: 0 }, + Cfg { len: 512, s_off: 0, d_off: 0 }, + Cfg { len: 1024, s_off: 0, d_off: 0 }, + Cfg { len: 4096, s_off: 0, d_off: 0 }, + Cfg { len: 1048576, s_off: 0, d_off: 0 }, + ], + setup = setup, + )] + #[benches::offset( + args = [ + Cfg { len: 16, s_off: 65, d_off: 65 }, + Cfg { len: 28, s_off: 65, d_off: 65 }, + Cfg { len: 32, s_off: 65, d_off: 65 }, + Cfg { len: 36, s_off: 65, d_off: 65 }, + Cfg { len: 60, s_off: 65, d_off: 65 }, + Cfg { len: 64, s_off: 65, d_off: 65 }, + Cfg { len: 68, s_off: 65, d_off: 65 }, + Cfg { len: 128, s_off: 65, d_off: 65 }, + Cfg { len: 256, s_off: 65, d_off: 65 }, + Cfg { len: 512, s_off: 65, d_off: 65 }, + Cfg { len: 1024, s_off: 65, d_off: 65 }, + Cfg { len: 4096, s_off: 65, d_off: 65 }, + Cfg { len: 1048576, s_off: 65, d_off: 65 }, + ], + setup = setup, + )] + #[benches::misaligned( + args = [ + Cfg { len: 16, s_off: 65, d_off: 66 }, + Cfg { len: 28, s_off: 65, d_off: 66 }, + Cfg { len: 32, s_off: 65, d_off: 66 }, + Cfg { len: 36, s_off: 65, d_off: 66 }, + Cfg { len: 60, s_off: 65, d_off: 66 }, + Cfg { len: 64, s_off: 65, d_off: 66 }, + Cfg { len: 68, s_off: 65, d_off: 66 }, + Cfg { len: 128, s_off: 65, d_off: 66 }, + Cfg { len: 256, s_off: 65, d_off: 66 }, + Cfg { len: 512, s_off: 65, d_off: 66 }, + Cfg { len: 1024, s_off: 65, d_off: 66 }, + Cfg { len: 4096, s_off: 65, d_off: 66 }, + Cfg { len: 1048576, s_off: 65, d_off: 66 }, + ], + setup = setup, + )] + fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) { + unsafe { + black_box(memcpy( + black_box(dst.as_mut_ptr()), + black_box(src.as_ptr()), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memcpy; benchmarks = bench); +} + +mod mset { + use super::*; + + struct Cfg { + len: usize, + offset: usize, + } + + fn setup(Cfg { len, offset }: Cfg) -> (usize, AlignedSlice) { + println!("{len} bytes, {offset} offset"); + (len, AlignedSlice::new_zeroed(len, offset)) + } + + #[library_benchmark] + #[benches::aligned( + args = [ + Cfg { len: 16, offset: 0 }, + Cfg { len: 32, offset: 0 }, + Cfg { len: 64, offset: 0 }, + Cfg { len: 512, offset: 0 }, + Cfg { len: 4096, offset: 0 }, + Cfg { len: 1048576, offset: 0 }, + ], + setup = setup, + )] + #[benches::offset( + args = [ + Cfg { len: 16, offset: 65 }, + Cfg { len: 32, offset: 65 }, + Cfg { len: 64, offset: 65 }, + Cfg { len: 512, offset: 65 }, + Cfg { len: 4096, offset: 65 }, + Cfg { len: 1048576, offset: 65 }, + ], + setup = setup, + )] + fn bench((len, mut dst): (usize, AlignedSlice)) { + unsafe { + black_box(memset( + black_box(dst.as_mut_ptr()), + black_box(27), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memset; benchmarks = bench); +} + +mod mcmp { + use super::*; + + struct Cfg { + len: usize, + s_off: usize, + d_off: usize, + } + + fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) { + let Cfg { len, s_off, d_off } = cfg; + println!("{len} bytes, {s_off} src offset, {d_off} dst offset"); + let b1 = AlignedSlice::new_zeroed(len, s_off); + let mut b2 = AlignedSlice::new_zeroed(len, d_off); + b2[len - 1] = 1; + (len, b1, b2) + } + + #[library_benchmark] + #[benches::aligned( + args = [ + Cfg { len: 16, s_off: 0, d_off: 0 }, + Cfg { len: 32, s_off: 0, d_off: 0 }, + Cfg { len: 64, s_off: 0, d_off: 0 }, + Cfg { len: 512, s_off: 0, d_off: 0 }, + Cfg { len: 4096, s_off: 0, d_off: 0 }, + Cfg { len: 1048576, s_off: 0, d_off: 0 }, + ], + setup = setup + )] + #[benches::offset( + args = [ + Cfg { len: 16, s_off: 65, d_off: 65 }, + Cfg { len: 32, s_off: 65, d_off: 65 }, + Cfg { len: 64, s_off: 65, d_off: 65 }, + Cfg { len: 512, s_off: 65, d_off: 65 }, + Cfg { len: 4096, s_off: 65, d_off: 65 }, + Cfg { len: 1048576, s_off: 65, d_off: 65 }, + ], + setup = setup + )] + #[benches::misaligned( + args = [ + Cfg { len: 16, s_off: 65, d_off: 66 }, + Cfg { len: 32, s_off: 65, d_off: 66 }, + Cfg { len: 64, s_off: 65, d_off: 66 }, + Cfg { len: 512, s_off: 65, d_off: 66 }, + Cfg { len: 4096, s_off: 65, d_off: 66 }, + Cfg { len: 1048576, s_off: 65, d_off: 66 }, + ], + setup = setup + )] + fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) { + unsafe { + black_box(memcmp( + black_box(dst.as_mut_ptr()), + black_box(src.as_ptr()), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memcmp; benchmarks = bench); +} + +mod mmove { + use super::*; + use Spread::{Large, Medium, Small}; + + struct Cfg { + len: usize, + spread: Spread, + off: usize, + } + + enum Spread { + /// `src` and `dst` are close. + Small, + /// `src` and `dst` are halfway offset in the buffer. + Medium, + /// `src` and `dst` only overlap by a single byte. + Large, + } + + fn calculate_spread(len: usize, spread: Spread) -> usize { + match spread { + Small => 1, + Medium => len / 2, + Large => len - 1, + } + } + + fn setup_forward(cfg: Cfg) -> (usize, usize, AlignedSlice) { + let Cfg { len, spread, off } = cfg; + let spread = calculate_spread(len, spread); + println!("{len} bytes, {spread} spread, {off} offset"); + assert!(spread < len, "otherwise this just tests memcpy"); + let mut buf = AlignedSlice::new_zeroed(len + spread, off); + let mut fill: usize = 0; + buf[..len].fill_with(|| { + fill += 1; + fill as u8 + }); + (len, spread, buf) + } + + fn setup_backward(cfg: Cfg) -> (usize, usize, AlignedSlice) { + let Cfg { len, spread, off } = cfg; + let spread = calculate_spread(len, spread); + println!("{len} bytes, {spread} spread, {off} offset"); + assert!(spread < len, "otherwise this just tests memcpy"); + let mut buf = AlignedSlice::new_zeroed(len + spread, off); + let mut fill: usize = 0; + buf[spread..].fill_with(|| { + fill += 1; + fill as u8 + }); + (len, spread, buf) + } + + #[library_benchmark] + #[benches::small_spread( + args = [ + Cfg { len: 16, spread: Small, off: 0 }, + Cfg { len: 32, spread: Small, off: 0 }, + Cfg { len: 64, spread: Small, off: 0 }, + Cfg { len: 512, spread: Small, off: 0 }, + Cfg { len: 4096, spread: Small, off: 0 }, + Cfg { len: 1048576, spread: Small, off: 0 }, + ], + setup = setup_forward + )] + #[benches::medium_spread( + args = [ + Cfg { len: 16, spread: Medium, off: 0 }, + Cfg { len: 32, spread: Medium, off: 0 }, + Cfg { len: 64, spread: Medium, off: 0 }, + Cfg { len: 512, spread: Medium, off: 0 }, + Cfg { len: 4096, spread: Medium, off: 0 }, + Cfg { len: 1048576, spread: Medium, off: 0 }, + ], + setup = setup_forward + )] + #[benches::large_spread( + args = [ + Cfg { len: 16, spread: Large, off: 0 }, + Cfg { len: 32, spread: Large, off: 0 }, + Cfg { len: 64, spread: Large, off: 0 }, + Cfg { len: 512, spread: Large, off: 0 }, + Cfg { len: 4096, spread: Large, off: 0 }, + Cfg { len: 1048576, spread: Large, off: 0 }, + ], + setup = setup_forward + )] + #[benches::small_spread_offset( + args = [ + Cfg { len: 16, spread: Small, off: 63 }, + Cfg { len: 32, spread: Small, off: 63 }, + Cfg { len: 64, spread: Small, off: 63 }, + Cfg { len: 512, spread: Small, off: 63 }, + Cfg { len: 4096, spread: Small, off: 63 }, + Cfg { len: 1048576, spread: Small, off: 63 }, + ], + setup = setup_forward + )] + #[benches::medium_spread_offset( + args = [ + Cfg { len: 16, spread: Medium, off: 63 }, + Cfg { len: 32, spread: Medium, off: 63 }, + Cfg { len: 64, spread: Medium, off: 63 }, + Cfg { len: 512, spread: Medium, off: 63 }, + Cfg { len: 4096, spread: Medium, off: 63 }, + Cfg { len: 1048576, spread: Medium, off: 63 }, + ], + setup = setup_forward + )] + #[benches::large_spread_offset( + args = [ + Cfg { len: 16, spread: Large, off: 63 }, + Cfg { len: 32, spread: Large, off: 63 }, + Cfg { len: 64, spread: Large, off: 63 }, + Cfg { len: 512, spread: Large, off: 63 }, + Cfg { len: 4096, spread: Large, off: 63 }, + Cfg { len: 1048576, spread: Large, off: 63 }, + ], + setup = setup_forward + )] + fn forward((len, spread, mut buf): (usize, usize, AlignedSlice)) { + // Test moving from the start of the buffer toward the end + unsafe { + black_box(memmove( + black_box(buf[spread..].as_mut_ptr()), + black_box(buf.as_ptr()), + black_box(len), + )); + } + } + + #[library_benchmark] + #[benches::small_spread( + args = [ + Cfg { len: 16, spread: Small, off: 0 }, + Cfg { len: 32, spread: Small, off: 0 }, + Cfg { len: 64, spread: Small, off: 0 }, + Cfg { len: 512, spread: Small, off: 0 }, + Cfg { len: 4096, spread: Small, off: 0 }, + Cfg { len: 1048576, spread: Small, off: 0 }, + ], + setup = setup_backward + )] + #[benches::middle( + args = [ + Cfg { len: 16, spread: Medium, off: 0 }, + Cfg { len: 32, spread: Medium, off: 0 }, + Cfg { len: 64, spread: Medium, off: 0 }, + Cfg { len: 512, spread: Medium, off: 0 }, + Cfg { len: 4096, spread: Medium, off: 0 }, + Cfg { len: 1048576, spread: Medium, off: 0 }, + ], + setup = setup_backward + )] + #[benches::large_spread( + args = [ + Cfg { len: 16, spread: Large, off: 0 }, + Cfg { len: 32, spread: Large, off: 0 }, + Cfg { len: 64, spread: Large, off: 0 }, + Cfg { len: 512, spread: Large, off: 0 }, + Cfg { len: 4096, spread: Large, off: 0 }, + Cfg { len: 1048576, spread: Large, off: 0 }, + ], + setup = setup_backward + )] + #[benches::small_spread_off( + args = [ + Cfg { len: 16, spread: Small, off: 63 }, + Cfg { len: 32, spread: Small, off: 63 }, + Cfg { len: 64, spread: Small, off: 63 }, + Cfg { len: 512, spread: Small, off: 63 }, + Cfg { len: 4096, spread: Small, off: 63 }, + Cfg { len: 1048576, spread: Small, off: 63 }, + ], + setup = setup_backward + )] + #[benches::middle_off( + args = [ + Cfg { len: 16, spread: Medium, off: 63 }, + Cfg { len: 32, spread: Medium, off: 63 }, + Cfg { len: 64, spread: Medium, off: 63 }, + Cfg { len: 512, spread: Medium, off: 63 }, + Cfg { len: 4096, spread: Medium, off: 63 }, + Cfg { len: 1048576, spread: Medium, off: 63 }, + ], + setup = setup_backward + )] + #[benches::large_spread_off( + args = [ + Cfg { len: 16, spread: Large, off: 63 }, + Cfg { len: 32, spread: Large, off: 63 }, + Cfg { len: 64, spread: Large, off: 63 }, + Cfg { len: 512, spread: Large, off: 63 }, + Cfg { len: 4096, spread: Large, off: 63 }, + Cfg { len: 1048576, spread: Large, off: 63 }, + ], + setup = setup_backward + )] + fn backward((len, spread, mut buf): (usize, usize, AlignedSlice)) { + // Test moving from the end of the buffer toward the start + unsafe { + black_box(memmove( + black_box(buf.as_mut_ptr()), + black_box(buf[spread..].as_ptr()), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memmove; benchmarks = forward, backward); +} + +use mcmp::memcmp; +use mcpy::memcpy; +use mmove::memmove; +use mset::memset; + +main!(library_benchmark_groups = memcpy, memset, memcmp, memmove); From 974d7213fc793cc3a3c67b0ad4492c4956121bb1 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 22 Mar 2025 05:03:03 +0000 Subject: [PATCH 1247/1459] Clean up icount benchmarks * Delete some memcpy tests that were a bit excessive * Always use the same offset of 65 * Add a memmove test with aligned source and destination * Improve printing output and add more comments * Use a constant for 1 MiB so it shows up in the benchmark logs --- testcrate/benches/mem_icount.rs | 199 ++++++++++++++++++-------------- 1 file changed, 112 insertions(+), 87 deletions(-) diff --git a/testcrate/benches/mem_icount.rs b/testcrate/benches/mem_icount.rs index be3d13dff..63045f6e1 100644 --- a/testcrate/benches/mem_icount.rs +++ b/testcrate/benches/mem_icount.rs @@ -7,7 +7,9 @@ use std::{ops, slice}; use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; use iai_callgrind::{library_benchmark, library_benchmark_group, main}; -const PAGE_SIZE: usize = 0x1000; +const PAGE_SIZE: usize = 0x1000; // 4 kiB +const MAX_ALIGN: usize = 512; // assume we may use avx512 operations one day +const MEG1: usize = 1 << 20; // 1 MiB #[derive(Clone)] #[repr(C, align(0x1000))] @@ -62,7 +64,7 @@ mod mcpy { fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) { let Cfg { len, s_off, d_off } = cfg; - println!("{len} bytes, {s_off} src offset, {d_off} dst offset"); + println!("bytes: {len} bytes, src offset: {s_off}, dst offset: {d_off}"); let mut src = AlignedSlice::new_zeroed(len, s_off); let dst = AlignedSlice::new_zeroed(len, d_off); src.fill(1); @@ -71,57 +73,38 @@ mod mcpy { #[library_benchmark] #[benches::aligned( + // Both aligned args = [ Cfg { len: 16, s_off: 0, d_off: 0 }, - Cfg { len: 16, s_off: 0, d_off: 0 }, - Cfg { len: 28, s_off: 0, d_off: 0 }, Cfg { len: 32, s_off: 0, d_off: 0 }, - Cfg { len: 36, s_off: 0, d_off: 0 }, - Cfg { len: 60, s_off: 0, d_off: 0 }, Cfg { len: 64, s_off: 0, d_off: 0 }, - Cfg { len: 68, s_off: 0, d_off: 0 }, - Cfg { len: 128, s_off: 0, d_off: 0 }, - Cfg { len: 256, s_off: 0, d_off: 0 }, Cfg { len: 512, s_off: 0, d_off: 0 }, - Cfg { len: 1024, s_off: 0, d_off: 0 }, Cfg { len: 4096, s_off: 0, d_off: 0 }, - Cfg { len: 1048576, s_off: 0, d_off: 0 }, + Cfg { len: MEG1, s_off: 0, d_off: 0 }, ], setup = setup, )] #[benches::offset( + // Both at the same offset args = [ Cfg { len: 16, s_off: 65, d_off: 65 }, - Cfg { len: 28, s_off: 65, d_off: 65 }, Cfg { len: 32, s_off: 65, d_off: 65 }, - Cfg { len: 36, s_off: 65, d_off: 65 }, - Cfg { len: 60, s_off: 65, d_off: 65 }, Cfg { len: 64, s_off: 65, d_off: 65 }, - Cfg { len: 68, s_off: 65, d_off: 65 }, - Cfg { len: 128, s_off: 65, d_off: 65 }, - Cfg { len: 256, s_off: 65, d_off: 65 }, Cfg { len: 512, s_off: 65, d_off: 65 }, - Cfg { len: 1024, s_off: 65, d_off: 65 }, Cfg { len: 4096, s_off: 65, d_off: 65 }, - Cfg { len: 1048576, s_off: 65, d_off: 65 }, + Cfg { len: MEG1, s_off: 65, d_off: 65 }, ], setup = setup, )] #[benches::misaligned( + // `src` and `dst` both misaligned by different amounts args = [ Cfg { len: 16, s_off: 65, d_off: 66 }, - Cfg { len: 28, s_off: 65, d_off: 66 }, Cfg { len: 32, s_off: 65, d_off: 66 }, - Cfg { len: 36, s_off: 65, d_off: 66 }, - Cfg { len: 60, s_off: 65, d_off: 66 }, Cfg { len: 64, s_off: 65, d_off: 66 }, - Cfg { len: 68, s_off: 65, d_off: 66 }, - Cfg { len: 128, s_off: 65, d_off: 66 }, - Cfg { len: 256, s_off: 65, d_off: 66 }, Cfg { len: 512, s_off: 65, d_off: 66 }, - Cfg { len: 1024, s_off: 65, d_off: 66 }, Cfg { len: 4096, s_off: 65, d_off: 66 }, - Cfg { len: 1048576, s_off: 65, d_off: 66 }, + Cfg { len: MEG1, s_off: 65, d_off: 66 }, ], setup = setup, )] @@ -147,7 +130,7 @@ mod mset { } fn setup(Cfg { len, offset }: Cfg) -> (usize, AlignedSlice) { - println!("{len} bytes, {offset} offset"); + println!("bytes: {len}, offset: {offset}"); (len, AlignedSlice::new_zeroed(len, offset)) } @@ -159,7 +142,7 @@ mod mset { Cfg { len: 64, offset: 0 }, Cfg { len: 512, offset: 0 }, Cfg { len: 4096, offset: 0 }, - Cfg { len: 1048576, offset: 0 }, + Cfg { len: MEG1, offset: 0 }, ], setup = setup, )] @@ -170,7 +153,7 @@ mod mset { Cfg { len: 64, offset: 65 }, Cfg { len: 512, offset: 65 }, Cfg { len: 4096, offset: 65 }, - Cfg { len: 1048576, offset: 65 }, + Cfg { len: MEG1, offset: 65 }, ], setup = setup, )] @@ -198,7 +181,7 @@ mod mcmp { fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) { let Cfg { len, s_off, d_off } = cfg; - println!("{len} bytes, {s_off} src offset, {d_off} dst offset"); + println!("bytes: {len}, src offset: {s_off}, dst offset: {d_off}"); let b1 = AlignedSlice::new_zeroed(len, s_off); let mut b2 = AlignedSlice::new_zeroed(len, d_off); b2[len - 1] = 1; @@ -207,35 +190,38 @@ mod mcmp { #[library_benchmark] #[benches::aligned( + // Both aligned args = [ Cfg { len: 16, s_off: 0, d_off: 0 }, Cfg { len: 32, s_off: 0, d_off: 0 }, Cfg { len: 64, s_off: 0, d_off: 0 }, Cfg { len: 512, s_off: 0, d_off: 0 }, Cfg { len: 4096, s_off: 0, d_off: 0 }, - Cfg { len: 1048576, s_off: 0, d_off: 0 }, + Cfg { len: MEG1, s_off: 0, d_off: 0 }, ], setup = setup )] #[benches::offset( + // Both at the same offset args = [ Cfg { len: 16, s_off: 65, d_off: 65 }, Cfg { len: 32, s_off: 65, d_off: 65 }, Cfg { len: 64, s_off: 65, d_off: 65 }, Cfg { len: 512, s_off: 65, d_off: 65 }, Cfg { len: 4096, s_off: 65, d_off: 65 }, - Cfg { len: 1048576, s_off: 65, d_off: 65 }, + Cfg { len: MEG1, s_off: 65, d_off: 65 }, ], setup = setup )] #[benches::misaligned( + // `src` and `dst` both misaligned by different amounts args = [ Cfg { len: 16, s_off: 65, d_off: 66 }, Cfg { len: 32, s_off: 65, d_off: 66 }, Cfg { len: 64, s_off: 65, d_off: 66 }, Cfg { len: 512, s_off: 65, d_off: 66 }, Cfg { len: 4096, s_off: 65, d_off: 66 }, - Cfg { len: 1048576, s_off: 65, d_off: 66 }, + Cfg { len: MEG1, s_off: 65, d_off: 66 }, ], setup = setup )] @@ -254,7 +240,7 @@ mod mcmp { mod mmove { use super::*; - use Spread::{Large, Medium, Small}; + use Spread::{Aligned, Large, Medium, Small}; struct Cfg { len: usize, @@ -263,6 +249,8 @@ mod mmove { } enum Spread { + /// `src` and `dst` are close and have the same alignment (or offset). + Aligned, /// `src` and `dst` are close. Small, /// `src` and `dst` are halfway offset in the buffer. @@ -271,10 +259,16 @@ mod mmove { Large, } + // Note that small and large are fn calculate_spread(len: usize, spread: Spread) -> usize { match spread { + // Note that this test doesn't make sense for lengths less than len=128 + Aligned => { + assert!(len > MAX_ALIGN, "aligned memset would have no overlap"); + MAX_ALIGN + } Small => 1, - Medium => len / 2, + Medium => (len / 2) + 1, // add 1 so all are misaligned Large => len - 1, } } @@ -282,8 +276,8 @@ mod mmove { fn setup_forward(cfg: Cfg) -> (usize, usize, AlignedSlice) { let Cfg { len, spread, off } = cfg; let spread = calculate_spread(len, spread); - println!("{len} bytes, {spread} spread, {off} offset"); - assert!(spread < len, "otherwise this just tests memcpy"); + println!("bytes: {len}, spread: {spread}, offset: {off}, forward"); + assert!(spread < len, "memmove tests should have some overlap"); let mut buf = AlignedSlice::new_zeroed(len + spread, off); let mut fill: usize = 0; buf[..len].fill_with(|| { @@ -296,8 +290,8 @@ mod mmove { fn setup_backward(cfg: Cfg) -> (usize, usize, AlignedSlice) { let Cfg { len, spread, off } = cfg; let spread = calculate_spread(len, spread); - println!("{len} bytes, {spread} spread, {off} offset"); - assert!(spread < len, "otherwise this just tests memcpy"); + println!("bytes: {len}, spread: {spread}, offset: {off}, backward"); + assert!(spread < len, "memmove tests should have some overlap"); let mut buf = AlignedSlice::new_zeroed(len + spread, off); let mut fill: usize = 0; buf[spread..].fill_with(|| { @@ -308,6 +302,14 @@ mod mmove { } #[library_benchmark] + #[benches::aligned( + args = [ + // Don't test small spreads since there is no overlap + Cfg { len: 4096, spread: Aligned, off: 0 }, + Cfg { len: MEG1, spread: Aligned, off: 0 }, + ], + setup = setup_forward + )] #[benches::small_spread( args = [ Cfg { len: 16, spread: Small, off: 0 }, @@ -315,7 +317,7 @@ mod mmove { Cfg { len: 64, spread: Small, off: 0 }, Cfg { len: 512, spread: Small, off: 0 }, Cfg { len: 4096, spread: Small, off: 0 }, - Cfg { len: 1048576, spread: Small, off: 0 }, + Cfg { len: MEG1, spread: Small, off: 0 }, ], setup = setup_forward )] @@ -326,7 +328,7 @@ mod mmove { Cfg { len: 64, spread: Medium, off: 0 }, Cfg { len: 512, spread: Medium, off: 0 }, Cfg { len: 4096, spread: Medium, off: 0 }, - Cfg { len: 1048576, spread: Medium, off: 0 }, + Cfg { len: MEG1, spread: Medium, off: 0 }, ], setup = setup_forward )] @@ -337,40 +339,47 @@ mod mmove { Cfg { len: 64, spread: Large, off: 0 }, Cfg { len: 512, spread: Large, off: 0 }, Cfg { len: 4096, spread: Large, off: 0 }, - Cfg { len: 1048576, spread: Large, off: 0 }, + Cfg { len: MEG1, spread: Large, off: 0 }, ], setup = setup_forward )] - #[benches::small_spread_offset( + #[benches::aligned_off( + args = [ + Cfg { len: 4096, spread: Aligned, off: 65 }, + Cfg { len: MEG1, spread: Aligned, off: 65 }, + ], + setup = setup_forward + )] + #[benches::small_spread_off( args = [ - Cfg { len: 16, spread: Small, off: 63 }, - Cfg { len: 32, spread: Small, off: 63 }, - Cfg { len: 64, spread: Small, off: 63 }, - Cfg { len: 512, spread: Small, off: 63 }, - Cfg { len: 4096, spread: Small, off: 63 }, - Cfg { len: 1048576, spread: Small, off: 63 }, + Cfg { len: 16, spread: Small, off: 65 }, + Cfg { len: 32, spread: Small, off: 65 }, + Cfg { len: 64, spread: Small, off: 65 }, + Cfg { len: 512, spread: Small, off: 65 }, + Cfg { len: 4096, spread: Small, off: 65 }, + Cfg { len: MEG1, spread: Small, off: 65 }, ], setup = setup_forward )] - #[benches::medium_spread_offset( + #[benches::medium_spread_off( args = [ - Cfg { len: 16, spread: Medium, off: 63 }, - Cfg { len: 32, spread: Medium, off: 63 }, - Cfg { len: 64, spread: Medium, off: 63 }, - Cfg { len: 512, spread: Medium, off: 63 }, - Cfg { len: 4096, spread: Medium, off: 63 }, - Cfg { len: 1048576, spread: Medium, off: 63 }, + Cfg { len: 16, spread: Medium, off: 65 }, + Cfg { len: 32, spread: Medium, off: 65 }, + Cfg { len: 64, spread: Medium, off: 65 }, + Cfg { len: 512, spread: Medium, off: 65 }, + Cfg { len: 4096, spread: Medium, off: 65 }, + Cfg { len: MEG1, spread: Medium, off: 65 }, ], setup = setup_forward )] - #[benches::large_spread_offset( + #[benches::large_spread_off( args = [ - Cfg { len: 16, spread: Large, off: 63 }, - Cfg { len: 32, spread: Large, off: 63 }, - Cfg { len: 64, spread: Large, off: 63 }, - Cfg { len: 512, spread: Large, off: 63 }, - Cfg { len: 4096, spread: Large, off: 63 }, - Cfg { len: 1048576, spread: Large, off: 63 }, + Cfg { len: 16, spread: Large, off: 65 }, + Cfg { len: 32, spread: Large, off: 65 }, + Cfg { len: 64, spread: Large, off: 65 }, + Cfg { len: 512, spread: Large, off: 65 }, + Cfg { len: 4096, spread: Large, off: 65 }, + Cfg { len: MEG1, spread: Large, off: 65 }, ], setup = setup_forward )] @@ -386,6 +395,14 @@ mod mmove { } #[library_benchmark] + #[benches::aligned( + args = [ + // Don't test small spreads since there is no overlap + Cfg { len: 4096, spread: Aligned, off: 0 }, + Cfg { len: MEG1, spread: Aligned, off: 0 }, + ], + setup = setup_backward + )] #[benches::small_spread( args = [ Cfg { len: 16, spread: Small, off: 0 }, @@ -393,18 +410,18 @@ mod mmove { Cfg { len: 64, spread: Small, off: 0 }, Cfg { len: 512, spread: Small, off: 0 }, Cfg { len: 4096, spread: Small, off: 0 }, - Cfg { len: 1048576, spread: Small, off: 0 }, + Cfg { len: MEG1, spread: Small, off: 0 }, ], setup = setup_backward )] - #[benches::middle( + #[benches::medium_spread( args = [ Cfg { len: 16, spread: Medium, off: 0 }, Cfg { len: 32, spread: Medium, off: 0 }, Cfg { len: 64, spread: Medium, off: 0 }, Cfg { len: 512, spread: Medium, off: 0 }, Cfg { len: 4096, spread: Medium, off: 0 }, - Cfg { len: 1048576, spread: Medium, off: 0 }, + Cfg { len: MEG1, spread: Medium, off: 0 }, ], setup = setup_backward )] @@ -415,40 +432,48 @@ mod mmove { Cfg { len: 64, spread: Large, off: 0 }, Cfg { len: 512, spread: Large, off: 0 }, Cfg { len: 4096, spread: Large, off: 0 }, - Cfg { len: 1048576, spread: Large, off: 0 }, + Cfg { len: MEG1, spread: Large, off: 0 }, + ], + setup = setup_backward + )] + #[benches::aligned_off( + args = [ + // Don't test small spreads since there is no overlap + Cfg { len: 4096, spread: Aligned, off: 65 }, + Cfg { len: MEG1, spread: Aligned, off: 65 }, ], setup = setup_backward )] #[benches::small_spread_off( args = [ - Cfg { len: 16, spread: Small, off: 63 }, - Cfg { len: 32, spread: Small, off: 63 }, - Cfg { len: 64, spread: Small, off: 63 }, - Cfg { len: 512, spread: Small, off: 63 }, - Cfg { len: 4096, spread: Small, off: 63 }, - Cfg { len: 1048576, spread: Small, off: 63 }, + Cfg { len: 16, spread: Small, off: 65 }, + Cfg { len: 32, spread: Small, off: 65 }, + Cfg { len: 64, spread: Small, off: 65 }, + Cfg { len: 512, spread: Small, off: 65 }, + Cfg { len: 4096, spread: Small, off: 65 }, + Cfg { len: MEG1, spread: Small, off: 65 }, ], setup = setup_backward )] - #[benches::middle_off( + #[benches::medium_spread_off( args = [ - Cfg { len: 16, spread: Medium, off: 63 }, - Cfg { len: 32, spread: Medium, off: 63 }, - Cfg { len: 64, spread: Medium, off: 63 }, - Cfg { len: 512, spread: Medium, off: 63 }, - Cfg { len: 4096, spread: Medium, off: 63 }, - Cfg { len: 1048576, spread: Medium, off: 63 }, + Cfg { len: 16, spread: Medium, off: 65 }, + Cfg { len: 32, spread: Medium, off: 65 }, + Cfg { len: 64, spread: Medium, off: 65 }, + Cfg { len: 512, spread: Medium, off: 65 }, + Cfg { len: 4096, spread: Medium, off: 65 }, + Cfg { len: MEG1, spread: Medium, off: 65 }, ], setup = setup_backward )] #[benches::large_spread_off( args = [ - Cfg { len: 16, spread: Large, off: 63 }, - Cfg { len: 32, spread: Large, off: 63 }, - Cfg { len: 64, spread: Large, off: 63 }, - Cfg { len: 512, spread: Large, off: 63 }, - Cfg { len: 4096, spread: Large, off: 63 }, - Cfg { len: 1048576, spread: Large, off: 63 }, + Cfg { len: 16, spread: Large, off: 65 }, + Cfg { len: 32, spread: Large, off: 65 }, + Cfg { len: 64, spread: Large, off: 65 }, + Cfg { len: 512, spread: Large, off: 65 }, + Cfg { len: 4096, spread: Large, off: 65 }, + Cfg { len: MEG1, spread: Large, off: 65 }, ], setup = setup_backward )] From 4df7a8dee8464552359d3b3ec7611972465d3b93 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 22 Mar 2025 06:36:40 +0100 Subject: [PATCH 1248/1459] copy_misaligned_words: avoid out-of-bounds accesses (#799) * copy_misaligned_words: avoid out-of-bounds accesses * add test to make Miri able to detect OOB in memmove * run Miri on CI --- .github/workflows/main.yml | 16 +++ ci/miri.sh | 16 +++ compiler-builtins/src/mem/impls.rs | 160 +++++++++++++++++++++++------ testcrate/tests/mem.rs | 23 ++++- 4 files changed, 183 insertions(+), 32 deletions(-) create mode 100755 ci/miri.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c337c26a2..003102d59 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -160,6 +160,21 @@ jobs: rm -rf /tmp/.buildx-cache mv /tmp/.buildx-cache-new /tmp/.buildx-cache + miri: + name: Miri + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install Rust (rustup) + run: rustup update nightly --no-self-update && rustup default nightly + shell: bash + - run: rustup component add miri + - run: cargo miri setup + - uses: Swatinem/rust-cache@v2 + - run: ./ci/miri.sh + rustfmt: name: Rustfmt runs-on: ubuntu-latest @@ -190,6 +205,7 @@ jobs: - test - rustfmt - clippy + - miri runs-on: ubuntu-latest # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency # failed" as success. So we have to do some contortions to ensure the job fails if any of its diff --git a/ci/miri.sh b/ci/miri.sh new file mode 100755 index 000000000..f9a1240a4 --- /dev/null +++ b/ci/miri.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -ex + +# We need Tree Borrows as some of our raw pointer patterns are not +# compatible with Stacked Borrows. +export MIRIFLAGS="-Zmiri-tree-borrows" + +# One target that sets `mem-unaligned` and one that does not, +# and a big-endian target. +TARGETS=(x86_64-unknown-linux-gnu + armv7-unknown-linux-gnueabihf + s390x-unknown-linux-gnu) +for TARGET in "${TARGETS[@]}"; do + # Only run the `mem` tests to avoid this taking too long. + cargo miri test --manifest-path testcrate/Cargo.toml --features no-asm --target $TARGET -- mem +done diff --git a/compiler-builtins/src/mem/impls.rs b/compiler-builtins/src/mem/impls.rs index c602a67db..dc12d6996 100644 --- a/compiler-builtins/src/mem/impls.rs +++ b/compiler-builtins/src/mem/impls.rs @@ -41,6 +41,72 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize { core::mem::transmute(x_read) } +/// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed +/// `load_sz`. The offset pointers must both be `T`-aligned. Returns the new offset, advanced by the +/// chunk size if a load happened. +#[cfg(not(feature = "mem-unaligned"))] +#[inline(always)] +unsafe fn load_chunk_aligned( + src: *const usize, + dst: *mut usize, + load_sz: usize, + offset: usize, +) -> usize { + let chunk_sz = core::mem::size_of::(); + if (load_sz & chunk_sz) != 0 { + *dst.wrapping_byte_add(offset).cast::() = *src.wrapping_byte_add(offset).cast::(); + offset | chunk_sz + } else { + offset + } +} + +/// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize` +/// read with the out-of-bounds part filled with 0s. +/// `load_sz` be strictly less than `WORD_SIZE`. +#[cfg(not(feature = "mem-unaligned"))] +#[inline(always)] +unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize { + debug_assert!(load_sz < WORD_SIZE); + // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8 + // (since `load_sz < WORD_SIZE`). + const { assert!(WORD_SIZE <= 8) }; + + let mut i = 0; + let mut out = 0usize; + // We load in decreasing order, so the pointers remain sufficiently aligned for the next step. + i = load_chunk_aligned::(src, &raw mut out, load_sz, i); + i = load_chunk_aligned::(src, &raw mut out, load_sz, i); + i = load_chunk_aligned::(src, &raw mut out, load_sz, i); + debug_assert!(i == load_sz); + out +} + +/// Load `load_sz` many bytes from `src.wrapping_byte_add(WORD_SIZE - load_sz)`. `src` must be +/// `usize`-aligned. The bytes are returned as the *last* bytes of the return value, i.e., this acts +/// as if we had done a `usize` read from `src`, with the out-of-bounds part filled with 0s. +/// `load_sz` be strictly less than `WORD_SIZE`. +#[cfg(not(feature = "mem-unaligned"))] +#[inline(always)] +unsafe fn load_aligned_end_partial(src: *const usize, load_sz: usize) -> usize { + debug_assert!(load_sz < WORD_SIZE); + // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8 + // (since `load_sz < WORD_SIZE`). + const { assert!(WORD_SIZE <= 8) }; + + let mut i = 0; + let mut out = 0usize; + // Obtain pointers pointing to the beginning of the range we want to load. + let src_shifted = src.wrapping_byte_add(WORD_SIZE - load_sz); + let out_shifted = (&raw mut out).wrapping_byte_add(WORD_SIZE - load_sz); + // We load in increasing order, so by the time we reach `u16` things are 2-aligned etc. + i = load_chunk_aligned::(src_shifted, out_shifted, load_sz, i); + i = load_chunk_aligned::(src_shifted, out_shifted, load_sz, i); + i = load_chunk_aligned::(src_shifted, out_shifted, load_sz, i); + debug_assert!(i == load_sz); + out +} + #[inline(always)] pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) { #[inline(always)] @@ -66,40 +132,57 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) } } + /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0. + /// `src` *must not* be `usize`-aligned. #[cfg(not(feature = "mem-unaligned"))] #[inline(always)] unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + debug_assert!(n > 0 && n % WORD_SIZE == 0); + debug_assert!(src.addr() % WORD_SIZE != 0); + let mut dest_usize = dest as *mut usize; let dest_end = dest.wrapping_add(n) as *mut usize; // Calculate the misalignment offset and shift needed to reassemble value. + // Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE. let offset = src as usize & WORD_MASK; let shift = offset * 8; // Realign src - let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize; - // This will read (but won't use) bytes out of bound. - // cfg needed because not all targets will have atomic loads that can be lowered - // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I) - #[cfg(target_has_atomic_load_store = "ptr")] - let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned); - #[cfg(not(target_has_atomic_load_store = "ptr"))] - let mut prev_word = core::ptr::read_volatile(src_aligned); + let mut src_aligned = src.wrapping_byte_sub(offset) as *mut usize; + let mut prev_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset); - while dest_usize < dest_end { + while dest_usize.wrapping_add(1) < dest_end { src_aligned = src_aligned.wrapping_add(1); let cur_word = *src_aligned; - #[cfg(target_endian = "little")] - let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift); - #[cfg(target_endian = "big")] - let resembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift); + let reassembled = if cfg!(target_endian = "little") { + prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift) + } else { + prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift) + }; prev_word = cur_word; - *dest_usize = resembled; + *dest_usize = reassembled; dest_usize = dest_usize.wrapping_add(1); } + + // There's one more element left to go, and we can't use the loop for that as on the `src` side, + // it is partially out-of-bounds. + src_aligned = src_aligned.wrapping_add(1); + let cur_word = load_aligned_partial(src_aligned, offset); + let reassembled = if cfg!(target_endian = "little") { + prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift) + } else { + prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift) + }; + // prev_word does not matter any more + + *dest_usize = reassembled; + // dest_usize does not matter any more } + /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0. + /// `src` *must not* be `usize`-aligned. #[cfg(feature = "mem-unaligned")] #[inline(always)] unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { @@ -164,40 +247,57 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { } } + /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0. + /// `src` *must not* be `usize`-aligned. #[cfg(not(feature = "mem-unaligned"))] #[inline(always)] unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + debug_assert!(n > 0 && n % WORD_SIZE == 0); + debug_assert!(src.addr() % WORD_SIZE != 0); + let mut dest_usize = dest as *mut usize; - let dest_start = dest.wrapping_sub(n) as *mut usize; + let dest_start = dest.wrapping_sub(n) as *mut usize; // we're moving towards the start // Calculate the misalignment offset and shift needed to reassemble value. + // Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE. let offset = src as usize & WORD_MASK; let shift = offset * 8; - // Realign src_aligned - let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize; - // This will read (but won't use) bytes out of bound. - // cfg needed because not all targets will have atomic loads that can be lowered - // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I) - #[cfg(target_has_atomic_load_store = "ptr")] - let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned); - #[cfg(not(target_has_atomic_load_store = "ptr"))] - let mut prev_word = core::ptr::read_volatile(src_aligned); + // Realign src + let mut src_aligned = src.wrapping_byte_sub(offset) as *mut usize; + let mut prev_word = load_aligned_partial(src_aligned, offset); - while dest_start < dest_usize { + while dest_start.wrapping_add(1) < dest_usize { src_aligned = src_aligned.wrapping_sub(1); let cur_word = *src_aligned; - #[cfg(target_endian = "little")] - let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift; - #[cfg(target_endian = "big")] - let resembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift; + let reassembled = if cfg!(target_endian = "little") { + prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift + } else { + prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift + }; prev_word = cur_word; dest_usize = dest_usize.wrapping_sub(1); - *dest_usize = resembled; + *dest_usize = reassembled; } + + // There's one more element left to go, and we can't use the loop for that as on the `src` side, + // it is partially out-of-bounds. + src_aligned = src_aligned.wrapping_sub(1); + let cur_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset); + let reassembled = if cfg!(target_endian = "little") { + prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift + } else { + prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift + }; + // prev_word does not matter any more + + dest_usize = dest_usize.wrapping_sub(1); + *dest_usize = reassembled; } + /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0. + /// `src` *must not* be `usize`-aligned. #[cfg(feature = "mem-unaligned")] #[inline(always)] unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { diff --git a/testcrate/tests/mem.rs b/testcrate/tests/mem.rs index 48ac95adc..d838ef159 100644 --- a/testcrate/tests/mem.rs +++ b/testcrate/tests/mem.rs @@ -128,11 +128,13 @@ fn memcmp_eq() { #[test] fn memcmp_ne() { let arr1 @ arr2 = gen_arr::<256>(); - for i in 0..256 { + // Reduce iteration count in Miri as it is too slow otherwise. + let limit = if cfg!(miri) { 64 } else { 256 }; + for i in 0..limit { let mut diff_arr = arr1; diff_arr.0[i] = 127; let expect = diff_arr.0[i].cmp(&arr2.0[i]); - for k in i + 1..256 { + for k in i + 1..limit { let result = unsafe { memcmp(diff_arr.0.as_ptr(), arr2.0.as_ptr(), k) }; assert_eq!(expect, result.cmp(&0)); } @@ -230,6 +232,23 @@ fn memmove_backward_aligned() { } } +#[test] +fn memmove_misaligned_bounds() { + // The above test have the downside that the addresses surrounding the range-to-copy are all + // still in-bounds, so Miri would not actually complain about OOB accesses. So we also test with + // an array that has just the right size. We test a few times to avoid it being accidentally + // aligned. + for _ in 0..8 { + let mut arr1 = [0u8; 17]; + let mut arr2 = [0u8; 17]; + unsafe { + // Copy both ways so we hit both the forward and backward cases. + memmove(arr1.as_mut_ptr(), arr2.as_mut_ptr(), 17); + memmove(arr2.as_mut_ptr(), arr1.as_mut_ptr(), 17); + } + } +} + #[test] fn memset_backward_misaligned_nonaligned_start() { let mut arr = gen_arr::<32>(); From f1b9055b0d09370a474ce5e9b9d78ac4758cedf8 Mon Sep 17 00:00:00 2001 From: Patryk Wychowaniec Date: Sun, 23 Mar 2025 08:44:22 +0100 Subject: [PATCH 1249/1459] avr: Skip No More! --- compiler-builtins/src/float/add.rs | 2 - compiler-builtins/src/float/cmp.rs | 81 +++++++++++---------------- compiler-builtins/src/float/div.rs | 3 - compiler-builtins/src/float/extend.rs | 7 --- compiler-builtins/src/float/mul.rs | 2 - compiler-builtins/src/float/pow.rs | 3 - compiler-builtins/src/float/sub.rs | 2 - compiler-builtins/src/float/trunc.rs | 7 --- compiler-builtins/src/int/bswap.rs | 3 - compiler-builtins/src/int/sdiv.rs | 42 +++++++++++++- compiler-builtins/src/int/shift.rs | 9 --- compiler-builtins/src/int/udiv.rs | 25 ++++++--- compiler-builtins/src/macros.rs | 29 ---------- testcrate/tests/misc.rs | 1 - 14 files changed, 91 insertions(+), 125 deletions(-) diff --git a/compiler-builtins/src/float/add.rs b/compiler-builtins/src/float/add.rs index ef04ddc16..0426c9cc4 100644 --- a/compiler-builtins/src/float/add.rs +++ b/compiler-builtins/src/float/add.rs @@ -189,14 +189,12 @@ where } intrinsics! { - #[avr_skip] #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_fadd] pub extern "C" fn __addsf3(a: f32, b: f32) -> f32 { add(a, b) } - #[avr_skip] #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_dadd] pub extern "C" fn __adddf3(a: f64, b: f64) -> f64 { diff --git a/compiler-builtins/src/float/cmp.rs b/compiler-builtins/src/float/cmp.rs index b9b4d0114..296952821 100644 --- a/compiler-builtins/src/float/cmp.rs +++ b/compiler-builtins/src/float/cmp.rs @@ -3,6 +3,14 @@ use crate::float::Float; use crate::int::MinInt; +// https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/compiler-rt/lib/builtins/fp_compare_impl.inc#L22 +#[cfg(target_arch = "avr")] +pub type CmpResult = i8; + +// https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/compiler-rt/lib/builtins/fp_compare_impl.inc#L25 +#[cfg(not(target_arch = "avr"))] +pub type CmpResult = i32; + #[derive(Clone, Copy)] enum Result { Less, @@ -12,7 +20,7 @@ enum Result { } impl Result { - fn to_le_abi(self) -> i32 { + fn to_le_abi(self) -> CmpResult { match self { Result::Less => -1, Result::Equal => 0, @@ -21,7 +29,7 @@ impl Result { } } - fn to_ge_abi(self) -> i32 { + fn to_ge_abi(self) -> CmpResult { match self { Result::Less => -1, Result::Equal => 0, @@ -99,120 +107,99 @@ fn unord(a: F, b: F) -> bool { } intrinsics! { - #[avr_skip] - pub extern "C" fn __lesf2(a: f32, b: f32) -> i32 { + pub extern "C" fn __lesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult { cmp(a, b).to_le_abi() } - #[avr_skip] - pub extern "C" fn __gesf2(a: f32, b: f32) -> i32 { + pub extern "C" fn __gesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult { cmp(a, b).to_ge_abi() } - #[avr_skip] #[arm_aeabi_alias = __aeabi_fcmpun] - pub extern "C" fn __unordsf2(a: f32, b: f32) -> i32 { - unord(a, b) as i32 + pub extern "C" fn __unordsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult { + unord(a, b) as crate::float::cmp::CmpResult } - #[avr_skip] - pub extern "C" fn __eqsf2(a: f32, b: f32) -> i32 { + pub extern "C" fn __eqsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult { cmp(a, b).to_le_abi() } - #[avr_skip] - pub extern "C" fn __ltsf2(a: f32, b: f32) -> i32 { + pub extern "C" fn __ltsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult { cmp(a, b).to_le_abi() } - #[avr_skip] - pub extern "C" fn __nesf2(a: f32, b: f32) -> i32 { + pub extern "C" fn __nesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult { cmp(a, b).to_le_abi() } - #[avr_skip] - pub extern "C" fn __gtsf2(a: f32, b: f32) -> i32 { + pub extern "C" fn __gtsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult { cmp(a, b).to_ge_abi() } - #[avr_skip] - pub extern "C" fn __ledf2(a: f64, b: f64) -> i32 { + pub extern "C" fn __ledf2(a: f64, b: f64) -> crate::float::cmp::CmpResult { cmp(a, b).to_le_abi() } - #[avr_skip] - pub extern "C" fn __gedf2(a: f64, b: f64) -> i32 { + pub extern "C" fn __gedf2(a: f64, b: f64) -> crate::float::cmp::CmpResult { cmp(a, b).to_ge_abi() } - #[avr_skip] #[arm_aeabi_alias = __aeabi_dcmpun] - pub extern "C" fn __unorddf2(a: f64, b: f64) -> i32 { - unord(a, b) as i32 + pub extern "C" fn __unorddf2(a: f64, b: f64) -> crate::float::cmp::CmpResult { + unord(a, b) as crate::float::cmp::CmpResult } - #[avr_skip] - pub extern "C" fn __eqdf2(a: f64, b: f64) -> i32 { + pub extern "C" fn __eqdf2(a: f64, b: f64) -> crate::float::cmp::CmpResult { cmp(a, b).to_le_abi() } - #[avr_skip] - pub extern "C" fn __ltdf2(a: f64, b: f64) -> i32 { + pub extern "C" fn __ltdf2(a: f64, b: f64) -> crate::float::cmp::CmpResult { cmp(a, b).to_le_abi() } - #[avr_skip] - pub extern "C" fn __nedf2(a: f64, b: f64) -> i32 { + pub extern "C" fn __nedf2(a: f64, b: f64) -> crate::float::cmp::CmpResult { cmp(a, b).to_le_abi() } - #[avr_skip] - pub extern "C" fn __gtdf2(a: f64, b: f64) -> i32 { + pub extern "C" fn __gtdf2(a: f64, b: f64) -> crate::float::cmp::CmpResult { cmp(a, b).to_ge_abi() } } #[cfg(f128_enabled)] intrinsics! { - #[avr_skip] #[ppc_alias = __lekf2] - pub extern "C" fn __letf2(a: f128, b: f128) -> i32 { + pub extern "C" fn __letf2(a: f128, b: f128) -> crate::float::cmp::CmpResult { cmp(a, b).to_le_abi() } - #[avr_skip] #[ppc_alias = __gekf2] - pub extern "C" fn __getf2(a: f128, b: f128) -> i32 { + pub extern "C" fn __getf2(a: f128, b: f128) -> crate::float::cmp::CmpResult { cmp(a, b).to_ge_abi() } - #[avr_skip] #[ppc_alias = __unordkf2] - pub extern "C" fn __unordtf2(a: f128, b: f128) -> i32 { - unord(a, b) as i32 + pub extern "C" fn __unordtf2(a: f128, b: f128) -> crate::float::cmp::CmpResult { + unord(a, b) as crate::float::cmp::CmpResult } - #[avr_skip] #[ppc_alias = __eqkf2] - pub extern "C" fn __eqtf2(a: f128, b: f128) -> i32 { + pub extern "C" fn __eqtf2(a: f128, b: f128) -> crate::float::cmp::CmpResult { cmp(a, b).to_le_abi() } - #[avr_skip] #[ppc_alias = __ltkf2] - pub extern "C" fn __lttf2(a: f128, b: f128) -> i32 { + pub extern "C" fn __lttf2(a: f128, b: f128) -> crate::float::cmp::CmpResult { cmp(a, b).to_le_abi() } - #[avr_skip] #[ppc_alias = __nekf2] - pub extern "C" fn __netf2(a: f128, b: f128) -> i32 { + pub extern "C" fn __netf2(a: f128, b: f128) -> crate::float::cmp::CmpResult { cmp(a, b).to_le_abi() } - #[avr_skip] #[ppc_alias = __gtkf2] - pub extern "C" fn __gttf2(a: f128, b: f128) -> i32 { + pub extern "C" fn __gttf2(a: f128, b: f128) -> crate::float::cmp::CmpResult { cmp(a, b).to_ge_abi() } } diff --git a/compiler-builtins/src/float/div.rs b/compiler-builtins/src/float/div.rs index 21c757dd6..929f29197 100644 --- a/compiler-builtins/src/float/div.rs +++ b/compiler-builtins/src/float/div.rs @@ -606,19 +606,16 @@ where } intrinsics! { - #[avr_skip] #[arm_aeabi_alias = __aeabi_fdiv] pub extern "C" fn __divsf3(a: f32, b: f32) -> f32 { div(a, b) } - #[avr_skip] #[arm_aeabi_alias = __aeabi_ddiv] pub extern "C" fn __divdf3(a: f64, b: f64) -> f64 { div(a, b) } - #[avr_skip] #[ppc_alias = __divkf3] #[cfg(f128_enabled)] pub extern "C" fn __divtf3(a: f128, b: f128) -> f128 { diff --git a/compiler-builtins/src/float/extend.rs b/compiler-builtins/src/float/extend.rs index ce00da31d..c4f1fe30e 100644 --- a/compiler-builtins/src/float/extend.rs +++ b/compiler-builtins/src/float/extend.rs @@ -70,7 +70,6 @@ where } intrinsics! { - #[avr_skip] #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_f2d] pub extern "C" fn __extendsfdf2(a: f32) -> f64 { @@ -79,7 +78,6 @@ intrinsics! { } intrinsics! { - #[avr_skip] #[aapcs_on_arm] #[apple_f16_arg_abi] #[arm_aeabi_alias = __aeabi_h2f] @@ -88,7 +86,6 @@ intrinsics! { extend(a) } - #[avr_skip] #[aapcs_on_arm] #[apple_f16_arg_abi] #[cfg(f16_enabled)] @@ -96,7 +93,6 @@ intrinsics! { extend(a) } - #[avr_skip] #[aapcs_on_arm] #[apple_f16_arg_abi] #[cfg(f16_enabled)] @@ -104,7 +100,6 @@ intrinsics! { extend(a) } - #[avr_skip] #[aapcs_on_arm] #[ppc_alias = __extendhfkf2] #[cfg(all(f16_enabled, f128_enabled))] @@ -112,7 +107,6 @@ intrinsics! { extend(a) } - #[avr_skip] #[aapcs_on_arm] #[ppc_alias = __extendsfkf2] #[cfg(f128_enabled)] @@ -120,7 +114,6 @@ intrinsics! { extend(a) } - #[avr_skip] #[aapcs_on_arm] #[ppc_alias = __extenddfkf2] #[cfg(f128_enabled)] diff --git a/compiler-builtins/src/float/mul.rs b/compiler-builtins/src/float/mul.rs index 58636cb5e..7f1f19d9b 100644 --- a/compiler-builtins/src/float/mul.rs +++ b/compiler-builtins/src/float/mul.rs @@ -180,14 +180,12 @@ where } intrinsics! { - #[avr_skip] #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_fmul] pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 { mul(a, b) } - #[avr_skip] #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_dmul] pub extern "C" fn __muldf3(a: f64, b: f64) -> f64 { diff --git a/compiler-builtins/src/float/pow.rs b/compiler-builtins/src/float/pow.rs index dac768f7b..fe76060e0 100644 --- a/compiler-builtins/src/float/pow.rs +++ b/compiler-builtins/src/float/pow.rs @@ -26,17 +26,14 @@ fn pow(a: F, b: i32) -> F { } intrinsics! { - #[avr_skip] pub extern "C" fn __powisf2(a: f32, b: i32) -> f32 { pow(a, b) } - #[avr_skip] pub extern "C" fn __powidf2(a: f64, b: i32) -> f64 { pow(a, b) } - #[avr_skip] #[ppc_alias = __powikf2] #[cfg(f128_enabled)] // FIXME(f16_f128): MSVC cannot build these until `__divtf3` is available in nightly. diff --git a/compiler-builtins/src/float/sub.rs b/compiler-builtins/src/float/sub.rs index 175b3a165..a0fd9dff9 100644 --- a/compiler-builtins/src/float/sub.rs +++ b/compiler-builtins/src/float/sub.rs @@ -1,13 +1,11 @@ use crate::float::Float; intrinsics! { - #[avr_skip] #[arm_aeabi_alias = __aeabi_fsub] pub extern "C" fn __subsf3(a: f32, b: f32) -> f32 { crate::float::add::__addsf3(a, f32::from_bits(b.to_bits() ^ f32::SIGN_MASK)) } - #[avr_skip] #[arm_aeabi_alias = __aeabi_dsub] pub extern "C" fn __subdf3(a: f64, b: f64) -> f64 { crate::float::add::__adddf3(a, f64::from_bits(b.to_bits() ^ f64::SIGN_MASK)) diff --git a/compiler-builtins/src/float/trunc.rs b/compiler-builtins/src/float/trunc.rs index 928eba0c8..ca8a0f368 100644 --- a/compiler-builtins/src/float/trunc.rs +++ b/compiler-builtins/src/float/trunc.rs @@ -115,7 +115,6 @@ where } intrinsics! { - #[avr_skip] #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_d2f] pub extern "C" fn __truncdfsf2(a: f64) -> f32 { @@ -124,7 +123,6 @@ intrinsics! { } intrinsics! { - #[avr_skip] #[aapcs_on_arm] #[apple_f16_ret_abi] #[arm_aeabi_alias = __aeabi_f2h] @@ -133,7 +131,6 @@ intrinsics! { trunc(a) } - #[avr_skip] #[aapcs_on_arm] #[apple_f16_ret_abi] #[cfg(f16_enabled)] @@ -141,7 +138,6 @@ intrinsics! { trunc(a) } - #[avr_skip] #[aapcs_on_arm] #[apple_f16_ret_abi] #[arm_aeabi_alias = __aeabi_d2h] @@ -150,7 +146,6 @@ intrinsics! { trunc(a) } - #[avr_skip] #[aapcs_on_arm] #[ppc_alias = __trunckfhf2] #[cfg(all(f16_enabled, f128_enabled))] @@ -158,7 +153,6 @@ intrinsics! { trunc(a) } - #[avr_skip] #[aapcs_on_arm] #[ppc_alias = __trunckfsf2] #[cfg(f128_enabled)] @@ -166,7 +160,6 @@ intrinsics! { trunc(a) } - #[avr_skip] #[aapcs_on_arm] #[ppc_alias = __trunckfdf2] #[cfg(f128_enabled)] diff --git a/compiler-builtins/src/int/bswap.rs b/compiler-builtins/src/int/bswap.rs index 9df80204d..3ede08882 100644 --- a/compiler-builtins/src/int/bswap.rs +++ b/compiler-builtins/src/int/bswap.rs @@ -1,20 +1,17 @@ intrinsics! { #[maybe_use_optimized_c_shim] - #[avr_skip] /// Swaps bytes in 32-bit number pub extern "C" fn __bswapsi2(x: u32) -> u32 { x.swap_bytes() } #[maybe_use_optimized_c_shim] - #[avr_skip] /// Swaps bytes in 64-bit number pub extern "C" fn __bswapdi2(x: u64) -> u64 { x.swap_bytes() } #[maybe_use_optimized_c_shim] - #[avr_skip] /// Swaps bytes in 128-bit number pub extern "C" fn __bswapti2(x: u128) -> u128 { x.swap_bytes() diff --git a/compiler-builtins/src/int/sdiv.rs b/compiler-builtins/src/int/sdiv.rs index 9630c7d7d..6a9029de7 100644 --- a/compiler-builtins/src/int/sdiv.rs +++ b/compiler-builtins/src/int/sdiv.rs @@ -9,7 +9,6 @@ macro_rules! sdivmod { $($attr:tt),* // attributes ) => { intrinsics! { - #[avr_skip] $( #[$attr] )* @@ -19,15 +18,18 @@ macro_rules! sdivmod { let b_neg = b < 0; let mut a = a; let mut b = b; + if a_neg { a = a.wrapping_neg(); } if b_neg { b = b.wrapping_neg(); } + let mut r = *rem as $uX; let t = $unsigned_fn(a as $uX, b as $uX, Some(&mut r)) as $iX; let mut r = r as $iX; + if a_neg { r = r.wrapping_neg(); } @@ -51,7 +53,6 @@ macro_rules! sdiv { $($attr:tt),* // attributes ) => { intrinsics! { - #[avr_skip] $( #[$attr] )* @@ -87,7 +88,6 @@ macro_rules! smod { $($attr:tt),* // attributes ) => { intrinsics! { - #[avr_skip] $( #[$attr] )* @@ -114,6 +114,7 @@ macro_rules! smod { } } +#[cfg(not(target_arch = "avr"))] sdivmod!( __udivmodsi4, __divmodsi4, @@ -121,6 +122,41 @@ sdivmod!( i32, maybe_use_optimized_c_shim ); + +#[cfg(target_arch = "avr")] +intrinsics! { + /// Returns `a / b` and `a % b` packed together. + /// + /// Ideally we'd use `-> (u32, u32)` or some kind of a packed struct, but + /// both force a stack allocation, while our result has to be in R18:R26. + pub extern "C" fn __divmodsi4(a: i32, b: i32) -> u64 { + let a_neg = a < 0; + let b_neg = b < 0; + let mut a = a; + let mut b = b; + + if a_neg { + a = a.wrapping_neg(); + } + if b_neg { + b = b.wrapping_neg(); + } + + let tr = __udivmodsi4(a as u32, b as u32); + let mut t = tr as u32 as i32; + let mut r = (tr >> 32) as u32 as i32; + + if a_neg { + r = r.wrapping_neg(); + } + if a_neg != b_neg { + t = t.wrapping_neg(); + } + + ((r as u32 as u64) << 32) | (t as u32 as u64) + } +} + // The `#[arm_aeabi_alias = __aeabi_idiv]` attribute cannot be made to work with `intrinsics!` in macros intrinsics! { #[maybe_use_optimized_c_shim] diff --git a/compiler-builtins/src/int/shift.rs b/compiler-builtins/src/int/shift.rs index 317272988..a85c1b33d 100644 --- a/compiler-builtins/src/int/shift.rs +++ b/compiler-builtins/src/int/shift.rs @@ -69,56 +69,47 @@ impl Lshr for u64 {} impl Lshr for u128 {} intrinsics! { - #[avr_skip] #[maybe_use_optimized_c_shim] pub extern "C" fn __ashlsi3(a: u32, b: u32) -> u32 { a.ashl(b) } - #[avr_skip] #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_llsl] pub extern "C" fn __ashldi3(a: u64, b: core::ffi::c_uint) -> u64 { a.ashl(b as u32) } - #[avr_skip] pub extern "C" fn __ashlti3(a: u128, b: u32) -> u128 { a.ashl(b) } - #[avr_skip] #[maybe_use_optimized_c_shim] pub extern "C" fn __ashrsi3(a: i32, b: u32) -> i32 { a.ashr(b) } - #[avr_skip] #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_lasr] pub extern "C" fn __ashrdi3(a: i64, b: core::ffi::c_uint) -> i64 { a.ashr(b as u32) } - #[avr_skip] pub extern "C" fn __ashrti3(a: i128, b: u32) -> i128 { a.ashr(b) } - #[avr_skip] #[maybe_use_optimized_c_shim] pub extern "C" fn __lshrsi3(a: u32, b: u32) -> u32 { a.lshr(b) } - #[avr_skip] #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_llsr] pub extern "C" fn __lshrdi3(a: u64, b: core::ffi::c_uint) -> u64 { a.lshr(b as u32) } - #[avr_skip] pub extern "C" fn __lshrti3(a: u128, b: u32) -> u128 { a.lshr(b) } diff --git a/compiler-builtins/src/int/udiv.rs b/compiler-builtins/src/int/udiv.rs index 1fa761212..a5c16040a 100644 --- a/compiler-builtins/src/int/udiv.rs +++ b/compiler-builtins/src/int/udiv.rs @@ -17,8 +17,10 @@ intrinsics! { pub extern "C" fn __umodsi3(n: u32, d: u32) -> u32 { u32_div_rem(n, d).1 } +} - #[avr_skip] +#[cfg(not(target_arch = "avr"))] +intrinsics! { #[maybe_use_optimized_c_shim] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 { @@ -28,22 +30,34 @@ intrinsics! { } quo_rem.0 } +} + +#[cfg(target_arch = "avr")] +intrinsics! { + /// Returns `n / d` and `n % d` packed together. + /// + /// Ideally we'd use `-> (u32, u32)` or some kind of a packed struct, but + /// both force a stack allocation, while our result has to be in R18:R26. + pub extern "C" fn __udivmodsi4(n: u32, d: u32) -> u64 { + let (div, rem) = u32_div_rem(n, d); - #[avr_skip] + ((rem as u64) << 32) | (div as u64) + } +} + +intrinsics! { #[maybe_use_optimized_c_shim] /// Returns `n / d` pub extern "C" fn __udivdi3(n: u64, d: u64) -> u64 { u64_div_rem(n, d).0 } - #[avr_skip] #[maybe_use_optimized_c_shim] /// Returns `n % d` pub extern "C" fn __umoddi3(n: u64, d: u64) -> u64 { u64_div_rem(n, d).1 } - #[avr_skip] #[maybe_use_optimized_c_shim] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmoddi4(n: u64, d: u64, rem: Option<&mut u64>) -> u64 { @@ -57,7 +71,6 @@ intrinsics! { // Note: we use block configuration and not `if cfg!(...)`, because we need to entirely disable // the existence of `u128_div_rem` to get 32-bit SPARC to compile, see `u128_divide_sparc` docs. - #[avr_skip] /// Returns `n / d` pub extern "C" fn __udivti3(n: u128, d: u128) -> u128 { #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { @@ -68,7 +81,6 @@ intrinsics! { } } - #[avr_skip] /// Returns `n % d` pub extern "C" fn __umodti3(n: u128, d: u128) -> u128 { #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { @@ -81,7 +93,6 @@ intrinsics! { } } - #[avr_skip] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmodti4(n: u128, d: u128, rem: Option<&mut u128>) -> u128 { #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { diff --git a/compiler-builtins/src/macros.rs b/compiler-builtins/src/macros.rs index b1b71379c..0397e4551 100644 --- a/compiler-builtins/src/macros.rs +++ b/compiler-builtins/src/macros.rs @@ -445,35 +445,6 @@ macro_rules! intrinsics { intrinsics!($($rest)*); ); - // For some intrinsics, AVR uses a custom calling convention¹ that does not - // match our definitions here. Ideally we would just use hand-written naked - // functions, but that's quite a lot of code to port² - so for the time - // being we are just ignoring the problematic functions, letting avr-gcc - // (which is required to compile to AVR anyway) link them from libgcc. - // - // ¹ https://gcc.gnu.org/wiki/avr-gcc (see "Exceptions to the Calling - // Convention") - // ² https://github.com/gcc-mirror/gcc/blob/31048012db98f5ec9c2ba537bfd850374bdd771f/libgcc/config/avr/lib1funcs.S - ( - #[avr_skip] - $(#[$($attr:tt)*])* - pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { - $($body:tt)* - } - - $($rest:tt)* - ) => ( - #[cfg(not(target_arch = "avr"))] - intrinsics! { - $(#[$($attr)*])* - pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { - $($body)* - } - } - - intrinsics!($($rest)*); - ); - // This is the final catch-all rule. At this point we generate an // intrinsic with a conditional `#[no_mangle]` directive to avoid // interfering with duplicate symbols and whatnot during testing. diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs index f5ac2ab7d..edbd3684d 100644 --- a/testcrate/tests/misc.rs +++ b/testcrate/tests/misc.rs @@ -175,7 +175,6 @@ fn trailing_zeros() { } #[test] -#[cfg(not(target_arch = "avr"))] fn bswap() { use compiler_builtins::int::bswap::{__bswapdi2, __bswapsi2}; fuzz(N, |x: u32| { From 1901c415975eae741b7b072e789e74c6e422c22a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 2 Apr 2025 19:19:43 +0000 Subject: [PATCH 1250/1459] Revert "Disable `f16` on AArch64 without the `neon` feature" The LLVM issue [1] was resolved and the fix was synced to rust-lang/rust in [2]. This reverts commit 5cf417a9e92bb48e4e55756a645826fd167b9f3a. [1]: https://github.com/llvm/llvm-project/issues/129394 [2]: https://github.com/rust-lang/rust/pull/138695 --- compiler-builtins/configure.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/compiler-builtins/configure.rs b/compiler-builtins/configure.rs index 2f134e578..4be0b3ca2 100644 --- a/compiler-builtins/configure.rs +++ b/compiler-builtins/configure.rs @@ -91,8 +91,6 @@ pub fn configure_f16_f128(target: &Target) { let f16_enabled = match target.arch.as_str() { // Unsupported "arm64ec" => false, - // Crash in LLVM20 - "aarch64" if !target.features.iter().any(|f| f == "neon") => false, // Selection failure "s390x" => false, // Infinite recursion From c8878529b046526579137981dd33ed7bb94cee83 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 9 Apr 2025 02:40:43 +0000 Subject: [PATCH 1251/1459] Remove a mention of `force-soft-float` in `build.rs` `libm` no longer uses this directly in `cfg`, it is only for setting other configuration in the `libm` `build.rs`. Clean up this configuration in `compiler-builtins` since it is unused. --- compiler-builtins/build.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs index fdfb61b17..d627121f3 100644 --- a/compiler-builtins/build.rs +++ b/compiler-builtins/build.rs @@ -101,9 +101,7 @@ fn configure_libm(target: &Target) { println!("cargo:rustc-cfg=intrinsics_enabled"); // The arch module may contain assembly. - if cfg!(feature = "no-asm") { - println!("cargo:rustc-cfg=feature=\"force-soft-floats\""); - } else { + if !cfg!(feature = "no-asm") { println!("cargo:rustc-cfg=arch_enabled"); } From 93f514fd723717826ba8a4290539f9975d5bc223 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 9 Apr 2025 01:56:14 +0000 Subject: [PATCH 1252/1459] Replace calls to `core::arch` intrinsics with assembly Some backends may replace calls to `core::arch` with multiple calls to `sqrt` [1], which becomes recursive. Help mitigate this by replacing the call with assembly. Results in the same assembly as the current implementation when built with optimizations. [1]: https://github.com/rust-lang/compiler-builtins/issues/649 --- libm/src/math/arch/i686.rs | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/libm/src/math/arch/i686.rs b/libm/src/math/arch/i686.rs index ad54d8b61..3e1d19bfa 100644 --- a/libm/src/math/arch/i686.rs +++ b/libm/src/math/arch/i686.rs @@ -1,22 +1,27 @@ //! Architecture-specific support for x86-32 and x86-64 with SSE2 -#[cfg(target_arch = "x86")] -use core::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use core::arch::x86_64::*; - -pub fn sqrtf(x: f32) -> f32 { +pub fn sqrtf(mut x: f32) -> f32 { + // SAFETY: `sqrtss` is part of `sse2`, which this module is gated behind. It has no memory + // access or side effects. unsafe { - let m = _mm_set_ss(x); - let m_sqrt = _mm_sqrt_ss(m); - _mm_cvtss_f32(m_sqrt) - } + core::arch::asm!( + "sqrtss {x}, {x}", + x = inout(xmm_reg) x, + options(nostack, nomem, pure), + ) + }; + x } -pub fn sqrt(x: f64) -> f64 { +pub fn sqrt(mut x: f64) -> f64 { + // SAFETY: `sqrtsd` is part of `sse2`, which this module is gated behind. It has no memory + // access or side effects. unsafe { - let m = _mm_set_sd(x); - let m_sqrt = _mm_sqrt_pd(m); - _mm_cvtsd_f64(m_sqrt) - } + core::arch::asm!( + "sqrtsd {x}, {x}", + x = inout(xmm_reg) x, + options(nostack, nomem, pure), + ) + }; + x } From 05ad9005b06d93fcb2ad8c20ddd11cc38cc003b3 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 9 Apr 2025 02:22:15 +0000 Subject: [PATCH 1253/1459] Resolve small errors identified by recent clippy --- libm/crates/libm-test/src/precision.rs | 1 + libm/src/math/support/env.rs | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/crates/libm-test/src/precision.rs b/libm/crates/libm-test/src/precision.rs index 8916b43ab..f5fb5f670 100644 --- a/libm/crates/libm-test/src/precision.rs +++ b/libm/crates/libm-test/src/precision.rs @@ -13,6 +13,7 @@ use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult}; pub struct SpecialCase; /// ULP allowed to differ from the results returned by a test basis. +#[allow(clippy::single_match)] pub fn default_ulp(ctx: &CheckCtx) -> u32 { // ULP compared to the infinite (MPFR) result. let mut ulp = match ctx.base_name { diff --git a/libm/src/math/support/env.rs b/libm/src/math/support/env.rs index 7244381da..c05890d98 100644 --- a/libm/src/math/support/env.rs +++ b/libm/src/math/support/env.rs @@ -70,7 +70,6 @@ impl Status { /// The default result for division is +/-inf based on operand sign. For `logB`, the default /// result is -inf. /// `x / y` when `x != 0.0` and `y == 0.0`, - #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] pub const DIVIDE_BY_ZERO: Self = Self(1 << 2); From 56fdec7bef57666591071e32009d076cd81aba88 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 23 Jan 2025 01:46:24 +0000 Subject: [PATCH 1254/1459] Add assembly version of simple operations on aarch64 Replace `core::arch` versions of the following with handwritten assembly, which avoids recursion issues (cg_gcc using `rint` as a fallback) as well as problems with `aarch64be`. * `rint` * `rintf` Additionally, add assembly versions of the following: * `fma` * `fmaf` * `sqrt` * `sqrtf` If the `fp16` target feature is available, which implies `neon`, also include the following: * `rintf16` * `sqrtf16` `sqrt` is added to match the implementation for `x86`. `fma` is included since it is used by many other routines. There are a handful of other operations that have assembly implementations. They are omitted here because we should have basic float math routines available in `core` in the near future, which will allow us to defer to LLVM for assembly lowering rather than implementing these ourselves. --- libm/etc/function-definitions.json | 6 ++ libm/src/math/arch/aarch64.rs | 126 ++++++++++++++++++++++++----- libm/src/math/arch/mod.rs | 21 ++++- libm/src/math/fma.rs | 6 ++ libm/src/math/fma_wide.rs | 6 ++ libm/src/math/rint.rs | 10 ++- libm/src/math/sqrt.rs | 1 + libm/src/math/sqrtf.rs | 1 + libm/src/math/sqrtf16.rs | 6 ++ 9 files changed, 155 insertions(+), 28 deletions(-) diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 64a775ba9..bca58402f 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -342,12 +342,14 @@ }, "fma": { "sources": [ + "src/math/arch/aarch64.rs", "src/math/fma.rs" ], "type": "f64" }, "fmaf": { "sources": [ + "src/math/arch/aarch64.rs", "src/math/fma_wide.rs" ], "type": "f32" @@ -806,6 +808,7 @@ }, "rintf16": { "sources": [ + "src/math/arch/aarch64.rs", "src/math/rint.rs" ], "type": "f16" @@ -928,6 +931,7 @@ }, "sqrt": { "sources": [ + "src/math/arch/aarch64.rs", "src/math/arch/i686.rs", "src/math/arch/wasm32.rs", "src/math/generic/sqrt.rs", @@ -937,6 +941,7 @@ }, "sqrtf": { "sources": [ + "src/math/arch/aarch64.rs", "src/math/arch/i686.rs", "src/math/arch/wasm32.rs", "src/math/generic/sqrt.rs", @@ -953,6 +958,7 @@ }, "sqrtf16": { "sources": [ + "src/math/arch/aarch64.rs", "src/math/generic/sqrt.rs", "src/math/sqrtf16.rs" ], diff --git a/libm/src/math/arch/aarch64.rs b/libm/src/math/arch/aarch64.rs index 374ec11bf..020bb731c 100644 --- a/libm/src/math/arch/aarch64.rs +++ b/libm/src/math/arch/aarch64.rs @@ -1,33 +1,115 @@ -use core::arch::aarch64::{ - float32x2_t, float64x1_t, vdup_n_f32, vdup_n_f64, vget_lane_f32, vget_lane_f64, vrndn_f32, - vrndn_f64, -}; +//! Architecture-specific support for aarch64 with neon. -pub fn rint(x: f64) -> f64 { - // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module. - let x_vec: float64x1_t = unsafe { vdup_n_f64(x) }; +use core::arch::asm; - // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module. - let result_vec: float64x1_t = unsafe { vrndn_f64(x_vec) }; +pub fn fma(mut x: f64, y: f64, z: f64) -> f64 { + // SAFETY: `fmadd` is available with neon and has no side effects. + unsafe { + asm!( + "fmadd {x:d}, {x:d}, {y:d}, {z:d}", + x = inout(vreg) x, + y = in(vreg) y, + z = in(vreg) z, + options(nomem, nostack, pure) + ); + } + x +} - // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module. - let result: f64 = unsafe { vget_lane_f64::<0>(result_vec) }; +pub fn fmaf(mut x: f32, y: f32, z: f32) -> f32 { + // SAFETY: `fmadd` is available with neon and has no side effects. + unsafe { + asm!( + "fmadd {x:s}, {x:s}, {y:s}, {z:s}", + x = inout(vreg) x, + y = in(vreg) y, + z = in(vreg) z, + options(nomem, nostack, pure) + ); + } + x +} - result +pub fn rint(mut x: f64) -> f64 { + // SAFETY: `frintn` is available with neon and has no side effects. + // + // `frintn` is always round-to-nearest which does not match the C specification, but Rust does + // not support rounding modes. + unsafe { + asm!( + "frintn {x:d}, {x:d}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x } -pub fn rintf(x: f32) -> f32 { - // There's a scalar form of this instruction (FRINTN) but core::arch doesn't expose it, so we - // have to use the vector form and drop the other lanes afterwards. +pub fn rintf(mut x: f32) -> f32 { + // SAFETY: `frintn` is available with neon and has no side effects. + // + // `frintn` is always round-to-nearest which does not match the C specification, but Rust does + // not support rounding modes. + unsafe { + asm!( + "frintn {x:s}, {x:s}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} - // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module. - let x_vec: float32x2_t = unsafe { vdup_n_f32(x) }; +#[cfg(all(f16_enabled, target_feature = "fp16"))] +pub fn rintf16(mut x: f16) -> f16 { + // SAFETY: `frintn` is available for `f16` with `fp16` (implies `neon`) and has no side effects. + // + // `frintn` is always round-to-nearest which does not match the C specification, but Rust does + // not support rounding modes. + unsafe { + asm!( + "frintn {x:h}, {x:h}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} - // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module. - let result_vec: float32x2_t = unsafe { vrndn_f32(x_vec) }; +pub fn sqrt(mut x: f64) -> f64 { + // SAFETY: `fsqrt` is available with neon and has no side effects. + unsafe { + asm!( + "fsqrt {x:d}, {x:d}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} - // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module. - let result: f32 = unsafe { vget_lane_f32::<0>(result_vec) }; +pub fn sqrtf(mut x: f32) -> f32 { + // SAFETY: `fsqrt` is available with neon and has no side effects. + unsafe { + asm!( + "fsqrt {x:s}, {x:s}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} - result +#[cfg(all(f16_enabled, target_feature = "fp16"))] +pub fn sqrtf16(mut x: f16) -> f16 { + // SAFETY: `fsqrt` is available for `f16` with `fp16` (implies `neon`) and has no + // side effects. + unsafe { + asm!( + "fsqrt {x:h}, {x:h}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x } diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs index 091d7650a..d9f2aad66 100644 --- a/libm/src/math/arch/mod.rs +++ b/libm/src/math/arch/mod.rs @@ -18,12 +18,25 @@ cfg_if! { mod i686; pub use i686::{sqrt, sqrtf}; } else if #[cfg(all( - target_arch = "aarch64", // TODO: also arm64ec? - target_feature = "neon", - target_endian = "little", // see https://github.com/rust-lang/stdarch/issues/1484 + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_feature = "neon" ))] { mod aarch64; - pub use aarch64::{rint, rintf}; + + pub use aarch64::{ + fma, + fmaf, + rint, + rintf, + sqrt, + sqrtf, + }; + + #[cfg(all(f16_enabled, target_feature = "fp16"))] + pub use aarch64::{ + rintf16, + sqrtf16, + }; } } diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 049f573cc..789b0836a 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -9,6 +9,12 @@ use super::{CastFrom, CastInto, Float, Int, MinInt}; /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fma(x: f64, y: f64, z: f64) -> f64 { + select_implementation! { + name: fma, + use_arch: all(target_arch = "aarch64", target_feature = "neon"), + args: x, y, z, + } + fma_round(x, y, z, Round::Nearest).val } diff --git a/libm/src/math/fma_wide.rs b/libm/src/math/fma_wide.rs index d0cf33baf..8e908a14f 100644 --- a/libm/src/math/fma_wide.rs +++ b/libm/src/math/fma_wide.rs @@ -17,6 +17,12 @@ pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { + select_implementation! { + name: fmaf, + use_arch: all(target_arch = "aarch64", target_feature = "neon"), + args: x, y, z, + } + fma_wide_round(x, y, z, Round::Nearest).val } diff --git a/libm/src/math/rint.rs b/libm/src/math/rint.rs index 8a5cbeab4..e1c32c943 100644 --- a/libm/src/math/rint.rs +++ b/libm/src/math/rint.rs @@ -4,6 +4,12 @@ use super::support::Round; #[cfg(f16_enabled)] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn rintf16(x: f16) -> f16 { + select_implementation! { + name: rintf16, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + super::generic::rint_round(x, Round::Nearest).val } @@ -13,8 +19,8 @@ pub fn rintf(x: f32) -> f32 { select_implementation! { name: rintf, use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), all(target_arch = "wasm32", intrinsics_enabled), - all(target_arch = "aarch64", target_feature = "neon", target_endian = "little"), ), args: x, } @@ -28,8 +34,8 @@ pub fn rint(x: f64) -> f64 { select_implementation! { name: rint, use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), all(target_arch = "wasm32", intrinsics_enabled), - all(target_arch = "aarch64", target_feature = "neon", target_endian = "little"), ), args: x, } diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 0e1d0cd2c..2bfc42bcf 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -4,6 +4,7 @@ pub fn sqrt(x: f64) -> f64 { select_implementation! { name: sqrt, use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), all(target_arch = "wasm32", intrinsics_enabled), target_feature = "sse2" ), diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs index 2e69a4b66..c28a705e3 100644 --- a/libm/src/math/sqrtf.rs +++ b/libm/src/math/sqrtf.rs @@ -4,6 +4,7 @@ pub fn sqrtf(x: f32) -> f32 { select_implementation! { name: sqrtf, use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), all(target_arch = "wasm32", intrinsics_enabled), target_feature = "sse2" ), diff --git a/libm/src/math/sqrtf16.rs b/libm/src/math/sqrtf16.rs index 549bf902c..7bedb7f8b 100644 --- a/libm/src/math/sqrtf16.rs +++ b/libm/src/math/sqrtf16.rs @@ -1,5 +1,11 @@ /// The square root of `x` (f16). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrtf16(x: f16) -> f16 { + select_implementation! { + name: sqrtf16, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + return super::generic::sqrt(x); } From cd6f4f693a43ad1727c4d02ab5ed3278761738dc Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 9 Apr 2025 17:44:37 +0000 Subject: [PATCH 1255/1459] Update the `libm` submodule Includes [1] and [2], which should resolve problems cg_gcc has using scalar math operations as a fallback for vector operations. [1]: https://github.com/rust-lang/libm/pull/459 [2]: https://github.com/rust-lang/libm/pull/534 --- compiler-builtins/libm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-builtins/libm b/compiler-builtins/libm index 69219c491..96d140032 160000 --- a/compiler-builtins/libm +++ b/compiler-builtins/libm @@ -1 +1 @@ -Subproject commit 69219c491ee9f05761d2068fd6d4c7c0de6faa3a +Subproject commit 96d1400326f47381858f8149451a2b2fd8de2ea4 From 9978a8b06b7c1b53a6c503a2bfe7aea9ba6ca98b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 9 Apr 2025 17:52:23 +0000 Subject: [PATCH 1256/1459] chore: release v0.1.153 --- compiler-builtins/CHANGELOG.md | 9 +++++++++ compiler-builtins/Cargo.toml | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/compiler-builtins/CHANGELOG.md b/compiler-builtins/CHANGELOG.md index 987f87713..517ba7859 100644 --- a/compiler-builtins/CHANGELOG.md +++ b/compiler-builtins/CHANGELOG.md @@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.153](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.152...compiler_builtins-v0.1.153) - 2025-04-09 + +### Other + +- Remove a mention of `force-soft-float` in `build.rs` +- Revert "Disable `f16` on AArch64 without the `neon` feature" +- Skip No More! +- avoid out-of-bounds accesses ([#799](https://github.com/rust-lang/compiler-builtins/pull/799)) + ## [0.1.152](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.151...compiler_builtins-v0.1.152) - 2025-03-20 ### Other diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 8de8bce6a..c0c40b5e1 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.152" +version = "0.1.153" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "../README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 62b685ca29ef242760c6e1c2b96f554b42d02ae0 Mon Sep 17 00:00:00 2001 From: quaternic <57393910+quaternic@users.noreply.github.com> Date: Tue, 15 Apr 2025 03:46:12 +0300 Subject: [PATCH 1257/1459] Implement rounding for the hex float parsing and prepare to improve error handling Parsing errors are now bubbled up part of the way, but that needs some more work. Rounding should be correct, and the `Status` returned by `parse_any` should have the correct bits set. These are used for the current (unchanged) behavior of the surface level functions like `hf64`: panic on invalid inputs, or values that aren't exactly representable. --- libm/crates/libm-test/src/f8_impl.rs | 5 +- libm/src/math/support/env.rs | 16 +- libm/src/math/support/hex_float.rs | 495 +++++++++++++++++++++------ 3 files changed, 405 insertions(+), 111 deletions(-) diff --git a/libm/crates/libm-test/src/f8_impl.rs b/libm/crates/libm-test/src/f8_impl.rs index 0683d8392..6772e092c 100644 --- a/libm/crates/libm-test/src/f8_impl.rs +++ b/libm/crates/libm-test/src/f8_impl.rs @@ -3,8 +3,6 @@ use std::cmp::{self, Ordering}; use std::{fmt, ops}; -use libm::support::hex_float::parse_any; - use crate::Float; /// Sometimes verifying float logic is easiest when all values can quickly be checked exhaustively @@ -499,5 +497,6 @@ impl fmt::LowerHex for f8 { } pub const fn hf8(s: &str) -> f8 { - f8(parse_any(s, 8, 3) as u8) + let Ok(bits) = libm::support::hex_float::parse_hex_exact(s, 8, 3) else { panic!() }; + f8(bits as u8) } diff --git a/libm/src/math/support/env.rs b/libm/src/math/support/env.rs index c05890d98..796309372 100644 --- a/libm/src/math/support/env.rs +++ b/libm/src/math/support/env.rs @@ -46,7 +46,7 @@ pub enum Round { } /// IEEE 754 exception status flags. -#[derive(Clone, Copy, Debug, PartialEq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Status(u8); impl Status { @@ -90,16 +90,22 @@ impl Status { /// True if `UNDERFLOW` is set. #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] - pub fn underflow(self) -> bool { + pub const fn underflow(self) -> bool { self.0 & Self::UNDERFLOW.0 != 0 } + /// True if `OVERFLOW` is set. + #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] + pub const fn overflow(self) -> bool { + self.0 & Self::OVERFLOW.0 != 0 + } + pub fn set_underflow(&mut self, val: bool) { self.set_flag(val, Self::UNDERFLOW); } /// True if `INEXACT` is set. - pub fn inexact(self) -> bool { + pub const fn inexact(self) -> bool { self.0 & Self::INEXACT.0 != 0 } @@ -114,4 +120,8 @@ impl Status { self.0 &= !mask.0; } } + + pub(crate) const fn with(self, rhs: Self) -> Self { + Self(self.0 | rhs.0) + } } diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs index be7d7607f..819e2f56e 100644 --- a/libm/src/math/support/hex_float.rs +++ b/libm/src/math/support/hex_float.rs @@ -2,149 +2,260 @@ use core::fmt; -use super::{Float, f32_from_bits, f64_from_bits}; +use super::{Float, Round, Status, f32_from_bits, f64_from_bits}; /// Construct a 16-bit float from hex float representation (C-style) #[cfg(f16_enabled)] pub const fn hf16(s: &str) -> f16 { - f16::from_bits(parse_any(s, 16, 10) as u16) + match parse_hex_exact(s, 16, 10) { + Ok(bits) => f16::from_bits(bits as u16), + Err(HexFloatParseError(s)) => panic!("{}", s), + } } /// Construct a 32-bit float from hex float representation (C-style) #[allow(unused)] pub const fn hf32(s: &str) -> f32 { - f32_from_bits(parse_any(s, 32, 23) as u32) + match parse_hex_exact(s, 32, 23) { + Ok(bits) => f32_from_bits(bits as u32), + Err(HexFloatParseError(s)) => panic!("{}", s), + } } /// Construct a 64-bit float from hex float representation (C-style) pub const fn hf64(s: &str) -> f64 { - f64_from_bits(parse_any(s, 64, 52) as u64) + match parse_hex_exact(s, 64, 52) { + Ok(bits) => f64_from_bits(bits as u64), + Err(HexFloatParseError(s)) => panic!("{}", s), + } } /// Construct a 128-bit float from hex float representation (C-style) #[cfg(f128_enabled)] pub const fn hf128(s: &str) -> f128 { - f128::from_bits(parse_any(s, 128, 112)) + match parse_hex_exact(s, 128, 112) { + Ok(bits) => f128::from_bits(bits), + Err(HexFloatParseError(s)) => panic!("{}", s), + } +} +#[derive(Copy, Clone, Debug)] +pub struct HexFloatParseError(&'static str); + +/// Parses any float to its bitwise representation, returning an error if it cannot be represented exactly +pub const fn parse_hex_exact( + s: &str, + bits: u32, + sig_bits: u32, +) -> Result { + match parse_any(s, bits, sig_bits, Round::Nearest) { + Err(e) => Err(e), + Ok((bits, Status::OK)) => Ok(bits), + Ok((_, status)) if status.overflow() => Err(HexFloatParseError("the value is too huge")), + Ok((_, status)) if status.underflow() => Err(HexFloatParseError("the value is too tiny")), + Ok((_, status)) if status.inexact() => Err(HexFloatParseError("the value is too precise")), + Ok(_) => unreachable!(), + } } /// Parse any float from hex to its bitwise representation. -/// -/// `nan_repr` is passed rather than constructed so the platform-specific NaN is returned. -pub const fn parse_any(s: &str, bits: u32, sig_bits: u32) -> u128 { +pub const fn parse_any( + s: &str, + bits: u32, + sig_bits: u32, + round: Round, +) -> Result<(u128, Status), HexFloatParseError> { + let mut b = s.as_bytes(); + + if sig_bits > 119 || bits > 128 || bits < sig_bits + 3 || bits > sig_bits + 30 { + return Err(HexFloatParseError("unsupported target float configuration")); + } + + let neg = matches!(b, [b'-', ..]); + if let &[b'-' | b'+', ref rest @ ..] = b { + b = rest; + } + + let sign_bit = 1 << (bits - 1); + let quiet_bit = 1 << (sig_bits - 1); + let nan = sign_bit - quiet_bit; + let inf = nan - quiet_bit; + + let (mut x, status) = match *b { + [b'i' | b'I', b'n' | b'N', b'f' | b'F'] => (inf, Status::OK), + [b'n' | b'N', b'a' | b'A', b'n' | b'N'] => (nan, Status::OK), + [b'0', b'x' | b'X', ref rest @ ..] => { + let round = match (neg, round) { + // parse("-x", Round::Positive) == -parse("x", Round::Negative) + (true, Round::Positive) => Round::Negative, + (true, Round::Negative) => Round::Positive, + // rounding toward nearest or zero are symmetric + (true, Round::Nearest | Round::Zero) | (false, _) => round, + }; + match parse_finite(rest, bits, sig_bits, round) { + Err(e) => return Err(e), + Ok(res) => res, + } + } + _ => return Err(HexFloatParseError("no hex indicator")), + }; + + if neg { + x ^= sign_bit; + } + + Ok((x, status)) +} + +const fn parse_finite( + b: &[u8], + bits: u32, + sig_bits: u32, + rounding_mode: Round, +) -> Result<(u128, Status), HexFloatParseError> { let exp_bits: u32 = bits - sig_bits - 1; let max_msb: i32 = (1 << (exp_bits - 1)) - 1; // The exponent of one ULP in the subnormals let min_lsb: i32 = 1 - max_msb - sig_bits as i32; - let exp_mask = ((1 << exp_bits) - 1) << sig_bits; + let (mut sig, mut exp) = match parse_hex(b) { + Err(e) => return Err(e), + Ok(Parsed { sig: 0, .. }) => return Ok((0, Status::OK)), + Ok(Parsed { sig, exp }) => (sig, exp), + }; + + let mut round_bits = u128_ilog2(sig) as i32 - sig_bits as i32; + + // Round at least up to min_lsb + if exp < min_lsb - round_bits { + round_bits = min_lsb - exp; + } + + let mut status = Status::OK; - let (neg, mut sig, exp) = match parse_hex(s.as_bytes()) { - Parsed::Finite { neg, sig: 0, .. } => return (neg as u128) << (bits - 1), - Parsed::Finite { neg, sig, exp } => (neg, sig, exp), - Parsed::Infinite { neg } => return ((neg as u128) << (bits - 1)) | exp_mask, - Parsed::Nan { neg } => { - return ((neg as u128) << (bits - 1)) | exp_mask | (1 << (sig_bits - 1)); + exp += round_bits; + + if round_bits > 0 { + // first, prepare for rounding exactly two bits + if round_bits == 1 { + sig <<= 1; + } else if round_bits > 2 { + sig = shr_odd_rounding(sig, (round_bits - 2) as u32); } - }; - // exponents of the least and most significant bits in the value - let lsb = sig.trailing_zeros() as i32; - let msb = u128_ilog2(sig) as i32; - let sig_bits = sig_bits as i32; + if sig & 0b11 != 0 { + status = Status::INEXACT; + } - assert!(msb - lsb <= sig_bits, "the value is too precise"); - assert!(msb + exp <= max_msb, "the value is too huge"); - assert!(lsb + exp >= min_lsb, "the value is too tiny"); + sig = shr2_round(sig, rounding_mode); + } else if round_bits < 0 { + sig <<= -round_bits; + } // The parsed value is X = sig * 2^exp // Expressed as a multiple U of the smallest subnormal value: // X = U * 2^min_lsb, so U = sig * 2^(exp-min_lsb) - let mut uexp = exp - min_lsb; + let uexp = (exp - min_lsb) as u128; + let uexp = uexp << sig_bits; - let shift = if uexp + msb >= sig_bits { - // normal, shift msb to position sig_bits - sig_bits - msb - } else { - // subnormal, shift so that uexp becomes 0 - uexp + // Note that it is possible for the exponent bits to equal 2 here + // if the value rounded up, but that means the mantissa is all zeroes + // so the value is still correct + debug_assert!(sig <= 2 << sig_bits); + + let inf = ((1 << exp_bits) - 1) << sig_bits; + + let bits = match sig.checked_add(uexp) { + Some(bits) if bits < inf => { + // inexact subnormal or zero? + if status.inexact() && bits < (1 << sig_bits) { + status = status.with(Status::UNDERFLOW); + } + bits + } + _ => { + // overflow to infinity + status = status.with(Status::OVERFLOW).with(Status::INEXACT); + match rounding_mode { + Round::Positive | Round::Nearest => inf, + Round::Negative | Round::Zero => inf - 1, + } + } }; + Ok((bits, status)) +} - if shift >= 0 { - sig <<= shift; +/// Shift right, rounding all inexact divisions to the nearest odd number +/// E.g. (0 >> 4) -> 0, (1..=31 >> 4) -> 1, (32 >> 4) -> 2, ... +/// +/// Useful for reducing a number before rounding the last two bits, since +/// the result of the final rounding is preserved for all rounding modes. +const fn shr_odd_rounding(x: u128, k: u32) -> u128 { + if k < 128 { + let inexact = x.trailing_zeros() < k; + (x >> k) | (inexact as u128) } else { - sig >>= -shift; + (x != 0) as u128 } - uexp -= shift; - - // the most significant bit is like having 1 in the exponent bits - // add any leftover exponent to that - assert!(uexp >= 0 && uexp < (1 << exp_bits) - 2); - sig += (uexp as u128) << sig_bits; +} - // finally, set the sign bit if necessary - sig | ((neg as u128) << (bits - 1)) +/// Divide by 4, rounding with the given mode +const fn shr2_round(mut x: u128, round: Round) -> u128 { + let t = (x as u32) & 0b111; + x >>= 2; + match round { + // Look-up-table on the last three bits for when to round up + Round::Nearest => x + ((0b11001000_u8 >> t) & 1) as u128, + + Round::Negative => x, + Round::Zero => x, + Round::Positive => x + (t & 0b11 != 0) as u128, + } } -/// A parsed floating point number. -enum Parsed { - /// Absolute value sig * 2^e - Finite { - neg: bool, - sig: u128, - exp: i32, - }, - Infinite { - neg: bool, - }, - Nan { - neg: bool, - }, +/// A parsed finite and unsigned floating point number. +struct Parsed { + /// Absolute value sig * 2^exp + sig: u128, + exp: i32, } /// Parse a hexadecimal float x -const fn parse_hex(mut b: &[u8]) -> Parsed { - let mut neg = false; +const fn parse_hex(mut b: &[u8]) -> Result { let mut sig: u128 = 0; let mut exp: i32 = 0; - if let &[c @ (b'-' | b'+'), ref rest @ ..] = b { - b = rest; - neg = c == b'-'; - } - - match *b { - [b'i' | b'I', b'n' | b'N', b'f' | b'F'] => return Parsed::Infinite { neg }, - [b'n' | b'N', b'a' | b'A', b'n' | b'N'] => return Parsed::Nan { neg }, - _ => (), - } - - if let &[b'0', b'x' | b'X', ref rest @ ..] = b { - b = rest; - } else { - panic!("no hex indicator"); - } - let mut seen_point = false; let mut some_digits = false; + let mut inexact = false; while let &[c, ref rest @ ..] = b { b = rest; match c { b'.' => { - assert!(!seen_point); + if seen_point { + return Err(HexFloatParseError("unexpected '.' parsing fractional digits")); + } seen_point = true; continue; } b'p' | b'P' => break, c => { - let digit = hex_digit(c); + let digit = match hex_digit(c) { + Some(d) => d, + None => return Err(HexFloatParseError("expected hexadecimal digit")), + }; some_digits = true; - let of; - (sig, of) = sig.overflowing_mul(16); - assert!(!of, "too many digits"); - sig |= digit as u128; - // up until the fractional point, the value grows + + if (sig >> 124) == 0 { + sig <<= 4; + sig |= digit as u128; + } else { + // FIXME: it is technically possible for exp to overflow if parsing a string with >500M digits + exp += 4; + inexact |= digit != 0; + } + // Up until the fractional point, the value grows // with more digits, but after it the exponent is // compensated to match. if seen_point { @@ -153,49 +264,79 @@ const fn parse_hex(mut b: &[u8]) -> Parsed { } } } - assert!(some_digits, "at least one digit is required"); + // If we've set inexact, the exact value has more than 125 + // significant bits, and lies somewhere between sig and sig + 1. + // Because we'll round off at least two of the trailing bits, + // setting the last bit gives correct rounding for inexact values. + sig |= inexact as u128; + + if !some_digits { + return Err(HexFloatParseError("at least one digit is required")); + }; + some_digits = false; - let mut negate_exp = false; - if let &[c @ (b'-' | b'+'), ref rest @ ..] = b { + let negate_exp = matches!(b, [b'-', ..]); + if let &[b'-' | b'+', ref rest @ ..] = b { b = rest; - negate_exp = c == b'-'; } - let mut pexp: i32 = 0; + let mut pexp: u32 = 0; while let &[c, ref rest @ ..] = b { b = rest; - let digit = dec_digit(c); + let digit = match dec_digit(c) { + Some(d) => d, + None => return Err(HexFloatParseError("expected decimal digit")), + }; some_digits = true; - let of; - (pexp, of) = pexp.overflowing_mul(10); - assert!(!of, "too many exponent digits"); - pexp += digit as i32; + pexp = pexp.saturating_mul(10); + pexp += digit as u32; } - assert!(some_digits, "at least one exponent digit is required"); + if !some_digits { + return Err(HexFloatParseError("at least one exponent digit is required")); + }; + + { + let e; + if negate_exp { + e = (exp as i64) - (pexp as i64); + } else { + e = (exp as i64) + (pexp as i64); + }; + + exp = if e < i32::MIN as i64 { + i32::MIN + } else if e > i32::MAX as i64 { + i32::MAX + } else { + e as i32 + }; + } + /* FIXME(msrv): once MSRV >= 1.66, replace the above workaround block with: if negate_exp { - exp -= pexp; + exp = exp.saturating_sub_unsigned(pexp); } else { - exp += pexp; - } + exp = exp.saturating_add_unsigned(pexp); + }; + */ - Parsed::Finite { neg, sig, exp } + Ok(Parsed { sig, exp }) } -const fn dec_digit(c: u8) -> u8 { +const fn dec_digit(c: u8) -> Option { match c { - b'0'..=b'9' => c - b'0', - _ => panic!("bad char"), + b'0'..=b'9' => Some(c - b'0'), + _ => None, } } -const fn hex_digit(c: u8) -> u8 { +const fn hex_digit(c: u8) -> Option { match c { - b'0'..=b'9' => c - b'0', - b'a'..=b'f' => c - b'a' + 10, - b'A'..=b'F' => c - b'A' + 10, - _ => panic!("bad char"), + b'0'..=b'9' => Some(c - b'0'), + b'a'..=b'f' => Some(c - b'a' + 10), + b'A'..=b'F' => Some(c - b'A' + 10), + _ => None, } } @@ -341,6 +482,61 @@ mod parse_tests { use super::*; + #[cfg(f16_enabled)] + fn rounding_properties(s: &str) -> Result<(), HexFloatParseError> { + let (xd, s0) = parse_any(s, 16, 10, Round::Negative)?; + let (xu, s1) = parse_any(s, 16, 10, Round::Positive)?; + let (xz, s2) = parse_any(s, 16, 10, Round::Zero)?; + let (xn, s3) = parse_any(s, 16, 10, Round::Nearest)?; + + // FIXME: A value between the least normal and largest subnormal + // could have underflow status depend on rounding mode. + + if let Status::OK = s0 { + // an exact result is the same for all rounding modes + assert_eq!(s0, s1); + assert_eq!(s0, s2); + assert_eq!(s0, s3); + + assert_eq!(xd, xu); + assert_eq!(xd, xz); + assert_eq!(xd, xn); + } else { + assert!([s0, s1, s2, s3].into_iter().all(Status::inexact)); + + let xd = f16::from_bits(xd as u16); + let xu = f16::from_bits(xu as u16); + let xz = f16::from_bits(xz as u16); + let xn = f16::from_bits(xn as u16); + + assert_biteq!(xd.next_up(), xu, "s={s}, xd={xd:?}, xu={xu:?}"); + + let signs = [xd, xu, xz, xn].map(f16::is_sign_negative); + + if signs == [true; 4] { + assert_biteq!(xz, xu); + } else { + assert_eq!(signs, [false; 4]); + assert_biteq!(xz, xd); + } + + if xn.to_bits() != xd.to_bits() { + assert_biteq!(xn, xu); + } + } + Ok(()) + } + #[test] + #[cfg(f16_enabled)] + fn test_rounding() { + let n = 1_i32 << 14; + for i in -n..n { + let u = i.rotate_right(11) as u32; + let s = format!("{}", Hexf(f32::from_bits(u))); + assert!(rounding_properties(&s).is_ok()); + } + } + #[test] fn test_parse_any() { for k in -149..=127 { @@ -397,6 +593,48 @@ mod parse_tests { } } + // FIXME: this test is causing failures that are likely UB on various platforms + #[cfg(all(target_arch = "x86_64", target_os = "linux"))] + #[test] + #[cfg(f128_enabled)] + fn rounding() { + let pi = std::f128::consts::PI; + let s = format!("{}", Hexf(pi)); + + for k in 0..=111 { + let (bits, status) = parse_any(&s, 128 - k, 112 - k, Round::Nearest).unwrap(); + let scale = (1u128 << (112 - k - 1)) as f128; + let expected = (pi * scale).round_ties_even() / scale; + assert_eq!(bits << k, expected.to_bits(), "k = {k}, s = {s}"); + assert_eq!(expected != pi, status.inexact()); + } + } + #[test] + fn rounding_extreme_underflow() { + for k in 1..1000 { + let s = format!("0x1p{}", -149 - k); + let Ok((bits, status)) = parse_any(&s, 32, 23, Round::Nearest) else { unreachable!() }; + assert_eq!(bits, 0, "{s} should round to zero, got bits={bits}"); + assert!(status.underflow(), "should indicate underflow when parsing {s}"); + assert!(status.inexact(), "should indicate inexact when parsing {s}"); + } + } + #[test] + fn long_tail() { + for k in 1..1000 { + let s = format!("0x1.{}p0", "0".repeat(k)); + let Ok(bits) = parse_hex_exact(&s, 32, 23) else { panic!("parsing {s} failed") }; + assert_eq!(f32::from_bits(bits as u32), 1.0); + + let s = format!("0x1.{}1p0", "0".repeat(k)); + let Ok((bits, status)) = parse_any(&s, 32, 23, Round::Nearest) else { unreachable!() }; + if status.inexact() { + assert!(1.0 == f32::from_bits(bits as u32)); + } else { + assert!(1.0 < f32::from_bits(bits as u32)); + } + } + } // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to // hide them from the AST. #[cfg(f16_enabled)] @@ -434,6 +672,7 @@ mod parse_tests { ]; for (s, exp) in checks { println!("parsing {s}"); + assert!(rounding_properties(s).is_ok()); let act = hf16(s).to_bits(); assert_eq!( act, exp, @@ -749,7 +988,13 @@ mod tests_panicking { #[test] #[should_panic(expected = "the value is too precise")] fn test_f128_extra_precision() { - // One bit more than the above. + // Just below the maximum finite. + hf128("0x1.fffffffffffffffffffffffffffe8p+16383"); + } + #[test] + #[should_panic(expected = "the value is too huge")] + fn test_f128_extra_precision_overflow() { + // One bit more than the above. Should overflow. hf128("0x1.ffffffffffffffffffffffffffff8p+16383"); } @@ -822,6 +1067,46 @@ mod print_tests { } } + #[test] + #[cfg(f16_enabled)] + fn test_f16_to_f32() { + use std::format; + // Exhaustively check that these are equivalent for all `f16`: + // - `f16 -> f32` + // - `f16 -> str -> f32` + // - `f16 -> f32 -> str -> f32` + // - `f16 -> f32 -> str -> f16 -> f32` + for x in 0..=u16::MAX { + let f16 = f16::from_bits(x); + let s16 = format!("{}", Hexf(f16)); + let f32 = f16 as f32; + let s32 = format!("{}", Hexf(f32)); + + let a = hf32(&s16); + let b = hf32(&s32); + let c = hf16(&s32); + + if f32.is_nan() && a.is_nan() && b.is_nan() && c.is_nan() { + continue; + } + + assert_eq!( + f32.to_bits(), + a.to_bits(), + "{f16:?} : f16 formatted as {s16} which parsed as {a:?} : f16" + ); + assert_eq!( + f32.to_bits(), + b.to_bits(), + "{f32:?} : f32 formatted as {s32} which parsed as {b:?} : f32" + ); + assert_eq!( + f32.to_bits(), + (c as f32).to_bits(), + "{f32:?} : f32 formatted as {s32} which parsed as {c:?} : f16" + ); + } + } #[test] fn spot_checks() { assert_eq!(Hexf(f32::MAX).to_string(), "0x1.fffffep+127"); From 5b162502b8330de81d904925505bec12809cf468 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 13 Apr 2025 19:07:27 +0200 Subject: [PATCH 1258/1459] turn #[naked] into an unsafe attribute --- compiler-builtins/src/aarch64.rs | 2 +- compiler-builtins/src/aarch64_linux.rs | 8 ++++---- compiler-builtins/src/arm.rs | 8 ++++---- compiler-builtins/src/macros.rs | 4 ++-- compiler-builtins/src/x86.rs | 4 ++-- compiler-builtins/src/x86_64.rs | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/compiler-builtins/src/aarch64.rs b/compiler-builtins/src/aarch64.rs index cce485c46..80392187c 100644 --- a/compiler-builtins/src/aarch64.rs +++ b/compiler-builtins/src/aarch64.rs @@ -3,7 +3,7 @@ use core::intrinsics; intrinsics! { - #[naked] + #[unsafe(naked)] #[cfg(all(target_os = "uefi", not(feature = "no-asm")))] pub unsafe extern "C" fn __chkstk() { core::arch::naked_asm!( diff --git a/compiler-builtins/src/aarch64_linux.rs b/compiler-builtins/src/aarch64_linux.rs index caac3e602..5515dbfc4 100644 --- a/compiler-builtins/src/aarch64_linux.rs +++ b/compiler-builtins/src/aarch64_linux.rs @@ -131,7 +131,7 @@ macro_rules! compare_and_swap { ($ordering:ident, $bytes:tt, $name:ident) => { intrinsics! { #[maybe_use_optimized_c_shim] - #[naked] + #[unsafe(naked)] pub unsafe extern "C" fn $name ( expected: int_ty!($bytes), desired: int_ty!($bytes), ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { @@ -161,7 +161,7 @@ macro_rules! compare_and_swap_i128 { ($ordering:ident, $name:ident) => { intrinsics! { #[maybe_use_optimized_c_shim] - #[naked] + #[unsafe(naked)] pub unsafe extern "C" fn $name ( expected: i128, desired: i128, ptr: *mut i128 ) -> i128 { @@ -190,7 +190,7 @@ macro_rules! swap { ($ordering:ident, $bytes:tt, $name:ident) => { intrinsics! { #[maybe_use_optimized_c_shim] - #[naked] + #[unsafe(naked)] pub unsafe extern "C" fn $name ( left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { @@ -215,7 +215,7 @@ macro_rules! fetch_op { ($ordering:ident, $bytes:tt, $name:ident, $op:literal) => { intrinsics! { #[maybe_use_optimized_c_shim] - #[naked] + #[unsafe(naked)] pub unsafe extern "C" fn $name ( val: int_ty!($bytes), ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { diff --git a/compiler-builtins/src/arm.rs b/compiler-builtins/src/arm.rs index 9e6608397..878bb8c43 100644 --- a/compiler-builtins/src/arm.rs +++ b/compiler-builtins/src/arm.rs @@ -20,7 +20,7 @@ macro_rules! bl { intrinsics! { // NOTE This function and the ones below are implemented using assembly because they are using a // custom calling convention which can't be implemented using a normal Rust function. - #[naked] + #[unsafe(naked)] #[cfg(not(target_env = "msvc"))] pub unsafe extern "C" fn __aeabi_uidivmod() { core::arch::naked_asm!( @@ -34,7 +34,7 @@ intrinsics! { ); } - #[naked] + #[unsafe(naked)] pub unsafe extern "C" fn __aeabi_uldivmod() { core::arch::naked_asm!( "push {{r4, lr}}", @@ -49,7 +49,7 @@ intrinsics! { ); } - #[naked] + #[unsafe(naked)] pub unsafe extern "C" fn __aeabi_idivmod() { core::arch::naked_asm!( "push {{r0, r1, r4, lr}}", @@ -61,7 +61,7 @@ intrinsics! { ); } - #[naked] + #[unsafe(naked)] pub unsafe extern "C" fn __aeabi_ldivmod() { core::arch::naked_asm!( "push {{r4, lr}}", diff --git a/compiler-builtins/src/macros.rs b/compiler-builtins/src/macros.rs index 0397e4551..13c8ecee3 100644 --- a/compiler-builtins/src/macros.rs +++ b/compiler-builtins/src/macros.rs @@ -423,7 +423,7 @@ macro_rules! intrinsics { // Naked functions are special: we can't generate wrappers for them since // they use a custom calling convention. ( - #[naked] + #[unsafe(naked)] $(#[$($attr:tt)*])* pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* @@ -433,7 +433,7 @@ macro_rules! intrinsics { ) => ( // `#[naked]` definitions are referenced by other places, so we can't use `cfg` like the others pub mod $name { - #[naked] + #[unsafe(naked)] $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] diff --git a/compiler-builtins/src/x86.rs b/compiler-builtins/src/x86.rs index ad04d2108..01152d9c7 100644 --- a/compiler-builtins/src/x86.rs +++ b/compiler-builtins/src/x86.rs @@ -8,7 +8,7 @@ use core::intrinsics; // NOTE These functions are never mangled as they are not tested against compiler-rt intrinsics! { - #[naked] + #[unsafe(naked)] #[cfg(all( any(all(windows, target_env = "gnu"), target_os = "uefi"), not(feature = "no-asm") @@ -20,7 +20,7 @@ intrinsics! { ); } - #[naked] + #[unsafe(naked)] #[cfg(all( any(all(windows, target_env = "gnu"), target_os = "uefi"), not(feature = "no-asm") diff --git a/compiler-builtins/src/x86_64.rs b/compiler-builtins/src/x86_64.rs index aae601f58..af67e66e2 100644 --- a/compiler-builtins/src/x86_64.rs +++ b/compiler-builtins/src/x86_64.rs @@ -8,7 +8,7 @@ use core::intrinsics; // NOTE These functions are never mangled as they are not tested against compiler-rt intrinsics! { - #[naked] + #[unsafe(naked)] #[cfg(all( any( all(windows, target_env = "gnu"), From 309618b024547f09215d06d0a0bf9f9a536021ab Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 16 Apr 2025 18:23:30 +0000 Subject: [PATCH 1259/1459] chore: release v0.1.154 --- compiler-builtins/CHANGELOG.md | 6 ++++++ compiler-builtins/Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/compiler-builtins/CHANGELOG.md b/compiler-builtins/CHANGELOG.md index 517ba7859..1dbccf38e 100644 --- a/compiler-builtins/CHANGELOG.md +++ b/compiler-builtins/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.154](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.153...compiler_builtins-v0.1.154) - 2025-04-16 + +### Other + +- turn #[naked] into an unsafe attribute + ## [0.1.153](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.152...compiler_builtins-v0.1.153) - 2025-04-09 ### Other diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index c0c40b5e1..bd17885a1 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.153" +version = "0.1.154" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "../README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 34b709237d268eacf26eb0eab0e18eefac81aaf2 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 15 Apr 2025 04:20:17 +0000 Subject: [PATCH 1260/1459] fmod: Add regression tests for subnormal issue From discussion at [1] our loop count calculation is incorrect, causing an issue with subnormal numbers. Add test cases for known failures. [1]: https://github.com/rust-lang/libm/pull/469#discussion_r2012473920 --- libm/crates/libm-test/src/gen/case_list.rs | 26 ++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/libm/crates/libm-test/src/gen/case_list.rs b/libm/crates/libm-test/src/gen/case_list.rs index 7cb9897d8..e3628d51c 100644 --- a/libm/crates/libm-test/src/gen/case_list.rs +++ b/libm/crates/libm-test/src/gen/case_list.rs @@ -403,11 +403,33 @@ fn fminimum_numf128_cases() -> Vec> { } fn fmod_cases() -> Vec> { - vec![] + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Previous failure with incorrect loop iteration + // + ((2.1, 3.123e-320), Some(2.0696e-320)), + ((2.1, 2.253547e-318), Some(1.772535e-318)), + ], + ); + v } fn fmodf_cases() -> Vec> { - vec![] + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Previous failure with incorrect loop iteration + // + ((2.1, 8.858e-42), Some(8.085e-42)), + ((2.1, 6.39164e-40), Some(6.1636e-40)), + ((5.5, 6.39164e-40), Some(4.77036e-40)), + ((-151.189, 6.39164e-40), Some(-5.64734e-40)), + ], + ); + v } #[cfg(f128_enabled)] From 8da2464423e13575e67a67e74eb5de5e1881c8e6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 16 Apr 2025 18:43:25 +0000 Subject: [PATCH 1261/1459] fmod: Correct the normalization of subnormals Discussed at [1], there was an off-by-one mistake when converting from the loop routine to using `leading_zeros` for normalization. Currently, using `EXP_BITS` has the effect that `ix` after the branch has its MSB _one bit to the left_ of the implicit bit's position, whereas a shift by `EXP_BITS + 1` ensures that the MSB is exactly at the implicit bit's position, matching what is done for normals (where the implicit bit is set to be explicit). This doesn't seem to have any effect in our implementation since the failing test cases from [1] appear to still have correct results. Since the result of using `EXP_BITS + 1` is more consistent with what is done for normals, apply this here. [1]: https://github.com/rust-lang/libm/pull/469#discussion_r2012473920 --- libm/src/math/generic/fmod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/src/math/generic/fmod.rs b/libm/src/math/generic/fmod.rs index c74b593d5..cd23350ea 100644 --- a/libm/src/math/generic/fmod.rs +++ b/libm/src/math/generic/fmod.rs @@ -26,7 +26,7 @@ pub fn fmod(x: F, y: F) -> F { /* normalize x and y */ if ex == 0 { - let i = ix << F::EXP_BITS; + let i = ix << (F::EXP_BITS + 1); ex -= i.leading_zeros() as i32; ix <<= -ex + 1; } else { @@ -35,7 +35,7 @@ pub fn fmod(x: F, y: F) -> F { } if ey == 0 { - let i = iy << F::EXP_BITS; + let i = iy << (F::EXP_BITS + 1); ey -= i.leading_zeros() as i32; iy <<= -ey + 1; } else { From 614ab5eb83c1fd7ef9663f361d8efdd9cadf0e78 Mon Sep 17 00:00:00 2001 From: Paul Sbarra Date: Sat, 12 Apr 2025 16:14:24 -0500 Subject: [PATCH 1262/1459] avr: __udivmod(h|q)i4 --- compiler-builtins/src/int/udiv.rs | 86 +++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/compiler-builtins/src/int/udiv.rs b/compiler-builtins/src/int/udiv.rs index a5c16040a..f18537b00 100644 --- a/compiler-builtins/src/int/udiv.rs +++ b/compiler-builtins/src/int/udiv.rs @@ -43,6 +43,92 @@ intrinsics! { ((rem as u64) << 32) | (div as u64) } + + #[unsafe(naked)] + pub unsafe extern "C" fn __udivmodqi4() { + // compute unsigned 8-bit `n / d` and `n % d`. + // + // Note: GCC implements a [non-standard calling convention](https://gcc.gnu.org/wiki/avr-gcc#Exceptions_to_the_Calling_Convention) for this function. + // Inputs: + // R24: dividend + // R22: divisor + // Outputs: + // R24: quotient (dividend / divisor) + // R25: remainder (dividend % divisor) + // Clobbers: + // R23: loop counter + core::arch::naked_asm!( + // This assembly routine implements the [long division](https://en.wikipedia.org/wiki/Division_algorithm#Long_division) algorithm. + // Bits shift out of the dividend and into the quotient, so R24 is used for both. + "clr R25", // remainder = 0 + + "ldi R23, 8", // for each bit + "1:", + "lsl R24", // shift the dividend MSb + "rol R25", // into the remainder LSb + + "cp R25, R22", // if remainder >= divisor + "brlo 2f", + "sub R25, R22", // remainder -= divisor + "sbr R24, 1", // quotient |= 1 + "2:", + + "dec R23", // end loop + "brne 1b", + "ret", + ); + } + + #[unsafe(naked)] + pub unsafe extern "C" fn __udivmodhi4() { + // compute unsigned 16-bit `n / d` and `n % d`. + // + // Note: GCC implements a [non-standard calling convention](https://gcc.gnu.org/wiki/avr-gcc#Exceptions_to_the_Calling_Convention) for this function. + // Inputs: + // R24: dividend [low] + // R25: dividend [high] + // R22: divisor [low] + // R23: divisor [high] + // Outputs: + // R22: quotient [low] (dividend / divisor) + // R23: quotient [high] + // R24: remainder [low] (dividend % divisor) + // R25: remainder [high] + // Clobbers: + // R21: loop counter + // R26: divisor [low] + // R27: divisor [high] + core::arch::naked_asm!( + // This assembly routine implements the [long division](https://en.wikipedia.org/wiki/Division_algorithm#Long_division) algorithm. + // Bits shift out of the dividend and into the quotient, so R24+R25 are used for both. + "mov R26, R22", // move divisor to make room for quotient + "mov R27, R23", + "mov R22, R24", // move dividend to output location (becomes quotient) + "mov R23, R25", + "clr R24", // remainder = 0 + "clr R25", + + "ldi R21, 16", // for each bit + "1:", + "lsl R22", // shift the dividend MSb + "rol R23", + "rol R24", // into the remainder LSb + "rol R25", + + "cp R24, R26", // if remainder >= divisor + "cpc R25, R27", + "brlo 2f", + "sub R24, R26", // remainder -= divisor + "sbc R25, R27", + "sbr R22, 1", // quotient |= 1 + "2:", + + "dec R21", // end loop + "brne 1b", + "ret", + ); + } + } intrinsics! { From 9a5841295ad726c217eccd9652592d6bda813184 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 17 Apr 2025 08:19:50 +0000 Subject: [PATCH 1263/1459] Replace the `bl!` macro with `asm_sym` `bl!` is being used to add a leading underscore on Apple targets. `asm_sym` has been around since 2022 and handles platform-specific symbol names automatically, so make use of this instead. I have verified that `armv7s-apple-ios` still builds correctly. --- compiler-builtins/src/arm.rs | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/compiler-builtins/src/arm.rs b/compiler-builtins/src/arm.rs index 878bb8c43..7859b5120 100644 --- a/compiler-builtins/src/arm.rs +++ b/compiler-builtins/src/arm.rs @@ -1,20 +1,16 @@ #![cfg(not(feature = "no-asm"))] -#![allow(unused_imports)] -use core::intrinsics; - -// Apple symbols have a leading underscore. -#[cfg(target_vendor = "apple")] -macro_rules! bl { - ($func:literal) => { - concat!("bl _", $func) - }; +// Interfaces used by naked trampolines. +extern "C" { + fn __udivmodsi4(a: u32, b: u32, rem: *mut u32) -> u32; + fn __udivmoddi4(a: u64, b: u64, rem: *mut u64) -> u64; + fn __divmoddi4(a: i64, b: i64, rem: *mut i64) -> i64; } -#[cfg(not(target_vendor = "apple"))] -macro_rules! bl { - ($func:literal) => { - concat!("bl ", $func) - }; + +extern "aapcs" { + // AAPCS is not always the correct ABI for these intrinsics, but we only use this to + // forward another `__aeabi_` call so it doesn't matter. + fn __aeabi_idiv(a: i32, b: i32) -> i32; } intrinsics! { @@ -27,10 +23,11 @@ intrinsics! { "push {{lr}}", "sub sp, sp, #4", "mov r2, sp", - bl!("__udivmodsi4"), + "bl {trampoline}", "ldr r1, [sp]", "add sp, sp, #4", "pop {{pc}}", + trampoline = sym crate::arm::__udivmodsi4 ); } @@ -41,11 +38,12 @@ intrinsics! { "sub sp, sp, #16", "add r4, sp, #8", "str r4, [sp]", - bl!("__udivmoddi4"), + "bl {trampoline}", "ldr r2, [sp, #8]", "ldr r3, [sp, #12]", "add sp, sp, #16", "pop {{r4, pc}}", + trampoline = sym crate::arm::__udivmoddi4 ); } @@ -53,11 +51,12 @@ intrinsics! { pub unsafe extern "C" fn __aeabi_idivmod() { core::arch::naked_asm!( "push {{r0, r1, r4, lr}}", - bl!("__aeabi_idiv"), + "bl {trampoline}", "pop {{r1, r2}}", "muls r2, r2, r0", "subs r1, r1, r2", "pop {{r4, pc}}", + trampoline = sym crate::arm::__aeabi_idiv, ); } @@ -68,11 +67,12 @@ intrinsics! { "sub sp, sp, #16", "add r4, sp, #8", "str r4, [sp]", - bl!("__divmoddi4"), + "bl {trampoline}", "ldr r2, [sp, #8]", "ldr r3, [sp, #12]", "add sp, sp, #16", "pop {{r4, pc}}", + trampoline = sym crate::arm::__divmoddi4, ); } From 7db4d8be4fbc91ca12c13064c69bd43c3f68c3f3 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 17 Apr 2025 12:27:35 +0200 Subject: [PATCH 1264/1459] use `#[cfg(bootstrap)]` for rustc sync --- compiler-builtins/Cargo.toml | 3 +++ compiler-builtins/src/macros.rs | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index bd17885a1..f5bbf4cd4 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -70,3 +70,6 @@ rustc-dep-of-std = ['compiler-builtins', 'core'] # This makes certain traits and function specializations public that # are not normally public but are required by the `testcrate` public-test-deps = [] + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(bootstrap)'] } diff --git a/compiler-builtins/src/macros.rs b/compiler-builtins/src/macros.rs index 13c8ecee3..b83414ce2 100644 --- a/compiler-builtins/src/macros.rs +++ b/compiler-builtins/src/macros.rs @@ -433,6 +433,17 @@ macro_rules! intrinsics { ) => ( // `#[naked]` definitions are referenced by other places, so we can't use `cfg` like the others pub mod $name { + // FIXME: when bootstrap supports `#[unsafe(naked)]` this duplication can be removed + #[cfg(bootstrap)] + #[naked] + $(#[$($attr)*])* + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] + pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + + #[cfg(not(bootstrap))] #[unsafe(naked)] $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] From bffb4abd9fed8d344ea4faff229b994a04cef157 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 17 Apr 2025 18:57:23 +0000 Subject: [PATCH 1265/1459] chore: release v0.1.155 --- compiler-builtins/CHANGELOG.md | 8 ++++++++ compiler-builtins/Cargo.toml | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/compiler-builtins/CHANGELOG.md b/compiler-builtins/CHANGELOG.md index 1dbccf38e..9916f2986 100644 --- a/compiler-builtins/CHANGELOG.md +++ b/compiler-builtins/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.155](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.154...compiler_builtins-v0.1.155) - 2025-04-17 + +### Other + +- use `#[cfg(bootstrap)]` for rustc sync +- Replace the `bl!` macro with `asm_sym` +- __udivmod(h|q)i4 + ## [0.1.154](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.153...compiler_builtins-v0.1.154) - 2025-04-16 ### Other diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index f5bbf4cd4..eb5b2b9cc 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.154" +version = "0.1.155" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "../README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 47799f8888544628fe4b7f2052d8d23726d001ed Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 00:18:12 +0000 Subject: [PATCH 1266/1459] Add `NEG_NAN` to `Float` Introduce a constant representing NaN with a negative sign bit for use with testing. There isn't really any guarantee that `F::NAN` is positive but in practice it always is, which is good enough for testing purposes. --- libm/crates/libm-test/src/f8_impl.rs | 1 + libm/src/math/support/float_traits.rs | 25 +++++++++++++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/libm/crates/libm-test/src/f8_impl.rs b/libm/crates/libm-test/src/f8_impl.rs index 6772e092c..ddb7bf90e 100644 --- a/libm/crates/libm-test/src/f8_impl.rs +++ b/libm/crates/libm-test/src/f8_impl.rs @@ -30,6 +30,7 @@ impl Float for f8 { const INFINITY: Self = Self(0b0_1111_000); const NEG_INFINITY: Self = Self(0b1_1111_000); const NAN: Self = Self(0b0_1111_100); + const NEG_NAN: Self = Self(0b1_1111_100); const MIN_POSITIVE_NORMAL: Self = Self(1 << Self::SIG_BITS); // FIXME: incorrect values const EPSILON: Self = Self::ZERO; diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 96c209c85..fac104832 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -34,6 +34,7 @@ pub trait Float: const INFINITY: Self; const NEG_INFINITY: Self; const NAN: Self; + const NEG_NAN: Self; const MAX: Self; const MIN: Self; const EPSILON: Self; @@ -187,6 +188,7 @@ macro_rules! float_impl { $bits:expr, $significand_bits:expr, $from_bits:path, + $to_bits:path, $fma_fn:ident, $fma_intrinsic:ident ) => { @@ -201,6 +203,9 @@ macro_rules! float_impl { const INFINITY: Self = Self::INFINITY; const NEG_INFINITY: Self = Self::NEG_INFINITY; const NAN: Self = Self::NAN; + // NAN isn't guaranteed to be positive but it usually is. We only use this for + // tests. + const NEG_NAN: Self = $from_bits($to_bits(Self::NAN) | Self::SIGN_MASK); const MAX: Self = -Self::MIN; // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS)); @@ -275,11 +280,11 @@ macro_rules! float_impl { } #[cfg(f16_enabled)] -float_impl!(f16, u16, i16, 16, 10, f16::from_bits, fmaf16, fmaf16); -float_impl!(f32, u32, i32, 32, 23, f32_from_bits, fmaf, fmaf32); -float_impl!(f64, u64, i64, 64, 52, f64_from_bits, fma, fmaf64); +float_impl!(f16, u16, i16, 16, 10, f16::from_bits, f16::to_bits, fmaf16, fmaf16); +float_impl!(f32, u32, i32, 32, 23, f32_from_bits, f32_to_bits, fmaf, fmaf32); +float_impl!(f64, u64, i64, 64, 52, f64_from_bits, f64_to_bits, fma, fmaf64); #[cfg(f128_enabled)] -float_impl!(f128, u128, i128, 128, 112, f128::from_bits, fmaf128, fmaf128); +float_impl!(f128, u128, i128, 128, 112, f128::from_bits, f128::to_bits, fmaf128, fmaf128); /* FIXME(msrv): vendor some things that are not const stable at our MSRV */ @@ -289,12 +294,24 @@ pub const fn f32_from_bits(bits: u32) -> f32 { unsafe { mem::transmute::(bits) } } +/// `f32::to_bits` +pub const fn f32_to_bits(x: f32) -> u32 { + // SAFETY: POD cast with no preconditions + unsafe { mem::transmute::(x) } +} + /// `f64::from_bits` pub const fn f64_from_bits(bits: u64) -> f64 { // SAFETY: POD cast with no preconditions unsafe { mem::transmute::(bits) } } +/// `f64::to_bits` +pub const fn f64_to_bits(x: f64) -> u64 { + // SAFETY: POD cast with no preconditions + unsafe { mem::transmute::(x) } +} + /// Trait for floats twice the bit width of another integer. pub trait DFloat: Float { /// Float that is half the bit width of the floatthis trait is implemented for. From 221a0a0cb512c7f5beda5f4fce85879fa2110a90 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 00:22:38 +0000 Subject: [PATCH 1267/1459] Make `assert_biteq!` not rely on having `Int` in scope --- libm/src/math/generic/fmax.rs | 2 +- libm/src/math/generic/fmaximum.rs | 2 +- libm/src/math/generic/fmaximum_num.rs | 2 +- libm/src/math/generic/fmin.rs | 2 +- libm/src/math/generic/fminimum.rs | 2 +- libm/src/math/generic/fminimum_num.rs | 2 +- libm/src/math/generic/rint.rs | 2 +- libm/src/math/generic/scalbn.rs | 1 - libm/src/math/support/macros.rs | 3 ++- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/libm/src/math/generic/fmax.rs b/libm/src/math/generic/fmax.rs index 32613a46b..039ffce9f 100644 --- a/libm/src/math/generic/fmax.rs +++ b/libm/src/math/generic/fmax.rs @@ -26,7 +26,7 @@ pub fn fmax(x: F, y: F) -> F { #[cfg(test)] mod tests { use super::*; - use crate::support::{Hexf, Int}; + use crate::support::Hexf; fn spec_test() { let cases = [ diff --git a/libm/src/math/generic/fmaximum.rs b/libm/src/math/generic/fmaximum.rs index 5f653ce94..b0fde88e8 100644 --- a/libm/src/math/generic/fmaximum.rs +++ b/libm/src/math/generic/fmaximum.rs @@ -29,7 +29,7 @@ pub fn fmaximum(x: F, y: F) -> F { #[cfg(test)] mod tests { use super::*; - use crate::support::{Hexf, Int}; + use crate::support::Hexf; fn spec_test() { let cases = [ diff --git a/libm/src/math/generic/fmaximum_num.rs b/libm/src/math/generic/fmaximum_num.rs index 224660123..68b03109d 100644 --- a/libm/src/math/generic/fmaximum_num.rs +++ b/libm/src/math/generic/fmaximum_num.rs @@ -28,7 +28,7 @@ pub fn fmaximum_num(x: F, y: F) -> F { #[cfg(test)] mod tests { use super::*; - use crate::support::{Hexf, Int}; + use crate::support::Hexf; fn spec_test() { let cases = [ diff --git a/libm/src/math/generic/fmin.rs b/libm/src/math/generic/fmin.rs index 5cc33e904..2aa7f6af7 100644 --- a/libm/src/math/generic/fmin.rs +++ b/libm/src/math/generic/fmin.rs @@ -25,7 +25,7 @@ pub fn fmin(x: F, y: F) -> F { #[cfg(test)] mod tests { use super::*; - use crate::support::{Hexf, Int}; + use crate::support::Hexf; fn spec_test() { let cases = [ diff --git a/libm/src/math/generic/fminimum.rs b/libm/src/math/generic/fminimum.rs index f566d9631..e01c88646 100644 --- a/libm/src/math/generic/fminimum.rs +++ b/libm/src/math/generic/fminimum.rs @@ -29,7 +29,7 @@ pub fn fminimum(x: F, y: F) -> F { #[cfg(test)] mod tests { use super::*; - use crate::support::{Hexf, Int}; + use crate::support::Hexf; fn spec_test() { let cases = [ diff --git a/libm/src/math/generic/fminimum_num.rs b/libm/src/math/generic/fminimum_num.rs index e58a585c3..3e97b893b 100644 --- a/libm/src/math/generic/fminimum_num.rs +++ b/libm/src/math/generic/fminimum_num.rs @@ -28,7 +28,7 @@ pub fn fminimum_num(x: F, y: F) -> F { #[cfg(test)] mod tests { use super::*; - use crate::support::{Hexf, Int}; + use crate::support::Hexf; fn spec_test() { let cases = [ diff --git a/libm/src/math/generic/rint.rs b/libm/src/math/generic/rint.rs index 2f8b2b365..45d2f3138 100644 --- a/libm/src/math/generic/rint.rs +++ b/libm/src/math/generic/rint.rs @@ -43,7 +43,7 @@ pub fn rint_round(x: F, _round: Round) -> FpResult { #[cfg(test)] mod tests { use super::*; - use crate::support::{Hexf, Int, Status}; + use crate::support::{Hexf, Status}; fn spec_test(cases: &[(F, F, Status)]) { let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY]; diff --git a/libm/src/math/generic/scalbn.rs b/libm/src/math/generic/scalbn.rs index 5ba7f2ab2..aaa243933 100644 --- a/libm/src/math/generic/scalbn.rs +++ b/libm/src/math/generic/scalbn.rs @@ -121,7 +121,6 @@ where #[cfg(test)] mod tests { - use super::super::super::Int; use super::*; // Tests against N3220 diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs index c80e77511..0b72db0e4 100644 --- a/libm/src/math/support/macros.rs +++ b/libm/src/math/support/macros.rs @@ -137,9 +137,10 @@ macro_rules! hf128 { #[cfg(test)] macro_rules! assert_biteq { ($left:expr, $right:expr, $($tt:tt)*) => {{ + use $crate::support::Int; let l = $left; let r = $right; - let bits = (l.to_bits() - l.to_bits()).leading_zeros(); // hack to get the width from the value + let bits = Int::leading_zeros(l.to_bits() - l.to_bits()); // hack to get the width from the value assert!( l.biteq(r), "{}\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})", From 0a2dc5d98ccc6e23f4b93828801fa9c6770d4359 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 00:22:21 +0000 Subject: [PATCH 1268/1459] Combine the source files for more generic implementations Splitting into different source files by float size doesn't have any benefit when the only content is a small function that forwards to the generic implementation. Combine the source files for all width versions of: * ceil * copysign * fabs * fdim * floor * fmaximum * fmaximum_num * fminimum * fminimum_num * ldexp * scalbn * sqrt * truc fmod is excluded to avoid conflicts with an open PR. As part of this change move unit tests out of the generic module, instead testing the type-specific functions (e.g. `ceilf16` rather than `ceil::()`). This ensures that unit tests are validating whatever we expose, such as arch-specific implementations via `select_implementation!`, which would otherwise be skipped. (They are still covered by integration tests). --- libm/etc/function-definitions.json | 60 +++++++------- libm/src/math/acosf.rs | 2 +- libm/src/math/asinf.rs | 4 +- libm/src/math/ceil.rs | 32 ++++++++ libm/src/math/ceilf.rs | 13 ---- libm/src/math/ceilf128.rs | 7 -- libm/src/math/ceilf16.rs | 7 -- libm/src/math/copysign.rs | 80 +++++++++++++++++++ libm/src/math/copysignf.rs | 8 -- libm/src/math/copysignf128.rs | 8 -- libm/src/math/copysignf16.rs | 8 -- libm/src/math/fabs.rs | 103 ++++++++++++++++++++++--- libm/src/math/fabsf.rs | 39 ---------- libm/src/math/fabsf128.rs | 31 -------- libm/src/math/fabsf16.rs | 31 -------- libm/src/math/fdim.rs | 41 ++++++++++ libm/src/math/fdimf.rs | 12 --- libm/src/math/fdimf128.rs | 12 --- libm/src/math/fdimf16.rs | 12 --- libm/src/math/floor.rs | 32 ++++++++ libm/src/math/floorf.rs | 13 ---- libm/src/math/floorf128.rs | 7 -- libm/src/math/floorf16.rs | 7 -- libm/src/math/fmin_fmax.rs | 92 ++++++++++++++++++++++ libm/src/math/fminimum_fmaximum.rs | 96 +++++++++++++++++++++++ libm/src/math/fminimum_fmaximum_num.rs | 96 +++++++++++++++++++++++ libm/src/math/generic/fmax.rs | 49 ------------ libm/src/math/generic/fmaximum.rs | 51 ------------ libm/src/math/generic/fmaximum_num.rs | 51 ------------ libm/src/math/generic/fmin.rs | 49 ------------ libm/src/math/generic/fminimum.rs | 51 ------------ libm/src/math/generic/fminimum_num.rs | 51 ------------ libm/src/math/generic/scalbn.rs | 65 ---------------- libm/src/math/ldexp.rs | 17 ++++ libm/src/math/ldexpf.rs | 4 - libm/src/math/ldexpf128.rs | 4 - libm/src/math/ldexpf16.rs | 4 - libm/src/math/mod.rs | 100 +++++++----------------- libm/src/math/round.rs | 20 +++++ libm/src/math/roundf.rs | 5 -- libm/src/math/roundf128.rs | 5 -- libm/src/math/roundf16.rs | 5 -- libm/src/math/scalbn.rs | 83 ++++++++++++++++++++ libm/src/math/scalbnf.rs | 4 - libm/src/math/scalbnf128.rs | 4 - libm/src/math/scalbnf16.rs | 4 - libm/src/math/sqrt.rs | 36 +++++++++ libm/src/math/sqrtf.rs | 15 ---- libm/src/math/sqrtf128.rs | 5 -- libm/src/math/sqrtf16.rs | 11 --- libm/src/math/trunc.rs | 40 ++++++++++ libm/src/math/truncf.rs | 23 ------ libm/src/math/truncf128.rs | 7 -- libm/src/math/truncf16.rs | 7 -- 54 files changed, 819 insertions(+), 804 deletions(-) delete mode 100644 libm/src/math/ceilf.rs delete mode 100644 libm/src/math/ceilf128.rs delete mode 100644 libm/src/math/ceilf16.rs delete mode 100644 libm/src/math/copysignf.rs delete mode 100644 libm/src/math/copysignf128.rs delete mode 100644 libm/src/math/copysignf16.rs delete mode 100644 libm/src/math/fabsf.rs delete mode 100644 libm/src/math/fabsf128.rs delete mode 100644 libm/src/math/fabsf16.rs delete mode 100644 libm/src/math/fdimf.rs delete mode 100644 libm/src/math/fdimf128.rs delete mode 100644 libm/src/math/fdimf16.rs delete mode 100644 libm/src/math/floorf.rs delete mode 100644 libm/src/math/floorf128.rs delete mode 100644 libm/src/math/floorf16.rs delete mode 100644 libm/src/math/ldexpf.rs delete mode 100644 libm/src/math/ldexpf128.rs delete mode 100644 libm/src/math/ldexpf16.rs delete mode 100644 libm/src/math/roundf.rs delete mode 100644 libm/src/math/roundf128.rs delete mode 100644 libm/src/math/roundf16.rs delete mode 100644 libm/src/math/scalbnf.rs delete mode 100644 libm/src/math/scalbnf128.rs delete mode 100644 libm/src/math/scalbnf16.rs delete mode 100644 libm/src/math/sqrtf.rs delete mode 100644 libm/src/math/sqrtf128.rs delete mode 100644 libm/src/math/sqrtf16.rs delete mode 100644 libm/src/math/truncf.rs delete mode 100644 libm/src/math/truncf128.rs delete mode 100644 libm/src/math/truncf16.rs diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index bca58402f..801e74b22 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -108,21 +108,21 @@ "ceilf": { "sources": [ "src/math/arch/wasm32.rs", - "src/math/ceilf.rs", + "src/math/ceil.rs", "src/math/generic/ceil.rs" ], "type": "f32" }, "ceilf128": { "sources": [ - "src/math/ceilf128.rs", + "src/math/ceil.rs", "src/math/generic/ceil.rs" ], "type": "f128" }, "ceilf16": { "sources": [ - "src/math/ceilf16.rs", + "src/math/ceil.rs", "src/math/generic/ceil.rs" ], "type": "f16" @@ -136,21 +136,21 @@ }, "copysignf": { "sources": [ - "src/math/copysignf.rs", + "src/math/copysign.rs", "src/math/generic/copysign.rs" ], "type": "f32" }, "copysignf128": { "sources": [ - "src/math/copysignf128.rs", + "src/math/copysign.rs", "src/math/generic/copysign.rs" ], "type": "f128" }, "copysignf16": { "sources": [ - "src/math/copysignf16.rs", + "src/math/copysign.rs", "src/math/generic/copysign.rs" ], "type": "f16" @@ -262,21 +262,21 @@ "fabsf": { "sources": [ "src/math/arch/wasm32.rs", - "src/math/fabsf.rs", + "src/math/fabs.rs", "src/math/generic/fabs.rs" ], "type": "f32" }, "fabsf128": { "sources": [ - "src/math/fabsf128.rs", + "src/math/fabs.rs", "src/math/generic/fabs.rs" ], "type": "f128" }, "fabsf16": { "sources": [ - "src/math/fabsf16.rs", + "src/math/fabs.rs", "src/math/generic/fabs.rs" ], "type": "f16" @@ -290,21 +290,21 @@ }, "fdimf": { "sources": [ - "src/math/fdimf.rs", + "src/math/fdim.rs", "src/math/generic/fdim.rs" ], "type": "f32" }, "fdimf128": { "sources": [ - "src/math/fdimf128.rs", + "src/math/fdim.rs", "src/math/generic/fdim.rs" ], "type": "f128" }, "fdimf16": { "sources": [ - "src/math/fdimf16.rs", + "src/math/fdim.rs", "src/math/generic/fdim.rs" ], "type": "f16" @@ -321,21 +321,21 @@ "floorf": { "sources": [ "src/math/arch/wasm32.rs", - "src/math/floorf.rs", + "src/math/floor.rs", "src/math/generic/floor.rs" ], "type": "f32" }, "floorf128": { "sources": [ - "src/math/floorf128.rs", + "src/math/floor.rs", "src/math/generic/floor.rs" ], "type": "f128" }, "floorf16": { "sources": [ - "src/math/floorf16.rs", + "src/math/floor.rs", "src/math/generic/floor.rs" ], "type": "f16" @@ -636,19 +636,19 @@ }, "ldexpf": { "sources": [ - "src/math/ldexpf.rs" + "src/math/ldexp.rs" ], "type": "f32" }, "ldexpf128": { "sources": [ - "src/math/ldexpf128.rs" + "src/math/ldexp.rs" ], "type": "f128" }, "ldexpf16": { "sources": [ - "src/math/ldexpf16.rs" + "src/math/ldexp.rs" ], "type": "f16" }, @@ -847,21 +847,21 @@ "roundf": { "sources": [ "src/math/generic/round.rs", - "src/math/roundf.rs" + "src/math/round.rs" ], "type": "f32" }, "roundf128": { "sources": [ "src/math/generic/round.rs", - "src/math/roundf128.rs" + "src/math/round.rs" ], "type": "f128" }, "roundf16": { "sources": [ "src/math/generic/round.rs", - "src/math/roundf16.rs" + "src/math/round.rs" ], "type": "f16" }, @@ -875,21 +875,21 @@ "scalbnf": { "sources": [ "src/math/generic/scalbn.rs", - "src/math/scalbnf.rs" + "src/math/scalbn.rs" ], "type": "f32" }, "scalbnf128": { "sources": [ "src/math/generic/scalbn.rs", - "src/math/scalbnf128.rs" + "src/math/scalbn.rs" ], "type": "f128" }, "scalbnf16": { "sources": [ "src/math/generic/scalbn.rs", - "src/math/scalbnf16.rs" + "src/math/scalbn.rs" ], "type": "f16" }, @@ -945,14 +945,14 @@ "src/math/arch/i686.rs", "src/math/arch/wasm32.rs", "src/math/generic/sqrt.rs", - "src/math/sqrtf.rs" + "src/math/sqrt.rs" ], "type": "f32" }, "sqrtf128": { "sources": [ "src/math/generic/sqrt.rs", - "src/math/sqrtf128.rs" + "src/math/sqrt.rs" ], "type": "f128" }, @@ -960,7 +960,7 @@ "sources": [ "src/math/arch/aarch64.rs", "src/math/generic/sqrt.rs", - "src/math/sqrtf16.rs" + "src/math/sqrt.rs" ], "type": "f16" }, @@ -1012,21 +1012,21 @@ "sources": [ "src/math/arch/wasm32.rs", "src/math/generic/trunc.rs", - "src/math/truncf.rs" + "src/math/trunc.rs" ], "type": "f32" }, "truncf128": { "sources": [ "src/math/generic/trunc.rs", - "src/math/truncf128.rs" + "src/math/trunc.rs" ], "type": "f128" }, "truncf16": { "sources": [ "src/math/generic/trunc.rs", - "src/math/truncf16.rs" + "src/math/trunc.rs" ], "type": "f16" }, diff --git a/libm/src/math/acosf.rs b/libm/src/math/acosf.rs index 1a60479e3..dd88eea5b 100644 --- a/libm/src/math/acosf.rs +++ b/libm/src/math/acosf.rs @@ -13,7 +13,7 @@ * ==================================================== */ -use super::sqrtf::sqrtf; +use super::sqrt::sqrtf; const PIO2_HI: f32 = 1.5707962513e+00; /* 0x3fc90fda */ const PIO2_LO: f32 = 7.5497894159e-08; /* 0x33a22168 */ diff --git a/libm/src/math/asinf.rs b/libm/src/math/asinf.rs index 0ea49c076..ed6855567 100644 --- a/libm/src/math/asinf.rs +++ b/libm/src/math/asinf.rs @@ -13,8 +13,8 @@ * ==================================================== */ -use super::fabsf::fabsf; use super::sqrt::sqrt; +use super::support::Float; const PIO2: f64 = 1.570796326794896558e+00; @@ -61,7 +61,7 @@ pub fn asinf(mut x: f32) -> f32 { } /* 1 > |x| >= 0.5 */ - let z = (1. - fabsf(x)) * 0.5; + let z = (1. - Float::abs(x)) * 0.5; let s = sqrt(z as f64); x = (PIO2 - 2. * (s + s * (r(z) as f64))) as f32; if (hx >> 31) != 0 { -x } else { x } diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index 535f434ac..4e1035457 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -1,3 +1,26 @@ +/// Ceil (f16) +/// +/// Finds the nearest integer greater than or equal to `x`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ceilf16(x: f16) -> f16 { + super::generic::ceil(x) +} + +/// Ceil (f32) +/// +/// Finds the nearest integer greater than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ceilf(x: f32) -> f32 { + select_implementation! { + name: ceilf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::ceil(x) +} + /// Ceil (f64) /// /// Finds the nearest integer greater than or equal to `x`. @@ -12,3 +35,12 @@ pub fn ceil(x: f64) -> f64 { super::generic::ceil(x) } + +/// Ceil (f128) +/// +/// Finds the nearest integer greater than or equal to `x`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ceilf128(x: f128) -> f128 { + super::generic::ceil(x) +} diff --git a/libm/src/math/ceilf.rs b/libm/src/math/ceilf.rs deleted file mode 100644 index 66d44189c..000000000 --- a/libm/src/math/ceilf.rs +++ /dev/null @@ -1,13 +0,0 @@ -/// Ceil (f32) -/// -/// Finds the nearest integer greater than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn ceilf(x: f32) -> f32 { - select_implementation! { - name: ceilf, - use_arch: all(target_arch = "wasm32", intrinsics_enabled), - args: x, - } - - super::generic::ceil(x) -} diff --git a/libm/src/math/ceilf128.rs b/libm/src/math/ceilf128.rs deleted file mode 100644 index 89980858e..000000000 --- a/libm/src/math/ceilf128.rs +++ /dev/null @@ -1,7 +0,0 @@ -/// Ceil (f128) -/// -/// Finds the nearest integer greater than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn ceilf128(x: f128) -> f128 { - super::generic::ceil(x) -} diff --git a/libm/src/math/ceilf16.rs b/libm/src/math/ceilf16.rs deleted file mode 100644 index 2af67eff0..000000000 --- a/libm/src/math/ceilf16.rs +++ /dev/null @@ -1,7 +0,0 @@ -/// Ceil (f16) -/// -/// Finds the nearest integer greater than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn ceilf16(x: f16) -> f16 { - super::generic::ceil(x) -} diff --git a/libm/src/math/copysign.rs b/libm/src/math/copysign.rs index 552bf3975..d2a86e7fd 100644 --- a/libm/src/math/copysign.rs +++ b/libm/src/math/copysign.rs @@ -1,3 +1,22 @@ +/// Sign of Y, magnitude of X (f16) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf16(x: f16, y: f16) -> f16 { + super::generic::copysign(x, y) +} + +/// Sign of Y, magnitude of X (f32) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf(x: f32, y: f32) -> f32 { + super::generic::copysign(x, y) +} + /// Sign of Y, magnitude of X (f64) /// /// Constructs a number with the magnitude (absolute value) of its @@ -6,3 +25,64 @@ pub fn copysign(x: f64, y: f64) -> f64 { super::generic::copysign(x, y) } + +/// Sign of Y, magnitude of X (f128) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf128(x: f128, y: f128) -> f128 { + super::generic::copysign(x, y) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::Float; + + fn spec_test(f: impl Fn(F, F) -> F) { + assert_biteq!(f(F::ZERO, F::ZERO), F::ZERO); + assert_biteq!(f(F::NEG_ZERO, F::ZERO), F::ZERO); + assert_biteq!(f(F::ZERO, F::NEG_ZERO), F::NEG_ZERO); + assert_biteq!(f(F::NEG_ZERO, F::NEG_ZERO), F::NEG_ZERO); + + assert_biteq!(f(F::ONE, F::ONE), F::ONE); + assert_biteq!(f(F::NEG_ONE, F::ONE), F::ONE); + assert_biteq!(f(F::ONE, F::NEG_ONE), F::NEG_ONE); + assert_biteq!(f(F::NEG_ONE, F::NEG_ONE), F::NEG_ONE); + + assert_biteq!(f(F::INFINITY, F::INFINITY), F::INFINITY); + assert_biteq!(f(F::NEG_INFINITY, F::INFINITY), F::INFINITY); + assert_biteq!(f(F::INFINITY, F::NEG_INFINITY), F::NEG_INFINITY); + assert_biteq!(f(F::NEG_INFINITY, F::NEG_INFINITY), F::NEG_INFINITY); + + // Not required but we expect it + assert_biteq!(f(F::NAN, F::NAN), F::NAN); + assert_biteq!(f(F::NEG_NAN, F::NAN), F::NAN); + assert_biteq!(f(F::NAN, F::NEG_NAN), F::NEG_NAN); + assert_biteq!(f(F::NEG_NAN, F::NEG_NAN), F::NEG_NAN); + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(copysignf16); + } + + #[test] + fn spec_tests_f32() { + spec_test::(copysignf); + } + + #[test] + fn spec_tests_f64() { + spec_test::(copysign); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(copysignf128); + } +} diff --git a/libm/src/math/copysignf.rs b/libm/src/math/copysignf.rs deleted file mode 100644 index 8b9bed4c0..000000000 --- a/libm/src/math/copysignf.rs +++ /dev/null @@ -1,8 +0,0 @@ -/// Sign of Y, magnitude of X (f32) -/// -/// Constructs a number with the magnitude (absolute value) of its -/// first argument, `x`, and the sign of its second argument, `y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn copysignf(x: f32, y: f32) -> f32 { - super::generic::copysign(x, y) -} diff --git a/libm/src/math/copysignf128.rs b/libm/src/math/copysignf128.rs deleted file mode 100644 index 7bd81d42b..000000000 --- a/libm/src/math/copysignf128.rs +++ /dev/null @@ -1,8 +0,0 @@ -/// Sign of Y, magnitude of X (f128) -/// -/// Constructs a number with the magnitude (absolute value) of its -/// first argument, `x`, and the sign of its second argument, `y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn copysignf128(x: f128, y: f128) -> f128 { - super::generic::copysign(x, y) -} diff --git a/libm/src/math/copysignf16.rs b/libm/src/math/copysignf16.rs deleted file mode 100644 index 820658686..000000000 --- a/libm/src/math/copysignf16.rs +++ /dev/null @@ -1,8 +0,0 @@ -/// Sign of Y, magnitude of X (f16) -/// -/// Constructs a number with the magnitude (absolute value) of its -/// first argument, `x`, and the sign of its second argument, `y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn copysignf16(x: f16, y: f16) -> f16 { - super::generic::copysign(x, y) -} diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index 22867fab0..0050a309f 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -1,3 +1,28 @@ +/// Absolute value (magnitude) (f16) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf16(x: f16) -> f16 { + super::generic::fabs(x) +} + +/// Absolute value (magnitude) (f32) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf(x: f32) -> f32 { + select_implementation! { + name: fabsf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::fabs(x) +} + /// Absolute value (magnitude) (f64) /// /// Calculates the absolute value (magnitude) of the argument `x`, @@ -13,25 +38,79 @@ pub fn fabs(x: f64) -> f64 { super::generic::fabs(x) } +/// Absolute value (magnitude) (f128) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf128(x: f128) -> f128 { + super::generic::fabs(x) +} + #[cfg(test)] mod tests { use super::*; + use crate::support::Float; + + /// Based on https://en.cppreference.com/w/cpp/numeric/math/fabs + fn spec_test(f: impl Fn(F) -> F) { + assert_biteq!(f(F::ZERO), F::ZERO); + assert_biteq!(f(F::NEG_ZERO), F::ZERO); + assert_biteq!(f(F::INFINITY), F::INFINITY); + assert_biteq!(f(F::NEG_INFINITY), F::INFINITY); + assert!(f(F::NAN).is_nan()); + + // Not spec rewquired but we expect it + assert!(f(F::NAN).is_sign_positive()); + assert!(f(F::from_bits(F::NAN.to_bits() | F::SIGN_MASK)).is_sign_positive()); + } + + #[test] + #[cfg(f16_enabled)] + fn sanity_check_f16() { + assert_eq!(fabsf16(-1.0f16), 1.0); + assert_eq!(fabsf16(2.8f16), 2.8); + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(fabsf16); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(fabsf(-1.0f32), 1.0); + assert_eq!(fabsf(2.8f32), 2.8); + } + + #[test] + fn spec_tests_f32() { + spec_test::(fabsf); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(fabs(-1.0f64), 1.0); + assert_eq!(fabs(2.8f64), 2.8); + } + + #[test] + fn spec_tests_f64() { + spec_test::(fabs); + } #[test] - fn sanity_check() { - assert_eq!(fabs(-1.0), 1.0); - assert_eq!(fabs(2.8), 2.8); + #[cfg(f128_enabled)] + fn sanity_check_f128() { + assert_eq!(fabsf128(-1.0f128), 1.0); + assert_eq!(fabsf128(2.8f128), 2.8); } - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs #[test] - fn spec_tests() { - assert!(fabs(f64::NAN).is_nan()); - for f in [0.0, -0.0].iter().copied() { - assert_eq!(fabs(f), 0.0); - } - for f in [f64::INFINITY, f64::NEG_INFINITY].iter().copied() { - assert_eq!(fabs(f), f64::INFINITY); - } + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(fabsf128); } } diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs deleted file mode 100644 index e5820a26c..000000000 --- a/libm/src/math/fabsf.rs +++ /dev/null @@ -1,39 +0,0 @@ -/// Absolute value (magnitude) (f32) -/// -/// Calculates the absolute value (magnitude) of the argument `x`, -/// by direct manipulation of the bit representation of `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fabsf(x: f32) -> f32 { - select_implementation! { - name: fabsf, - use_arch: all(target_arch = "wasm32", intrinsics_enabled), - args: x, - } - - super::generic::fabs(x) -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(fabsf(-1.0), 1.0); - assert_eq!(fabsf(2.8), 2.8); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs - #[test] - fn spec_tests() { - assert!(fabsf(f32::NAN).is_nan()); - for f in [0.0, -0.0].iter().copied() { - assert_eq!(fabsf(f), 0.0); - } - for f in [f32::INFINITY, f32::NEG_INFINITY].iter().copied() { - assert_eq!(fabsf(f), f32::INFINITY); - } - } -} diff --git a/libm/src/math/fabsf128.rs b/libm/src/math/fabsf128.rs deleted file mode 100644 index 46429ca49..000000000 --- a/libm/src/math/fabsf128.rs +++ /dev/null @@ -1,31 +0,0 @@ -/// Absolute value (magnitude) (f128) -/// -/// Calculates the absolute value (magnitude) of the argument `x`, -/// by direct manipulation of the bit representation of `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fabsf128(x: f128) -> f128 { - super::generic::fabs(x) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(fabsf128(-1.0), 1.0); - assert_eq!(fabsf128(2.8), 2.8); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs - #[test] - fn spec_tests() { - assert!(fabsf128(f128::NAN).is_nan()); - for f in [0.0, -0.0].iter().copied() { - assert_eq!(fabsf128(f), 0.0); - } - for f in [f128::INFINITY, f128::NEG_INFINITY].iter().copied() { - assert_eq!(fabsf128(f), f128::INFINITY); - } - } -} diff --git a/libm/src/math/fabsf16.rs b/libm/src/math/fabsf16.rs deleted file mode 100644 index eee42ac6a..000000000 --- a/libm/src/math/fabsf16.rs +++ /dev/null @@ -1,31 +0,0 @@ -/// Absolute value (magnitude) (f16) -/// -/// Calculates the absolute value (magnitude) of the argument `x`, -/// by direct manipulation of the bit representation of `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fabsf16(x: f16) -> f16 { - super::generic::fabs(x) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(fabsf16(-1.0), 1.0); - assert_eq!(fabsf16(2.8), 2.8); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs - #[test] - fn spec_tests() { - assert!(fabsf16(f16::NAN).is_nan()); - for f in [0.0, -0.0].iter().copied() { - assert_eq!(fabsf16(f), 0.0); - } - for f in [f16::INFINITY, f16::NEG_INFINITY].iter().copied() { - assert_eq!(fabsf16(f), f16::INFINITY); - } - } -} diff --git a/libm/src/math/fdim.rs b/libm/src/math/fdim.rs index 10ffa2881..082c5478b 100644 --- a/libm/src/math/fdim.rs +++ b/libm/src/math/fdim.rs @@ -1,3 +1,30 @@ +/// Positive difference (f16) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf16(x: f16, y: f16) -> f16 { + super::generic::fdim(x, y) +} + +/// Positive difference (f32) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf(x: f32, y: f32) -> f32 { + super::generic::fdim(x, y) +} + /// Positive difference (f64) /// /// Determines the positive difference between arguments, returning: @@ -10,3 +37,17 @@ pub fn fdim(x: f64, y: f64) -> f64 { super::generic::fdim(x, y) } + +/// Positive difference (f128) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf128(x: f128, y: f128) -> f128 { + super::generic::fdim(x, y) +} diff --git a/libm/src/math/fdimf.rs b/libm/src/math/fdimf.rs deleted file mode 100644 index 367ef517c..000000000 --- a/libm/src/math/fdimf.rs +++ /dev/null @@ -1,12 +0,0 @@ -/// Positive difference (f32) -/// -/// Determines the positive difference between arguments, returning: -/// * x - y if x > y, or -/// * +0 if x <= y, or -/// * NAN if either argument is NAN. -/// -/// A range error may occur. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fdimf(x: f32, y: f32) -> f32 { - super::generic::fdim(x, y) -} diff --git a/libm/src/math/fdimf128.rs b/libm/src/math/fdimf128.rs deleted file mode 100644 index 6f3d1d0ff..000000000 --- a/libm/src/math/fdimf128.rs +++ /dev/null @@ -1,12 +0,0 @@ -/// Positive difference (f128) -/// -/// Determines the positive difference between arguments, returning: -/// * x - y if x > y, or -/// * +0 if x <= y, or -/// * NAN if either argument is NAN. -/// -/// A range error may occur. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fdimf128(x: f128, y: f128) -> f128 { - super::generic::fdim(x, y) -} diff --git a/libm/src/math/fdimf16.rs b/libm/src/math/fdimf16.rs deleted file mode 100644 index 37bd68858..000000000 --- a/libm/src/math/fdimf16.rs +++ /dev/null @@ -1,12 +0,0 @@ -/// Positive difference (f16) -/// -/// Determines the positive difference between arguments, returning: -/// * x - y if x > y, or -/// * +0 if x <= y, or -/// * NAN if either argument is NAN. -/// -/// A range error may occur. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fdimf16(x: f16, y: f16) -> f16 { - super::generic::fdim(x, y) -} diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index b4f02abc4..3c5eab101 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -1,3 +1,12 @@ +/// Floor (f16) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf16(x: f16) -> f16 { + return super::generic::floor(x); +} + /// Floor (f64) /// /// Finds the nearest integer less than or equal to `x`. @@ -12,3 +21,26 @@ pub fn floor(x: f64) -> f64 { return super::generic::floor(x); } + +/// Floor (f32) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf(x: f32) -> f32 { + select_implementation! { + name: floorf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + return super::generic::floor(x); +} + +/// Floor (f128) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf128(x: f128) -> f128 { + return super::generic::floor(x); +} diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs deleted file mode 100644 index 16957b7f3..000000000 --- a/libm/src/math/floorf.rs +++ /dev/null @@ -1,13 +0,0 @@ -/// Floor (f32) -/// -/// Finds the nearest integer less than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn floorf(x: f32) -> f32 { - select_implementation! { - name: floorf, - use_arch: all(target_arch = "wasm32", intrinsics_enabled), - args: x, - } - - return super::generic::floor(x); -} diff --git a/libm/src/math/floorf128.rs b/libm/src/math/floorf128.rs deleted file mode 100644 index 9a9fe4151..000000000 --- a/libm/src/math/floorf128.rs +++ /dev/null @@ -1,7 +0,0 @@ -/// Floor (f128) -/// -/// Finds the nearest integer less than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn floorf128(x: f128) -> f128 { - return super::generic::floor(x); -} diff --git a/libm/src/math/floorf16.rs b/libm/src/math/floorf16.rs deleted file mode 100644 index f9b868e04..000000000 --- a/libm/src/math/floorf16.rs +++ /dev/null @@ -1,7 +0,0 @@ -/// Floor (f16) -/// -/// Finds the nearest integer less than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn floorf16(x: f16) -> f16 { - return super::generic::floor(x); -} diff --git a/libm/src/math/fmin_fmax.rs b/libm/src/math/fmin_fmax.rs index 4f9136dbb..2947b783e 100644 --- a/libm/src/math/fmin_fmax.rs +++ b/libm/src/math/fmin_fmax.rs @@ -73,3 +73,95 @@ pub fn fmax(x: f64, y: f64) -> f64 { pub fn fmaxf128(x: f128, y: f128) -> f128 { super::generic::fmax(x, y) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Float, Hexf}; + + fn fmin_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ZERO), + (F::ONE, F::ZERO, F::ZERO), + (F::ZERO, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::ZERO, F::NEG_ONE), + (F::INFINITY, F::ZERO, F::ZERO), + (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fmin({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fmin_spec_tests_f16() { + fmin_spec_test::(fminf16); + } + + #[test] + fn fmin_spec_tests_f32() { + fmin_spec_test::(fminf); + } + + #[test] + fn fmin_spec_tests_f64() { + fmin_spec_test::(fmin); + } + + #[test] + #[cfg(f128_enabled)] + fn fmin_spec_tests_f128() { + fmin_spec_test::(fminf128); + } + + fn fmax_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ONE), + (F::ONE, F::ZERO, F::ONE), + (F::ZERO, F::NEG_ONE, F::ZERO), + (F::NEG_ONE, F::ZERO, F::ZERO), + (F::INFINITY, F::ZERO, F::INFINITY), + (F::NEG_INFINITY, F::ZERO, F::ZERO), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fmax({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fmax_spec_tests_f16() { + fmax_spec_test::(fmaxf16); + } + + #[test] + fn fmax_spec_tests_f32() { + fmax_spec_test::(fmaxf); + } + + #[test] + fn fmax_spec_tests_f64() { + fmax_spec_test::(fmax); + } + + #[test] + #[cfg(f128_enabled)] + fn fmax_spec_tests_f128() { + fmax_spec_test::(fmaxf128); + } +} diff --git a/libm/src/math/fminimum_fmaximum.rs b/libm/src/math/fminimum_fmaximum.rs index fd3c5ed10..b7999e273 100644 --- a/libm/src/math/fminimum_fmaximum.rs +++ b/libm/src/math/fminimum_fmaximum.rs @@ -65,3 +65,99 @@ pub fn fmaximum(x: f64, y: f64) -> f64 { pub fn fmaximumf128(x: f128, y: f128) -> f128 { super::generic::fmaximum(x, y) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Float, Hexf}; + + fn fminimum_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ZERO), + (F::ONE, F::ZERO, F::ZERO), + (F::ZERO, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::ZERO, F::NEG_ONE), + (F::INFINITY, F::ZERO, F::ZERO), + (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), + (F::NAN, F::ZERO, F::NAN), + (F::ZERO, F::NAN, F::NAN), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::ZERO, F::NEG_ZERO), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fminimum({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fminimum_spec_tests_f16() { + fminimum_spec_test::(fminimumf16); + } + + #[test] + fn fminimum_spec_tests_f32() { + fminimum_spec_test::(fminimumf); + } + + #[test] + fn fminimum_spec_tests_f64() { + fminimum_spec_test::(fminimum); + } + + #[test] + #[cfg(f128_enabled)] + fn fminimum_spec_tests_f128() { + fminimum_spec_test::(fminimumf128); + } + + fn fmaximum_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ONE), + (F::ONE, F::ZERO, F::ONE), + (F::ZERO, F::NEG_ONE, F::ZERO), + (F::NEG_ONE, F::ZERO, F::ZERO), + (F::INFINITY, F::ZERO, F::INFINITY), + (F::NEG_INFINITY, F::ZERO, F::ZERO), + (F::NAN, F::ZERO, F::NAN), + (F::ZERO, F::NAN, F::NAN), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::ZERO), + (F::NEG_ZERO, F::ZERO, F::ZERO), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fmaximum({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fmaximum_spec_tests_f16() { + fmaximum_spec_test::(fmaximumf16); + } + + #[test] + fn fmaximum_spec_tests_f32() { + fmaximum_spec_test::(fmaximumf); + } + + #[test] + fn fmaximum_spec_tests_f64() { + fmaximum_spec_test::(fmaximum); + } + + #[test] + #[cfg(f128_enabled)] + fn fmaximum_spec_tests_f128() { + fmaximum_spec_test::(fmaximumf128); + } +} diff --git a/libm/src/math/fminimum_fmaximum_num.rs b/libm/src/math/fminimum_fmaximum_num.rs index 640ddfc9b..180d21f72 100644 --- a/libm/src/math/fminimum_fmaximum_num.rs +++ b/libm/src/math/fminimum_fmaximum_num.rs @@ -65,3 +65,99 @@ pub fn fmaximum_num(x: f64, y: f64) -> f64 { pub fn fmaximum_numf128(x: f128, y: f128) -> f128 { super::generic::fmaximum_num(x, y) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Float, Hexf}; + + fn fminimum_num_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ZERO), + (F::ONE, F::ZERO, F::ZERO), + (F::ZERO, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::ZERO, F::NEG_ONE), + (F::INFINITY, F::ZERO, F::ZERO), + (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::ZERO, F::NEG_ZERO), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fminimum_num({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fminimum_num_spec_tests_f16() { + fminimum_num_spec_test::(fminimum_numf16); + } + + #[test] + fn fminimum_num_spec_tests_f32() { + fminimum_num_spec_test::(fminimum_numf); + } + + #[test] + fn fminimum_num_spec_tests_f64() { + fminimum_num_spec_test::(fminimum_num); + } + + #[test] + #[cfg(f128_enabled)] + fn fminimum_num_spec_tests_f128() { + fminimum_num_spec_test::(fminimum_numf128); + } + + fn fmaximum_num_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ONE), + (F::ONE, F::ZERO, F::ONE), + (F::ZERO, F::NEG_ONE, F::ZERO), + (F::NEG_ONE, F::ZERO, F::ZERO), + (F::INFINITY, F::ZERO, F::INFINITY), + (F::NEG_INFINITY, F::ZERO, F::ZERO), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::ZERO), + (F::NEG_ZERO, F::ZERO, F::ZERO), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fmaximum_num({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fmaximum_num_spec_tests_f16() { + fmaximum_num_spec_test::(fmaximum_numf16); + } + + #[test] + fn fmaximum_num_spec_tests_f32() { + fmaximum_num_spec_test::(fmaximum_numf); + } + + #[test] + fn fmaximum_num_spec_tests_f64() { + fmaximum_num_spec_test::(fmaximum_num); + } + + #[test] + #[cfg(f128_enabled)] + fn fmaximum_num_spec_tests_f128() { + fmaximum_num_spec_test::(fmaximum_numf128); + } +} diff --git a/libm/src/math/generic/fmax.rs b/libm/src/math/generic/fmax.rs index 039ffce9f..29a031100 100644 --- a/libm/src/math/generic/fmax.rs +++ b/libm/src/math/generic/fmax.rs @@ -22,52 +22,3 @@ pub fn fmax(x: F, y: F) -> F { // Canonicalize res * F::ONE } - -#[cfg(test)] -mod tests { - use super::*; - use crate::support::Hexf; - - fn spec_test() { - let cases = [ - (F::ZERO, F::ZERO, F::ZERO), - (F::ONE, F::ONE, F::ONE), - (F::ZERO, F::ONE, F::ONE), - (F::ONE, F::ZERO, F::ONE), - (F::ZERO, F::NEG_ONE, F::ZERO), - (F::NEG_ONE, F::ZERO, F::ZERO), - (F::INFINITY, F::ZERO, F::INFINITY), - (F::NEG_INFINITY, F::ZERO, F::ZERO), - (F::NAN, F::ZERO, F::ZERO), - (F::ZERO, F::NAN, F::ZERO), - (F::NAN, F::NAN, F::NAN), - ]; - - for (x, y, res) in cases { - let val = fmax(x, y); - assert_biteq!(val, res, "fmax({}, {})", Hexf(x), Hexf(y)); - } - } - - #[test] - #[cfg(f16_enabled)] - fn spec_tests_f16() { - spec_test::(); - } - - #[test] - fn spec_tests_f32() { - spec_test::(); - } - - #[test] - fn spec_tests_f64() { - spec_test::(); - } - - #[test] - #[cfg(f128_enabled)] - fn spec_tests_f128() { - spec_test::(); - } -} diff --git a/libm/src/math/generic/fmaximum.rs b/libm/src/math/generic/fmaximum.rs index b0fde88e8..9e8d1739f 100644 --- a/libm/src/math/generic/fmaximum.rs +++ b/libm/src/math/generic/fmaximum.rs @@ -25,54 +25,3 @@ pub fn fmaximum(x: F, y: F) -> F { // Canonicalize res * F::ONE } - -#[cfg(test)] -mod tests { - use super::*; - use crate::support::Hexf; - - fn spec_test() { - let cases = [ - (F::ZERO, F::ZERO, F::ZERO), - (F::ONE, F::ONE, F::ONE), - (F::ZERO, F::ONE, F::ONE), - (F::ONE, F::ZERO, F::ONE), - (F::ZERO, F::NEG_ONE, F::ZERO), - (F::NEG_ONE, F::ZERO, F::ZERO), - (F::INFINITY, F::ZERO, F::INFINITY), - (F::NEG_INFINITY, F::ZERO, F::ZERO), - (F::NAN, F::ZERO, F::NAN), - (F::ZERO, F::NAN, F::NAN), - (F::NAN, F::NAN, F::NAN), - (F::ZERO, F::NEG_ZERO, F::ZERO), - (F::NEG_ZERO, F::ZERO, F::ZERO), - ]; - - for (x, y, res) in cases { - let val = fmaximum(x, y); - assert_biteq!(val, res, "fmaximum({}, {})", Hexf(x), Hexf(y)); - } - } - - #[test] - #[cfg(f16_enabled)] - fn spec_tests_f16() { - spec_test::(); - } - - #[test] - fn spec_tests_f32() { - spec_test::(); - } - - #[test] - fn spec_tests_f64() { - spec_test::(); - } - - #[test] - #[cfg(f128_enabled)] - fn spec_tests_f128() { - spec_test::(); - } -} diff --git a/libm/src/math/generic/fmaximum_num.rs b/libm/src/math/generic/fmaximum_num.rs index 68b03109d..756ef5d9f 100644 --- a/libm/src/math/generic/fmaximum_num.rs +++ b/libm/src/math/generic/fmaximum_num.rs @@ -24,54 +24,3 @@ pub fn fmaximum_num(x: F, y: F) -> F { // Canonicalize res * F::ONE } - -#[cfg(test)] -mod tests { - use super::*; - use crate::support::Hexf; - - fn spec_test() { - let cases = [ - (F::ZERO, F::ZERO, F::ZERO), - (F::ONE, F::ONE, F::ONE), - (F::ZERO, F::ONE, F::ONE), - (F::ONE, F::ZERO, F::ONE), - (F::ZERO, F::NEG_ONE, F::ZERO), - (F::NEG_ONE, F::ZERO, F::ZERO), - (F::INFINITY, F::ZERO, F::INFINITY), - (F::NEG_INFINITY, F::ZERO, F::ZERO), - (F::NAN, F::ZERO, F::ZERO), - (F::ZERO, F::NAN, F::ZERO), - (F::NAN, F::NAN, F::NAN), - (F::ZERO, F::NEG_ZERO, F::ZERO), - (F::NEG_ZERO, F::ZERO, F::ZERO), - ]; - - for (x, y, res) in cases { - let val = fmaximum_num(x, y); - assert_biteq!(val, res, "fmaximum_num({}, {})", Hexf(x), Hexf(y)); - } - } - - #[test] - #[cfg(f16_enabled)] - fn spec_tests_f16() { - spec_test::(); - } - - #[test] - fn spec_tests_f32() { - spec_test::(); - } - - #[test] - fn spec_tests_f64() { - spec_test::(); - } - - #[test] - #[cfg(f128_enabled)] - fn spec_tests_f128() { - spec_test::(); - } -} diff --git a/libm/src/math/generic/fmin.rs b/libm/src/math/generic/fmin.rs index 2aa7f6af7..69fbf85a1 100644 --- a/libm/src/math/generic/fmin.rs +++ b/libm/src/math/generic/fmin.rs @@ -21,52 +21,3 @@ pub fn fmin(x: F, y: F) -> F { // Canonicalize res * F::ONE } - -#[cfg(test)] -mod tests { - use super::*; - use crate::support::Hexf; - - fn spec_test() { - let cases = [ - (F::ZERO, F::ZERO, F::ZERO), - (F::ONE, F::ONE, F::ONE), - (F::ZERO, F::ONE, F::ZERO), - (F::ONE, F::ZERO, F::ZERO), - (F::ZERO, F::NEG_ONE, F::NEG_ONE), - (F::NEG_ONE, F::ZERO, F::NEG_ONE), - (F::INFINITY, F::ZERO, F::ZERO), - (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), - (F::NAN, F::ZERO, F::ZERO), - (F::ZERO, F::NAN, F::ZERO), - (F::NAN, F::NAN, F::NAN), - ]; - - for (x, y, res) in cases { - let val = fmin(x, y); - assert_biteq!(val, res, "fmin({}, {})", Hexf(x), Hexf(y)); - } - } - - #[test] - #[cfg(f16_enabled)] - fn spec_tests_f16() { - spec_test::(); - } - - #[test] - fn spec_tests_f32() { - spec_test::(); - } - - #[test] - fn spec_tests_f64() { - spec_test::(); - } - - #[test] - #[cfg(f128_enabled)] - fn spec_tests_f128() { - spec_test::(); - } -} diff --git a/libm/src/math/generic/fminimum.rs b/libm/src/math/generic/fminimum.rs index e01c88646..ee5493880 100644 --- a/libm/src/math/generic/fminimum.rs +++ b/libm/src/math/generic/fminimum.rs @@ -25,54 +25,3 @@ pub fn fminimum(x: F, y: F) -> F { // Canonicalize res * F::ONE } - -#[cfg(test)] -mod tests { - use super::*; - use crate::support::Hexf; - - fn spec_test() { - let cases = [ - (F::ZERO, F::ZERO, F::ZERO), - (F::ONE, F::ONE, F::ONE), - (F::ZERO, F::ONE, F::ZERO), - (F::ONE, F::ZERO, F::ZERO), - (F::ZERO, F::NEG_ONE, F::NEG_ONE), - (F::NEG_ONE, F::ZERO, F::NEG_ONE), - (F::INFINITY, F::ZERO, F::ZERO), - (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), - (F::NAN, F::ZERO, F::NAN), - (F::ZERO, F::NAN, F::NAN), - (F::NAN, F::NAN, F::NAN), - (F::ZERO, F::NEG_ZERO, F::NEG_ZERO), - (F::NEG_ZERO, F::ZERO, F::NEG_ZERO), - ]; - - for (x, y, res) in cases { - let val = fminimum(x, y); - assert_biteq!(val, res, "fminimum({}, {})", Hexf(x), Hexf(y)); - } - } - - #[test] - #[cfg(f16_enabled)] - fn spec_tests_f16() { - spec_test::(); - } - - #[test] - fn spec_tests_f32() { - spec_test::(); - } - - #[test] - fn spec_tests_f64() { - spec_test::(); - } - - #[test] - #[cfg(f128_enabled)] - fn spec_tests_f128() { - spec_test::(); - } -} diff --git a/libm/src/math/generic/fminimum_num.rs b/libm/src/math/generic/fminimum_num.rs index 3e97b893b..966618328 100644 --- a/libm/src/math/generic/fminimum_num.rs +++ b/libm/src/math/generic/fminimum_num.rs @@ -24,54 +24,3 @@ pub fn fminimum_num(x: F, y: F) -> F { // Canonicalize res * F::ONE } - -#[cfg(test)] -mod tests { - use super::*; - use crate::support::Hexf; - - fn spec_test() { - let cases = [ - (F::ZERO, F::ZERO, F::ZERO), - (F::ONE, F::ONE, F::ONE), - (F::ZERO, F::ONE, F::ZERO), - (F::ONE, F::ZERO, F::ZERO), - (F::ZERO, F::NEG_ONE, F::NEG_ONE), - (F::NEG_ONE, F::ZERO, F::NEG_ONE), - (F::INFINITY, F::ZERO, F::ZERO), - (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), - (F::NAN, F::ZERO, F::ZERO), - (F::ZERO, F::NAN, F::ZERO), - (F::NAN, F::NAN, F::NAN), - (F::ZERO, F::NEG_ZERO, F::NEG_ZERO), - (F::NEG_ZERO, F::ZERO, F::NEG_ZERO), - ]; - - for (x, y, res) in cases { - let val = fminimum_num(x, y); - assert_biteq!(val, res, "fminimum_num({}, {})", Hexf(x), Hexf(y)); - } - } - - #[test] - #[cfg(f16_enabled)] - fn spec_tests_f16() { - spec_test::(); - } - - #[test] - fn spec_tests_f32() { - spec_test::(); - } - - #[test] - fn spec_tests_f64() { - spec_test::(); - } - - #[test] - #[cfg(f128_enabled)] - fn spec_tests_f128() { - spec_test::(); - } -} diff --git a/libm/src/math/generic/scalbn.rs b/libm/src/math/generic/scalbn.rs index aaa243933..b2696e5cc 100644 --- a/libm/src/math/generic/scalbn.rs +++ b/libm/src/math/generic/scalbn.rs @@ -118,68 +118,3 @@ where let scale = F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero); x * scale } - -#[cfg(test)] -mod tests { - use super::*; - - // Tests against N3220 - fn spec_test() - where - u32: CastInto, - F::Int: CastFrom, - F::Int: CastFrom, - { - // `scalbn(±0, n)` returns `±0`. - assert_biteq!(scalbn(F::NEG_ZERO, 10), F::NEG_ZERO); - assert_biteq!(scalbn(F::NEG_ZERO, 0), F::NEG_ZERO); - assert_biteq!(scalbn(F::NEG_ZERO, -10), F::NEG_ZERO); - assert_biteq!(scalbn(F::ZERO, 10), F::ZERO); - assert_biteq!(scalbn(F::ZERO, 0), F::ZERO); - assert_biteq!(scalbn(F::ZERO, -10), F::ZERO); - - // `scalbn(x, 0)` returns `x`. - assert_biteq!(scalbn(F::MIN, 0), F::MIN); - assert_biteq!(scalbn(F::MAX, 0), F::MAX); - assert_biteq!(scalbn(F::INFINITY, 0), F::INFINITY); - assert_biteq!(scalbn(F::NEG_INFINITY, 0), F::NEG_INFINITY); - assert_biteq!(scalbn(F::ZERO, 0), F::ZERO); - assert_biteq!(scalbn(F::NEG_ZERO, 0), F::NEG_ZERO); - - // `scalbn(±∞, n)` returns `±∞`. - assert_biteq!(scalbn(F::INFINITY, 10), F::INFINITY); - assert_biteq!(scalbn(F::INFINITY, -10), F::INFINITY); - assert_biteq!(scalbn(F::NEG_INFINITY, 10), F::NEG_INFINITY); - assert_biteq!(scalbn(F::NEG_INFINITY, -10), F::NEG_INFINITY); - - // NaN should remain NaNs. - assert!(scalbn(F::NAN, 10).is_nan()); - assert!(scalbn(F::NAN, 0).is_nan()); - assert!(scalbn(F::NAN, -10).is_nan()); - assert!(scalbn(-F::NAN, 10).is_nan()); - assert!(scalbn(-F::NAN, 0).is_nan()); - assert!(scalbn(-F::NAN, -10).is_nan()); - } - - #[test] - #[cfg(f16_enabled)] - fn spec_test_f16() { - spec_test::(); - } - - #[test] - fn spec_test_f32() { - spec_test::(); - } - - #[test] - fn spec_test_f64() { - spec_test::(); - } - - #[test] - #[cfg(f128_enabled)] - fn spec_test_f128() { - spec_test::(); - } -} diff --git a/libm/src/math/ldexp.rs b/libm/src/math/ldexp.rs index e46242e55..24899ba30 100644 --- a/libm/src/math/ldexp.rs +++ b/libm/src/math/ldexp.rs @@ -1,4 +1,21 @@ +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf16(x: f16, n: i32) -> f16 { + super::scalbnf16(x, n) +} + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf(x: f32, n: i32) -> f32 { + super::scalbnf(x, n) +} + #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ldexp(x: f64, n: i32) -> f64 { super::scalbn(x, n) } + +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf128(x: f128, n: i32) -> f128 { + super::scalbnf128(x, n) +} diff --git a/libm/src/math/ldexpf.rs b/libm/src/math/ldexpf.rs deleted file mode 100644 index 95b27fc49..000000000 --- a/libm/src/math/ldexpf.rs +++ /dev/null @@ -1,4 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn ldexpf(x: f32, n: i32) -> f32 { - super::scalbnf(x, n) -} diff --git a/libm/src/math/ldexpf128.rs b/libm/src/math/ldexpf128.rs deleted file mode 100644 index b35277d15..000000000 --- a/libm/src/math/ldexpf128.rs +++ /dev/null @@ -1,4 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn ldexpf128(x: f128, n: i32) -> f128 { - super::scalbnf128(x, n) -} diff --git a/libm/src/math/ldexpf16.rs b/libm/src/math/ldexpf16.rs deleted file mode 100644 index 8de6cffd6..000000000 --- a/libm/src/math/ldexpf16.rs +++ /dev/null @@ -1,4 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn ldexpf16(x: f16, n: i32) -> f16 { - super::scalbnf16(x, n) -} diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 5fc8fa0b3..023cf67a3 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -140,9 +140,7 @@ mod atanhf; mod cbrt; mod cbrtf; mod ceil; -mod ceilf; mod copysign; -mod copysignf; mod cos; mod cosf; mod cosh; @@ -158,11 +156,8 @@ mod expf; mod expm1; mod expm1f; mod fabs; -mod fabsf; mod fdim; -mod fdimf; mod floor; -mod floorf; mod fma; mod fma_wide; mod fmin_fmax; @@ -183,7 +178,6 @@ mod j1f; mod jn; mod jnf; mod ldexp; -mod ldexpf; mod lgamma; mod lgamma_r; mod lgammaf; @@ -209,9 +203,7 @@ mod remquof; mod rint; mod round; mod roundeven; -mod roundf; mod scalbn; -mod scalbnf; mod sin; mod sincos; mod sincosf; @@ -219,7 +211,6 @@ mod sinf; mod sinh; mod sinhf; mod sqrt; -mod sqrtf; mod tan; mod tanf; mod tanh; @@ -227,7 +218,6 @@ mod tanhf; mod tgamma; mod tgammaf; mod trunc; -mod truncf; // Use separated imports instead of {}-grouped imports for easier merging. pub use self::acos::acos; @@ -246,10 +236,8 @@ pub use self::atanh::atanh; pub use self::atanhf::atanhf; pub use self::cbrt::cbrt; pub use self::cbrtf::cbrtf; -pub use self::ceil::ceil; -pub use self::ceilf::ceilf; -pub use self::copysign::copysign; -pub use self::copysignf::copysignf; +pub use self::ceil::{ceil, ceilf}; +pub use self::copysign::{copysign, copysignf}; pub use self::cos::cos; pub use self::cosf::cosf; pub use self::cosh::cosh; @@ -264,12 +252,9 @@ pub use self::exp10f::exp10f; pub use self::expf::expf; pub use self::expm1::expm1; pub use self::expm1f::expm1f; -pub use self::fabs::fabs; -pub use self::fabsf::fabsf; -pub use self::fdim::fdim; -pub use self::fdimf::fdimf; -pub use self::floor::floor; -pub use self::floorf::floorf; +pub use self::fabs::{fabs, fabsf}; +pub use self::fdim::{fdim, fdimf}; +pub use self::floor::{floor, floorf}; pub use self::fma::fma; pub use self::fma_wide::fmaf; pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf}; @@ -289,8 +274,7 @@ pub use self::j1::{j1, y1}; pub use self::j1f::{j1f, y1f}; pub use self::jn::{jn, yn}; pub use self::jnf::{jnf, ynf}; -pub use self::ldexp::ldexp; -pub use self::ldexpf::ldexpf; +pub use self::ldexp::{ldexp, ldexpf}; pub use self::lgamma::lgamma; pub use self::lgamma_r::lgamma_r; pub use self::lgammaf::lgammaf; @@ -314,61 +298,47 @@ pub use self::remainderf::remainderf; pub use self::remquo::remquo; pub use self::remquof::remquof; pub use self::rint::{rint, rintf}; -pub use self::round::round; +pub use self::round::{round, roundf}; pub use self::roundeven::{roundeven, roundevenf}; -pub use self::roundf::roundf; -pub use self::scalbn::scalbn; -pub use self::scalbnf::scalbnf; +pub use self::scalbn::{scalbn, scalbnf}; pub use self::sin::sin; pub use self::sincos::sincos; pub use self::sincosf::sincosf; pub use self::sinf::sinf; pub use self::sinh::sinh; pub use self::sinhf::sinhf; -pub use self::sqrt::sqrt; -pub use self::sqrtf::sqrtf; +pub use self::sqrt::{sqrt, sqrtf}; pub use self::tan::tan; pub use self::tanf::tanf; pub use self::tanh::tanh; pub use self::tanhf::tanhf; pub use self::tgamma::tgamma; pub use self::tgammaf::tgammaf; -pub use self::trunc::trunc; -pub use self::truncf::truncf; +pub use self::trunc::{trunc, truncf}; cfg_if! { if #[cfg(f16_enabled)] { // verify-sorted-start - mod ceilf16; - mod copysignf16; - mod fabsf16; - mod fdimf16; - mod floorf16; mod fmodf16; - mod ldexpf16; - mod roundf16; - mod scalbnf16; - mod sqrtf16; - mod truncf16; // verify-sorted-end // verify-sorted-start - pub use self::ceilf16::ceilf16; - pub use self::copysignf16::copysignf16; - pub use self::fabsf16::fabsf16; - pub use self::fdimf16::fdimf16; - pub use self::floorf16::floorf16; + pub use self::ceil::ceilf16; + pub use self::copysign::copysignf16; + pub use self::fabs::fabsf16; + pub use self::fdim::fdimf16; + pub use self::floor::floorf16; pub use self::fmin_fmax::{fmaxf16, fminf16}; pub use self::fminimum_fmaximum::{fmaximumf16, fminimumf16}; pub use self::fminimum_fmaximum_num::{fmaximum_numf16, fminimum_numf16}; pub use self::fmodf16::fmodf16; - pub use self::ldexpf16::ldexpf16; + pub use self::ldexp::ldexpf16; pub use self::rint::rintf16; + pub use self::round::roundf16; pub use self::roundeven::roundevenf16; - pub use self::roundf16::roundf16; - pub use self::scalbnf16::scalbnf16; - pub use self::sqrtf16::sqrtf16; - pub use self::truncf16::truncf16; + pub use self::scalbn::scalbnf16; + pub use self::sqrt::sqrtf16; + pub use self::trunc::truncf16; // verify-sorted-end #[allow(unused_imports)] @@ -379,37 +349,27 @@ cfg_if! { cfg_if! { if #[cfg(f128_enabled)] { // verify-sorted-start - mod ceilf128; - mod copysignf128; - mod fabsf128; - mod fdimf128; - mod floorf128; mod fmodf128; - mod ldexpf128; - mod roundf128; - mod scalbnf128; - mod sqrtf128; - mod truncf128; // verify-sorted-end // verify-sorted-start - pub use self::ceilf128::ceilf128; - pub use self::copysignf128::copysignf128; - pub use self::fabsf128::fabsf128; - pub use self::fdimf128::fdimf128; - pub use self::floorf128::floorf128; + pub use self::ceil::ceilf128; + pub use self::copysign::copysignf128; + pub use self::fabs::fabsf128; + pub use self::fdim::fdimf128; + pub use self::floor::floorf128; pub use self::fma::fmaf128; pub use self::fmin_fmax::{fmaxf128, fminf128}; pub use self::fminimum_fmaximum::{fmaximumf128, fminimumf128}; pub use self::fminimum_fmaximum_num::{fmaximum_numf128, fminimum_numf128}; pub use self::fmodf128::fmodf128; - pub use self::ldexpf128::ldexpf128; + pub use self::ldexp::ldexpf128; pub use self::rint::rintf128; + pub use self::round::roundf128; pub use self::roundeven::roundevenf128; - pub use self::roundf128::roundf128; - pub use self::scalbnf128::scalbnf128; - pub use self::sqrtf128::sqrtf128; - pub use self::truncf128::truncf128; + pub use self::scalbn::scalbnf128; + pub use self::sqrt::sqrtf128; + pub use self::trunc::truncf128; // verify-sorted-end } } diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs index 36e0eb1f2..6cd091cd7 100644 --- a/libm/src/math/round.rs +++ b/libm/src/math/round.rs @@ -1,5 +1,25 @@ +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf16(x: f16) -> f16 { + super::generic::round(x) +} + +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf(x: f32) -> f32 { + super::generic::round(x) +} + /// Round `x` to the nearest integer, breaking ties away from zero. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn round(x: f64) -> f64 { super::generic::round(x) } + +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf128(x: f128) -> f128 { + super::generic::round(x) +} diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs deleted file mode 100644 index b5d7c9d69..000000000 --- a/libm/src/math/roundf.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Round `x` to the nearest integer, breaking ties away from zero. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn roundf(x: f32) -> f32 { - super::generic::round(x) -} diff --git a/libm/src/math/roundf128.rs b/libm/src/math/roundf128.rs deleted file mode 100644 index fc3164929..000000000 --- a/libm/src/math/roundf128.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Round `x` to the nearest integer, breaking ties away from zero. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn roundf128(x: f128) -> f128 { - super::generic::round(x) -} diff --git a/libm/src/math/roundf16.rs b/libm/src/math/roundf16.rs deleted file mode 100644 index 8b356eaab..000000000 --- a/libm/src/math/roundf16.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Round `x` to the nearest integer, breaking ties away from zero. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn roundf16(x: f16) -> f16 { - super::generic::round(x) -} diff --git a/libm/src/math/scalbn.rs b/libm/src/math/scalbn.rs index f809dad51..ed73c3f94 100644 --- a/libm/src/math/scalbn.rs +++ b/libm/src/math/scalbn.rs @@ -1,4 +1,87 @@ +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf16(x: f16, n: i32) -> f16 { + super::generic::scalbn(x, n) +} + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf(x: f32, n: i32) -> f32 { + super::generic::scalbn(x, n) +} + #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn scalbn(x: f64, n: i32) -> f64 { super::generic::scalbn(x, n) } + +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf128(x: f128, n: i32) -> f128 { + super::generic::scalbn(x, n) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{CastFrom, CastInto, Float}; + + // Tests against N3220 + fn spec_test(f: impl Fn(F, i32) -> F) + where + u32: CastInto, + F::Int: CastFrom, + F::Int: CastFrom, + { + // `scalbn(±0, n)` returns `±0`. + assert_biteq!(f(F::NEG_ZERO, 10), F::NEG_ZERO); + assert_biteq!(f(F::NEG_ZERO, 0), F::NEG_ZERO); + assert_biteq!(f(F::NEG_ZERO, -10), F::NEG_ZERO); + assert_biteq!(f(F::ZERO, 10), F::ZERO); + assert_biteq!(f(F::ZERO, 0), F::ZERO); + assert_biteq!(f(F::ZERO, -10), F::ZERO); + + // `scalbn(x, 0)` returns `x`. + assert_biteq!(f(F::MIN, 0), F::MIN); + assert_biteq!(f(F::MAX, 0), F::MAX); + assert_biteq!(f(F::INFINITY, 0), F::INFINITY); + assert_biteq!(f(F::NEG_INFINITY, 0), F::NEG_INFINITY); + assert_biteq!(f(F::ZERO, 0), F::ZERO); + assert_biteq!(f(F::NEG_ZERO, 0), F::NEG_ZERO); + + // `scalbn(±∞, n)` returns `±∞`. + assert_biteq!(f(F::INFINITY, 10), F::INFINITY); + assert_biteq!(f(F::INFINITY, -10), F::INFINITY); + assert_biteq!(f(F::NEG_INFINITY, 10), F::NEG_INFINITY); + assert_biteq!(f(F::NEG_INFINITY, -10), F::NEG_INFINITY); + + // NaN should remain NaNs. + assert!(f(F::NAN, 10).is_nan()); + assert!(f(F::NAN, 0).is_nan()); + assert!(f(F::NAN, -10).is_nan()); + assert!(f(-F::NAN, 10).is_nan()); + assert!(f(-F::NAN, 0).is_nan()); + assert!(f(-F::NAN, -10).is_nan()); + } + + #[test] + #[cfg(f16_enabled)] + fn spec_test_f16() { + spec_test::(scalbnf16); + } + + #[test] + fn spec_test_f32() { + spec_test::(scalbnf); + } + + #[test] + fn spec_test_f64() { + spec_test::(scalbn); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_test_f128() { + spec_test::(scalbnf128); + } +} diff --git a/libm/src/math/scalbnf.rs b/libm/src/math/scalbnf.rs deleted file mode 100644 index 57e7ba76f..000000000 --- a/libm/src/math/scalbnf.rs +++ /dev/null @@ -1,4 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn scalbnf(x: f32, n: i32) -> f32 { - super::generic::scalbn(x, n) -} diff --git a/libm/src/math/scalbnf128.rs b/libm/src/math/scalbnf128.rs deleted file mode 100644 index c1d2b4855..000000000 --- a/libm/src/math/scalbnf128.rs +++ /dev/null @@ -1,4 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn scalbnf128(x: f128, n: i32) -> f128 { - super::generic::scalbn(x, n) -} diff --git a/libm/src/math/scalbnf16.rs b/libm/src/math/scalbnf16.rs deleted file mode 100644 index 2209e1a17..000000000 --- a/libm/src/math/scalbnf16.rs +++ /dev/null @@ -1,4 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn scalbnf16(x: f16, n: i32) -> f16 { - super::generic::scalbn(x, n) -} diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 2bfc42bcf..76bc240cf 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -1,3 +1,32 @@ +/// The square root of `x` (f16). +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf16(x: f16) -> f16 { + select_implementation! { + name: sqrtf16, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + + return super::generic::sqrt(x); +} + +/// The square root of `x` (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf(x: f32) -> f32 { + select_implementation! { + name: sqrtf, + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + target_feature = "sse2" + ), + args: x, + } + + super::generic::sqrt(x) +} + /// The square root of `x` (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrt(x: f64) -> f64 { @@ -13,3 +42,10 @@ pub fn sqrt(x: f64) -> f64 { super::generic::sqrt(x) } + +/// The square root of `x` (f128). +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf128(x: f128) -> f128 { + return super::generic::sqrt(x); +} diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs deleted file mode 100644 index c28a705e3..000000000 --- a/libm/src/math/sqrtf.rs +++ /dev/null @@ -1,15 +0,0 @@ -/// The square root of `x` (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn sqrtf(x: f32) -> f32 { - select_implementation! { - name: sqrtf, - use_arch: any( - all(target_arch = "aarch64", target_feature = "neon"), - all(target_arch = "wasm32", intrinsics_enabled), - target_feature = "sse2" - ), - args: x, - } - - super::generic::sqrt(x) -} diff --git a/libm/src/math/sqrtf128.rs b/libm/src/math/sqrtf128.rs deleted file mode 100644 index eaef6ae0c..000000000 --- a/libm/src/math/sqrtf128.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// The square root of `x` (f128). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn sqrtf128(x: f128) -> f128 { - return super::generic::sqrt(x); -} diff --git a/libm/src/math/sqrtf16.rs b/libm/src/math/sqrtf16.rs deleted file mode 100644 index 7bedb7f8b..000000000 --- a/libm/src/math/sqrtf16.rs +++ /dev/null @@ -1,11 +0,0 @@ -/// The square root of `x` (f16). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn sqrtf16(x: f16) -> f16 { - select_implementation! { - name: sqrtf16, - use_arch: all(target_arch = "aarch64", target_feature = "fp16"), - args: x, - } - - return super::generic::sqrt(x); -} diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs index 2cc8aaa7e..fa50d55e1 100644 --- a/libm/src/math/trunc.rs +++ b/libm/src/math/trunc.rs @@ -1,3 +1,26 @@ +/// Rounds the number toward 0 to the closest integral value (f16). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf16(x: f16) -> f16 { + super::generic::trunc(x) +} + +/// Rounds the number toward 0 to the closest integral value (f32). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf(x: f32) -> f32 { + select_implementation! { + name: truncf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::trunc(x) +} + /// Rounds the number toward 0 to the closest integral value (f64). /// /// This effectively removes the decimal part of the number, leaving the integral part. @@ -11,3 +34,20 @@ pub fn trunc(x: f64) -> f64 { super::generic::trunc(x) } + +/// Rounds the number toward 0 to the closest integral value (f128). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf128(x: f128) -> f128 { + super::generic::trunc(x) +} + +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::truncf(1.1), 1.0); + } +} diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs deleted file mode 100644 index 14533a267..000000000 --- a/libm/src/math/truncf.rs +++ /dev/null @@ -1,23 +0,0 @@ -/// Rounds the number toward 0 to the closest integral value (f32). -/// -/// This effectively removes the decimal part of the number, leaving the integral part. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn truncf(x: f32) -> f32 { - select_implementation! { - name: truncf, - use_arch: all(target_arch = "wasm32", intrinsics_enabled), - args: x, - } - - super::generic::trunc(x) -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - #[test] - fn sanity_check() { - assert_eq!(super::truncf(1.1), 1.0); - } -} diff --git a/libm/src/math/truncf128.rs b/libm/src/math/truncf128.rs deleted file mode 100644 index 9dccc0d0e..000000000 --- a/libm/src/math/truncf128.rs +++ /dev/null @@ -1,7 +0,0 @@ -/// Rounds the number toward 0 to the closest integral value (f128). -/// -/// This effectively removes the decimal part of the number, leaving the integral part. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn truncf128(x: f128) -> f128 { - super::generic::trunc(x) -} diff --git a/libm/src/math/truncf16.rs b/libm/src/math/truncf16.rs deleted file mode 100644 index d7c3d225c..000000000 --- a/libm/src/math/truncf16.rs +++ /dev/null @@ -1,7 +0,0 @@ -/// Rounds the number toward 0 to the closest integral value (f16). -/// -/// This effectively removes the decimal part of the number, leaving the integral part. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn truncf16(x: f16) -> f16 { - super::generic::trunc(x) -} From 0f01f5e9a4d4f0db83ef1a905e1e72b2a40455f9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 03:22:13 +0000 Subject: [PATCH 1269/1459] ci: Cancel jobs when a new push happens Jobs should just cancel automatically, it isn't ideal that extensive jobs can continue running for multiple hours after code has been updated. Use a solution from [1] to do this. [1]: https://stackoverflow.com/a/72408109/5380651 --- libm/.github/workflows/main.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index e86f936f7..93c56c9d4 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -5,6 +5,11 @@ on: - master pull_request: +concurrency: + # Make sure that new pushes cancel running jobs + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + env: CARGO_TERM_COLOR: always RUSTDOCFLAGS: -Dwarnings From 00e1768bae13aaba31f7753cf775ca703895d6f7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 01:58:05 +0000 Subject: [PATCH 1270/1459] ci: Allow skipping extensive tests with `ci: skip-extensive` Sometimes we do refactoring that moves things around and triggers an extensive test, even though the implementation didn't change. There isn't any need to run full extensive CI in these cases, so add a way to skip it from the PR message. --- libm/.github/workflows/main.yaml | 15 ++++-- libm/ci/ci-util.py | 88 +++++++++++++++++++++++--------- 2 files changed, 73 insertions(+), 30 deletions(-) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index 93c56c9d4..2b2891ab2 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -239,6 +239,9 @@ jobs: name: Calculate job matrix runs-on: ubuntu-24.04 timeout-minutes: 10 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} outputs: matrix: ${{ steps.script.outputs.matrix }} steps: @@ -267,7 +270,7 @@ jobs: # this is not currently possible https://github.com/actions/runner/issues/1985. include: ${{ fromJSON(needs.calculate_extensive_matrix.outputs.matrix).matrix }} env: - CHANGED: ${{ matrix.changed }} + TO_TEST: ${{ matrix.to_test }} steps: - uses: actions/checkout@v4 with: @@ -279,16 +282,18 @@ jobs: - uses: Swatinem/rust-cache@v2 - name: Run extensive tests run: | - echo "Changed: '$CHANGED'" - if [ -z "$CHANGED" ]; then + echo "Tests to run: '$TO_TEST'" + if [ -z "$TO_TEST" ]; then echo "No tests to run, exiting." exit fi + set -x + # Run the non-extensive tests first to catch any easy failures - cargo t --profile release-checked -- "$CHANGED" + cargo t --profile release-checked -- "$TO_TEST" - LIBM_EXTENSIVE_TESTS="$CHANGED" cargo t \ + LIBM_EXTENSIVE_TESTS="$TO_TEST" cargo t \ --features build-mpfr,unstable,force-soft-floats \ --profile release-checked \ -- extensive diff --git a/libm/ci/ci-util.py b/libm/ci/ci-util.py index 7464fd425..8b07dde31 100755 --- a/libm/ci/ci-util.py +++ b/libm/ci/ci-util.py @@ -6,6 +6,7 @@ """ import json +import os import subprocess as sp import sys from dataclasses import dataclass @@ -13,7 +14,7 @@ from inspect import cleandoc from os import getenv from pathlib import Path -from typing import TypedDict +from typing import TypedDict, Self USAGE = cleandoc( """ @@ -51,6 +52,8 @@ ARTIFACT_GLOB = "baseline-icount*" # Place this in a PR body to skip regression checks (must be at the start of a line). REGRESSION_DIRECTIVE = "ci: allow-regressions" +# Place this in a PR body to skip extensive tests +SKIP_EXTENSIVE_DIRECTIVE = "ci: skip-extensive" # Don't run exhaustive tests if these files change, even if they contaiin a function # definition. @@ -68,6 +71,39 @@ def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) +@dataclass +class PrInfo: + """GitHub response for PR query""" + + body: str + commits: list[str] + created_at: str + number: int + + @classmethod + def load(cls, pr_number: int | str) -> Self: + """For a given PR number, query the body and commit list""" + pr_info = sp.check_output( + [ + "gh", + "pr", + "view", + str(pr_number), + "--json=number,commits,body,createdAt", + # Flatten the commit list to only hashes, change a key to snake naming + "--jq=.commits |= map(.oid) | .created_at = .createdAt | del(.createdAt)", + ], + text=True, + ) + eprint("PR info:", json.dumps(pr_info, indent=4)) + return cls(**json.loads(pr_info)) + + def contains_directive(self, directive: str) -> bool: + """Return true if the provided directive is on a line in the PR body""" + lines = self.body.splitlines() + return any(line.startswith(directive) for line in lines) + + class FunctionDef(TypedDict): """Type for an entry in `function-definitions.json`""" @@ -149,7 +185,7 @@ def changed_routines(self) -> dict[str, list[str]]: eprint(f"changed files for {name}: {changed}") routines.add(name) - ret = {} + ret: dict[str, list[str]] = {} for r in sorted(routines): ret.setdefault(self.defs[r]["type"], []).append(r) @@ -159,13 +195,27 @@ def make_workflow_output(self) -> str: """Create a JSON object a list items for each type's changed files, if any did change, and the routines that were affected by the change. """ + + pr_number = os.environ.get("PR_NUMBER") + skip_tests = False + + if pr_number is not None: + pr = PrInfo.load(pr_number) + skip_tests = pr.contains_directive(SKIP_EXTENSIVE_DIRECTIVE) + + if skip_tests: + eprint("Skipping all extensive tests") + changed = self.changed_routines() ret = [] for ty in TYPES: ty_changed = changed.get(ty, []) + changed_str = ",".join(ty_changed) + item = { "ty": ty, - "changed": ",".join(ty_changed), + "changed": changed_str, + "to_test": "" if skip_tests else changed_str, } ret.append(item) output = json.dumps({"matrix": ret}, separators=(",", ":")) @@ -266,13 +316,13 @@ def check_iai_regressions(args: list[str]): found. """ - iai_home = "iai-home" - pr_number = False + iai_home_str = "iai-home" + pr_number = None while len(args) > 0: match args: case ["--home", home, *rest]: - iai_home = home + iai_home_str = home args = rest case ["--allow-pr-override", pr_num, *rest]: pr_number = pr_num @@ -281,10 +331,10 @@ def check_iai_regressions(args: list[str]): eprint(USAGE) exit(1) - iai_home = Path(iai_home) + iai_home = Path(iai_home_str) found_summaries = False - regressions = [] + regressions: list[dict] = [] for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True): found_summaries = True with open(iai_home / summary_path, "r") as f: @@ -292,7 +342,9 @@ def check_iai_regressions(args: list[str]): summary_regs = [] run = summary["callgrind_summary"]["callgrind_run"] - name_entry = {"name": f"{summary["function_name"]}.{summary["id"]}"} + fname = summary["function_name"] + id = summary["id"] + name_entry = {"name": f"{fname}.{id}"} for segment in run["segments"]: summary_regs.extend(segment["regressions"]) @@ -312,22 +364,8 @@ def check_iai_regressions(args: list[str]): eprint("Found regressions:", json.dumps(regressions, indent=4)) if pr_number is not None: - pr_info = sp.check_output( - [ - "gh", - "pr", - "view", - str(pr_number), - "--json=number,commits,body,createdAt", - "--jq=.commits |= map(.oid)", - ], - text=True, - ) - pr = json.loads(pr_info) - eprint("PR info:", json.dumps(pr, indent=4)) - - lines = pr["body"].splitlines() - if any(line.startswith(REGRESSION_DIRECTIVE) for line in lines): + pr = PrInfo.load(pr_number) + if pr.contains_directive(REGRESSION_DIRECTIVE): eprint("PR allows regressions, returning") return From 8d3429fb00452331cb82b5adde6abd1fe11514c2 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 02:20:21 +0000 Subject: [PATCH 1271/1459] ci: Require `ci: allow-many-extensive` if a threshold is exceeded Error out when too many extensive tests would be run unless `ci: allow-many-extensive` is in the PR description. This allows us to set a much higher CI timeout with less risk that a 4+ hour job gets started by accident. --- libm/ci/ci-util.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/libm/ci/ci-util.py b/libm/ci/ci-util.py index 8b07dde31..aae791d0f 100755 --- a/libm/ci/ci-util.py +++ b/libm/ci/ci-util.py @@ -54,6 +54,11 @@ REGRESSION_DIRECTIVE = "ci: allow-regressions" # Place this in a PR body to skip extensive tests SKIP_EXTENSIVE_DIRECTIVE = "ci: skip-extensive" +# Place this in a PR body to allow running a large number of extensive tests. If not +# set, this script will error out if a threshold is exceeded in order to avoid +# accidentally spending huge amounts of CI time. +ALLOW_MANY_EXTENSIVE_DIRECTIVE = "ci: allow-many-extensive" +MANY_EXTENSIVE_THRESHOLD = 20 # Don't run exhaustive tests if these files change, even if they contaiin a function # definition. @@ -198,28 +203,45 @@ def make_workflow_output(self) -> str: pr_number = os.environ.get("PR_NUMBER") skip_tests = False + error_on_many_tests = False if pr_number is not None: pr = PrInfo.load(pr_number) skip_tests = pr.contains_directive(SKIP_EXTENSIVE_DIRECTIVE) + error_on_many_tests = not pr.contains_directive( + ALLOW_MANY_EXTENSIVE_DIRECTIVE + ) if skip_tests: eprint("Skipping all extensive tests") changed = self.changed_routines() ret = [] + total_to_test = 0 + for ty in TYPES: ty_changed = changed.get(ty, []) - changed_str = ",".join(ty_changed) + ty_to_test = [] if skip_tests else ty_changed + total_to_test += len(ty_to_test) item = { "ty": ty, - "changed": changed_str, - "to_test": "" if skip_tests else changed_str, + "changed": ",".join(ty_changed), + "to_test": ",".join(ty_to_test), } + ret.append(item) output = json.dumps({"matrix": ret}, separators=(",", ":")) eprint(f"output: {output}") + eprint(f"total extensive tests: {total_to_test}") + + if error_on_many_tests and total_to_test > MANY_EXTENSIVE_THRESHOLD: + eprint( + f"More than {MANY_EXTENSIVE_THRESHOLD} tests would be run; add" + f" `{ALLOW_MANY_EXTENSIVE_DIRECTIVE}` to the PR body if this is intentional" + ) + exit(1) + return output From d966c8d50d0e973d302c00c60bd2b5def3c5dfff Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 01:24:38 +0000 Subject: [PATCH 1272/1459] ci: Increase the timeout for extensive tests The reorganization PR has caused this to fail once before because every file shows up as changed. Increase the timeout so this doesn't happen. We now cancel the job if too many extensive tests are run unless `ci: allow-many-extensive` is in the PR description, so this helps prevent the limit being hit by accident. --- libm/.github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index 2b2891ab2..c925e63aa 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -261,7 +261,7 @@ jobs: - clippy - calculate_extensive_matrix runs-on: ubuntu-24.04 - timeout-minutes: 180 + timeout-minutes: 240 # 4 hours strategy: matrix: # Use the output from `calculate_extensive_matrix` to calculate the matrix From 9d0ee21075ffcb87cbcb369b0b023daedbfebc53 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 06:15:10 +0000 Subject: [PATCH 1273/1459] Ensure configure.rs changes trigger rebuilds --- libm/crates/compiler-builtins-smoke-test/build.rs | 1 + libm/crates/libm-test/build.rs | 1 + libm/crates/util/build.rs | 1 + 3 files changed, 3 insertions(+) diff --git a/libm/crates/compiler-builtins-smoke-test/build.rs b/libm/crates/compiler-builtins-smoke-test/build.rs index 401b7e1eb..4ee5ab585 100644 --- a/libm/crates/compiler-builtins-smoke-test/build.rs +++ b/libm/crates/compiler-builtins-smoke-test/build.rs @@ -2,6 +2,7 @@ mod configure; fn main() { + println!("cargo:rerun-if-changed=../../configure.rs"); let cfg = configure::Config::from_env(); configure::emit_libm_config(&cfg); } diff --git a/libm/crates/libm-test/build.rs b/libm/crates/libm-test/build.rs index 134fb11ce..d2d0df9cb 100644 --- a/libm/crates/libm-test/build.rs +++ b/libm/crates/libm-test/build.rs @@ -3,6 +3,7 @@ mod configure; use configure::Config; fn main() { + println!("cargo:rerun-if-changed=../../configure.rs"); let cfg = Config::from_env(); configure::emit_test_config(&cfg); } diff --git a/libm/crates/util/build.rs b/libm/crates/util/build.rs index 0745ef3dd..b6cceb5f1 100644 --- a/libm/crates/util/build.rs +++ b/libm/crates/util/build.rs @@ -4,6 +4,7 @@ mod configure; fn main() { + println!("cargo:rerun-if-changed=../../configure.rs"); let cfg = configure::Config::from_env(); configure::emit_libm_config(&cfg); } From 1a0c1726c08b71f740f260391f89d25d5c655121 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 06:43:19 +0000 Subject: [PATCH 1274/1459] ci: Account for `PR_NUMBER` being set to an empty string This is the case for CI after merge that is no longer associated with a pull request. --- libm/ci/ci-util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/ci/ci-util.py b/libm/ci/ci-util.py index aae791d0f..7468fd690 100755 --- a/libm/ci/ci-util.py +++ b/libm/ci/ci-util.py @@ -205,7 +205,7 @@ def make_workflow_output(self) -> str: skip_tests = False error_on_many_tests = False - if pr_number is not None: + if pr_number is not None and len(pr_number) > 0: pr = PrInfo.load(pr_number) skip_tests = pr.contains_directive(SKIP_EXTENSIVE_DIRECTIVE) error_on_many_tests = not pr.contains_directive( From eda2148dbda406dc398013069e662631d7cb4148 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 07:05:13 +0000 Subject: [PATCH 1275/1459] Ensure all public functions are marked `no_panic` Fixes: https://github.com/rust-lang/libm/issues/201 --- libm/src/math/frexpf.rs | 1 + libm/src/math/j0.rs | 2 ++ libm/src/math/j0f.rs | 2 ++ libm/src/math/j1.rs | 2 ++ libm/src/math/j1f.rs | 2 ++ libm/src/math/jn.rs | 2 ++ libm/src/math/jnf.rs | 2 ++ libm/src/math/modf.rs | 1 + libm/src/math/modff.rs | 1 + 9 files changed, 15 insertions(+) diff --git a/libm/src/math/frexpf.rs b/libm/src/math/frexpf.rs index 2919c0ab0..0ec91c2d3 100644 --- a/libm/src/math/frexpf.rs +++ b/libm/src/math/frexpf.rs @@ -1,3 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn frexpf(x: f32) -> (f32, i32) { let mut y = x.to_bits(); let ee: i32 = ((y >> 23) & 0xff) as i32; diff --git a/libm/src/math/j0.rs b/libm/src/math/j0.rs index 5e5e839f8..99d656f0d 100644 --- a/libm/src/math/j0.rs +++ b/libm/src/math/j0.rs @@ -110,6 +110,7 @@ const S03: f64 = 5.13546550207318111446e-07; /* 0x3EA13B54, 0xCE84D5A9 */ const S04: f64 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */ /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn j0(mut x: f64) -> f64 { let z: f64; let r: f64; @@ -164,6 +165,7 @@ const V03: f64 = 2.59150851840457805467e-07; /* 0x3E91642D, 0x7FF202FD */ const V04: f64 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */ /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn y0(x: f64) -> f64 { let z: f64; let u: f64; diff --git a/libm/src/math/j0f.rs b/libm/src/math/j0f.rs index afb6ee9ba..25e5b325c 100644 --- a/libm/src/math/j0f.rs +++ b/libm/src/math/j0f.rs @@ -63,6 +63,7 @@ const S03: f32 = 5.1354652442e-07; /* 0x3509daa6 */ const S04: f32 = 1.1661400734e-09; /* 0x30a045e8 */ /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn j0f(mut x: f32) -> f32 { let z: f32; let r: f32; @@ -109,6 +110,7 @@ const V03: f32 = 2.5915085189e-07; /* 0x348b216c */ const V04: f32 = 4.4111031494e-10; /* 0x2ff280c2 */ /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn y0f(x: f32) -> f32 { let z: f32; let u: f32; diff --git a/libm/src/math/j1.rs b/libm/src/math/j1.rs index 578ae59d3..9b604d9e4 100644 --- a/libm/src/math/j1.rs +++ b/libm/src/math/j1.rs @@ -114,6 +114,7 @@ const S04: f64 = 5.04636257076217042715e-09; /* 0x3E35AC88, 0xC97DFF2C */ const S05: f64 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */ /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn j1(x: f64) -> f64 { let mut z: f64; let r: f64; @@ -160,6 +161,7 @@ const V0: [f64; 5] = [ ]; /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn y1(x: f64) -> f64 { let z: f64; let u: f64; diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs index 02a3efd24..a47472401 100644 --- a/libm/src/math/j1f.rs +++ b/libm/src/math/j1f.rs @@ -64,6 +64,7 @@ const S04: f32 = 5.0463624390e-09; /* 0x31ad6446 */ const S05: f32 = 1.2354227016e-11; /* 0x2d59567e */ /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn j1f(x: f32) -> f32 { let mut z: f32; let r: f32; @@ -109,6 +110,7 @@ const V0: [f32; 5] = [ ]; /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn y1f(x: f32) -> f32 { let z: f32; let u: f32; diff --git a/libm/src/math/jn.rs b/libm/src/math/jn.rs index d228781d1..31f8d9c53 100644 --- a/libm/src/math/jn.rs +++ b/libm/src/math/jn.rs @@ -39,6 +39,7 @@ use super::{cos, fabs, get_high_word, get_low_word, j0, j1, log, sin, sqrt, y0, const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn jn(n: i32, mut x: f64) -> f64 { let mut ix: u32; let lx: u32; @@ -248,6 +249,7 @@ pub fn jn(n: i32, mut x: f64) -> f64 { } /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn yn(n: i32, x: f64) -> f64 { let mut ix: u32; let lx: u32; diff --git a/libm/src/math/jnf.rs b/libm/src/math/jnf.rs index 754f8f33b..52cf7d8a8 100644 --- a/libm/src/math/jnf.rs +++ b/libm/src/math/jnf.rs @@ -16,6 +16,7 @@ use super::{fabsf, j0f, j1f, logf, y0f, y1f}; /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn jnf(n: i32, mut x: f32) -> f32 { let mut ix: u32; let mut nm1: i32; @@ -191,6 +192,7 @@ pub fn jnf(n: i32, mut x: f32) -> f32 { } /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ynf(n: i32, x: f32) -> f32 { let mut ix: u32; let mut ib: u32; diff --git a/libm/src/math/modf.rs b/libm/src/math/modf.rs index e29e80ccf..6541862cd 100644 --- a/libm/src/math/modf.rs +++ b/libm/src/math/modf.rs @@ -1,3 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn modf(x: f64) -> (f64, f64) { let rv2: f64; let mut u = x.to_bits(); diff --git a/libm/src/math/modff.rs b/libm/src/math/modff.rs index fac60abaa..90c6bca7d 100644 --- a/libm/src/math/modff.rs +++ b/libm/src/math/modff.rs @@ -1,3 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn modff(x: f32) -> (f32, f32) { let rv2: f32; let mut u: u32 = x.to_bits(); From c3c363ffc34ce0be317db166a3d9c7aefc31b0ac Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 07:15:26 +0000 Subject: [PATCH 1276/1459] Combine the source files for `fmod` Since `fmod` is generic, there isn't any need to have the small wrappers in separate files. Most operations was done in [1] but `fmod` was omitted until now. [1]: https://github.com/rust-lang/libm/pull/537 --- libm/etc/function-definitions.json | 6 +++--- libm/src/math/fmod.rs | 20 ++++++++++++++++++++ libm/src/math/fmodf.rs | 5 ----- libm/src/math/fmodf128.rs | 5 ----- libm/src/math/fmodf16.rs | 5 ----- libm/src/math/frexp.rs | 1 + libm/src/math/mod.rs | 16 +++------------- 7 files changed, 27 insertions(+), 31 deletions(-) delete mode 100644 libm/src/math/fmodf.rs delete mode 100644 libm/src/math/fmodf128.rs delete mode 100644 libm/src/math/fmodf16.rs diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index 801e74b22..ead1f807f 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -537,21 +537,21 @@ }, "fmodf": { "sources": [ - "src/math/fmodf.rs", + "src/math/fmod.rs", "src/math/generic/fmod.rs" ], "type": "f32" }, "fmodf128": { "sources": [ - "src/math/fmodf128.rs", + "src/math/fmod.rs", "src/math/generic/fmod.rs" ], "type": "f128" }, "fmodf16": { "sources": [ - "src/math/fmodf16.rs", + "src/math/fmod.rs", "src/math/generic/fmod.rs" ], "type": "f16" diff --git a/libm/src/math/fmod.rs b/libm/src/math/fmod.rs index d9786b53d..c4752b925 100644 --- a/libm/src/math/fmod.rs +++ b/libm/src/math/fmod.rs @@ -1,5 +1,25 @@ +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf16(x: f16, y: f16) -> f16 { + super::generic::fmod(x, y) +} + +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf(x: f32, y: f32) -> f32 { + super::generic::fmod(x, y) +} + /// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmod(x: f64, y: f64) -> f64 { super::generic::fmod(x, y) } + +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf128(x: f128, y: f128) -> f128 { + super::generic::fmod(x, y) +} diff --git a/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs deleted file mode 100644 index 4e95696e2..000000000 --- a/libm/src/math/fmodf.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmodf(x: f32, y: f32) -> f32 { - super::generic::fmod(x, y) -} diff --git a/libm/src/math/fmodf128.rs b/libm/src/math/fmodf128.rs deleted file mode 100644 index ff0e0493e..000000000 --- a/libm/src/math/fmodf128.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmodf128(x: f128, y: f128) -> f128 { - super::generic::fmod(x, y) -} diff --git a/libm/src/math/fmodf16.rs b/libm/src/math/fmodf16.rs deleted file mode 100644 index 11972a7de..000000000 --- a/libm/src/math/fmodf16.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmodf16(x: f16, y: f16) -> f16 { - super::generic::fmod(x, y) -} diff --git a/libm/src/math/frexp.rs b/libm/src/math/frexp.rs index badad786a..de7a64fda 100644 --- a/libm/src/math/frexp.rs +++ b/libm/src/math/frexp.rs @@ -1,3 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn frexp(x: f64) -> (f64, i32) { let mut y = x.to_bits(); let ee = ((y >> 52) & 0x7ff) as i32; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 023cf67a3..949c18b40 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -164,7 +164,6 @@ mod fmin_fmax; mod fminimum_fmaximum; mod fminimum_fmaximum_num; mod fmod; -mod fmodf; mod frexp; mod frexpf; mod hypot; @@ -260,8 +259,7 @@ pub use self::fma_wide::fmaf; pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf}; pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf}; pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf}; -pub use self::fmod::fmod; -pub use self::fmodf::fmodf; +pub use self::fmod::{fmod, fmodf}; pub use self::frexp::frexp; pub use self::frexpf::frexpf; pub use self::hypot::hypot; @@ -318,10 +316,6 @@ pub use self::trunc::{trunc, truncf}; cfg_if! { if #[cfg(f16_enabled)] { - // verify-sorted-start - mod fmodf16; - // verify-sorted-end - // verify-sorted-start pub use self::ceil::ceilf16; pub use self::copysign::copysignf16; @@ -331,7 +325,7 @@ cfg_if! { pub use self::fmin_fmax::{fmaxf16, fminf16}; pub use self::fminimum_fmaximum::{fmaximumf16, fminimumf16}; pub use self::fminimum_fmaximum_num::{fmaximum_numf16, fminimum_numf16}; - pub use self::fmodf16::fmodf16; + pub use self::fmod::fmodf16; pub use self::ldexp::ldexpf16; pub use self::rint::rintf16; pub use self::round::roundf16; @@ -348,10 +342,6 @@ cfg_if! { cfg_if! { if #[cfg(f128_enabled)] { - // verify-sorted-start - mod fmodf128; - // verify-sorted-end - // verify-sorted-start pub use self::ceil::ceilf128; pub use self::copysign::copysignf128; @@ -362,7 +352,7 @@ cfg_if! { pub use self::fmin_fmax::{fmaxf128, fminf128}; pub use self::fminimum_fmaximum::{fmaximumf128, fminimumf128}; pub use self::fminimum_fmaximum_num::{fmaximum_numf128, fminimum_numf128}; - pub use self::fmodf128::fmodf128; + pub use self::fmod::fmodf128; pub use self::ldexp::ldexpf128; pub use self::rint::rintf128; pub use self::round::roundf128; From 161036abde295edb0cc6cf51e15bd7879b6f0bc6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 19:19:24 +0000 Subject: [PATCH 1277/1459] Mark generic functions `#[inline]` Benchmarks for [1] seemed to indicate that repository organization for some reason had an effect on performance, even though the exact same rustc commands were running (though some with a different order). After investigating more, it appears that dependencies may have an affect on inlining thresholds for generic functions. It is surprising that this happens, we more or less expect that public functions will be standalone but everything they call will be inlined. To help ensure this, mark all generic functions `#[inline]` if they should be merged into the public function. Zulip discussion at [2]. [1]: https://github.com/rust-lang/libm/pull/533 [2]: https://rust-lang.zulipchat.com/#narrow/channel/182449-t-compiler.2Fhelp/topic/Dependencies.20affecting.20codegen/with/513079387 --- libm/src/math/fma.rs | 1 + libm/src/math/fma_wide.rs | 1 + libm/src/math/generic/ceil.rs | 2 ++ libm/src/math/generic/copysign.rs | 1 + libm/src/math/generic/fabs.rs | 1 + libm/src/math/generic/fdim.rs | 1 + libm/src/math/generic/floor.rs | 2 ++ libm/src/math/generic/fmax.rs | 2 +- libm/src/math/generic/fmaximum.rs | 1 + libm/src/math/generic/fmaximum_num.rs | 1 + libm/src/math/generic/fmin.rs | 1 + libm/src/math/generic/fminimum.rs | 1 + libm/src/math/generic/fminimum_num.rs | 1 + libm/src/math/generic/fmod.rs | 2 +- libm/src/math/generic/mod.rs | 3 +++ libm/src/math/generic/rint.rs | 1 + libm/src/math/generic/round.rs | 1 + libm/src/math/generic/scalbn.rs | 1 + libm/src/math/generic/sqrt.rs | 2 ++ libm/src/math/generic/trunc.rs | 2 ++ libm/src/math/roundeven.rs | 1 + 21 files changed, 27 insertions(+), 2 deletions(-) diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 789b0836a..e0b3347ac 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -29,6 +29,7 @@ pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { /// Fused multiply-add that works when there is not a larger float size available. Computes /// `(x * y) + z`. +#[inline] pub fn fma_round(x: F, y: F, z: F, _round: Round) -> FpResult where F: Float, diff --git a/libm/src/math/fma_wide.rs b/libm/src/math/fma_wide.rs index 8e908a14f..08b78b022 100644 --- a/libm/src/math/fma_wide.rs +++ b/libm/src/math/fma_wide.rs @@ -28,6 +28,7 @@ pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { /// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`, /// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding. +#[inline] pub fn fma_wide_round(x: F, y: F, z: F, round: Round) -> FpResult where F: Float + HFloat, diff --git a/libm/src/math/generic/ceil.rs b/libm/src/math/generic/ceil.rs index bf7e1d8e2..5c5bb4763 100644 --- a/libm/src/math/generic/ceil.rs +++ b/libm/src/math/generic/ceil.rs @@ -10,10 +10,12 @@ use super::super::support::{FpResult, Status}; use super::super::{Float, Int, IntTy, MinInt}; +#[inline] pub fn ceil(x: F) -> F { ceil_status(x).val } +#[inline] pub fn ceil_status(x: F) -> FpResult { let zero = IntTy::::ZERO; diff --git a/libm/src/math/generic/copysign.rs b/libm/src/math/generic/copysign.rs index 04864a359..a61af22f0 100644 --- a/libm/src/math/generic/copysign.rs +++ b/libm/src/math/generic/copysign.rs @@ -1,6 +1,7 @@ use super::super::Float; /// Copy the sign of `y` to `x`. +#[inline] pub fn copysign(x: F, y: F) -> F { let mut ux = x.to_bits(); let uy = y.to_bits(); diff --git a/libm/src/math/generic/fabs.rs b/libm/src/math/generic/fabs.rs index 75b473107..0fa0edf9b 100644 --- a/libm/src/math/generic/fabs.rs +++ b/libm/src/math/generic/fabs.rs @@ -1,6 +1,7 @@ use super::super::Float; /// Absolute value. +#[inline] pub fn fabs(x: F) -> F { let abs_mask = !F::SIGN_MASK; F::from_bits(x.to_bits() & abs_mask) diff --git a/libm/src/math/generic/fdim.rs b/libm/src/math/generic/fdim.rs index bf971cd7d..a63007b19 100644 --- a/libm/src/math/generic/fdim.rs +++ b/libm/src/math/generic/fdim.rs @@ -1,5 +1,6 @@ use super::super::Float; +#[inline] pub fn fdim(x: F, y: F) -> F { if x <= y { F::ZERO } else { x - y } } diff --git a/libm/src/math/generic/floor.rs b/libm/src/math/generic/floor.rs index 779955164..243804625 100644 --- a/libm/src/math/generic/floor.rs +++ b/libm/src/math/generic/floor.rs @@ -10,10 +10,12 @@ use super::super::support::{FpResult, Status}; use super::super::{Float, Int, IntTy, MinInt}; +#[inline] pub fn floor(x: F) -> F { floor_status(x).val } +#[inline] pub fn floor_status(x: F) -> FpResult { let zero = IntTy::::ZERO; diff --git a/libm/src/math/generic/fmax.rs b/libm/src/math/generic/fmax.rs index 29a031100..bf3f847e8 100644 --- a/libm/src/math/generic/fmax.rs +++ b/libm/src/math/generic/fmax.rs @@ -16,7 +16,7 @@ use super::super::Float; -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[inline] pub fn fmax(x: F, y: F) -> F { let res = if x.is_nan() || x < y { y } else { x }; // Canonicalize diff --git a/libm/src/math/generic/fmaximum.rs b/libm/src/math/generic/fmaximum.rs index 9e8d1739f..387055af2 100644 --- a/libm/src/math/generic/fmaximum.rs +++ b/libm/src/math/generic/fmaximum.rs @@ -11,6 +11,7 @@ use super::super::Float; +#[inline] pub fn fmaximum(x: F, y: F) -> F { let res = if x.is_nan() { x diff --git a/libm/src/math/generic/fmaximum_num.rs b/libm/src/math/generic/fmaximum_num.rs index 756ef5d9f..f7efdde80 100644 --- a/libm/src/math/generic/fmaximum_num.rs +++ b/libm/src/math/generic/fmaximum_num.rs @@ -13,6 +13,7 @@ use super::super::Float; +#[inline] pub fn fmaximum_num(x: F, y: F) -> F { let res = if x.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) { diff --git a/libm/src/math/generic/fmin.rs b/libm/src/math/generic/fmin.rs index 69fbf85a1..cd3caeee4 100644 --- a/libm/src/math/generic/fmin.rs +++ b/libm/src/math/generic/fmin.rs @@ -16,6 +16,7 @@ use super::super::Float; +#[inline] pub fn fmin(x: F, y: F) -> F { let res = if y.is_nan() || x < y { x } else { y }; // Canonicalize diff --git a/libm/src/math/generic/fminimum.rs b/libm/src/math/generic/fminimum.rs index ee5493880..4ddb36455 100644 --- a/libm/src/math/generic/fminimum.rs +++ b/libm/src/math/generic/fminimum.rs @@ -11,6 +11,7 @@ use super::super::Float; +#[inline] pub fn fminimum(x: F, y: F) -> F { let res = if x.is_nan() { x diff --git a/libm/src/math/generic/fminimum_num.rs b/libm/src/math/generic/fminimum_num.rs index 966618328..441c204a9 100644 --- a/libm/src/math/generic/fminimum_num.rs +++ b/libm/src/math/generic/fminimum_num.rs @@ -13,6 +13,7 @@ use super::super::Float; +#[inline] pub fn fminimum_num(x: F, y: F) -> F { let res = if y.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) { diff --git a/libm/src/math/generic/fmod.rs b/libm/src/math/generic/fmod.rs index cd23350ea..6414bbd25 100644 --- a/libm/src/math/generic/fmod.rs +++ b/libm/src/math/generic/fmod.rs @@ -3,7 +3,7 @@ use super::super::{CastFrom, Float, Int, MinInt}; -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[inline] pub fn fmod(x: F, y: F) -> F { let zero = F::Int::ZERO; let one = F::Int::ONE; diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index 9be185f80..35846351a 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -1,3 +1,6 @@ +// Note: generic functions are marked `#[inline]` because, even though generic functions are +// typically inlined, this does not seem to always be the case. + mod ceil; mod copysign; mod fabs; diff --git a/libm/src/math/generic/rint.rs b/libm/src/math/generic/rint.rs index 45d2f3138..9cdeb1185 100644 --- a/libm/src/math/generic/rint.rs +++ b/libm/src/math/generic/rint.rs @@ -6,6 +6,7 @@ use super::super::support::{FpResult, Round}; /// IEEE 754-2019 `roundToIntegralExact`, which respects rounding mode and raises inexact if /// applicable. +#[inline] pub fn rint_round(x: F, _round: Round) -> FpResult { let toint = F::ONE / F::EPSILON; let e = x.ex(); diff --git a/libm/src/math/generic/round.rs b/libm/src/math/generic/round.rs index 8b5138188..01314ac70 100644 --- a/libm/src/math/generic/round.rs +++ b/libm/src/math/generic/round.rs @@ -1,6 +1,7 @@ use super::super::{Float, MinInt}; use super::{copysign, trunc}; +#[inline] pub fn round(x: F) -> F { let f0p5 = F::from_parts(false, F::EXP_BIAS - 1, F::Int::ZERO); // 0.5 let f0p25 = F::from_parts(false, F::EXP_BIAS - 2, F::Int::ZERO); // 0.25 diff --git a/libm/src/math/generic/scalbn.rs b/libm/src/math/generic/scalbn.rs index b2696e5cc..a45db1b4a 100644 --- a/libm/src/math/generic/scalbn.rs +++ b/libm/src/math/generic/scalbn.rs @@ -16,6 +16,7 @@ use super::super::{CastFrom, CastInto, Float, IntTy, MinInt}; /// > /// > If the calculation does not overflow or underflow, the returned value is exact and /// > independent of the current rounding direction mode. +#[inline] pub fn scalbn(mut x: F, mut n: i32) -> F where u32: CastInto, diff --git a/libm/src/math/generic/sqrt.rs b/libm/src/math/generic/sqrt.rs index 5918025bc..ec9ff22df 100644 --- a/libm/src/math/generic/sqrt.rs +++ b/libm/src/math/generic/sqrt.rs @@ -44,6 +44,7 @@ use super::super::support::{FpResult, IntTy, Round, Status, cold_path}; use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt}; +#[inline] pub fn sqrt(x: F) -> F where F: Float + SqrtHelper, @@ -57,6 +58,7 @@ where sqrt_round(x, Round::Nearest).val } +#[inline] pub fn sqrt_round(x: F, _round: Round) -> FpResult where F: Float + SqrtHelper, diff --git a/libm/src/math/generic/trunc.rs b/libm/src/math/generic/trunc.rs index 0fb3fa5ad..25414ecf4 100644 --- a/libm/src/math/generic/trunc.rs +++ b/libm/src/math/generic/trunc.rs @@ -4,10 +4,12 @@ use super::super::support::{FpResult, Status}; use super::super::{Float, Int, IntTy, MinInt}; +#[inline] pub fn trunc(x: F) -> F { trunc_status(x).val } +#[inline] pub fn trunc_status(x: F) -> FpResult { let mut xi: F::Int = x.to_bits(); let e: i32 = x.exp_unbiased(); diff --git a/libm/src/math/roundeven.rs b/libm/src/math/roundeven.rs index ec1738285..6e621d762 100644 --- a/libm/src/math/roundeven.rs +++ b/libm/src/math/roundeven.rs @@ -30,6 +30,7 @@ pub fn roundevenf128(x: f128) -> f128 { roundeven_impl(x) } +#[inline] pub fn roundeven_impl(x: F) -> F { super::generic::rint_round(x, Round::Nearest).val } From 54bac411c894c793007ded934584ed77148e7c38 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 9 Apr 2025 01:21:33 +0000 Subject: [PATCH 1278/1459] refactor: Move the `libm` crate to a subdirectory In preparation for switching to a virtual manifest, move the `libm` crate into a subdirectory and update paths to match. Updating `Cargo.toml` is done in the next commit so git tracks the moved file correctly. --- libm/.github/workflows/main.yaml | 2 +- libm/ci/ci-util.py | 6 +-- .../compiler-builtins-smoke-test/build.rs | 4 +- .../compiler-builtins-smoke-test/src/math.rs | 2 +- libm/crates/libm-test/Cargo.toml | 2 +- libm/crates/libm-test/build.rs | 4 +- libm/crates/musl-math-sys/Cargo.toml | 2 +- libm/crates/util/Cargo.toml | 2 +- libm/crates/util/build.rs | 4 +- libm/etc/update-api-list.py | 18 ++++----- libm/{ => libm}/Cargo.toml | 0 libm/{ => libm}/build.rs | 1 + libm/{ => libm}/configure.rs | 0 libm/{ => libm}/src/lib.rs | 0 libm/{ => libm}/src/libm_helper.rs | 0 libm/{ => libm}/src/math/acos.rs | 0 libm/{ => libm}/src/math/acosf.rs | 0 libm/{ => libm}/src/math/acosh.rs | 0 libm/{ => libm}/src/math/acoshf.rs | 0 libm/{ => libm}/src/math/arch/aarch64.rs | 0 libm/{ => libm}/src/math/arch/i586.rs | 0 libm/{ => libm}/src/math/arch/i686.rs | 0 libm/{ => libm}/src/math/arch/mod.rs | 0 libm/{ => libm}/src/math/arch/wasm32.rs | 0 libm/{ => libm}/src/math/asin.rs | 0 libm/{ => libm}/src/math/asinf.rs | 0 libm/{ => libm}/src/math/asinh.rs | 0 libm/{ => libm}/src/math/asinhf.rs | 0 libm/{ => libm}/src/math/atan.rs | 0 libm/{ => libm}/src/math/atan2.rs | 0 libm/{ => libm}/src/math/atan2f.rs | 0 libm/{ => libm}/src/math/atanf.rs | 0 libm/{ => libm}/src/math/atanh.rs | 0 libm/{ => libm}/src/math/atanhf.rs | 0 libm/{ => libm}/src/math/cbrt.rs | 0 libm/{ => libm}/src/math/cbrtf.rs | 0 libm/{ => libm}/src/math/ceil.rs | 0 libm/{ => libm}/src/math/copysign.rs | 0 libm/libm/src/math/copysignf.rs | 8 ++++ libm/libm/src/math/copysignf128.rs | 8 ++++ libm/libm/src/math/copysignf16.rs | 8 ++++ libm/{ => libm}/src/math/cos.rs | 0 libm/{ => libm}/src/math/cosf.rs | 0 libm/{ => libm}/src/math/cosh.rs | 0 libm/{ => libm}/src/math/coshf.rs | 0 libm/{ => libm}/src/math/erf.rs | 0 libm/{ => libm}/src/math/erff.rs | 0 libm/{ => libm}/src/math/exp.rs | 0 libm/{ => libm}/src/math/exp10.rs | 0 libm/{ => libm}/src/math/exp10f.rs | 0 libm/{ => libm}/src/math/exp2.rs | 0 libm/{ => libm}/src/math/exp2f.rs | 0 libm/{ => libm}/src/math/expf.rs | 0 libm/{ => libm}/src/math/expm1.rs | 0 libm/{ => libm}/src/math/expm1f.rs | 0 libm/{ => libm}/src/math/expo2.rs | 0 libm/{ => libm}/src/math/fabs.rs | 0 libm/libm/src/math/fabsf.rs | 39 +++++++++++++++++++ libm/libm/src/math/fabsf128.rs | 31 +++++++++++++++ libm/libm/src/math/fabsf16.rs | 31 +++++++++++++++ libm/{ => libm}/src/math/fdim.rs | 0 libm/libm/src/math/fdimf.rs | 12 ++++++ libm/libm/src/math/fdimf128.rs | 12 ++++++ libm/libm/src/math/fdimf16.rs | 12 ++++++ libm/{ => libm}/src/math/floor.rs | 0 libm/libm/src/math/floorf.rs | 13 +++++++ libm/libm/src/math/floorf128.rs | 7 ++++ libm/libm/src/math/floorf16.rs | 7 ++++ libm/{ => libm}/src/math/fma.rs | 0 libm/{ => libm}/src/math/fma_wide.rs | 0 libm/{ => libm}/src/math/fmin_fmax.rs | 0 libm/{ => libm}/src/math/fminimum_fmaximum.rs | 0 .../src/math/fminimum_fmaximum_num.rs | 0 libm/{ => libm}/src/math/fmod.rs | 0 libm/libm/src/math/fmodf.rs | 5 +++ libm/libm/src/math/fmodf128.rs | 5 +++ libm/libm/src/math/fmodf16.rs | 5 +++ libm/{ => libm}/src/math/frexp.rs | 0 libm/{ => libm}/src/math/frexpf.rs | 0 libm/{ => libm}/src/math/generic/ceil.rs | 0 libm/{ => libm}/src/math/generic/copysign.rs | 0 libm/{ => libm}/src/math/generic/fabs.rs | 0 libm/{ => libm}/src/math/generic/fdim.rs | 0 libm/{ => libm}/src/math/generic/floor.rs | 0 libm/{ => libm}/src/math/generic/fmax.rs | 0 libm/{ => libm}/src/math/generic/fmaximum.rs | 0 .../src/math/generic/fmaximum_num.rs | 0 libm/{ => libm}/src/math/generic/fmin.rs | 0 libm/{ => libm}/src/math/generic/fminimum.rs | 0 .../src/math/generic/fminimum_num.rs | 0 libm/{ => libm}/src/math/generic/fmod.rs | 0 libm/{ => libm}/src/math/generic/mod.rs | 0 libm/{ => libm}/src/math/generic/rint.rs | 0 libm/{ => libm}/src/math/generic/round.rs | 0 libm/{ => libm}/src/math/generic/scalbn.rs | 0 libm/{ => libm}/src/math/generic/sqrt.rs | 0 libm/{ => libm}/src/math/generic/trunc.rs | 0 libm/{ => libm}/src/math/hypot.rs | 0 libm/{ => libm}/src/math/hypotf.rs | 0 libm/{ => libm}/src/math/ilogb.rs | 0 libm/{ => libm}/src/math/ilogbf.rs | 0 libm/{ => libm}/src/math/j0.rs | 0 libm/{ => libm}/src/math/j0f.rs | 0 libm/{ => libm}/src/math/j1.rs | 0 libm/{ => libm}/src/math/j1f.rs | 0 libm/{ => libm}/src/math/jn.rs | 0 libm/{ => libm}/src/math/jnf.rs | 0 libm/{ => libm}/src/math/k_cos.rs | 0 libm/{ => libm}/src/math/k_cosf.rs | 0 libm/{ => libm}/src/math/k_expo2.rs | 0 libm/{ => libm}/src/math/k_expo2f.rs | 0 libm/{ => libm}/src/math/k_sin.rs | 0 libm/{ => libm}/src/math/k_sinf.rs | 0 libm/{ => libm}/src/math/k_tan.rs | 0 libm/{ => libm}/src/math/k_tanf.rs | 0 libm/{ => libm}/src/math/ldexp.rs | 0 libm/libm/src/math/ldexpf.rs | 4 ++ libm/libm/src/math/ldexpf128.rs | 4 ++ libm/libm/src/math/ldexpf16.rs | 4 ++ libm/{ => libm}/src/math/lgamma.rs | 0 libm/{ => libm}/src/math/lgamma_r.rs | 0 libm/{ => libm}/src/math/lgammaf.rs | 0 libm/{ => libm}/src/math/lgammaf_r.rs | 0 libm/{ => libm}/src/math/log.rs | 0 libm/{ => libm}/src/math/log10.rs | 0 libm/{ => libm}/src/math/log10f.rs | 0 libm/{ => libm}/src/math/log1p.rs | 0 libm/{ => libm}/src/math/log1pf.rs | 0 libm/{ => libm}/src/math/log2.rs | 0 libm/{ => libm}/src/math/log2f.rs | 0 libm/{ => libm}/src/math/logf.rs | 0 libm/{ => libm}/src/math/mod.rs | 0 libm/{ => libm}/src/math/modf.rs | 0 libm/{ => libm}/src/math/modff.rs | 0 libm/{ => libm}/src/math/nextafter.rs | 0 libm/{ => libm}/src/math/nextafterf.rs | 0 libm/{ => libm}/src/math/pow.rs | 0 libm/{ => libm}/src/math/powf.rs | 0 libm/{ => libm}/src/math/rem_pio2.rs | 0 libm/{ => libm}/src/math/rem_pio2_large.rs | 0 libm/{ => libm}/src/math/rem_pio2f.rs | 0 libm/{ => libm}/src/math/remainder.rs | 0 libm/{ => libm}/src/math/remainderf.rs | 0 libm/{ => libm}/src/math/remquo.rs | 0 libm/{ => libm}/src/math/remquof.rs | 0 libm/{ => libm}/src/math/rint.rs | 0 libm/{ => libm}/src/math/round.rs | 0 libm/{ => libm}/src/math/roundeven.rs | 0 libm/libm/src/math/roundf.rs | 5 +++ libm/libm/src/math/roundf128.rs | 5 +++ libm/libm/src/math/roundf16.rs | 5 +++ libm/{ => libm}/src/math/scalbn.rs | 0 libm/libm/src/math/scalbnf.rs | 4 ++ libm/libm/src/math/scalbnf128.rs | 4 ++ libm/libm/src/math/scalbnf16.rs | 4 ++ libm/{ => libm}/src/math/sin.rs | 0 libm/{ => libm}/src/math/sincos.rs | 0 libm/{ => libm}/src/math/sincosf.rs | 0 libm/{ => libm}/src/math/sinf.rs | 0 libm/{ => libm}/src/math/sinh.rs | 0 libm/{ => libm}/src/math/sinhf.rs | 0 libm/{ => libm}/src/math/sqrt.rs | 0 libm/libm/src/math/sqrtf.rs | 15 +++++++ libm/libm/src/math/sqrtf128.rs | 5 +++ libm/libm/src/math/sqrtf16.rs | 11 ++++++ libm/{ => libm}/src/math/support/big.rs | 0 libm/{ => libm}/src/math/support/big/tests.rs | 0 libm/{ => libm}/src/math/support/env.rs | 0 .../src/math/support/float_traits.rs | 0 libm/{ => libm}/src/math/support/hex_float.rs | 0 .../{ => libm}/src/math/support/int_traits.rs | 0 libm/{ => libm}/src/math/support/macros.rs | 0 libm/{ => libm}/src/math/support/mod.rs | 0 libm/{ => libm}/src/math/tan.rs | 0 libm/{ => libm}/src/math/tanf.rs | 0 libm/{ => libm}/src/math/tanh.rs | 0 libm/{ => libm}/src/math/tanhf.rs | 0 libm/{ => libm}/src/math/tgamma.rs | 0 libm/{ => libm}/src/math/tgammaf.rs | 0 libm/{ => libm}/src/math/trunc.rs | 0 libm/libm/src/math/truncf.rs | 23 +++++++++++ libm/libm/src/math/truncf128.rs | 7 ++++ libm/libm/src/math/truncf16.rs | 7 ++++ 183 files changed, 334 insertions(+), 23 deletions(-) rename libm/{ => libm}/Cargo.toml (100%) rename libm/{ => libm}/build.rs (88%) rename libm/{ => libm}/configure.rs (100%) rename libm/{ => libm}/src/lib.rs (100%) rename libm/{ => libm}/src/libm_helper.rs (100%) rename libm/{ => libm}/src/math/acos.rs (100%) rename libm/{ => libm}/src/math/acosf.rs (100%) rename libm/{ => libm}/src/math/acosh.rs (100%) rename libm/{ => libm}/src/math/acoshf.rs (100%) rename libm/{ => libm}/src/math/arch/aarch64.rs (100%) rename libm/{ => libm}/src/math/arch/i586.rs (100%) rename libm/{ => libm}/src/math/arch/i686.rs (100%) rename libm/{ => libm}/src/math/arch/mod.rs (100%) rename libm/{ => libm}/src/math/arch/wasm32.rs (100%) rename libm/{ => libm}/src/math/asin.rs (100%) rename libm/{ => libm}/src/math/asinf.rs (100%) rename libm/{ => libm}/src/math/asinh.rs (100%) rename libm/{ => libm}/src/math/asinhf.rs (100%) rename libm/{ => libm}/src/math/atan.rs (100%) rename libm/{ => libm}/src/math/atan2.rs (100%) rename libm/{ => libm}/src/math/atan2f.rs (100%) rename libm/{ => libm}/src/math/atanf.rs (100%) rename libm/{ => libm}/src/math/atanh.rs (100%) rename libm/{ => libm}/src/math/atanhf.rs (100%) rename libm/{ => libm}/src/math/cbrt.rs (100%) rename libm/{ => libm}/src/math/cbrtf.rs (100%) rename libm/{ => libm}/src/math/ceil.rs (100%) rename libm/{ => libm}/src/math/copysign.rs (100%) create mode 100644 libm/libm/src/math/copysignf.rs create mode 100644 libm/libm/src/math/copysignf128.rs create mode 100644 libm/libm/src/math/copysignf16.rs rename libm/{ => libm}/src/math/cos.rs (100%) rename libm/{ => libm}/src/math/cosf.rs (100%) rename libm/{ => libm}/src/math/cosh.rs (100%) rename libm/{ => libm}/src/math/coshf.rs (100%) rename libm/{ => libm}/src/math/erf.rs (100%) rename libm/{ => libm}/src/math/erff.rs (100%) rename libm/{ => libm}/src/math/exp.rs (100%) rename libm/{ => libm}/src/math/exp10.rs (100%) rename libm/{ => libm}/src/math/exp10f.rs (100%) rename libm/{ => libm}/src/math/exp2.rs (100%) rename libm/{ => libm}/src/math/exp2f.rs (100%) rename libm/{ => libm}/src/math/expf.rs (100%) rename libm/{ => libm}/src/math/expm1.rs (100%) rename libm/{ => libm}/src/math/expm1f.rs (100%) rename libm/{ => libm}/src/math/expo2.rs (100%) rename libm/{ => libm}/src/math/fabs.rs (100%) create mode 100644 libm/libm/src/math/fabsf.rs create mode 100644 libm/libm/src/math/fabsf128.rs create mode 100644 libm/libm/src/math/fabsf16.rs rename libm/{ => libm}/src/math/fdim.rs (100%) create mode 100644 libm/libm/src/math/fdimf.rs create mode 100644 libm/libm/src/math/fdimf128.rs create mode 100644 libm/libm/src/math/fdimf16.rs rename libm/{ => libm}/src/math/floor.rs (100%) create mode 100644 libm/libm/src/math/floorf.rs create mode 100644 libm/libm/src/math/floorf128.rs create mode 100644 libm/libm/src/math/floorf16.rs rename libm/{ => libm}/src/math/fma.rs (100%) rename libm/{ => libm}/src/math/fma_wide.rs (100%) rename libm/{ => libm}/src/math/fmin_fmax.rs (100%) rename libm/{ => libm}/src/math/fminimum_fmaximum.rs (100%) rename libm/{ => libm}/src/math/fminimum_fmaximum_num.rs (100%) rename libm/{ => libm}/src/math/fmod.rs (100%) create mode 100644 libm/libm/src/math/fmodf.rs create mode 100644 libm/libm/src/math/fmodf128.rs create mode 100644 libm/libm/src/math/fmodf16.rs rename libm/{ => libm}/src/math/frexp.rs (100%) rename libm/{ => libm}/src/math/frexpf.rs (100%) rename libm/{ => libm}/src/math/generic/ceil.rs (100%) rename libm/{ => libm}/src/math/generic/copysign.rs (100%) rename libm/{ => libm}/src/math/generic/fabs.rs (100%) rename libm/{ => libm}/src/math/generic/fdim.rs (100%) rename libm/{ => libm}/src/math/generic/floor.rs (100%) rename libm/{ => libm}/src/math/generic/fmax.rs (100%) rename libm/{ => libm}/src/math/generic/fmaximum.rs (100%) rename libm/{ => libm}/src/math/generic/fmaximum_num.rs (100%) rename libm/{ => libm}/src/math/generic/fmin.rs (100%) rename libm/{ => libm}/src/math/generic/fminimum.rs (100%) rename libm/{ => libm}/src/math/generic/fminimum_num.rs (100%) rename libm/{ => libm}/src/math/generic/fmod.rs (100%) rename libm/{ => libm}/src/math/generic/mod.rs (100%) rename libm/{ => libm}/src/math/generic/rint.rs (100%) rename libm/{ => libm}/src/math/generic/round.rs (100%) rename libm/{ => libm}/src/math/generic/scalbn.rs (100%) rename libm/{ => libm}/src/math/generic/sqrt.rs (100%) rename libm/{ => libm}/src/math/generic/trunc.rs (100%) rename libm/{ => libm}/src/math/hypot.rs (100%) rename libm/{ => libm}/src/math/hypotf.rs (100%) rename libm/{ => libm}/src/math/ilogb.rs (100%) rename libm/{ => libm}/src/math/ilogbf.rs (100%) rename libm/{ => libm}/src/math/j0.rs (100%) rename libm/{ => libm}/src/math/j0f.rs (100%) rename libm/{ => libm}/src/math/j1.rs (100%) rename libm/{ => libm}/src/math/j1f.rs (100%) rename libm/{ => libm}/src/math/jn.rs (100%) rename libm/{ => libm}/src/math/jnf.rs (100%) rename libm/{ => libm}/src/math/k_cos.rs (100%) rename libm/{ => libm}/src/math/k_cosf.rs (100%) rename libm/{ => libm}/src/math/k_expo2.rs (100%) rename libm/{ => libm}/src/math/k_expo2f.rs (100%) rename libm/{ => libm}/src/math/k_sin.rs (100%) rename libm/{ => libm}/src/math/k_sinf.rs (100%) rename libm/{ => libm}/src/math/k_tan.rs (100%) rename libm/{ => libm}/src/math/k_tanf.rs (100%) rename libm/{ => libm}/src/math/ldexp.rs (100%) create mode 100644 libm/libm/src/math/ldexpf.rs create mode 100644 libm/libm/src/math/ldexpf128.rs create mode 100644 libm/libm/src/math/ldexpf16.rs rename libm/{ => libm}/src/math/lgamma.rs (100%) rename libm/{ => libm}/src/math/lgamma_r.rs (100%) rename libm/{ => libm}/src/math/lgammaf.rs (100%) rename libm/{ => libm}/src/math/lgammaf_r.rs (100%) rename libm/{ => libm}/src/math/log.rs (100%) rename libm/{ => libm}/src/math/log10.rs (100%) rename libm/{ => libm}/src/math/log10f.rs (100%) rename libm/{ => libm}/src/math/log1p.rs (100%) rename libm/{ => libm}/src/math/log1pf.rs (100%) rename libm/{ => libm}/src/math/log2.rs (100%) rename libm/{ => libm}/src/math/log2f.rs (100%) rename libm/{ => libm}/src/math/logf.rs (100%) rename libm/{ => libm}/src/math/mod.rs (100%) rename libm/{ => libm}/src/math/modf.rs (100%) rename libm/{ => libm}/src/math/modff.rs (100%) rename libm/{ => libm}/src/math/nextafter.rs (100%) rename libm/{ => libm}/src/math/nextafterf.rs (100%) rename libm/{ => libm}/src/math/pow.rs (100%) rename libm/{ => libm}/src/math/powf.rs (100%) rename libm/{ => libm}/src/math/rem_pio2.rs (100%) rename libm/{ => libm}/src/math/rem_pio2_large.rs (100%) rename libm/{ => libm}/src/math/rem_pio2f.rs (100%) rename libm/{ => libm}/src/math/remainder.rs (100%) rename libm/{ => libm}/src/math/remainderf.rs (100%) rename libm/{ => libm}/src/math/remquo.rs (100%) rename libm/{ => libm}/src/math/remquof.rs (100%) rename libm/{ => libm}/src/math/rint.rs (100%) rename libm/{ => libm}/src/math/round.rs (100%) rename libm/{ => libm}/src/math/roundeven.rs (100%) create mode 100644 libm/libm/src/math/roundf.rs create mode 100644 libm/libm/src/math/roundf128.rs create mode 100644 libm/libm/src/math/roundf16.rs rename libm/{ => libm}/src/math/scalbn.rs (100%) create mode 100644 libm/libm/src/math/scalbnf.rs create mode 100644 libm/libm/src/math/scalbnf128.rs create mode 100644 libm/libm/src/math/scalbnf16.rs rename libm/{ => libm}/src/math/sin.rs (100%) rename libm/{ => libm}/src/math/sincos.rs (100%) rename libm/{ => libm}/src/math/sincosf.rs (100%) rename libm/{ => libm}/src/math/sinf.rs (100%) rename libm/{ => libm}/src/math/sinh.rs (100%) rename libm/{ => libm}/src/math/sinhf.rs (100%) rename libm/{ => libm}/src/math/sqrt.rs (100%) create mode 100644 libm/libm/src/math/sqrtf.rs create mode 100644 libm/libm/src/math/sqrtf128.rs create mode 100644 libm/libm/src/math/sqrtf16.rs rename libm/{ => libm}/src/math/support/big.rs (100%) rename libm/{ => libm}/src/math/support/big/tests.rs (100%) rename libm/{ => libm}/src/math/support/env.rs (100%) rename libm/{ => libm}/src/math/support/float_traits.rs (100%) rename libm/{ => libm}/src/math/support/hex_float.rs (100%) rename libm/{ => libm}/src/math/support/int_traits.rs (100%) rename libm/{ => libm}/src/math/support/macros.rs (100%) rename libm/{ => libm}/src/math/support/mod.rs (100%) rename libm/{ => libm}/src/math/tan.rs (100%) rename libm/{ => libm}/src/math/tanf.rs (100%) rename libm/{ => libm}/src/math/tanh.rs (100%) rename libm/{ => libm}/src/math/tanhf.rs (100%) rename libm/{ => libm}/src/math/tgamma.rs (100%) rename libm/{ => libm}/src/math/tgammaf.rs (100%) rename libm/{ => libm}/src/math/trunc.rs (100%) create mode 100644 libm/libm/src/math/truncf.rs create mode 100644 libm/libm/src/math/truncf128.rs create mode 100644 libm/libm/src/math/truncf16.rs diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index c925e63aa..a717c3ea8 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -213,7 +213,7 @@ jobs: steps: - uses: actions/checkout@master - run: | - msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' Cargo.toml)" + msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' libm/Cargo.toml)" echo "MSRV: $msrv" echo "MSRV=$msrv" >> "$GITHUB_ENV" - name: Install Rust diff --git a/libm/ci/ci-util.py b/libm/ci/ci-util.py index 7468fd690..ed63d6dee 100755 --- a/libm/ci/ci-util.py +++ b/libm/ci/ci-util.py @@ -63,9 +63,9 @@ # Don't run exhaustive tests if these files change, even if they contaiin a function # definition. IGNORE_FILES = [ - "src/math/support/", - "src/libm_helper.rs", - "src/math/arch/intrinsics.rs", + "libm/src/math/support/", + "libm/src/libm_helper.rs", + "libm/src/math/arch/intrinsics.rs", ] TYPES = ["f16", "f32", "f64", "f128"] diff --git a/libm/crates/compiler-builtins-smoke-test/build.rs b/libm/crates/compiler-builtins-smoke-test/build.rs index 4ee5ab585..ef8d613c9 100644 --- a/libm/crates/compiler-builtins-smoke-test/build.rs +++ b/libm/crates/compiler-builtins-smoke-test/build.rs @@ -1,8 +1,8 @@ -#[path = "../../configure.rs"] +#[path = "../../libm/configure.rs"] mod configure; fn main() { - println!("cargo:rerun-if-changed=../../configure.rs"); + println!("cargo:rerun-if-changed=../../libm/configure.rs"); let cfg = configure::Config::from_env(); configure::emit_libm_config(&cfg); } diff --git a/libm/crates/compiler-builtins-smoke-test/src/math.rs b/libm/crates/compiler-builtins-smoke-test/src/math.rs index 7e0146998..f17fc1231 100644 --- a/libm/crates/compiler-builtins-smoke-test/src/math.rs +++ b/libm/crates/compiler-builtins-smoke-test/src/math.rs @@ -3,7 +3,7 @@ use core::ffi::c_int; #[allow(dead_code)] #[allow(clippy::all)] // We don't get `libm`'s list of `allow`s, so just ignore Clippy. #[allow(unused_imports)] -#[path = "../../../src/math/mod.rs"] +#[path = "../../../libm/src/math/mod.rs"] pub mod libm; /// Mark functions `#[no_mangle]` and with the C ABI. diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 98da73cea..1bcc163ed 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -32,7 +32,7 @@ anyhow = "1.0.97" gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false } iai-callgrind = { version = "0.14.0", optional = true } indicatif = { version = "0.17.11", default-features = false } -libm = { path = "../..", features = ["unstable-public-internals"] } +libm = { path = "../../libm", features = ["unstable-public-internals"] } libm-macros = { path = "../libm-macros" } musl-math-sys = { path = "../musl-math-sys", optional = true } paste = "1.0.15" diff --git a/libm/crates/libm-test/build.rs b/libm/crates/libm-test/build.rs index d2d0df9cb..f75e3dda5 100644 --- a/libm/crates/libm-test/build.rs +++ b/libm/crates/libm-test/build.rs @@ -1,9 +1,9 @@ -#[path = "../../configure.rs"] +#[path = "../../libm/configure.rs"] mod configure; use configure::Config; fn main() { - println!("cargo:rerun-if-changed=../../configure.rs"); + println!("cargo:rerun-if-changed=../../libm/configure.rs"); let cfg = Config::from_env(); configure::emit_test_config(&cfg); } diff --git a/libm/crates/musl-math-sys/Cargo.toml b/libm/crates/musl-math-sys/Cargo.toml index 34682b74c..ad73578d8 100644 --- a/libm/crates/musl-math-sys/Cargo.toml +++ b/libm/crates/musl-math-sys/Cargo.toml @@ -7,7 +7,7 @@ publish = false [dependencies] [dev-dependencies] -libm = { path = "../../" } +libm = { path = "../../libm" } [build-dependencies] cc = "1.2.16" diff --git a/libm/crates/util/Cargo.toml b/libm/crates/util/Cargo.toml index 8005459db..94c7f1033 100644 --- a/libm/crates/util/Cargo.toml +++ b/libm/crates/util/Cargo.toml @@ -11,7 +11,7 @@ build-mpfr = ["libm-test/build-mpfr", "dep:rug"] unstable-float = ["libm/unstable-float", "libm-test/unstable-float", "rug?/nightly-float"] [dependencies] -libm = { path = "../..", default-features = false } +libm = { path = "../../libm", default-features = false } libm-macros = { path = "../libm-macros" } libm-test = { path = "../libm-test", default-features = false } musl-math-sys = { path = "../musl-math-sys", optional = true } diff --git a/libm/crates/util/build.rs b/libm/crates/util/build.rs index b6cceb5f1..a1be41275 100644 --- a/libm/crates/util/build.rs +++ b/libm/crates/util/build.rs @@ -1,10 +1,10 @@ #![allow(unexpected_cfgs)] -#[path = "../../configure.rs"] +#[path = "../../libm/configure.rs"] mod configure; fn main() { - println!("cargo:rerun-if-changed=../../configure.rs"); + println!("cargo:rerun-if-changed=../../libm/configure.rs"); let cfg = configure::Config::from_env(); configure::emit_libm_config(&cfg); } diff --git a/libm/etc/update-api-list.py b/libm/etc/update-api-list.py index b4ce2c453..950824fc4 100755 --- a/libm/etc/update-api-list.py +++ b/libm/etc/update-api-list.py @@ -18,7 +18,7 @@ SELF_PATH = Path(__file__) ETC_DIR = SELF_PATH.parent -ROOT_DIR = ETC_DIR.parent +LIBM_DIR = ETC_DIR.parent.joinpath("libm") # These files do not trigger a retest. IGNORED_SOURCES = ["src/libm_helper.rs", "src/math/support/float_traits.rs"] @@ -75,7 +75,7 @@ def get_rustdoc_json() -> dict[Any, Any]: "-Zunstable-options", "-o-", ], - cwd=ROOT_DIR, + cwd=LIBM_DIR, text=True, ) j = json.loads(j) @@ -121,8 +121,8 @@ def _init_defs(self, index: IndexTy) -> None: # A lot of the `arch` module is often configured out so doesn't show up in docs. Use # string matching as a fallback. - for fname in glob("src/math/arch/**.rs", root_dir=ROOT_DIR): - contents = (ROOT_DIR.joinpath(fname)).read_text() + for fname in glob("src/math/arch/**.rs", root_dir=LIBM_DIR): + contents = (LIBM_DIR.joinpath(fname)).read_text() for name in self.public_functions: if f"fn {name}" in contents: @@ -188,10 +188,10 @@ def tidy_lists(self) -> None: include all public API. """ - flist = sp.check_output(["git", "ls-files"], cwd=ROOT_DIR, text=True) + flist = sp.check_output(["git", "ls-files"], cwd=LIBM_DIR, text=True) for path in flist.splitlines(): - fpath = ROOT_DIR.joinpath(path) + fpath = LIBM_DIR.joinpath(path) if fpath.is_dir() or fpath == SELF_PATH: continue @@ -229,7 +229,7 @@ def ensure_contains_api(self, fpath: Path, line_num: int, lines: list[str]): if len(not_found) == 0: return - relpath = fpath.relative_to(ROOT_DIR) + relpath = fpath.relative_to(LIBM_DIR) eprint(f"functions not found at {relpath}:{line_num}: {not_found}") exit(1) @@ -244,7 +244,7 @@ def validate_delimited_block( """Identify blocks of code wrapped within `start` and `end`, collect their contents to a list of strings, and call `validate` for each of those lists. """ - relpath = fpath.relative_to(ROOT_DIR) + relpath = fpath.relative_to(LIBM_DIR) block_lines = [] block_start_line: None | int = None for line_num, line in enumerate(lines): @@ -274,7 +274,7 @@ def validate_delimited_block( def ensure_sorted(fpath: Path, block_start_line: int, lines: list[str]) -> None: """Ensure that a list of lines is sorted, otherwise print a diff and exit.""" - relpath = fpath.relative_to(ROOT_DIR) + relpath = fpath.relative_to(LIBM_DIR) diff_and_exit( "\n".join(lines), "\n".join(sorted(lines)), diff --git a/libm/Cargo.toml b/libm/libm/Cargo.toml similarity index 100% rename from libm/Cargo.toml rename to libm/libm/Cargo.toml diff --git a/libm/build.rs b/libm/libm/build.rs similarity index 88% rename from libm/build.rs rename to libm/libm/build.rs index 7042b54d7..07d08ed43 100644 --- a/libm/build.rs +++ b/libm/libm/build.rs @@ -6,6 +6,7 @@ fn main() { let cfg = configure::Config::from_env(); println!("cargo:rerun-if-changed=build.rs"); + println!("cargo:rerun-if-changed=configure.rs"); println!("cargo:rustc-check-cfg=cfg(assert_no_panic)"); // If set, enable `no-panic`. Requires LTO (`release-opt` profile). diff --git a/libm/configure.rs b/libm/libm/configure.rs similarity index 100% rename from libm/configure.rs rename to libm/libm/configure.rs diff --git a/libm/src/lib.rs b/libm/libm/src/lib.rs similarity index 100% rename from libm/src/lib.rs rename to libm/libm/src/lib.rs diff --git a/libm/src/libm_helper.rs b/libm/libm/src/libm_helper.rs similarity index 100% rename from libm/src/libm_helper.rs rename to libm/libm/src/libm_helper.rs diff --git a/libm/src/math/acos.rs b/libm/libm/src/math/acos.rs similarity index 100% rename from libm/src/math/acos.rs rename to libm/libm/src/math/acos.rs diff --git a/libm/src/math/acosf.rs b/libm/libm/src/math/acosf.rs similarity index 100% rename from libm/src/math/acosf.rs rename to libm/libm/src/math/acosf.rs diff --git a/libm/src/math/acosh.rs b/libm/libm/src/math/acosh.rs similarity index 100% rename from libm/src/math/acosh.rs rename to libm/libm/src/math/acosh.rs diff --git a/libm/src/math/acoshf.rs b/libm/libm/src/math/acoshf.rs similarity index 100% rename from libm/src/math/acoshf.rs rename to libm/libm/src/math/acoshf.rs diff --git a/libm/src/math/arch/aarch64.rs b/libm/libm/src/math/arch/aarch64.rs similarity index 100% rename from libm/src/math/arch/aarch64.rs rename to libm/libm/src/math/arch/aarch64.rs diff --git a/libm/src/math/arch/i586.rs b/libm/libm/src/math/arch/i586.rs similarity index 100% rename from libm/src/math/arch/i586.rs rename to libm/libm/src/math/arch/i586.rs diff --git a/libm/src/math/arch/i686.rs b/libm/libm/src/math/arch/i686.rs similarity index 100% rename from libm/src/math/arch/i686.rs rename to libm/libm/src/math/arch/i686.rs diff --git a/libm/src/math/arch/mod.rs b/libm/libm/src/math/arch/mod.rs similarity index 100% rename from libm/src/math/arch/mod.rs rename to libm/libm/src/math/arch/mod.rs diff --git a/libm/src/math/arch/wasm32.rs b/libm/libm/src/math/arch/wasm32.rs similarity index 100% rename from libm/src/math/arch/wasm32.rs rename to libm/libm/src/math/arch/wasm32.rs diff --git a/libm/src/math/asin.rs b/libm/libm/src/math/asin.rs similarity index 100% rename from libm/src/math/asin.rs rename to libm/libm/src/math/asin.rs diff --git a/libm/src/math/asinf.rs b/libm/libm/src/math/asinf.rs similarity index 100% rename from libm/src/math/asinf.rs rename to libm/libm/src/math/asinf.rs diff --git a/libm/src/math/asinh.rs b/libm/libm/src/math/asinh.rs similarity index 100% rename from libm/src/math/asinh.rs rename to libm/libm/src/math/asinh.rs diff --git a/libm/src/math/asinhf.rs b/libm/libm/src/math/asinhf.rs similarity index 100% rename from libm/src/math/asinhf.rs rename to libm/libm/src/math/asinhf.rs diff --git a/libm/src/math/atan.rs b/libm/libm/src/math/atan.rs similarity index 100% rename from libm/src/math/atan.rs rename to libm/libm/src/math/atan.rs diff --git a/libm/src/math/atan2.rs b/libm/libm/src/math/atan2.rs similarity index 100% rename from libm/src/math/atan2.rs rename to libm/libm/src/math/atan2.rs diff --git a/libm/src/math/atan2f.rs b/libm/libm/src/math/atan2f.rs similarity index 100% rename from libm/src/math/atan2f.rs rename to libm/libm/src/math/atan2f.rs diff --git a/libm/src/math/atanf.rs b/libm/libm/src/math/atanf.rs similarity index 100% rename from libm/src/math/atanf.rs rename to libm/libm/src/math/atanf.rs diff --git a/libm/src/math/atanh.rs b/libm/libm/src/math/atanh.rs similarity index 100% rename from libm/src/math/atanh.rs rename to libm/libm/src/math/atanh.rs diff --git a/libm/src/math/atanhf.rs b/libm/libm/src/math/atanhf.rs similarity index 100% rename from libm/src/math/atanhf.rs rename to libm/libm/src/math/atanhf.rs diff --git a/libm/src/math/cbrt.rs b/libm/libm/src/math/cbrt.rs similarity index 100% rename from libm/src/math/cbrt.rs rename to libm/libm/src/math/cbrt.rs diff --git a/libm/src/math/cbrtf.rs b/libm/libm/src/math/cbrtf.rs similarity index 100% rename from libm/src/math/cbrtf.rs rename to libm/libm/src/math/cbrtf.rs diff --git a/libm/src/math/ceil.rs b/libm/libm/src/math/ceil.rs similarity index 100% rename from libm/src/math/ceil.rs rename to libm/libm/src/math/ceil.rs diff --git a/libm/src/math/copysign.rs b/libm/libm/src/math/copysign.rs similarity index 100% rename from libm/src/math/copysign.rs rename to libm/libm/src/math/copysign.rs diff --git a/libm/libm/src/math/copysignf.rs b/libm/libm/src/math/copysignf.rs new file mode 100644 index 000000000..8b9bed4c0 --- /dev/null +++ b/libm/libm/src/math/copysignf.rs @@ -0,0 +1,8 @@ +/// Sign of Y, magnitude of X (f32) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf(x: f32, y: f32) -> f32 { + super::generic::copysign(x, y) +} diff --git a/libm/libm/src/math/copysignf128.rs b/libm/libm/src/math/copysignf128.rs new file mode 100644 index 000000000..7bd81d42b --- /dev/null +++ b/libm/libm/src/math/copysignf128.rs @@ -0,0 +1,8 @@ +/// Sign of Y, magnitude of X (f128) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf128(x: f128, y: f128) -> f128 { + super::generic::copysign(x, y) +} diff --git a/libm/libm/src/math/copysignf16.rs b/libm/libm/src/math/copysignf16.rs new file mode 100644 index 000000000..820658686 --- /dev/null +++ b/libm/libm/src/math/copysignf16.rs @@ -0,0 +1,8 @@ +/// Sign of Y, magnitude of X (f16) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf16(x: f16, y: f16) -> f16 { + super::generic::copysign(x, y) +} diff --git a/libm/src/math/cos.rs b/libm/libm/src/math/cos.rs similarity index 100% rename from libm/src/math/cos.rs rename to libm/libm/src/math/cos.rs diff --git a/libm/src/math/cosf.rs b/libm/libm/src/math/cosf.rs similarity index 100% rename from libm/src/math/cosf.rs rename to libm/libm/src/math/cosf.rs diff --git a/libm/src/math/cosh.rs b/libm/libm/src/math/cosh.rs similarity index 100% rename from libm/src/math/cosh.rs rename to libm/libm/src/math/cosh.rs diff --git a/libm/src/math/coshf.rs b/libm/libm/src/math/coshf.rs similarity index 100% rename from libm/src/math/coshf.rs rename to libm/libm/src/math/coshf.rs diff --git a/libm/src/math/erf.rs b/libm/libm/src/math/erf.rs similarity index 100% rename from libm/src/math/erf.rs rename to libm/libm/src/math/erf.rs diff --git a/libm/src/math/erff.rs b/libm/libm/src/math/erff.rs similarity index 100% rename from libm/src/math/erff.rs rename to libm/libm/src/math/erff.rs diff --git a/libm/src/math/exp.rs b/libm/libm/src/math/exp.rs similarity index 100% rename from libm/src/math/exp.rs rename to libm/libm/src/math/exp.rs diff --git a/libm/src/math/exp10.rs b/libm/libm/src/math/exp10.rs similarity index 100% rename from libm/src/math/exp10.rs rename to libm/libm/src/math/exp10.rs diff --git a/libm/src/math/exp10f.rs b/libm/libm/src/math/exp10f.rs similarity index 100% rename from libm/src/math/exp10f.rs rename to libm/libm/src/math/exp10f.rs diff --git a/libm/src/math/exp2.rs b/libm/libm/src/math/exp2.rs similarity index 100% rename from libm/src/math/exp2.rs rename to libm/libm/src/math/exp2.rs diff --git a/libm/src/math/exp2f.rs b/libm/libm/src/math/exp2f.rs similarity index 100% rename from libm/src/math/exp2f.rs rename to libm/libm/src/math/exp2f.rs diff --git a/libm/src/math/expf.rs b/libm/libm/src/math/expf.rs similarity index 100% rename from libm/src/math/expf.rs rename to libm/libm/src/math/expf.rs diff --git a/libm/src/math/expm1.rs b/libm/libm/src/math/expm1.rs similarity index 100% rename from libm/src/math/expm1.rs rename to libm/libm/src/math/expm1.rs diff --git a/libm/src/math/expm1f.rs b/libm/libm/src/math/expm1f.rs similarity index 100% rename from libm/src/math/expm1f.rs rename to libm/libm/src/math/expm1f.rs diff --git a/libm/src/math/expo2.rs b/libm/libm/src/math/expo2.rs similarity index 100% rename from libm/src/math/expo2.rs rename to libm/libm/src/math/expo2.rs diff --git a/libm/src/math/fabs.rs b/libm/libm/src/math/fabs.rs similarity index 100% rename from libm/src/math/fabs.rs rename to libm/libm/src/math/fabs.rs diff --git a/libm/libm/src/math/fabsf.rs b/libm/libm/src/math/fabsf.rs new file mode 100644 index 000000000..e5820a26c --- /dev/null +++ b/libm/libm/src/math/fabsf.rs @@ -0,0 +1,39 @@ +/// Absolute value (magnitude) (f32) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf(x: f32) -> f32 { + select_implementation! { + name: fabsf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::fabs(x) +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sanity_check() { + assert_eq!(fabsf(-1.0), 1.0); + assert_eq!(fabsf(2.8), 2.8); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs + #[test] + fn spec_tests() { + assert!(fabsf(f32::NAN).is_nan()); + for f in [0.0, -0.0].iter().copied() { + assert_eq!(fabsf(f), 0.0); + } + for f in [f32::INFINITY, f32::NEG_INFINITY].iter().copied() { + assert_eq!(fabsf(f), f32::INFINITY); + } + } +} diff --git a/libm/libm/src/math/fabsf128.rs b/libm/libm/src/math/fabsf128.rs new file mode 100644 index 000000000..46429ca49 --- /dev/null +++ b/libm/libm/src/math/fabsf128.rs @@ -0,0 +1,31 @@ +/// Absolute value (magnitude) (f128) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf128(x: f128) -> f128 { + super::generic::fabs(x) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sanity_check() { + assert_eq!(fabsf128(-1.0), 1.0); + assert_eq!(fabsf128(2.8), 2.8); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs + #[test] + fn spec_tests() { + assert!(fabsf128(f128::NAN).is_nan()); + for f in [0.0, -0.0].iter().copied() { + assert_eq!(fabsf128(f), 0.0); + } + for f in [f128::INFINITY, f128::NEG_INFINITY].iter().copied() { + assert_eq!(fabsf128(f), f128::INFINITY); + } + } +} diff --git a/libm/libm/src/math/fabsf16.rs b/libm/libm/src/math/fabsf16.rs new file mode 100644 index 000000000..eee42ac6a --- /dev/null +++ b/libm/libm/src/math/fabsf16.rs @@ -0,0 +1,31 @@ +/// Absolute value (magnitude) (f16) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf16(x: f16) -> f16 { + super::generic::fabs(x) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sanity_check() { + assert_eq!(fabsf16(-1.0), 1.0); + assert_eq!(fabsf16(2.8), 2.8); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs + #[test] + fn spec_tests() { + assert!(fabsf16(f16::NAN).is_nan()); + for f in [0.0, -0.0].iter().copied() { + assert_eq!(fabsf16(f), 0.0); + } + for f in [f16::INFINITY, f16::NEG_INFINITY].iter().copied() { + assert_eq!(fabsf16(f), f16::INFINITY); + } + } +} diff --git a/libm/src/math/fdim.rs b/libm/libm/src/math/fdim.rs similarity index 100% rename from libm/src/math/fdim.rs rename to libm/libm/src/math/fdim.rs diff --git a/libm/libm/src/math/fdimf.rs b/libm/libm/src/math/fdimf.rs new file mode 100644 index 000000000..367ef517c --- /dev/null +++ b/libm/libm/src/math/fdimf.rs @@ -0,0 +1,12 @@ +/// Positive difference (f32) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf(x: f32, y: f32) -> f32 { + super::generic::fdim(x, y) +} diff --git a/libm/libm/src/math/fdimf128.rs b/libm/libm/src/math/fdimf128.rs new file mode 100644 index 000000000..6f3d1d0ff --- /dev/null +++ b/libm/libm/src/math/fdimf128.rs @@ -0,0 +1,12 @@ +/// Positive difference (f128) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf128(x: f128, y: f128) -> f128 { + super::generic::fdim(x, y) +} diff --git a/libm/libm/src/math/fdimf16.rs b/libm/libm/src/math/fdimf16.rs new file mode 100644 index 000000000..37bd68858 --- /dev/null +++ b/libm/libm/src/math/fdimf16.rs @@ -0,0 +1,12 @@ +/// Positive difference (f16) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf16(x: f16, y: f16) -> f16 { + super::generic::fdim(x, y) +} diff --git a/libm/src/math/floor.rs b/libm/libm/src/math/floor.rs similarity index 100% rename from libm/src/math/floor.rs rename to libm/libm/src/math/floor.rs diff --git a/libm/libm/src/math/floorf.rs b/libm/libm/src/math/floorf.rs new file mode 100644 index 000000000..16957b7f3 --- /dev/null +++ b/libm/libm/src/math/floorf.rs @@ -0,0 +1,13 @@ +/// Floor (f32) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf(x: f32) -> f32 { + select_implementation! { + name: floorf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + return super::generic::floor(x); +} diff --git a/libm/libm/src/math/floorf128.rs b/libm/libm/src/math/floorf128.rs new file mode 100644 index 000000000..9a9fe4151 --- /dev/null +++ b/libm/libm/src/math/floorf128.rs @@ -0,0 +1,7 @@ +/// Floor (f128) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf128(x: f128) -> f128 { + return super::generic::floor(x); +} diff --git a/libm/libm/src/math/floorf16.rs b/libm/libm/src/math/floorf16.rs new file mode 100644 index 000000000..f9b868e04 --- /dev/null +++ b/libm/libm/src/math/floorf16.rs @@ -0,0 +1,7 @@ +/// Floor (f16) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf16(x: f16) -> f16 { + return super::generic::floor(x); +} diff --git a/libm/src/math/fma.rs b/libm/libm/src/math/fma.rs similarity index 100% rename from libm/src/math/fma.rs rename to libm/libm/src/math/fma.rs diff --git a/libm/src/math/fma_wide.rs b/libm/libm/src/math/fma_wide.rs similarity index 100% rename from libm/src/math/fma_wide.rs rename to libm/libm/src/math/fma_wide.rs diff --git a/libm/src/math/fmin_fmax.rs b/libm/libm/src/math/fmin_fmax.rs similarity index 100% rename from libm/src/math/fmin_fmax.rs rename to libm/libm/src/math/fmin_fmax.rs diff --git a/libm/src/math/fminimum_fmaximum.rs b/libm/libm/src/math/fminimum_fmaximum.rs similarity index 100% rename from libm/src/math/fminimum_fmaximum.rs rename to libm/libm/src/math/fminimum_fmaximum.rs diff --git a/libm/src/math/fminimum_fmaximum_num.rs b/libm/libm/src/math/fminimum_fmaximum_num.rs similarity index 100% rename from libm/src/math/fminimum_fmaximum_num.rs rename to libm/libm/src/math/fminimum_fmaximum_num.rs diff --git a/libm/src/math/fmod.rs b/libm/libm/src/math/fmod.rs similarity index 100% rename from libm/src/math/fmod.rs rename to libm/libm/src/math/fmod.rs diff --git a/libm/libm/src/math/fmodf.rs b/libm/libm/src/math/fmodf.rs new file mode 100644 index 000000000..4e95696e2 --- /dev/null +++ b/libm/libm/src/math/fmodf.rs @@ -0,0 +1,5 @@ +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf(x: f32, y: f32) -> f32 { + super::generic::fmod(x, y) +} diff --git a/libm/libm/src/math/fmodf128.rs b/libm/libm/src/math/fmodf128.rs new file mode 100644 index 000000000..ff0e0493e --- /dev/null +++ b/libm/libm/src/math/fmodf128.rs @@ -0,0 +1,5 @@ +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf128(x: f128, y: f128) -> f128 { + super::generic::fmod(x, y) +} diff --git a/libm/libm/src/math/fmodf16.rs b/libm/libm/src/math/fmodf16.rs new file mode 100644 index 000000000..11972a7de --- /dev/null +++ b/libm/libm/src/math/fmodf16.rs @@ -0,0 +1,5 @@ +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf16(x: f16, y: f16) -> f16 { + super::generic::fmod(x, y) +} diff --git a/libm/src/math/frexp.rs b/libm/libm/src/math/frexp.rs similarity index 100% rename from libm/src/math/frexp.rs rename to libm/libm/src/math/frexp.rs diff --git a/libm/src/math/frexpf.rs b/libm/libm/src/math/frexpf.rs similarity index 100% rename from libm/src/math/frexpf.rs rename to libm/libm/src/math/frexpf.rs diff --git a/libm/src/math/generic/ceil.rs b/libm/libm/src/math/generic/ceil.rs similarity index 100% rename from libm/src/math/generic/ceil.rs rename to libm/libm/src/math/generic/ceil.rs diff --git a/libm/src/math/generic/copysign.rs b/libm/libm/src/math/generic/copysign.rs similarity index 100% rename from libm/src/math/generic/copysign.rs rename to libm/libm/src/math/generic/copysign.rs diff --git a/libm/src/math/generic/fabs.rs b/libm/libm/src/math/generic/fabs.rs similarity index 100% rename from libm/src/math/generic/fabs.rs rename to libm/libm/src/math/generic/fabs.rs diff --git a/libm/src/math/generic/fdim.rs b/libm/libm/src/math/generic/fdim.rs similarity index 100% rename from libm/src/math/generic/fdim.rs rename to libm/libm/src/math/generic/fdim.rs diff --git a/libm/src/math/generic/floor.rs b/libm/libm/src/math/generic/floor.rs similarity index 100% rename from libm/src/math/generic/floor.rs rename to libm/libm/src/math/generic/floor.rs diff --git a/libm/src/math/generic/fmax.rs b/libm/libm/src/math/generic/fmax.rs similarity index 100% rename from libm/src/math/generic/fmax.rs rename to libm/libm/src/math/generic/fmax.rs diff --git a/libm/src/math/generic/fmaximum.rs b/libm/libm/src/math/generic/fmaximum.rs similarity index 100% rename from libm/src/math/generic/fmaximum.rs rename to libm/libm/src/math/generic/fmaximum.rs diff --git a/libm/src/math/generic/fmaximum_num.rs b/libm/libm/src/math/generic/fmaximum_num.rs similarity index 100% rename from libm/src/math/generic/fmaximum_num.rs rename to libm/libm/src/math/generic/fmaximum_num.rs diff --git a/libm/src/math/generic/fmin.rs b/libm/libm/src/math/generic/fmin.rs similarity index 100% rename from libm/src/math/generic/fmin.rs rename to libm/libm/src/math/generic/fmin.rs diff --git a/libm/src/math/generic/fminimum.rs b/libm/libm/src/math/generic/fminimum.rs similarity index 100% rename from libm/src/math/generic/fminimum.rs rename to libm/libm/src/math/generic/fminimum.rs diff --git a/libm/src/math/generic/fminimum_num.rs b/libm/libm/src/math/generic/fminimum_num.rs similarity index 100% rename from libm/src/math/generic/fminimum_num.rs rename to libm/libm/src/math/generic/fminimum_num.rs diff --git a/libm/src/math/generic/fmod.rs b/libm/libm/src/math/generic/fmod.rs similarity index 100% rename from libm/src/math/generic/fmod.rs rename to libm/libm/src/math/generic/fmod.rs diff --git a/libm/src/math/generic/mod.rs b/libm/libm/src/math/generic/mod.rs similarity index 100% rename from libm/src/math/generic/mod.rs rename to libm/libm/src/math/generic/mod.rs diff --git a/libm/src/math/generic/rint.rs b/libm/libm/src/math/generic/rint.rs similarity index 100% rename from libm/src/math/generic/rint.rs rename to libm/libm/src/math/generic/rint.rs diff --git a/libm/src/math/generic/round.rs b/libm/libm/src/math/generic/round.rs similarity index 100% rename from libm/src/math/generic/round.rs rename to libm/libm/src/math/generic/round.rs diff --git a/libm/src/math/generic/scalbn.rs b/libm/libm/src/math/generic/scalbn.rs similarity index 100% rename from libm/src/math/generic/scalbn.rs rename to libm/libm/src/math/generic/scalbn.rs diff --git a/libm/src/math/generic/sqrt.rs b/libm/libm/src/math/generic/sqrt.rs similarity index 100% rename from libm/src/math/generic/sqrt.rs rename to libm/libm/src/math/generic/sqrt.rs diff --git a/libm/src/math/generic/trunc.rs b/libm/libm/src/math/generic/trunc.rs similarity index 100% rename from libm/src/math/generic/trunc.rs rename to libm/libm/src/math/generic/trunc.rs diff --git a/libm/src/math/hypot.rs b/libm/libm/src/math/hypot.rs similarity index 100% rename from libm/src/math/hypot.rs rename to libm/libm/src/math/hypot.rs diff --git a/libm/src/math/hypotf.rs b/libm/libm/src/math/hypotf.rs similarity index 100% rename from libm/src/math/hypotf.rs rename to libm/libm/src/math/hypotf.rs diff --git a/libm/src/math/ilogb.rs b/libm/libm/src/math/ilogb.rs similarity index 100% rename from libm/src/math/ilogb.rs rename to libm/libm/src/math/ilogb.rs diff --git a/libm/src/math/ilogbf.rs b/libm/libm/src/math/ilogbf.rs similarity index 100% rename from libm/src/math/ilogbf.rs rename to libm/libm/src/math/ilogbf.rs diff --git a/libm/src/math/j0.rs b/libm/libm/src/math/j0.rs similarity index 100% rename from libm/src/math/j0.rs rename to libm/libm/src/math/j0.rs diff --git a/libm/src/math/j0f.rs b/libm/libm/src/math/j0f.rs similarity index 100% rename from libm/src/math/j0f.rs rename to libm/libm/src/math/j0f.rs diff --git a/libm/src/math/j1.rs b/libm/libm/src/math/j1.rs similarity index 100% rename from libm/src/math/j1.rs rename to libm/libm/src/math/j1.rs diff --git a/libm/src/math/j1f.rs b/libm/libm/src/math/j1f.rs similarity index 100% rename from libm/src/math/j1f.rs rename to libm/libm/src/math/j1f.rs diff --git a/libm/src/math/jn.rs b/libm/libm/src/math/jn.rs similarity index 100% rename from libm/src/math/jn.rs rename to libm/libm/src/math/jn.rs diff --git a/libm/src/math/jnf.rs b/libm/libm/src/math/jnf.rs similarity index 100% rename from libm/src/math/jnf.rs rename to libm/libm/src/math/jnf.rs diff --git a/libm/src/math/k_cos.rs b/libm/libm/src/math/k_cos.rs similarity index 100% rename from libm/src/math/k_cos.rs rename to libm/libm/src/math/k_cos.rs diff --git a/libm/src/math/k_cosf.rs b/libm/libm/src/math/k_cosf.rs similarity index 100% rename from libm/src/math/k_cosf.rs rename to libm/libm/src/math/k_cosf.rs diff --git a/libm/src/math/k_expo2.rs b/libm/libm/src/math/k_expo2.rs similarity index 100% rename from libm/src/math/k_expo2.rs rename to libm/libm/src/math/k_expo2.rs diff --git a/libm/src/math/k_expo2f.rs b/libm/libm/src/math/k_expo2f.rs similarity index 100% rename from libm/src/math/k_expo2f.rs rename to libm/libm/src/math/k_expo2f.rs diff --git a/libm/src/math/k_sin.rs b/libm/libm/src/math/k_sin.rs similarity index 100% rename from libm/src/math/k_sin.rs rename to libm/libm/src/math/k_sin.rs diff --git a/libm/src/math/k_sinf.rs b/libm/libm/src/math/k_sinf.rs similarity index 100% rename from libm/src/math/k_sinf.rs rename to libm/libm/src/math/k_sinf.rs diff --git a/libm/src/math/k_tan.rs b/libm/libm/src/math/k_tan.rs similarity index 100% rename from libm/src/math/k_tan.rs rename to libm/libm/src/math/k_tan.rs diff --git a/libm/src/math/k_tanf.rs b/libm/libm/src/math/k_tanf.rs similarity index 100% rename from libm/src/math/k_tanf.rs rename to libm/libm/src/math/k_tanf.rs diff --git a/libm/src/math/ldexp.rs b/libm/libm/src/math/ldexp.rs similarity index 100% rename from libm/src/math/ldexp.rs rename to libm/libm/src/math/ldexp.rs diff --git a/libm/libm/src/math/ldexpf.rs b/libm/libm/src/math/ldexpf.rs new file mode 100644 index 000000000..95b27fc49 --- /dev/null +++ b/libm/libm/src/math/ldexpf.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf(x: f32, n: i32) -> f32 { + super::scalbnf(x, n) +} diff --git a/libm/libm/src/math/ldexpf128.rs b/libm/libm/src/math/ldexpf128.rs new file mode 100644 index 000000000..b35277d15 --- /dev/null +++ b/libm/libm/src/math/ldexpf128.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf128(x: f128, n: i32) -> f128 { + super::scalbnf128(x, n) +} diff --git a/libm/libm/src/math/ldexpf16.rs b/libm/libm/src/math/ldexpf16.rs new file mode 100644 index 000000000..8de6cffd6 --- /dev/null +++ b/libm/libm/src/math/ldexpf16.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf16(x: f16, n: i32) -> f16 { + super::scalbnf16(x, n) +} diff --git a/libm/src/math/lgamma.rs b/libm/libm/src/math/lgamma.rs similarity index 100% rename from libm/src/math/lgamma.rs rename to libm/libm/src/math/lgamma.rs diff --git a/libm/src/math/lgamma_r.rs b/libm/libm/src/math/lgamma_r.rs similarity index 100% rename from libm/src/math/lgamma_r.rs rename to libm/libm/src/math/lgamma_r.rs diff --git a/libm/src/math/lgammaf.rs b/libm/libm/src/math/lgammaf.rs similarity index 100% rename from libm/src/math/lgammaf.rs rename to libm/libm/src/math/lgammaf.rs diff --git a/libm/src/math/lgammaf_r.rs b/libm/libm/src/math/lgammaf_r.rs similarity index 100% rename from libm/src/math/lgammaf_r.rs rename to libm/libm/src/math/lgammaf_r.rs diff --git a/libm/src/math/log.rs b/libm/libm/src/math/log.rs similarity index 100% rename from libm/src/math/log.rs rename to libm/libm/src/math/log.rs diff --git a/libm/src/math/log10.rs b/libm/libm/src/math/log10.rs similarity index 100% rename from libm/src/math/log10.rs rename to libm/libm/src/math/log10.rs diff --git a/libm/src/math/log10f.rs b/libm/libm/src/math/log10f.rs similarity index 100% rename from libm/src/math/log10f.rs rename to libm/libm/src/math/log10f.rs diff --git a/libm/src/math/log1p.rs b/libm/libm/src/math/log1p.rs similarity index 100% rename from libm/src/math/log1p.rs rename to libm/libm/src/math/log1p.rs diff --git a/libm/src/math/log1pf.rs b/libm/libm/src/math/log1pf.rs similarity index 100% rename from libm/src/math/log1pf.rs rename to libm/libm/src/math/log1pf.rs diff --git a/libm/src/math/log2.rs b/libm/libm/src/math/log2.rs similarity index 100% rename from libm/src/math/log2.rs rename to libm/libm/src/math/log2.rs diff --git a/libm/src/math/log2f.rs b/libm/libm/src/math/log2f.rs similarity index 100% rename from libm/src/math/log2f.rs rename to libm/libm/src/math/log2f.rs diff --git a/libm/src/math/logf.rs b/libm/libm/src/math/logf.rs similarity index 100% rename from libm/src/math/logf.rs rename to libm/libm/src/math/logf.rs diff --git a/libm/src/math/mod.rs b/libm/libm/src/math/mod.rs similarity index 100% rename from libm/src/math/mod.rs rename to libm/libm/src/math/mod.rs diff --git a/libm/src/math/modf.rs b/libm/libm/src/math/modf.rs similarity index 100% rename from libm/src/math/modf.rs rename to libm/libm/src/math/modf.rs diff --git a/libm/src/math/modff.rs b/libm/libm/src/math/modff.rs similarity index 100% rename from libm/src/math/modff.rs rename to libm/libm/src/math/modff.rs diff --git a/libm/src/math/nextafter.rs b/libm/libm/src/math/nextafter.rs similarity index 100% rename from libm/src/math/nextafter.rs rename to libm/libm/src/math/nextafter.rs diff --git a/libm/src/math/nextafterf.rs b/libm/libm/src/math/nextafterf.rs similarity index 100% rename from libm/src/math/nextafterf.rs rename to libm/libm/src/math/nextafterf.rs diff --git a/libm/src/math/pow.rs b/libm/libm/src/math/pow.rs similarity index 100% rename from libm/src/math/pow.rs rename to libm/libm/src/math/pow.rs diff --git a/libm/src/math/powf.rs b/libm/libm/src/math/powf.rs similarity index 100% rename from libm/src/math/powf.rs rename to libm/libm/src/math/powf.rs diff --git a/libm/src/math/rem_pio2.rs b/libm/libm/src/math/rem_pio2.rs similarity index 100% rename from libm/src/math/rem_pio2.rs rename to libm/libm/src/math/rem_pio2.rs diff --git a/libm/src/math/rem_pio2_large.rs b/libm/libm/src/math/rem_pio2_large.rs similarity index 100% rename from libm/src/math/rem_pio2_large.rs rename to libm/libm/src/math/rem_pio2_large.rs diff --git a/libm/src/math/rem_pio2f.rs b/libm/libm/src/math/rem_pio2f.rs similarity index 100% rename from libm/src/math/rem_pio2f.rs rename to libm/libm/src/math/rem_pio2f.rs diff --git a/libm/src/math/remainder.rs b/libm/libm/src/math/remainder.rs similarity index 100% rename from libm/src/math/remainder.rs rename to libm/libm/src/math/remainder.rs diff --git a/libm/src/math/remainderf.rs b/libm/libm/src/math/remainderf.rs similarity index 100% rename from libm/src/math/remainderf.rs rename to libm/libm/src/math/remainderf.rs diff --git a/libm/src/math/remquo.rs b/libm/libm/src/math/remquo.rs similarity index 100% rename from libm/src/math/remquo.rs rename to libm/libm/src/math/remquo.rs diff --git a/libm/src/math/remquof.rs b/libm/libm/src/math/remquof.rs similarity index 100% rename from libm/src/math/remquof.rs rename to libm/libm/src/math/remquof.rs diff --git a/libm/src/math/rint.rs b/libm/libm/src/math/rint.rs similarity index 100% rename from libm/src/math/rint.rs rename to libm/libm/src/math/rint.rs diff --git a/libm/src/math/round.rs b/libm/libm/src/math/round.rs similarity index 100% rename from libm/src/math/round.rs rename to libm/libm/src/math/round.rs diff --git a/libm/src/math/roundeven.rs b/libm/libm/src/math/roundeven.rs similarity index 100% rename from libm/src/math/roundeven.rs rename to libm/libm/src/math/roundeven.rs diff --git a/libm/libm/src/math/roundf.rs b/libm/libm/src/math/roundf.rs new file mode 100644 index 000000000..b5d7c9d69 --- /dev/null +++ b/libm/libm/src/math/roundf.rs @@ -0,0 +1,5 @@ +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf(x: f32) -> f32 { + super::generic::round(x) +} diff --git a/libm/libm/src/math/roundf128.rs b/libm/libm/src/math/roundf128.rs new file mode 100644 index 000000000..fc3164929 --- /dev/null +++ b/libm/libm/src/math/roundf128.rs @@ -0,0 +1,5 @@ +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf128(x: f128) -> f128 { + super::generic::round(x) +} diff --git a/libm/libm/src/math/roundf16.rs b/libm/libm/src/math/roundf16.rs new file mode 100644 index 000000000..8b356eaab --- /dev/null +++ b/libm/libm/src/math/roundf16.rs @@ -0,0 +1,5 @@ +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf16(x: f16) -> f16 { + super::generic::round(x) +} diff --git a/libm/src/math/scalbn.rs b/libm/libm/src/math/scalbn.rs similarity index 100% rename from libm/src/math/scalbn.rs rename to libm/libm/src/math/scalbn.rs diff --git a/libm/libm/src/math/scalbnf.rs b/libm/libm/src/math/scalbnf.rs new file mode 100644 index 000000000..57e7ba76f --- /dev/null +++ b/libm/libm/src/math/scalbnf.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf(x: f32, n: i32) -> f32 { + super::generic::scalbn(x, n) +} diff --git a/libm/libm/src/math/scalbnf128.rs b/libm/libm/src/math/scalbnf128.rs new file mode 100644 index 000000000..c1d2b4855 --- /dev/null +++ b/libm/libm/src/math/scalbnf128.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf128(x: f128, n: i32) -> f128 { + super::generic::scalbn(x, n) +} diff --git a/libm/libm/src/math/scalbnf16.rs b/libm/libm/src/math/scalbnf16.rs new file mode 100644 index 000000000..2209e1a17 --- /dev/null +++ b/libm/libm/src/math/scalbnf16.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf16(x: f16, n: i32) -> f16 { + super::generic::scalbn(x, n) +} diff --git a/libm/src/math/sin.rs b/libm/libm/src/math/sin.rs similarity index 100% rename from libm/src/math/sin.rs rename to libm/libm/src/math/sin.rs diff --git a/libm/src/math/sincos.rs b/libm/libm/src/math/sincos.rs similarity index 100% rename from libm/src/math/sincos.rs rename to libm/libm/src/math/sincos.rs diff --git a/libm/src/math/sincosf.rs b/libm/libm/src/math/sincosf.rs similarity index 100% rename from libm/src/math/sincosf.rs rename to libm/libm/src/math/sincosf.rs diff --git a/libm/src/math/sinf.rs b/libm/libm/src/math/sinf.rs similarity index 100% rename from libm/src/math/sinf.rs rename to libm/libm/src/math/sinf.rs diff --git a/libm/src/math/sinh.rs b/libm/libm/src/math/sinh.rs similarity index 100% rename from libm/src/math/sinh.rs rename to libm/libm/src/math/sinh.rs diff --git a/libm/src/math/sinhf.rs b/libm/libm/src/math/sinhf.rs similarity index 100% rename from libm/src/math/sinhf.rs rename to libm/libm/src/math/sinhf.rs diff --git a/libm/src/math/sqrt.rs b/libm/libm/src/math/sqrt.rs similarity index 100% rename from libm/src/math/sqrt.rs rename to libm/libm/src/math/sqrt.rs diff --git a/libm/libm/src/math/sqrtf.rs b/libm/libm/src/math/sqrtf.rs new file mode 100644 index 000000000..c28a705e3 --- /dev/null +++ b/libm/libm/src/math/sqrtf.rs @@ -0,0 +1,15 @@ +/// The square root of `x` (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf(x: f32) -> f32 { + select_implementation! { + name: sqrtf, + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + target_feature = "sse2" + ), + args: x, + } + + super::generic::sqrt(x) +} diff --git a/libm/libm/src/math/sqrtf128.rs b/libm/libm/src/math/sqrtf128.rs new file mode 100644 index 000000000..eaef6ae0c --- /dev/null +++ b/libm/libm/src/math/sqrtf128.rs @@ -0,0 +1,5 @@ +/// The square root of `x` (f128). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf128(x: f128) -> f128 { + return super::generic::sqrt(x); +} diff --git a/libm/libm/src/math/sqrtf16.rs b/libm/libm/src/math/sqrtf16.rs new file mode 100644 index 000000000..7bedb7f8b --- /dev/null +++ b/libm/libm/src/math/sqrtf16.rs @@ -0,0 +1,11 @@ +/// The square root of `x` (f16). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf16(x: f16) -> f16 { + select_implementation! { + name: sqrtf16, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + + return super::generic::sqrt(x); +} diff --git a/libm/src/math/support/big.rs b/libm/libm/src/math/support/big.rs similarity index 100% rename from libm/src/math/support/big.rs rename to libm/libm/src/math/support/big.rs diff --git a/libm/src/math/support/big/tests.rs b/libm/libm/src/math/support/big/tests.rs similarity index 100% rename from libm/src/math/support/big/tests.rs rename to libm/libm/src/math/support/big/tests.rs diff --git a/libm/src/math/support/env.rs b/libm/libm/src/math/support/env.rs similarity index 100% rename from libm/src/math/support/env.rs rename to libm/libm/src/math/support/env.rs diff --git a/libm/src/math/support/float_traits.rs b/libm/libm/src/math/support/float_traits.rs similarity index 100% rename from libm/src/math/support/float_traits.rs rename to libm/libm/src/math/support/float_traits.rs diff --git a/libm/src/math/support/hex_float.rs b/libm/libm/src/math/support/hex_float.rs similarity index 100% rename from libm/src/math/support/hex_float.rs rename to libm/libm/src/math/support/hex_float.rs diff --git a/libm/src/math/support/int_traits.rs b/libm/libm/src/math/support/int_traits.rs similarity index 100% rename from libm/src/math/support/int_traits.rs rename to libm/libm/src/math/support/int_traits.rs diff --git a/libm/src/math/support/macros.rs b/libm/libm/src/math/support/macros.rs similarity index 100% rename from libm/src/math/support/macros.rs rename to libm/libm/src/math/support/macros.rs diff --git a/libm/src/math/support/mod.rs b/libm/libm/src/math/support/mod.rs similarity index 100% rename from libm/src/math/support/mod.rs rename to libm/libm/src/math/support/mod.rs diff --git a/libm/src/math/tan.rs b/libm/libm/src/math/tan.rs similarity index 100% rename from libm/src/math/tan.rs rename to libm/libm/src/math/tan.rs diff --git a/libm/src/math/tanf.rs b/libm/libm/src/math/tanf.rs similarity index 100% rename from libm/src/math/tanf.rs rename to libm/libm/src/math/tanf.rs diff --git a/libm/src/math/tanh.rs b/libm/libm/src/math/tanh.rs similarity index 100% rename from libm/src/math/tanh.rs rename to libm/libm/src/math/tanh.rs diff --git a/libm/src/math/tanhf.rs b/libm/libm/src/math/tanhf.rs similarity index 100% rename from libm/src/math/tanhf.rs rename to libm/libm/src/math/tanhf.rs diff --git a/libm/src/math/tgamma.rs b/libm/libm/src/math/tgamma.rs similarity index 100% rename from libm/src/math/tgamma.rs rename to libm/libm/src/math/tgamma.rs diff --git a/libm/src/math/tgammaf.rs b/libm/libm/src/math/tgammaf.rs similarity index 100% rename from libm/src/math/tgammaf.rs rename to libm/libm/src/math/tgammaf.rs diff --git a/libm/src/math/trunc.rs b/libm/libm/src/math/trunc.rs similarity index 100% rename from libm/src/math/trunc.rs rename to libm/libm/src/math/trunc.rs diff --git a/libm/libm/src/math/truncf.rs b/libm/libm/src/math/truncf.rs new file mode 100644 index 000000000..14533a267 --- /dev/null +++ b/libm/libm/src/math/truncf.rs @@ -0,0 +1,23 @@ +/// Rounds the number toward 0 to the closest integral value (f32). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf(x: f32) -> f32 { + select_implementation! { + name: truncf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::trunc(x) +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::truncf(1.1), 1.0); + } +} diff --git a/libm/libm/src/math/truncf128.rs b/libm/libm/src/math/truncf128.rs new file mode 100644 index 000000000..9dccc0d0e --- /dev/null +++ b/libm/libm/src/math/truncf128.rs @@ -0,0 +1,7 @@ +/// Rounds the number toward 0 to the closest integral value (f128). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf128(x: f128) -> f128 { + super::generic::trunc(x) +} diff --git a/libm/libm/src/math/truncf16.rs b/libm/libm/src/math/truncf16.rs new file mode 100644 index 000000000..d7c3d225c --- /dev/null +++ b/libm/libm/src/math/truncf16.rs @@ -0,0 +1,7 @@ +/// Rounds the number toward 0 to the closest integral value (f16). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf16(x: f16) -> f16 { + super::generic::trunc(x) +} From deb6c91cfe20257b1e054b0de8ea9ea0213f4bde Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 9 Apr 2025 01:21:44 +0000 Subject: [PATCH 1279/1459] refactor: Introduce a virtual manifest Move the workspace configuration to a virtual manifest. This reorganization makes a more clear separation between package contents and support files that don't get distributed. It will also make it easier to merge this repository with `compiler-builtins` which is planned (builtins had a similar update done in [1]). LICENSE.txt and README.md are symlinkedinto the new directory to ensure they get included in the package. [1]: https://github.com/rust-lang/compiler-builtins/pull/702 --- libm/Cargo.toml | 37 +++ libm/ci/ci-util.py | 2 +- libm/etc/function-definitions.json | 502 ++++++++++++++--------------- libm/etc/update-api-list.py | 26 +- libm/libm/Cargo.toml | 38 --- libm/libm/LICENSE.txt | 1 + libm/libm/README.md | 1 + 7 files changed, 305 insertions(+), 302 deletions(-) create mode 100644 libm/Cargo.toml create mode 120000 libm/libm/LICENSE.txt create mode 120000 libm/libm/README.md diff --git a/libm/Cargo.toml b/libm/Cargo.toml new file mode 100644 index 000000000..268b6fb0e --- /dev/null +++ b/libm/Cargo.toml @@ -0,0 +1,37 @@ +[workspace] +resolver = "2" +members = [ + "libm", + "crates/libm-macros", + "crates/libm-test", + "crates/musl-math-sys", + "crates/util", +] +default-members = [ + "libm", + "crates/libm-macros", + "crates/libm-test" +] +exclude = [ + # Requires `panic = abort` so can't be a member of the workspace + "crates/compiler-builtins-smoke-test", +] + +# The default release profile is unchanged. + +# Release mode with debug assertions +[profile.release-checked] +inherits = "release" +debug-assertions = true +overflow-checks = true + +# Release with maximum optimizations, which is very slow to build. This is also +# what is needed to check `no-panic`. +[profile.release-opt] +inherits = "release" +codegen-units = 1 +lto = "fat" + +[profile.bench] +# Required for iai-callgrind +debug = true diff --git a/libm/ci/ci-util.py b/libm/ci/ci-util.py index ed63d6dee..d9e402d6b 100755 --- a/libm/ci/ci-util.py +++ b/libm/ci/ci-util.py @@ -25,7 +25,7 @@ COMMAND: generate-matrix Calculate a matrix of which functions had source change, print that as - a JSON object. + a JSON object. locate-baseline [--download] [--extract] Locate the most recent benchmark baseline available in CI and, if flags diff --git a/libm/etc/function-definitions.json b/libm/etc/function-definitions.json index ead1f807f..3e33343c4 100644 --- a/libm/etc/function-definitions.json +++ b/libm/etc/function-definitions.json @@ -2,1067 +2,1067 @@ "__comment": "Autogenerated by update-api-list.py. List of files that define a function with a given name. This file is checked in to make it obvious if refactoring breaks things", "acos": { "sources": [ - "src/math/acos.rs" + "libm/src/math/acos.rs" ], "type": "f64" }, "acosf": { "sources": [ - "src/math/acosf.rs" + "libm/src/math/acosf.rs" ], "type": "f32" }, "acosh": { "sources": [ - "src/math/acosh.rs" + "libm/src/math/acosh.rs" ], "type": "f64" }, "acoshf": { "sources": [ - "src/math/acoshf.rs" + "libm/src/math/acoshf.rs" ], "type": "f32" }, "asin": { "sources": [ - "src/math/asin.rs" + "libm/src/math/asin.rs" ], "type": "f64" }, "asinf": { "sources": [ - "src/math/asinf.rs" + "libm/src/math/asinf.rs" ], "type": "f32" }, "asinh": { "sources": [ - "src/math/asinh.rs" + "libm/src/math/asinh.rs" ], "type": "f64" }, "asinhf": { "sources": [ - "src/math/asinhf.rs" + "libm/src/math/asinhf.rs" ], "type": "f32" }, "atan": { "sources": [ - "src/math/atan.rs" + "libm/src/math/atan.rs" ], "type": "f64" }, "atan2": { "sources": [ - "src/math/atan2.rs" + "libm/src/math/atan2.rs" ], "type": "f64" }, "atan2f": { "sources": [ - "src/math/atan2f.rs" + "libm/src/math/atan2f.rs" ], "type": "f32" }, "atanf": { "sources": [ - "src/math/atanf.rs" + "libm/src/math/atanf.rs" ], "type": "f32" }, "atanh": { "sources": [ - "src/math/atanh.rs" + "libm/src/math/atanh.rs" ], "type": "f64" }, "atanhf": { "sources": [ - "src/math/atanhf.rs" + "libm/src/math/atanhf.rs" ], "type": "f32" }, "cbrt": { "sources": [ - "src/math/cbrt.rs" + "libm/src/math/cbrt.rs" ], "type": "f64" }, "cbrtf": { "sources": [ - "src/math/cbrtf.rs" + "libm/src/math/cbrtf.rs" ], "type": "f32" }, "ceil": { "sources": [ - "src/math/arch/i586.rs", - "src/math/arch/wasm32.rs", - "src/math/ceil.rs", - "src/math/generic/ceil.rs" + "libm/src/math/arch/i586.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/ceil.rs", + "libm/src/math/generic/ceil.rs" ], "type": "f64" }, "ceilf": { "sources": [ - "src/math/arch/wasm32.rs", - "src/math/ceil.rs", - "src/math/generic/ceil.rs" + "libm/src/math/arch/wasm32.rs", + "libm/src/math/ceil.rs", + "libm/src/math/generic/ceil.rs" ], "type": "f32" }, "ceilf128": { "sources": [ - "src/math/ceil.rs", - "src/math/generic/ceil.rs" + "libm/src/math/ceil.rs", + "libm/src/math/generic/ceil.rs" ], "type": "f128" }, "ceilf16": { "sources": [ - "src/math/ceil.rs", - "src/math/generic/ceil.rs" + "libm/src/math/ceil.rs", + "libm/src/math/generic/ceil.rs" ], "type": "f16" }, "copysign": { "sources": [ - "src/math/copysign.rs", - "src/math/generic/copysign.rs" + "libm/src/math/copysign.rs", + "libm/src/math/generic/copysign.rs" ], "type": "f64" }, "copysignf": { "sources": [ - "src/math/copysign.rs", - "src/math/generic/copysign.rs" + "libm/src/math/copysign.rs", + "libm/src/math/generic/copysign.rs" ], "type": "f32" }, "copysignf128": { "sources": [ - "src/math/copysign.rs", - "src/math/generic/copysign.rs" + "libm/src/math/copysign.rs", + "libm/src/math/generic/copysign.rs" ], "type": "f128" }, "copysignf16": { "sources": [ - "src/math/copysign.rs", - "src/math/generic/copysign.rs" + "libm/src/math/copysign.rs", + "libm/src/math/generic/copysign.rs" ], "type": "f16" }, "cos": { "sources": [ - "src/math/cos.rs" + "libm/src/math/cos.rs" ], "type": "f64" }, "cosf": { "sources": [ - "src/math/cosf.rs" + "libm/src/math/cosf.rs" ], "type": "f32" }, "cosh": { "sources": [ - "src/math/cosh.rs" + "libm/src/math/cosh.rs" ], "type": "f64" }, "coshf": { "sources": [ - "src/math/coshf.rs" + "libm/src/math/coshf.rs" ], "type": "f32" }, "erf": { "sources": [ - "src/math/erf.rs" + "libm/src/math/erf.rs" ], "type": "f64" }, "erfc": { "sources": [ - "src/math/erf.rs" + "libm/src/math/erf.rs" ], "type": "f64" }, "erfcf": { "sources": [ - "src/math/erff.rs" + "libm/src/math/erff.rs" ], "type": "f32" }, "erff": { "sources": [ - "src/math/erff.rs" + "libm/src/math/erff.rs" ], "type": "f32" }, "exp": { "sources": [ - "src/math/exp.rs" + "libm/src/math/exp.rs" ], "type": "f64" }, "exp10": { "sources": [ - "src/math/exp10.rs" + "libm/src/math/exp10.rs" ], "type": "f64" }, "exp10f": { "sources": [ - "src/math/exp10f.rs" + "libm/src/math/exp10f.rs" ], "type": "f32" }, "exp2": { "sources": [ - "src/math/exp2.rs" + "libm/src/math/exp2.rs" ], "type": "f64" }, "exp2f": { "sources": [ - "src/math/exp2f.rs" + "libm/src/math/exp2f.rs" ], "type": "f32" }, "expf": { "sources": [ - "src/math/expf.rs" + "libm/src/math/expf.rs" ], "type": "f32" }, "expm1": { "sources": [ - "src/math/expm1.rs" + "libm/src/math/expm1.rs" ], "type": "f64" }, "expm1f": { "sources": [ - "src/math/expm1f.rs" + "libm/src/math/expm1f.rs" ], "type": "f32" }, "fabs": { "sources": [ - "src/math/arch/wasm32.rs", - "src/math/fabs.rs", - "src/math/generic/fabs.rs" + "libm/src/math/arch/wasm32.rs", + "libm/src/math/fabs.rs", + "libm/src/math/generic/fabs.rs" ], "type": "f64" }, "fabsf": { "sources": [ - "src/math/arch/wasm32.rs", - "src/math/fabs.rs", - "src/math/generic/fabs.rs" + "libm/src/math/arch/wasm32.rs", + "libm/src/math/fabs.rs", + "libm/src/math/generic/fabs.rs" ], "type": "f32" }, "fabsf128": { "sources": [ - "src/math/fabs.rs", - "src/math/generic/fabs.rs" + "libm/src/math/fabs.rs", + "libm/src/math/generic/fabs.rs" ], "type": "f128" }, "fabsf16": { "sources": [ - "src/math/fabs.rs", - "src/math/generic/fabs.rs" + "libm/src/math/fabs.rs", + "libm/src/math/generic/fabs.rs" ], "type": "f16" }, "fdim": { "sources": [ - "src/math/fdim.rs", - "src/math/generic/fdim.rs" + "libm/src/math/fdim.rs", + "libm/src/math/generic/fdim.rs" ], "type": "f64" }, "fdimf": { "sources": [ - "src/math/fdim.rs", - "src/math/generic/fdim.rs" + "libm/src/math/fdim.rs", + "libm/src/math/generic/fdim.rs" ], "type": "f32" }, "fdimf128": { "sources": [ - "src/math/fdim.rs", - "src/math/generic/fdim.rs" + "libm/src/math/fdim.rs", + "libm/src/math/generic/fdim.rs" ], "type": "f128" }, "fdimf16": { "sources": [ - "src/math/fdim.rs", - "src/math/generic/fdim.rs" + "libm/src/math/fdim.rs", + "libm/src/math/generic/fdim.rs" ], "type": "f16" }, "floor": { "sources": [ - "src/math/arch/i586.rs", - "src/math/arch/wasm32.rs", - "src/math/floor.rs", - "src/math/generic/floor.rs" + "libm/src/math/arch/i586.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/floor.rs", + "libm/src/math/generic/floor.rs" ], "type": "f64" }, "floorf": { "sources": [ - "src/math/arch/wasm32.rs", - "src/math/floor.rs", - "src/math/generic/floor.rs" + "libm/src/math/arch/wasm32.rs", + "libm/src/math/floor.rs", + "libm/src/math/generic/floor.rs" ], "type": "f32" }, "floorf128": { "sources": [ - "src/math/floor.rs", - "src/math/generic/floor.rs" + "libm/src/math/floor.rs", + "libm/src/math/generic/floor.rs" ], "type": "f128" }, "floorf16": { "sources": [ - "src/math/floor.rs", - "src/math/generic/floor.rs" + "libm/src/math/floor.rs", + "libm/src/math/generic/floor.rs" ], "type": "f16" }, "fma": { "sources": [ - "src/math/arch/aarch64.rs", - "src/math/fma.rs" + "libm/src/math/arch/aarch64.rs", + "libm/src/math/fma.rs" ], "type": "f64" }, "fmaf": { "sources": [ - "src/math/arch/aarch64.rs", - "src/math/fma_wide.rs" + "libm/src/math/arch/aarch64.rs", + "libm/src/math/fma_wide.rs" ], "type": "f32" }, "fmaf128": { "sources": [ - "src/math/fma.rs" + "libm/src/math/fma.rs" ], "type": "f128" }, "fmax": { "sources": [ - "src/math/fmin_fmax.rs", - "src/math/generic/fmax.rs" + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmax.rs" ], "type": "f64" }, "fmaxf": { "sources": [ - "src/math/fmin_fmax.rs", - "src/math/generic/fmax.rs" + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmax.rs" ], "type": "f32" }, "fmaxf128": { "sources": [ - "src/math/fmin_fmax.rs", - "src/math/generic/fmax.rs" + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmax.rs" ], "type": "f128" }, "fmaxf16": { "sources": [ - "src/math/fmin_fmax.rs", - "src/math/generic/fmax.rs" + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmax.rs" ], "type": "f16" }, "fmaximum": { "sources": [ - "src/math/fminimum_fmaximum.rs", - "src/math/generic/fmaximum.rs" + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fmaximum.rs" ], "type": "f64" }, "fmaximum_num": { "sources": [ - "src/math/fminimum_fmaximum_num.rs", - "src/math/generic/fmaximum_num.rs" + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fmaximum_num.rs" ], "type": "f64" }, "fmaximum_numf": { "sources": [ - "src/math/fminimum_fmaximum_num.rs", - "src/math/generic/fmaximum_num.rs" + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fmaximum_num.rs" ], "type": "f32" }, "fmaximum_numf128": { "sources": [ - "src/math/fminimum_fmaximum_num.rs", - "src/math/generic/fmaximum_num.rs" + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fmaximum_num.rs" ], "type": "f128" }, "fmaximum_numf16": { "sources": [ - "src/math/fminimum_fmaximum_num.rs", - "src/math/generic/fmaximum_num.rs" + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fmaximum_num.rs" ], "type": "f16" }, "fmaximumf": { "sources": [ - "src/math/fminimum_fmaximum.rs", - "src/math/generic/fmaximum.rs" + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fmaximum.rs" ], "type": "f32" }, "fmaximumf128": { "sources": [ - "src/math/fminimum_fmaximum.rs", - "src/math/generic/fmaximum.rs" + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fmaximum.rs" ], "type": "f128" }, "fmaximumf16": { "sources": [ - "src/math/fminimum_fmaximum.rs", - "src/math/generic/fmaximum.rs" + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fmaximum.rs" ], "type": "f16" }, "fmin": { "sources": [ - "src/math/fmin_fmax.rs", - "src/math/generic/fmin.rs" + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmin.rs" ], "type": "f64" }, "fminf": { "sources": [ - "src/math/fmin_fmax.rs", - "src/math/generic/fmin.rs" + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmin.rs" ], "type": "f32" }, "fminf128": { "sources": [ - "src/math/fmin_fmax.rs", - "src/math/generic/fmin.rs" + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmin.rs" ], "type": "f128" }, "fminf16": { "sources": [ - "src/math/fmin_fmax.rs", - "src/math/generic/fmin.rs" + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmin.rs" ], "type": "f16" }, "fminimum": { "sources": [ - "src/math/fminimum_fmaximum.rs", - "src/math/generic/fminimum.rs" + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fminimum.rs" ], "type": "f64" }, "fminimum_num": { "sources": [ - "src/math/fminimum_fmaximum_num.rs", - "src/math/generic/fminimum_num.rs" + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fminimum_num.rs" ], "type": "f64" }, "fminimum_numf": { "sources": [ - "src/math/fminimum_fmaximum_num.rs", - "src/math/generic/fminimum_num.rs" + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fminimum_num.rs" ], "type": "f32" }, "fminimum_numf128": { "sources": [ - "src/math/fminimum_fmaximum_num.rs", - "src/math/generic/fminimum_num.rs" + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fminimum_num.rs" ], "type": "f128" }, "fminimum_numf16": { "sources": [ - "src/math/fminimum_fmaximum_num.rs", - "src/math/generic/fminimum_num.rs" + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fminimum_num.rs" ], "type": "f16" }, "fminimumf": { "sources": [ - "src/math/fminimum_fmaximum.rs", - "src/math/generic/fminimum.rs" + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fminimum.rs" ], "type": "f32" }, "fminimumf128": { "sources": [ - "src/math/fminimum_fmaximum.rs", - "src/math/generic/fminimum.rs" + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fminimum.rs" ], "type": "f128" }, "fminimumf16": { "sources": [ - "src/math/fminimum_fmaximum.rs", - "src/math/generic/fminimum.rs" + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fminimum.rs" ], "type": "f16" }, "fmod": { "sources": [ - "src/math/fmod.rs", - "src/math/generic/fmod.rs" + "libm/src/math/fmod.rs", + "libm/src/math/generic/fmod.rs" ], "type": "f64" }, "fmodf": { "sources": [ - "src/math/fmod.rs", - "src/math/generic/fmod.rs" + "libm/src/math/fmod.rs", + "libm/src/math/generic/fmod.rs" ], "type": "f32" }, "fmodf128": { "sources": [ - "src/math/fmod.rs", - "src/math/generic/fmod.rs" + "libm/src/math/fmod.rs", + "libm/src/math/generic/fmod.rs" ], "type": "f128" }, "fmodf16": { "sources": [ - "src/math/fmod.rs", - "src/math/generic/fmod.rs" + "libm/src/math/fmod.rs", + "libm/src/math/generic/fmod.rs" ], "type": "f16" }, "frexp": { "sources": [ - "src/math/frexp.rs" + "libm/src/math/frexp.rs" ], "type": "f64" }, "frexpf": { "sources": [ - "src/math/frexpf.rs" + "libm/src/math/frexpf.rs" ], "type": "f32" }, "hypot": { "sources": [ - "src/math/hypot.rs" + "libm/src/math/hypot.rs" ], "type": "f64" }, "hypotf": { "sources": [ - "src/math/hypotf.rs" + "libm/src/math/hypotf.rs" ], "type": "f32" }, "ilogb": { "sources": [ - "src/math/ilogb.rs" + "libm/src/math/ilogb.rs" ], "type": "f64" }, "ilogbf": { "sources": [ - "src/math/ilogbf.rs" + "libm/src/math/ilogbf.rs" ], "type": "f32" }, "j0": { "sources": [ - "src/math/j0.rs" + "libm/src/math/j0.rs" ], "type": "f64" }, "j0f": { "sources": [ - "src/math/j0f.rs" + "libm/src/math/j0f.rs" ], "type": "f32" }, "j1": { "sources": [ - "src/math/j1.rs" + "libm/src/math/j1.rs" ], "type": "f64" }, "j1f": { "sources": [ - "src/math/j1f.rs" + "libm/src/math/j1f.rs" ], "type": "f32" }, "jn": { "sources": [ - "src/math/jn.rs" + "libm/src/math/jn.rs" ], "type": "f64" }, "jnf": { "sources": [ - "src/math/jnf.rs" + "libm/src/math/jnf.rs" ], "type": "f32" }, "ldexp": { "sources": [ - "src/math/ldexp.rs" + "libm/src/math/ldexp.rs" ], "type": "f64" }, "ldexpf": { "sources": [ - "src/math/ldexp.rs" + "libm/src/math/ldexp.rs" ], "type": "f32" }, "ldexpf128": { "sources": [ - "src/math/ldexp.rs" + "libm/src/math/ldexp.rs" ], "type": "f128" }, "ldexpf16": { "sources": [ - "src/math/ldexp.rs" + "libm/src/math/ldexp.rs" ], "type": "f16" }, "lgamma": { "sources": [ - "src/math/lgamma.rs" + "libm/src/math/lgamma.rs" ], "type": "f64" }, "lgamma_r": { "sources": [ - "src/math/lgamma_r.rs" + "libm/src/math/lgamma_r.rs" ], "type": "f64" }, "lgammaf": { "sources": [ - "src/math/lgammaf.rs" + "libm/src/math/lgammaf.rs" ], "type": "f32" }, "lgammaf_r": { "sources": [ - "src/math/lgammaf_r.rs" + "libm/src/math/lgammaf_r.rs" ], "type": "f32" }, "log": { "sources": [ - "src/math/log.rs" + "libm/src/math/log.rs" ], "type": "f64" }, "log10": { "sources": [ - "src/math/log10.rs" + "libm/src/math/log10.rs" ], "type": "f64" }, "log10f": { "sources": [ - "src/math/log10f.rs" + "libm/src/math/log10f.rs" ], "type": "f32" }, "log1p": { "sources": [ - "src/math/log1p.rs" + "libm/src/math/log1p.rs" ], "type": "f64" }, "log1pf": { "sources": [ - "src/math/log1pf.rs" + "libm/src/math/log1pf.rs" ], "type": "f32" }, "log2": { "sources": [ - "src/math/log2.rs" + "libm/src/math/log2.rs" ], "type": "f64" }, "log2f": { "sources": [ - "src/math/log2f.rs" + "libm/src/math/log2f.rs" ], "type": "f32" }, "logf": { "sources": [ - "src/math/logf.rs" + "libm/src/math/logf.rs" ], "type": "f32" }, "modf": { "sources": [ - "src/math/modf.rs" + "libm/src/math/modf.rs" ], "type": "f64" }, "modff": { "sources": [ - "src/math/modff.rs" + "libm/src/math/modff.rs" ], "type": "f32" }, "nextafter": { "sources": [ - "src/math/nextafter.rs" + "libm/src/math/nextafter.rs" ], "type": "f64" }, "nextafterf": { "sources": [ - "src/math/nextafterf.rs" + "libm/src/math/nextafterf.rs" ], "type": "f32" }, "pow": { "sources": [ - "src/math/pow.rs" + "libm/src/math/pow.rs" ], "type": "f64" }, "powf": { "sources": [ - "src/math/powf.rs" + "libm/src/math/powf.rs" ], "type": "f32" }, "remainder": { "sources": [ - "src/math/remainder.rs" + "libm/src/math/remainder.rs" ], "type": "f64" }, "remainderf": { "sources": [ - "src/math/remainderf.rs" + "libm/src/math/remainderf.rs" ], "type": "f32" }, "remquo": { "sources": [ - "src/math/remquo.rs" + "libm/src/math/remquo.rs" ], "type": "f64" }, "remquof": { "sources": [ - "src/math/remquof.rs" + "libm/src/math/remquof.rs" ], "type": "f32" }, "rint": { "sources": [ - "src/math/arch/aarch64.rs", - "src/math/arch/wasm32.rs", - "src/math/rint.rs" + "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/rint.rs" ], "type": "f64" }, "rintf": { "sources": [ - "src/math/arch/aarch64.rs", - "src/math/arch/wasm32.rs", - "src/math/rint.rs" + "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/rint.rs" ], "type": "f32" }, "rintf128": { "sources": [ - "src/math/rint.rs" + "libm/src/math/rint.rs" ], "type": "f128" }, "rintf16": { "sources": [ - "src/math/arch/aarch64.rs", - "src/math/rint.rs" + "libm/src/math/arch/aarch64.rs", + "libm/src/math/rint.rs" ], "type": "f16" }, "round": { "sources": [ - "src/math/generic/round.rs", - "src/math/round.rs" + "libm/src/math/generic/round.rs", + "libm/src/math/round.rs" ], "type": "f64" }, "roundeven": { "sources": [ - "src/math/roundeven.rs" + "libm/src/math/roundeven.rs" ], "type": "f64" }, "roundevenf": { "sources": [ - "src/math/roundeven.rs" + "libm/src/math/roundeven.rs" ], "type": "f32" }, "roundevenf128": { "sources": [ - "src/math/roundeven.rs" + "libm/src/math/roundeven.rs" ], "type": "f128" }, "roundevenf16": { "sources": [ - "src/math/roundeven.rs" + "libm/src/math/roundeven.rs" ], "type": "f16" }, "roundf": { "sources": [ - "src/math/generic/round.rs", - "src/math/round.rs" + "libm/src/math/generic/round.rs", + "libm/src/math/round.rs" ], "type": "f32" }, "roundf128": { "sources": [ - "src/math/generic/round.rs", - "src/math/round.rs" + "libm/src/math/generic/round.rs", + "libm/src/math/round.rs" ], "type": "f128" }, "roundf16": { "sources": [ - "src/math/generic/round.rs", - "src/math/round.rs" + "libm/src/math/generic/round.rs", + "libm/src/math/round.rs" ], "type": "f16" }, "scalbn": { "sources": [ - "src/math/generic/scalbn.rs", - "src/math/scalbn.rs" + "libm/src/math/generic/scalbn.rs", + "libm/src/math/scalbn.rs" ], "type": "f64" }, "scalbnf": { "sources": [ - "src/math/generic/scalbn.rs", - "src/math/scalbn.rs" + "libm/src/math/generic/scalbn.rs", + "libm/src/math/scalbn.rs" ], "type": "f32" }, "scalbnf128": { "sources": [ - "src/math/generic/scalbn.rs", - "src/math/scalbn.rs" + "libm/src/math/generic/scalbn.rs", + "libm/src/math/scalbn.rs" ], "type": "f128" }, "scalbnf16": { "sources": [ - "src/math/generic/scalbn.rs", - "src/math/scalbn.rs" + "libm/src/math/generic/scalbn.rs", + "libm/src/math/scalbn.rs" ], "type": "f16" }, "sin": { "sources": [ - "src/math/sin.rs" + "libm/src/math/sin.rs" ], "type": "f64" }, "sincos": { "sources": [ - "src/math/sincos.rs" + "libm/src/math/sincos.rs" ], "type": "f64" }, "sincosf": { "sources": [ - "src/math/sincosf.rs" + "libm/src/math/sincosf.rs" ], "type": "f32" }, "sinf": { "sources": [ - "src/math/sinf.rs" + "libm/src/math/sinf.rs" ], "type": "f32" }, "sinh": { "sources": [ - "src/math/sinh.rs" + "libm/src/math/sinh.rs" ], "type": "f64" }, "sinhf": { "sources": [ - "src/math/sinhf.rs" + "libm/src/math/sinhf.rs" ], "type": "f32" }, "sqrt": { "sources": [ - "src/math/arch/aarch64.rs", - "src/math/arch/i686.rs", - "src/math/arch/wasm32.rs", - "src/math/generic/sqrt.rs", - "src/math/sqrt.rs" + "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/i686.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/generic/sqrt.rs", + "libm/src/math/sqrt.rs" ], "type": "f64" }, "sqrtf": { "sources": [ - "src/math/arch/aarch64.rs", - "src/math/arch/i686.rs", - "src/math/arch/wasm32.rs", - "src/math/generic/sqrt.rs", - "src/math/sqrt.rs" + "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/i686.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/generic/sqrt.rs", + "libm/src/math/sqrt.rs" ], "type": "f32" }, "sqrtf128": { "sources": [ - "src/math/generic/sqrt.rs", - "src/math/sqrt.rs" + "libm/src/math/generic/sqrt.rs", + "libm/src/math/sqrt.rs" ], "type": "f128" }, "sqrtf16": { "sources": [ - "src/math/arch/aarch64.rs", - "src/math/generic/sqrt.rs", - "src/math/sqrt.rs" + "libm/src/math/arch/aarch64.rs", + "libm/src/math/generic/sqrt.rs", + "libm/src/math/sqrt.rs" ], "type": "f16" }, "tan": { "sources": [ - "src/math/tan.rs" + "libm/src/math/tan.rs" ], "type": "f64" }, "tanf": { "sources": [ - "src/math/tanf.rs" + "libm/src/math/tanf.rs" ], "type": "f32" }, "tanh": { "sources": [ - "src/math/tanh.rs" + "libm/src/math/tanh.rs" ], "type": "f64" }, "tanhf": { "sources": [ - "src/math/tanhf.rs" + "libm/src/math/tanhf.rs" ], "type": "f32" }, "tgamma": { "sources": [ - "src/math/tgamma.rs" + "libm/src/math/tgamma.rs" ], "type": "f64" }, "tgammaf": { "sources": [ - "src/math/tgammaf.rs" + "libm/src/math/tgammaf.rs" ], "type": "f32" }, "trunc": { "sources": [ - "src/math/arch/wasm32.rs", - "src/math/generic/trunc.rs", - "src/math/trunc.rs" + "libm/src/math/arch/wasm32.rs", + "libm/src/math/generic/trunc.rs", + "libm/src/math/trunc.rs" ], "type": "f64" }, "truncf": { "sources": [ - "src/math/arch/wasm32.rs", - "src/math/generic/trunc.rs", - "src/math/trunc.rs" + "libm/src/math/arch/wasm32.rs", + "libm/src/math/generic/trunc.rs", + "libm/src/math/trunc.rs" ], "type": "f32" }, "truncf128": { "sources": [ - "src/math/generic/trunc.rs", - "src/math/trunc.rs" + "libm/src/math/generic/trunc.rs", + "libm/src/math/trunc.rs" ], "type": "f128" }, "truncf16": { "sources": [ - "src/math/generic/trunc.rs", - "src/math/trunc.rs" + "libm/src/math/generic/trunc.rs", + "libm/src/math/trunc.rs" ], "type": "f16" }, "y0": { "sources": [ - "src/math/j0.rs" + "libm/src/math/j0.rs" ], "type": "f64" }, "y0f": { "sources": [ - "src/math/j0f.rs" + "libm/src/math/j0f.rs" ], "type": "f32" }, "y1": { "sources": [ - "src/math/j1.rs" + "libm/src/math/j1.rs" ], "type": "f64" }, "y1f": { "sources": [ - "src/math/j1f.rs" + "libm/src/math/j1f.rs" ], "type": "f32" }, "yn": { "sources": [ - "src/math/jn.rs" + "libm/src/math/jn.rs" ], "type": "f64" }, "ynf": { "sources": [ - "src/math/jnf.rs" + "libm/src/math/jnf.rs" ], "type": "f32" } diff --git a/libm/etc/update-api-list.py b/libm/etc/update-api-list.py index 950824fc4..0770a8b20 100755 --- a/libm/etc/update-api-list.py +++ b/libm/etc/update-api-list.py @@ -18,10 +18,10 @@ SELF_PATH = Path(__file__) ETC_DIR = SELF_PATH.parent -LIBM_DIR = ETC_DIR.parent.joinpath("libm") +ROOT_DIR = ETC_DIR.parent # These files do not trigger a retest. -IGNORED_SOURCES = ["src/libm_helper.rs", "src/math/support/float_traits.rs"] +IGNORED_SOURCES = ["libm/src/libm_helper.rs", "libm/src/math/support/float_traits.rs"] IndexTy: TypeAlias = dict[str, dict[str, Any]] """Type of the `index` item in rustdoc's JSON output""" @@ -66,7 +66,7 @@ def get_rustdoc_json() -> dict[Any, Any]: j = sp.check_output( [ "rustdoc", - "src/lib.rs", + "libm/src/lib.rs", "--edition=2021", "--document-private-items", "--output-format=json", @@ -75,7 +75,7 @@ def get_rustdoc_json() -> dict[Any, Any]: "-Zunstable-options", "-o-", ], - cwd=LIBM_DIR, + cwd=ROOT_DIR, text=True, ) j = json.loads(j) @@ -94,7 +94,9 @@ def _init_function_list(self, index: IndexTy) -> None: # Collect a list of source IDs for reexported items in `lib.rs` or `mod math`. use = (i for i in public if "use" in i["inner"]) use = ( - i for i in use if i["span"]["filename"] in ["src/math/mod.rs", "src/lib.rs"] + i + for i in use + if i["span"]["filename"] in ["libm/src/math/mod.rs", "libm/src/lib.rs"] ) reexported_ids = [item["inner"]["use"]["id"] for item in use] @@ -121,8 +123,8 @@ def _init_defs(self, index: IndexTy) -> None: # A lot of the `arch` module is often configured out so doesn't show up in docs. Use # string matching as a fallback. - for fname in glob("src/math/arch/**.rs", root_dir=LIBM_DIR): - contents = (LIBM_DIR.joinpath(fname)).read_text() + for fname in glob("libm/src/math/arch/**.rs", root_dir=ROOT_DIR): + contents = (ROOT_DIR.joinpath(fname)).read_text() for name in self.public_functions: if f"fn {name}" in contents: @@ -188,10 +190,10 @@ def tidy_lists(self) -> None: include all public API. """ - flist = sp.check_output(["git", "ls-files"], cwd=LIBM_DIR, text=True) + flist = sp.check_output(["git", "ls-files"], cwd=ROOT_DIR, text=True) for path in flist.splitlines(): - fpath = LIBM_DIR.joinpath(path) + fpath = ROOT_DIR.joinpath(path) if fpath.is_dir() or fpath == SELF_PATH: continue @@ -229,7 +231,7 @@ def ensure_contains_api(self, fpath: Path, line_num: int, lines: list[str]): if len(not_found) == 0: return - relpath = fpath.relative_to(LIBM_DIR) + relpath = fpath.relative_to(ROOT_DIR) eprint(f"functions not found at {relpath}:{line_num}: {not_found}") exit(1) @@ -244,7 +246,7 @@ def validate_delimited_block( """Identify blocks of code wrapped within `start` and `end`, collect their contents to a list of strings, and call `validate` for each of those lists. """ - relpath = fpath.relative_to(LIBM_DIR) + relpath = fpath.relative_to(ROOT_DIR) block_lines = [] block_start_line: None | int = None for line_num, line in enumerate(lines): @@ -274,7 +276,7 @@ def validate_delimited_block( def ensure_sorted(fpath: Path, block_start_line: int, lines: list[str]) -> None: """Ensure that a list of lines is sorted, otherwise print a diff and exit.""" - relpath = fpath.relative_to(LIBM_DIR) + relpath = fpath.relative_to(ROOT_DIR) diff_and_exit( "\n".join(lines), "\n".join(sorted(lines)), diff --git a/libm/libm/Cargo.toml b/libm/libm/Cargo.toml index e0aeb07d5..44154c1a8 100644 --- a/libm/libm/Cargo.toml +++ b/libm/libm/Cargo.toml @@ -10,7 +10,6 @@ readme = "README.md" repository = "https://github.com/rust-lang/libm" version = "0.2.11" edition = "2021" -exclude = ["/ci/", "/.github/workflows/"] rust-version = "1.63" [features] @@ -40,24 +39,6 @@ unstable-float = [] # hard float operations. force-soft-floats = [] -[workspace] -resolver = "2" -members = [ - "crates/libm-macros", - "crates/libm-test", - "crates/musl-math-sys", - "crates/util", -] -default-members = [ - ".", - "crates/libm-macros", - "crates/libm-test", -] -exclude = [ - # Requires `panic = abort` so can't be a member of the workspace - "crates/compiler-builtins-smoke-test", -] - [dev-dependencies] no-panic = "0.1.35" @@ -66,22 +47,3 @@ unexpected_cfgs = { level = "warn", check-cfg = [ # compiler-builtins sets this feature, but we use it in `libm` 'cfg(feature, values("compiler-builtins"))', ] } - -# The default release profile is unchanged. - -# Release mode with debug assertions -[profile.release-checked] -inherits = "release" -debug-assertions = true -overflow-checks = true - -# Release with maximum optimizations, which is very slow to build. This is also -# what is needed to check `no-panic`. -[profile.release-opt] -inherits = "release" -codegen-units = 1 -lto = "fat" - -[profile.bench] -# Required for iai-callgrind -debug = true diff --git a/libm/libm/LICENSE.txt b/libm/libm/LICENSE.txt new file mode 120000 index 000000000..4ab43736a --- /dev/null +++ b/libm/libm/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/libm/libm/README.md b/libm/libm/README.md new file mode 120000 index 000000000..32d46ee88 --- /dev/null +++ b/libm/libm/README.md @@ -0,0 +1 @@ +../README.md \ No newline at end of file From 15fb6307f6dc295fb965d1c4f486571cc18ab6b3 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 19 Mar 2025 00:11:39 +0000 Subject: [PATCH 1280/1459] Migrate all crates except `libm` to edition 2024 Unfortunately this means we lose use of the convenient name `gen`, so this includes a handful of renaming. We can't increase the edition for `libm` yet due to MSRV, but we can enable `unsafe_op_in_unsafe_fn` to help make that change smoother in the future. --- libm/.github/workflows/main.yaml | 13 ++++++++----- .../crates/compiler-builtins-smoke-test/src/math.rs | 8 ++++---- libm/crates/libm-macros/Cargo.toml | 2 +- libm/crates/libm-test/Cargo.toml | 2 +- libm/crates/libm-test/benches/icount.rs | 2 +- libm/crates/libm-test/benches/random.rs | 4 ++-- libm/crates/libm-test/examples/plot_domains.rs | 8 ++++---- libm/crates/libm-test/src/{gen.rs => generate.rs} | 0 .../libm-test/src/{gen => generate}/case_list.rs | 0 .../libm-test/src/{gen => generate}/edge_cases.rs | 2 +- .../libm-test/src/{gen => generate}/random.rs | 0 .../libm-test/src/{gen => generate}/spaced.rs | 0 libm/crates/libm-test/src/lib.rs | 2 +- libm/crates/libm-test/src/run_cfg.rs | 4 ++-- libm/crates/libm-test/tests/compare_built_musl.rs | 2 +- libm/crates/libm-test/tests/multiprecision.rs | 2 +- libm/crates/libm-test/tests/standalone.rs | 2 +- libm/crates/libm-test/tests/u256.rs | 2 +- libm/crates/libm-test/tests/z_extensive/run.rs | 2 +- libm/crates/musl-math-sys/Cargo.toml | 2 +- libm/crates/musl-math-sys/src/lib.rs | 2 +- libm/crates/util/Cargo.toml | 2 +- libm/libm/src/lib.rs | 1 + 23 files changed, 34 insertions(+), 30 deletions(-) rename libm/crates/libm-test/src/{gen.rs => generate.rs} (100%) rename libm/crates/libm-test/src/{gen => generate}/case_list.rs (100%) rename libm/crates/libm-test/src/{gen => generate}/edge_cases.rs (99%) rename libm/crates/libm-test/src/{gen => generate}/random.rs (100%) rename libm/crates/libm-test/src/{gen => generate}/spaced.rs (100%) diff --git a/libm/.github/workflows/main.yaml b/libm/.github/workflows/main.yaml index a717c3ea8..5ce0dbc26 100644 --- a/libm/.github/workflows/main.yaml +++ b/libm/.github/workflows/main.yaml @@ -212,14 +212,17 @@ jobs: RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings` steps: - uses: actions/checkout@master - - run: | + - name: Install Rust + run: | msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' libm/Cargo.toml)" echo "MSRV: $msrv" - echo "MSRV=$msrv" >> "$GITHUB_ENV" - - name: Install Rust - run: rustup update "$MSRV" --no-self-update && rustup default "$MSRV" + rustup update "$msrv" --no-self-update && rustup default "$msrv" - uses: Swatinem/rust-cache@v2 - - run: cargo build -p libm + - run: | + # FIXME(msrv): Remove the workspace Cargo.toml so 1.63 cargo doesn't see + # `edition = "2024"` and get spooked. + rm Cargo.toml + cargo build --manifest-path libm/Cargo.toml rustfmt: name: Rustfmt diff --git a/libm/crates/compiler-builtins-smoke-test/src/math.rs b/libm/crates/compiler-builtins-smoke-test/src/math.rs index f17fc1231..58a5bfbb9 100644 --- a/libm/crates/compiler-builtins-smoke-test/src/math.rs +++ b/libm/crates/compiler-builtins-smoke-test/src/math.rs @@ -14,7 +14,7 @@ macro_rules! no_mangle { // Handle simple functions with single return types (@inner $name:ident( $($arg:ident: $aty:ty),+ ) -> $ret:ty) => { - #[no_mangle] + #[unsafe(no_mangle)] extern "C" fn $name($($arg: $aty),+) -> $ret { libm::$name($($arg),+) } @@ -26,7 +26,7 @@ macro_rules! no_mangle { ( @inner $name:ident( $($arg:ident: $aty:ty),+ | $($rarg:ident: $rty:ty),+) -> $ret:ty ) => { - #[no_mangle] + #[unsafe(no_mangle)] extern "C" fn $name($($arg: $aty,)+ $($rarg: $rty),+) -> $ret { let ret; (ret, $(*$rarg),+) = libm::$name($($arg),+); @@ -166,12 +166,12 @@ no_mangle! { /* sincos has no direct return type, not worth handling in the macro */ -#[no_mangle] +#[unsafe(no_mangle)] extern "C" fn sincos(x: f64, s: &mut f64, c: &mut f64) { (*s, *c) = libm::sincos(x); } -#[no_mangle] +#[unsafe(no_mangle)] extern "C" fn sincosf(x: f32, s: &mut f32, c: &mut f32) { (*s, *c) = libm::sincosf(x); } diff --git a/libm/crates/libm-macros/Cargo.toml b/libm/crates/libm-macros/Cargo.toml index 314f4ae37..50c869db7 100644 --- a/libm/crates/libm-macros/Cargo.toml +++ b/libm/crates/libm-macros/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "libm-macros" version = "0.1.0" -edition = "2021" +edition = "2024" publish = false [lib] diff --git a/libm/crates/libm-test/Cargo.toml b/libm/crates/libm-test/Cargo.toml index 1bcc163ed..5d150b4ae 100644 --- a/libm/crates/libm-test/Cargo.toml +++ b/libm/crates/libm-test/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "libm-test" version = "0.1.0" -edition = "2021" +edition = "2024" publish = false [features] diff --git a/libm/crates/libm-test/benches/icount.rs b/libm/crates/libm-test/benches/icount.rs index 4a10ec383..da8c6bfd1 100644 --- a/libm/crates/libm-test/benches/icount.rs +++ b/libm/crates/libm-test/benches/icount.rs @@ -4,7 +4,7 @@ use std::hint::black_box; use iai_callgrind::{library_benchmark, library_benchmark_group, main}; use libm::support::{HInt, u256}; -use libm_test::gen::spaced; +use libm_test::generate::spaced; use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op}; const BENCH_ITER_ITEMS: u64 = 500; diff --git a/libm/crates/libm-test/benches/random.rs b/libm/crates/libm-test/benches/random.rs index 17e4e0d55..63d7e5c6d 100644 --- a/libm/crates/libm-test/benches/random.rs +++ b/libm/crates/libm-test/benches/random.rs @@ -2,8 +2,8 @@ use std::hint::black_box; use std::time::Duration; use criterion::{Criterion, criterion_main}; -use libm_test::gen::random; -use libm_test::gen::random::RandomInput; +use libm_test::generate::random; +use libm_test::generate::random::RandomInput; use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, TupleCall}; /// Benchmark with this many items to get a variety diff --git a/libm/crates/libm-test/examples/plot_domains.rs b/libm/crates/libm-test/examples/plot_domains.rs index 441889c69..78524761e 100644 --- a/libm/crates/libm-test/examples/plot_domains.rs +++ b/libm/crates/libm-test/examples/plot_domains.rs @@ -12,8 +12,8 @@ use std::path::Path; use std::process::Command; use std::{env, fs}; -use libm_test::gen::spaced::SpacedInput; -use libm_test::gen::{edge_cases, spaced}; +use libm_test::generate::spaced::SpacedInput; +use libm_test::generate::{edge_cases, spaced}; use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, op}; const JL_PLOT: &str = "examples/plot_file.jl"; @@ -73,7 +73,7 @@ fn plot_one_generator( ctx: &CheckCtx, gen_name: &str, config: &mut String, - gen: impl Iterator, + generator: impl Iterator, ) { let fn_name = ctx.base_name_str; let text_file = out_dir.join(format!("input-{fn_name}-{gen_name}.txt")); @@ -82,7 +82,7 @@ fn plot_one_generator( let mut w = BufWriter::new(f); let mut count = 0u64; - for input in gen { + for input in generator { writeln!(w, "{:e}", input.0).unwrap(); count += 1; } diff --git a/libm/crates/libm-test/src/gen.rs b/libm/crates/libm-test/src/generate.rs similarity index 100% rename from libm/crates/libm-test/src/gen.rs rename to libm/crates/libm-test/src/generate.rs diff --git a/libm/crates/libm-test/src/gen/case_list.rs b/libm/crates/libm-test/src/generate/case_list.rs similarity index 100% rename from libm/crates/libm-test/src/gen/case_list.rs rename to libm/crates/libm-test/src/generate/case_list.rs diff --git a/libm/crates/libm-test/src/gen/edge_cases.rs b/libm/crates/libm-test/src/generate/edge_cases.rs similarity index 99% rename from libm/crates/libm-test/src/gen/edge_cases.rs rename to libm/crates/libm-test/src/generate/edge_cases.rs index 69b59a105..56cc9fa9a 100644 --- a/libm/crates/libm-test/src/gen/edge_cases.rs +++ b/libm/crates/libm-test/src/generate/edge_cases.rs @@ -3,7 +3,7 @@ use libm::support::{CastInto, Float, Int, MinInt}; use crate::domain::get_domain; -use crate::gen::KnownSize; +use crate::generate::KnownSize; use crate::op::OpITy; use crate::run_cfg::{check_near_count, check_point_count}; use crate::{BaseName, CheckCtx, FloatExt, FloatTy, MathOp, test_log}; diff --git a/libm/crates/libm-test/src/gen/random.rs b/libm/crates/libm-test/src/generate/random.rs similarity index 100% rename from libm/crates/libm-test/src/gen/random.rs rename to libm/crates/libm-test/src/generate/random.rs diff --git a/libm/crates/libm-test/src/gen/spaced.rs b/libm/crates/libm-test/src/generate/spaced.rs similarity index 100% rename from libm/crates/libm-test/src/gen/spaced.rs rename to libm/crates/libm-test/src/generate/spaced.rs diff --git a/libm/crates/libm-test/src/lib.rs b/libm/crates/libm-test/src/lib.rs index 824f09a33..485c01a47 100644 --- a/libm/crates/libm-test/src/lib.rs +++ b/libm/crates/libm-test/src/lib.rs @@ -4,7 +4,7 @@ pub mod domain; mod f8_impl; -pub mod gen; +pub mod generate; #[cfg(feature = "build-mpfr")] pub mod mpfloat; mod num; diff --git a/libm/crates/libm-test/src/run_cfg.rs b/libm/crates/libm-test/src/run_cfg.rs index 8e4fff53c..b36164b00 100644 --- a/libm/crates/libm-test/src/run_cfg.rs +++ b/libm/crates/libm-test/src/run_cfg.rs @@ -4,7 +4,7 @@ use std::ops::RangeInclusive; use std::sync::LazyLock; use std::{env, str}; -use crate::gen::random::{SEED, SEED_ENV}; +use crate::generate::random::{SEED, SEED_ENV}; use crate::{BaseName, FloatTy, Identifier, test_log}; /// The environment variable indicating which extensive tests should be run. @@ -241,7 +241,7 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { // Some tests are significantly slower than others and need to be further reduced. if let Some((_id, _gen, scale)) = EXTEMELY_SLOW_TESTS .iter() - .find(|(id, gen, _scale)| *id == ctx.fn_ident && *gen == ctx.gen_kind) + .find(|(id, generator, _scale)| *id == ctx.fn_ident && *generator == ctx.gen_kind) { // However, do not override if the extensive iteration count has been manually set. if !(ctx.gen_kind == GeneratorKind::Extensive && EXTENSIVE_ITER_OVERRIDE.is_some()) { diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm/crates/libm-test/tests/compare_built_musl.rs index 897dfc26e..cbb4bd49b 100644 --- a/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/libm/crates/libm-test/tests/compare_built_musl.rs @@ -9,7 +9,7 @@ // There are some targets we can't build musl for #![cfg(feature = "build-musl")] -use libm_test::gen::{case_list, edge_cases, random, spaced}; +use libm_test::generate::{case_list, edge_cases, random, spaced}; use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall}; const BASIS: CheckBasis = CheckBasis::Musl; diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm/crates/libm-test/tests/multiprecision.rs index 0ab4b64da..80b2c7868 100644 --- a/libm/crates/libm-test/tests/multiprecision.rs +++ b/libm/crates/libm-test/tests/multiprecision.rs @@ -2,7 +2,7 @@ #![cfg(feature = "build-mpfr")] -use libm_test::gen::{case_list, edge_cases, random, spaced}; +use libm_test::generate::{case_list, edge_cases, random, spaced}; use libm_test::mpfloat::MpOp; use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall}; diff --git a/libm/crates/libm-test/tests/standalone.rs b/libm/crates/libm-test/tests/standalone.rs index 7d694843e..7b30a3b48 100644 --- a/libm/crates/libm-test/tests/standalone.rs +++ b/libm/crates/libm-test/tests/standalone.rs @@ -1,6 +1,6 @@ //! Test cases that have both an input and an output, so do not require a basis. -use libm_test::gen::case_list; +use libm_test::generate::case_list; use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall}; const BASIS: CheckBasis = CheckBasis::None; diff --git a/libm/crates/libm-test/tests/u256.rs b/libm/crates/libm-test/tests/u256.rs index 460353424..4444036d0 100644 --- a/libm/crates/libm-test/tests/u256.rs +++ b/libm/crates/libm-test/tests/u256.rs @@ -9,7 +9,7 @@ use libm::support::{HInt, u256}; type BigInt = rug::Integer; use libm_test::bigint_fuzz_iteration_count; -use libm_test::gen::random::SEED; +use libm_test::generate::random::SEED; use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; use rug::Assign; diff --git a/libm/crates/libm-test/tests/z_extensive/run.rs b/libm/crates/libm-test/tests/z_extensive/run.rs index 786546a9d..b10c231d1 100644 --- a/libm/crates/libm-test/tests/z_extensive/run.rs +++ b/libm/crates/libm-test/tests/z_extensive/run.rs @@ -6,7 +6,7 @@ use std::sync::atomic::{AtomicU64, Ordering}; use std::time::Duration; use indicatif::{ProgressBar, ProgressStyle}; -use libm_test::gen::spaced; +use libm_test::generate::spaced; use libm_test::mpfloat::MpOp; use libm_test::{ CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TestResult, TupleCall, diff --git a/libm/crates/musl-math-sys/Cargo.toml b/libm/crates/musl-math-sys/Cargo.toml index ad73578d8..9e866a970 100644 --- a/libm/crates/musl-math-sys/Cargo.toml +++ b/libm/crates/musl-math-sys/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "musl-math-sys" version = "0.1.0" -edition = "2021" +edition = "2024" publish = false [dependencies] diff --git a/libm/crates/musl-math-sys/src/lib.rs b/libm/crates/musl-math-sys/src/lib.rs index 07277ef3e..6a4bf4859 100644 --- a/libm/crates/musl-math-sys/src/lib.rs +++ b/libm/crates/musl-math-sys/src/lib.rs @@ -10,7 +10,7 @@ macro_rules! functions { $( #[$meta:meta] )* $pfx_name:ident: $name:ident( $($arg:ident: $aty:ty),+ ) -> $rty:ty; )* ) => { - extern "C" { + unsafe extern "C" { $( fn $pfx_name( $($arg: $aty),+ ) -> $rty; )* } diff --git a/libm/crates/util/Cargo.toml b/libm/crates/util/Cargo.toml index 94c7f1033..4bcb97472 100644 --- a/libm/crates/util/Cargo.toml +++ b/libm/crates/util/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "util" version = "0.1.0" -edition = "2021" +edition = "2024" publish = false [features] diff --git a/libm/libm/src/lib.rs b/libm/libm/src/lib.rs index b0e431211..7e56bd079 100644 --- a/libm/libm/src/lib.rs +++ b/libm/libm/src/lib.rs @@ -17,6 +17,7 @@ #![allow(clippy::needless_return)] #![allow(clippy::unreadable_literal)] #![allow(clippy::zero_divided_by_zero)] +#![forbid(unsafe_op_in_unsafe_fn)] mod libm_helper; mod math; From 38bbfbf1bfcfa60521285bc759e6df4e26ada54f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 18 Mar 2025 09:52:53 +0000 Subject: [PATCH 1281/1459] Rename `testcrate` to `builtins-test` The repo will soon have `libm` as a top-level crate, so make it clear that this is only the test crate for `compiler-builtins`. --- Cargo.toml | 8 ++++---- {testcrate => builtins-test}/Cargo.toml | 7 ++++--- {testcrate => builtins-test}/benches/float_add.rs | 2 +- {testcrate => builtins-test}/benches/float_cmp.rs | 2 +- {testcrate => builtins-test}/benches/float_conv.rs | 2 +- {testcrate => builtins-test}/benches/float_div.rs | 2 +- {testcrate => builtins-test}/benches/float_extend.rs | 2 +- {testcrate => builtins-test}/benches/float_mul.rs | 2 +- {testcrate => builtins-test}/benches/float_pow.rs | 2 +- {testcrate => builtins-test}/benches/float_sub.rs | 2 +- {testcrate => builtins-test}/benches/float_trunc.rs | 2 +- {testcrate => builtins-test}/benches/mem.rs | 0 {testcrate => builtins-test}/benches/mem_icount.rs | 0 {testcrate => builtins-test}/build.rs | 0 {testcrate => builtins-test}/src/bench.rs | 0 {testcrate => builtins-test}/src/lib.rs | 4 ++-- {testcrate => builtins-test}/tests/addsub.rs | 2 +- {testcrate => builtins-test}/tests/aeabi_memclr.rs | 0 {testcrate => builtins-test}/tests/aeabi_memcpy.rs | 0 {testcrate => builtins-test}/tests/aeabi_memset.rs | 0 {testcrate => builtins-test}/tests/big.rs | 0 {testcrate => builtins-test}/tests/cmp.rs | 2 +- {testcrate => builtins-test}/tests/conv.rs | 2 +- {testcrate => builtins-test}/tests/div_rem.rs | 2 +- {testcrate => builtins-test}/tests/float_pow.rs | 2 +- {testcrate => builtins-test}/tests/lse.rs | 6 +++--- {testcrate => builtins-test}/tests/mem.rs | 0 {testcrate => builtins-test}/tests/misc.rs | 2 +- {testcrate => builtins-test}/tests/mul.rs | 2 +- {testcrate => builtins-test}/tests/shift.rs | 2 +- ci/miri.sh | 2 +- ci/run.sh | 6 +++--- compiler-builtins/Cargo.toml | 2 +- compiler-builtins/configure.rs | 2 +- compiler-builtins/src/int/traits.rs | 4 ++-- 35 files changed, 38 insertions(+), 37 deletions(-) rename {testcrate => builtins-test}/Cargo.toml (93%) rename {testcrate => builtins-test}/benches/float_add.rs (98%) rename {testcrate => builtins-test}/benches/float_cmp.rs (99%) rename {testcrate => builtins-test}/benches/float_conv.rs (99%) rename {testcrate => builtins-test}/benches/float_div.rs (98%) rename {testcrate => builtins-test}/benches/float_extend.rs (99%) rename {testcrate => builtins-test}/benches/float_mul.rs (98%) rename {testcrate => builtins-test}/benches/float_pow.rs (97%) rename {testcrate => builtins-test}/benches/float_sub.rs (98%) rename {testcrate => builtins-test}/benches/float_trunc.rs (99%) rename {testcrate => builtins-test}/benches/mem.rs (100%) rename {testcrate => builtins-test}/benches/mem_icount.rs (100%) rename {testcrate => builtins-test}/build.rs (100%) rename {testcrate => builtins-test}/src/bench.rs (100%) rename {testcrate => builtins-test}/src/lib.rs (99%) rename {testcrate => builtins-test}/tests/addsub.rs (99%) rename {testcrate => builtins-test}/tests/aeabi_memclr.rs (100%) rename {testcrate => builtins-test}/tests/aeabi_memcpy.rs (100%) rename {testcrate => builtins-test}/tests/aeabi_memset.rs (100%) rename {testcrate => builtins-test}/tests/big.rs (100%) rename {testcrate => builtins-test}/tests/cmp.rs (99%) rename {testcrate => builtins-test}/tests/conv.rs (99%) rename {testcrate => builtins-test}/tests/div_rem.rs (99%) rename {testcrate => builtins-test}/tests/float_pow.rs (99%) rename {testcrate => builtins-test}/tests/lse.rs (93%) rename {testcrate => builtins-test}/tests/mem.rs (100%) rename {testcrate => builtins-test}/tests/misc.rs (99%) rename {testcrate => builtins-test}/tests/mul.rs (99%) rename {testcrate => builtins-test}/tests/shift.rs (97%) diff --git a/Cargo.toml b/Cargo.toml index db4c45dfa..2e17c303a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,16 +2,16 @@ resolver = "3" members = [ # Note that builtins-test-intrinsics cannot be a default member because it - # needs the `mangled-names` feature disabled, while `testcrate` needs it - # enabled. + # needs the `mangled-names` feature disabled, while `builtins-test` needs + # it enabled. + "builtins-test", "builtins-test-intrinsics", "compiler-builtins", - "testcrate", ] default-members = [ "compiler-builtins", - "testcrate", + "builtins-test", ] [profile.release] diff --git a/testcrate/Cargo.toml b/builtins-test/Cargo.toml similarity index 93% rename from testcrate/Cargo.toml rename to builtins-test/Cargo.toml index bda2b641d..526e9b18a 100644 --- a/testcrate/Cargo.toml +++ b/builtins-test/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "testcrate" +name = "builtins-test" version = "0.1.0" authors = ["Alex Crichton "] edition = "2024" @@ -56,8 +56,9 @@ icount = ["dep:iai-callgrind"] benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] # NOTE: benchmarks must be run with `--no-default-features` or with -# `-p testcrate`, otherwise the default `compiler-builtins` feature of the -# `compiler_builtins` crate gets activated, resulting in linker errors. +# `-p builtins-test`, otherwise the default `compiler-builtins` feature +# of the `compiler_builtins` crate gets activated, resulting in linker +# errors. [[bench]] name = "float_add" diff --git a/testcrate/benches/float_add.rs b/builtins-test/benches/float_add.rs similarity index 98% rename from testcrate/benches/float_add.rs rename to builtins-test/benches/float_add.rs index a578655f8..197f90b31 100644 --- a/testcrate/benches/float_add.rs +++ b/builtins-test/benches/float_add.rs @@ -1,8 +1,8 @@ #![cfg_attr(f128_enabled, feature(f128))] +use builtins_test::float_bench; use compiler_builtins::float::add; use criterion::{Criterion, criterion_main}; -use testcrate::float_bench; float_bench! { name: add_f32, diff --git a/testcrate/benches/float_cmp.rs b/builtins-test/benches/float_cmp.rs similarity index 99% rename from testcrate/benches/float_cmp.rs rename to builtins-test/benches/float_cmp.rs index 4c269e488..4493765ec 100644 --- a/testcrate/benches/float_cmp.rs +++ b/builtins-test/benches/float_cmp.rs @@ -1,7 +1,7 @@ #![cfg_attr(f128_enabled, feature(f128))] +use builtins_test::float_bench; use criterion::{Criterion, criterion_main}; -use testcrate::float_bench; use compiler_builtins::float::cmp; diff --git a/testcrate/benches/float_conv.rs b/builtins-test/benches/float_conv.rs similarity index 99% rename from testcrate/benches/float_conv.rs rename to builtins-test/benches/float_conv.rs index e3f2af863..d4a7346d1 100644 --- a/testcrate/benches/float_conv.rs +++ b/builtins-test/benches/float_conv.rs @@ -1,9 +1,9 @@ #![allow(improper_ctypes)] #![cfg_attr(f128_enabled, feature(f128))] +use builtins_test::float_bench; use compiler_builtins::float::conv; use criterion::{Criterion, criterion_main}; -use testcrate::float_bench; /* unsigned int -> float */ diff --git a/testcrate/benches/float_div.rs b/builtins-test/benches/float_div.rs similarity index 98% rename from testcrate/benches/float_div.rs rename to builtins-test/benches/float_div.rs index c42f3f386..d5b0ad0fd 100644 --- a/testcrate/benches/float_div.rs +++ b/builtins-test/benches/float_div.rs @@ -1,8 +1,8 @@ #![cfg_attr(f128_enabled, feature(f128))] +use builtins_test::float_bench; use compiler_builtins::float::div; use criterion::{Criterion, criterion_main}; -use testcrate::float_bench; float_bench! { name: div_f32, diff --git a/testcrate/benches/float_extend.rs b/builtins-test/benches/float_extend.rs similarity index 99% rename from testcrate/benches/float_extend.rs rename to builtins-test/benches/float_extend.rs index 1e7fedefe..fc44e80c9 100644 --- a/testcrate/benches/float_extend.rs +++ b/builtins-test/benches/float_extend.rs @@ -2,9 +2,9 @@ #![cfg_attr(f128_enabled, feature(f128))] #![cfg_attr(f16_enabled, feature(f16))] +use builtins_test::float_bench; use compiler_builtins::float::extend; use criterion::{Criterion, criterion_main}; -use testcrate::float_bench; #[cfg(f16_enabled)] float_bench! { diff --git a/testcrate/benches/float_mul.rs b/builtins-test/benches/float_mul.rs similarity index 98% rename from testcrate/benches/float_mul.rs rename to builtins-test/benches/float_mul.rs index 0857a68a2..a7a2d34aa 100644 --- a/testcrate/benches/float_mul.rs +++ b/builtins-test/benches/float_mul.rs @@ -1,8 +1,8 @@ #![cfg_attr(f128_enabled, feature(f128))] +use builtins_test::float_bench; use compiler_builtins::float::mul; use criterion::{Criterion, criterion_main}; -use testcrate::float_bench; float_bench! { name: mul_f32, diff --git a/testcrate/benches/float_pow.rs b/builtins-test/benches/float_pow.rs similarity index 97% rename from testcrate/benches/float_pow.rs rename to builtins-test/benches/float_pow.rs index e84fee51c..64e37dd32 100644 --- a/testcrate/benches/float_pow.rs +++ b/builtins-test/benches/float_pow.rs @@ -1,8 +1,8 @@ #![cfg_attr(f128_enabled, feature(f128))] +use builtins_test::float_bench; use compiler_builtins::float::pow; use criterion::{Criterion, criterion_main}; -use testcrate::float_bench; float_bench! { name: powi_f32, diff --git a/testcrate/benches/float_sub.rs b/builtins-test/benches/float_sub.rs similarity index 98% rename from testcrate/benches/float_sub.rs rename to builtins-test/benches/float_sub.rs index 7a6c05ea5..8bae294cd 100644 --- a/testcrate/benches/float_sub.rs +++ b/builtins-test/benches/float_sub.rs @@ -1,8 +1,8 @@ #![cfg_attr(f128_enabled, feature(f128))] +use builtins_test::float_bench; use compiler_builtins::float::sub; use criterion::{Criterion, criterion_main}; -use testcrate::float_bench; float_bench! { name: sub_f32, diff --git a/testcrate/benches/float_trunc.rs b/builtins-test/benches/float_trunc.rs similarity index 99% rename from testcrate/benches/float_trunc.rs rename to builtins-test/benches/float_trunc.rs index 4ceb62ab0..43310c7cf 100644 --- a/testcrate/benches/float_trunc.rs +++ b/builtins-test/benches/float_trunc.rs @@ -1,9 +1,9 @@ #![cfg_attr(f128_enabled, feature(f128))] #![cfg_attr(f16_enabled, feature(f16))] +use builtins_test::float_bench; use compiler_builtins::float::trunc; use criterion::{Criterion, criterion_main}; -use testcrate::float_bench; #[cfg(f16_enabled)] float_bench! { diff --git a/testcrate/benches/mem.rs b/builtins-test/benches/mem.rs similarity index 100% rename from testcrate/benches/mem.rs rename to builtins-test/benches/mem.rs diff --git a/testcrate/benches/mem_icount.rs b/builtins-test/benches/mem_icount.rs similarity index 100% rename from testcrate/benches/mem_icount.rs rename to builtins-test/benches/mem_icount.rs diff --git a/testcrate/build.rs b/builtins-test/build.rs similarity index 100% rename from testcrate/build.rs rename to builtins-test/build.rs diff --git a/testcrate/src/bench.rs b/builtins-test/src/bench.rs similarity index 100% rename from testcrate/src/bench.rs rename to builtins-test/src/bench.rs diff --git a/testcrate/src/lib.rs b/builtins-test/src/lib.rs similarity index 99% rename from testcrate/src/lib.rs rename to builtins-test/src/lib.rs index c61618755..a83aea562 100644 --- a/testcrate/src/lib.rs +++ b/builtins-test/src/lib.rs @@ -7,8 +7,8 @@ //! edge case testing is crucial for checking cases like where both inputs are equal or equal to //! special values such as `i128::MIN`, which is unlikely for the random fuzzer by itself to //! encounter. The randomized fuzz testing is specially designed to cover wide swaths of search -//! space in as few iterations as possible. See `fuzz_values` in `testcrate/tests/misc.rs` for an -//! example. +//! space in as few iterations as possible. See `fuzz_values` in `builtins-test/tests/misc.rs` for +//! an example. //! //! Some floating point tests are disabled for specific architectures, because they do not have //! correct rounding. diff --git a/testcrate/tests/addsub.rs b/builtins-test/tests/addsub.rs similarity index 99% rename from testcrate/tests/addsub.rs rename to builtins-test/tests/addsub.rs index 3c0e20f77..865b9e472 100644 --- a/testcrate/tests/addsub.rs +++ b/builtins-test/tests/addsub.rs @@ -1,7 +1,7 @@ #![allow(unused_macros)] #![cfg_attr(f128_enabled, feature(f128))] -use testcrate::*; +use builtins_test::*; mod int_addsub { use super::*; diff --git a/testcrate/tests/aeabi_memclr.rs b/builtins-test/tests/aeabi_memclr.rs similarity index 100% rename from testcrate/tests/aeabi_memclr.rs rename to builtins-test/tests/aeabi_memclr.rs diff --git a/testcrate/tests/aeabi_memcpy.rs b/builtins-test/tests/aeabi_memcpy.rs similarity index 100% rename from testcrate/tests/aeabi_memcpy.rs rename to builtins-test/tests/aeabi_memcpy.rs diff --git a/testcrate/tests/aeabi_memset.rs b/builtins-test/tests/aeabi_memset.rs similarity index 100% rename from testcrate/tests/aeabi_memset.rs rename to builtins-test/tests/aeabi_memset.rs diff --git a/testcrate/tests/big.rs b/builtins-test/tests/big.rs similarity index 100% rename from testcrate/tests/big.rs rename to builtins-test/tests/big.rs diff --git a/testcrate/tests/cmp.rs b/builtins-test/tests/cmp.rs similarity index 99% rename from testcrate/tests/cmp.rs rename to builtins-test/tests/cmp.rs index 19d90c664..dbedd213e 100644 --- a/testcrate/tests/cmp.rs +++ b/builtins-test/tests/cmp.rs @@ -2,7 +2,7 @@ #![allow(unreachable_code)] #![cfg_attr(f128_enabled, feature(f128))] -use testcrate::*; +use builtins_test::*; mod float_comparisons { use super::*; diff --git a/testcrate/tests/conv.rs b/builtins-test/tests/conv.rs similarity index 99% rename from testcrate/tests/conv.rs rename to builtins-test/tests/conv.rs index 381d3e155..491915d9b 100644 --- a/testcrate/tests/conv.rs +++ b/builtins-test/tests/conv.rs @@ -4,9 +4,9 @@ #![allow(unused_macros)] #![allow(unused_imports)] +use builtins_test::*; use compiler_builtins::float::Float; use rustc_apfloat::{Float as _, FloatConvert as _}; -use testcrate::*; mod i_to_f { use super::*; diff --git a/testcrate/tests/div_rem.rs b/builtins-test/tests/div_rem.rs similarity index 99% rename from testcrate/tests/div_rem.rs rename to builtins-test/tests/div_rem.rs index ac87eb630..6c0280a32 100644 --- a/testcrate/tests/div_rem.rs +++ b/builtins-test/tests/div_rem.rs @@ -4,7 +4,7 @@ use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4}; use compiler_builtins::int::udiv::{__udivmoddi4, __udivmodsi4, __udivmodti4, u128_divide_sparc}; -use testcrate::*; +use builtins_test::*; // Division algorithms have by far the nastiest and largest number of edge cases, and experience shows // that sometimes 100_000 iterations of the random fuzzer is needed. diff --git a/testcrate/tests/float_pow.rs b/builtins-test/tests/float_pow.rs similarity index 99% rename from testcrate/tests/float_pow.rs rename to builtins-test/tests/float_pow.rs index 8d86392f5..8209543e6 100644 --- a/testcrate/tests/float_pow.rs +++ b/builtins-test/tests/float_pow.rs @@ -2,7 +2,7 @@ #![cfg_attr(f128_enabled, feature(f128))] #![cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] -use testcrate::*; +use builtins_test::*; // This is approximate because of issues related to // https://github.com/rust-lang/rust/issues/73920. diff --git a/testcrate/tests/lse.rs b/builtins-test/tests/lse.rs similarity index 93% rename from testcrate/tests/lse.rs rename to builtins-test/tests/lse.rs index cbecd6143..53167d98f 100644 --- a/testcrate/tests/lse.rs +++ b/builtins-test/tests/lse.rs @@ -14,7 +14,7 @@ mod cas { pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) { #[test] fn $name() { - testcrate::fuzz_2(10000, |expected: super::int_ty!($bytes), new| { + builtins_test::fuzz_2(10000, |expected: super::int_ty!($bytes), new| { let mut target = expected.wrapping_add(10); assert_eq!( unsafe { @@ -50,7 +50,7 @@ mod swap { pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) { #[test] fn $name() { - testcrate::fuzz_2(10000, |left: super::int_ty!($bytes), mut right| { + builtins_test::fuzz_2(10000, |left: super::int_ty!($bytes), mut right| { let orig_right = right; assert_eq!( unsafe { compiler_builtins::aarch64_linux::$name::$name(left, &mut right) }, @@ -69,7 +69,7 @@ macro_rules! test_op { ($_ordering:ident, $bytes:tt, $name:ident) => { #[test] fn $name() { - testcrate::fuzz_2(10000, |old, val| { + builtins_test::fuzz_2(10000, |old, val| { let mut target = old; let op: fn(super::int_ty!($bytes), super::int_ty!($bytes)) -> _ = $($op)*; let expected = op(old, val); diff --git a/testcrate/tests/mem.rs b/builtins-test/tests/mem.rs similarity index 100% rename from testcrate/tests/mem.rs rename to builtins-test/tests/mem.rs diff --git a/testcrate/tests/misc.rs b/builtins-test/tests/misc.rs similarity index 99% rename from testcrate/tests/misc.rs rename to builtins-test/tests/misc.rs index edbd3684d..b8c75c026 100644 --- a/testcrate/tests/misc.rs +++ b/builtins-test/tests/misc.rs @@ -1,7 +1,7 @@ // makes configuration easier #![allow(unused_macros)] -use testcrate::*; +use builtins_test::*; /// Make sure that the the edge case tester and randomized tester don't break, and list examples of /// fuzz values for documentation purposes. diff --git a/testcrate/tests/mul.rs b/builtins-test/tests/mul.rs similarity index 99% rename from testcrate/tests/mul.rs rename to builtins-test/tests/mul.rs index 2113b177d..198cacb34 100644 --- a/testcrate/tests/mul.rs +++ b/builtins-test/tests/mul.rs @@ -1,7 +1,7 @@ #![allow(unused_macros)] #![cfg_attr(f128_enabled, feature(f128))] -use testcrate::*; +use builtins_test::*; mod int_mul { use super::*; diff --git a/testcrate/tests/shift.rs b/builtins-test/tests/shift.rs similarity index 97% rename from testcrate/tests/shift.rs rename to builtins-test/tests/shift.rs index 23e3395ed..0f2483855 100644 --- a/testcrate/tests/shift.rs +++ b/builtins-test/tests/shift.rs @@ -1,4 +1,4 @@ -use testcrate::*; +use builtins_test::*; macro_rules! shift { ($($i:ty, $fn_std:ident, $fn_builtins:ident);*;) => { diff --git a/ci/miri.sh b/ci/miri.sh index f9a1240a4..79e660bab 100755 --- a/ci/miri.sh +++ b/ci/miri.sh @@ -12,5 +12,5 @@ TARGETS=(x86_64-unknown-linux-gnu s390x-unknown-linux-gnu) for TARGET in "${TARGETS[@]}"; do # Only run the `mem` tests to avoid this taking too long. - cargo miri test --manifest-path testcrate/Cargo.toml --features no-asm --target $TARGET -- mem + cargo miri test --manifest-path builtins-test/Cargo.toml --features no-asm --target $TARGET -- mem done diff --git a/ci/run.sh b/ci/run.sh index 9abbf25a7..8dcb139d4 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -23,7 +23,7 @@ fi if [ "${NO_STD:-}" = "1" ]; then echo "nothing to do for no_std" else - run="cargo test --package testcrate --no-fail-fast --target $target" + run="cargo test --package builtins-test --no-fail-fast --target $target" $run $run --release $run --features c @@ -37,8 +37,8 @@ else fi if [ "${TEST_VERBATIM:-}" = "1" ]; then - verb_path=$(cmd.exe //C echo \\\\?\\%cd%\\testcrate\\target2) - cargo build --package testcrate \ + verb_path=$(cmd.exe //C echo \\\\?\\%cd%\\builtins-test\\target2) + cargo build --package builtins-test \ --target "$target" --target-dir "$verb_path" --features c fi diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index eb5b2b9cc..3151546ab 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -68,7 +68,7 @@ mangled-names = [] rustc-dep-of-std = ['compiler-builtins', 'core'] # This makes certain traits and function specializations public that -# are not normally public but are required by the `testcrate` +# are not normally public but are required by the `builtins-test` public-test-deps = [] [lints.rust] diff --git a/compiler-builtins/configure.rs b/compiler-builtins/configure.rs index 4be0b3ca2..d825f35a9 100644 --- a/compiler-builtins/configure.rs +++ b/compiler-builtins/configure.rs @@ -1,4 +1,4 @@ -// Configuration that is shared between `compiler_builtins` and `testcrate`. +// Configuration that is shared between `compiler_builtins` and `builtins_test`. use std::env; diff --git a/compiler-builtins/src/int/traits.rs b/compiler-builtins/src/int/traits.rs index 9b079e2aa..152cb2eee 100644 --- a/compiler-builtins/src/int/traits.rs +++ b/compiler-builtins/src/int/traits.rs @@ -48,8 +48,8 @@ pub trait Int: + ops::BitAnd { /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing - /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, - /// 112,119,120,125,126,127]. + /// in `builtins-test`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96, + /// 111,112,119,120,125,126,127]. const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(::BITS); /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. From 4f2dde87586ebc11497f8164f2df19b1cd812ed3 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 03:16:02 +0000 Subject: [PATCH 1282/1459] ci: Make CI configuration more similar to `libm` Apply a handful of changes to reduce the diff between the two: * Cancel running jobs on new pushes * Enable log color and backtraces * Add timeouts * Specify CI runner versions * Add an armv7 job * Replace the name NO_STD with BUILD_ONLY * Update the extension to the canonical .yaml * Set AR_ and CC_ environments in docker * Install requirements to build MPFR --- .github/workflows/{main.yml => main.yaml} | 173 ++++++++---------- .../workflows/{publish.yml => publish.yaml} | 0 .../aarch64-unknown-linux-gnu/Dockerfile | 9 +- .../arm-unknown-linux-gnueabi/Dockerfile | 7 +- .../arm-unknown-linux-gnueabihf/Dockerfile | 7 +- .../armv7-unknown-linux-gnueabihf/Dockerfile | 7 +- ci/docker/i586-unknown-linux-gnu/Dockerfile | 3 +- ci/docker/i686-unknown-linux-gnu/Dockerfile | 3 +- .../loongarch64-unknown-linux-gnu/Dockerfile | 1 + ci/docker/mips-unknown-linux-gnu/Dockerfile | 5 +- .../mips64-unknown-linux-gnuabi64/Dockerfile | 8 +- .../Dockerfile | 8 +- ci/docker/mipsel-unknown-linux-gnu/Dockerfile | 5 +- .../powerpc-unknown-linux-gnu/Dockerfile | 5 +- .../powerpc64-unknown-linux-gnu/Dockerfile | 6 +- .../powerpc64le-unknown-linux-gnu/Dockerfile | 5 +- .../riscv64gc-unknown-linux-gnu/Dockerfile | 5 +- ci/docker/thumbv6m-none-eabi/Dockerfile | 3 +- ci/docker/thumbv7em-none-eabi/Dockerfile | 3 +- ci/docker/thumbv7em-none-eabihf/Dockerfile | 3 +- ci/docker/thumbv7m-none-eabi/Dockerfile | 3 +- ci/docker/wasm32-unknown-unknown/Dockerfile | 1 + ci/docker/x86_64-unknown-linux-gnu/Dockerfile | 3 +- ci/run.sh | 7 +- 24 files changed, 161 insertions(+), 119 deletions(-) rename .github/workflows/{main.yml => main.yaml} (62%) rename .github/workflows/{publish.yml => publish.yaml} (100%) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yaml similarity index 62% rename from .github/workflows/main.yml rename to .github/workflows/main.yaml index 003102d59..eec747a24 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yaml @@ -1,105 +1,85 @@ name: CI -on: [push, pull_request] +on: + push: { branches: [master] } + pull_request: + +concurrency: + # Make sure that new pushes cancel running jobs + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true env: + CARGO_TERM_COLOR: always RUSTDOCFLAGS: -Dwarnings RUSTFLAGS: -Dwarnings + RUST_BACKTRACE: full jobs: test: - name: Test - runs-on: ${{ matrix.os }} + name: Build and test + timeout-minutes: 60 strategy: fail-fast: false matrix: include: - target: aarch64-apple-darwin - os: macos-latest - rust: nightly + os: macos-15 - target: aarch64-unknown-linux-gnu - os: ubuntu-latest - rust: nightly + os: ubuntu-24.04-arm - target: aarch64-pc-windows-msvc - os: windows-latest - rust: nightly + os: windows-2025 test_verbatim: 1 - no_std: 1 + build_only: 1 - target: arm-unknown-linux-gnueabi - os: ubuntu-latest - rust: nightly + os: ubuntu-24.04 - target: arm-unknown-linux-gnueabihf - os: ubuntu-latest - rust: nightly + os: ubuntu-24.04 + - target: armv7-unknown-linux-gnueabihf + os: ubuntu-24.04 - target: i586-unknown-linux-gnu - os: ubuntu-latest - rust: nightly + os: ubuntu-24.04 - target: i686-unknown-linux-gnu - os: ubuntu-latest - rust: nightly + os: ubuntu-24.04 - target: loongarch64-unknown-linux-gnu - os: ubuntu-latest - rust: nightly - # MIPS targets disabled since they are dropped to tier 3. - # See https://github.com/rust-lang/compiler-team/issues/648 - #- target: mips-unknown-linux-gnu - # os: ubuntu-latest - # rust: nightly - #- target: mips64-unknown-linux-gnuabi64 - # os: ubuntu-latest - # rust: nightly - #- target: mips64el-unknown-linux-gnuabi64 - # os: ubuntu-latest - # rust: nightly - #- target: mipsel-unknown-linux-gnu - # os: ubuntu-latest - # rust: nightly + os: ubuntu-24.04 - target: powerpc-unknown-linux-gnu - os: ubuntu-latest - rust: nightly + os: ubuntu-24.04 - target: powerpc64-unknown-linux-gnu - os: ubuntu-latest - rust: nightly + os: ubuntu-24.04 - target: powerpc64le-unknown-linux-gnu - os: ubuntu-latest - rust: nightly + os: ubuntu-24.04 - target: riscv64gc-unknown-linux-gnu - os: ubuntu-latest - rust: nightly + os: ubuntu-24.04 - target: thumbv6m-none-eabi - os: ubuntu-latest - rust: nightly + os: ubuntu-24.04 - target: thumbv7em-none-eabi - os: ubuntu-latest - rust: nightly + os: ubuntu-24.04 - target: thumbv7em-none-eabihf - os: ubuntu-latest - rust: nightly + os: ubuntu-24.04 - target: thumbv7m-none-eabi - os: ubuntu-latest - rust: nightly + os: ubuntu-24.04 - target: wasm32-unknown-unknown - os: ubuntu-latest - rust: nightly + os: ubuntu-24.04 - target: x86_64-unknown-linux-gnu - os: ubuntu-latest - rust: nightly + os: ubuntu-24.04 - target: x86_64-apple-darwin os: macos-13 - rust: nightly - target: i686-pc-windows-msvc - os: windows-latest - rust: nightly + os: windows-2025 test_verbatim: 1 - target: x86_64-pc-windows-msvc - os: windows-latest - rust: nightly + os: windows-2025 test_verbatim: 1 - target: i686-pc-windows-gnu - os: windows-latest - rust: nightly-i686-gnu + os: windows-2025 + channel: nightly-i686-gnu - target: x86_64-pc-windows-gnu - os: windows-latest - rust: nightly-x86_64-gnu + os: windows-2025 + channel: nightly-x86_64-gnu + runs-on: ${{ matrix.os }} + env: + BUILD_ONLY: ${{ matrix.build_only }} + TEST_VERBATIM: ${{ matrix.test_verbatim }} steps: - name: Print runner information run: uname -a @@ -107,16 +87,21 @@ jobs: with: submodules: true - name: Install Rust (rustup) - run: rustup update ${{ matrix.rust }} --no-self-update && rustup default ${{ matrix.rust }} shell: bash - - run: rustup target add ${{ matrix.target }} - - run: rustup component add llvm-tools-preview + run: | + channel="nightly" + # Account for channels that have required components (MinGW) + [ -n "${{ matrix.channel }}" ] && channel="${{ matrix.channel }}" + rustup update "$channel" --no-self-update + rustup default "$channel" + rustup target add "${{ matrix.target }}" + rustup component add llvm-tools-preview - uses: Swatinem/rust-cache@v2 with: key: ${{ matrix.target }} - name: Cache Docker layers uses: actions/cache@v4 - if: matrix.os == 'ubuntu-latest' + if: matrix.os == 'ubuntu-24.04' with: path: /tmp/.buildx-cache key: ${{ matrix.target }}-buildx-${{ github.sha }} @@ -136,33 +121,49 @@ jobs: shell: bash # Non-linux tests just use our raw script - - run: ./ci/run.sh ${{ matrix.target }} - if: matrix.os != 'ubuntu-latest' + - name: Run locally + if: matrix.os != 'ubuntu-24.04' shell: bash - env: - NO_STD: ${{ matrix.no_std }} - TEST_VERBATIM: ${{ matrix.test_verbatim }} + run: ./ci/run.sh ${{ matrix.target }} # Configure buildx to use Docker layer caching - uses: docker/setup-buildx-action@v3 - if: matrix.os == 'ubuntu-latest' + if: matrix.os == 'ubuntu-24.04' # Otherwise we use our docker containers to run builds - - run: cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} - if: matrix.os == 'ubuntu-latest' + - name: Run in Docker + if: matrix.os == 'ubuntu-24.04' + run: cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} # Workaround to keep Docker cache smaller # https://github.com/docker/build-push-action/issues/252 # https://github.com/moby/buildkit/issues/1896 - name: Move Docker cache - if: matrix.os == 'ubuntu-latest' + if: matrix.os == 'ubuntu-24.04' run: | rm -rf /tmp/.buildx-cache mv /tmp/.buildx-cache-new /tmp/.buildx-cache + clippy: + name: Clippy + runs-on: ubuntu-24.04 + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + with: + submodules: true + # Unlike rustfmt, stable clippy does not work on code with nightly features. + - name: Install nightly `clippy` + run: | + rustup set profile minimal + rustup default nightly + rustup component add clippy + - uses: Swatinem/rust-cache@v2 + - run: cargo clippy -- -D clippy::all + miri: name: Miri - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v4 with: @@ -177,7 +178,7 @@ jobs: rustfmt: name: Rustfmt - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v4 with: @@ -186,27 +187,13 @@ jobs: run: rustup set profile minimal && rustup default stable && rustup component add rustfmt - run: cargo fmt -- --check - clippy: - name: Clippy - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - submodules: true - # Unlike rustfmt, stable clippy does not work on code with nightly features. - - name: Install nightly `clippy` - run: | - rustup set profile minimal && rustup default nightly && rustup component add clippy - - uses: Swatinem/rust-cache@v2 - - run: cargo clippy -- -D clippy::all - success: needs: - test - rustfmt - clippy - miri - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency # failed" as success. So we have to do some contortions to ensure the job fails if any of its # dependencies fails. diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yaml similarity index 100% rename from .github/workflows/publish.yml rename to .github/workflows/publish.yaml diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index 1aef14a96..df71804ba 100644 --- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -1,11 +1,16 @@ ARG IMAGE=ubuntu:24.04 FROM $IMAGE + RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ - gcc-aarch64-linux-gnu libc6-dev-arm64-cross \ + gcc-aarch64-linux-gnu m4 make libc6-dev-arm64-cross \ qemu-user-static -ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ + +ENV TOOLCHAIN_PREFIX=aarch64-linux-gnu- +ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-aarch64-static \ + AR_aarch64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_aarch64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/aarch64-linux-gnu \ RUST_TEST_THREADS=1 diff --git a/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/ci/docker/arm-unknown-linux-gnueabi/Dockerfile index fc9803777..38ad1a136 100644 --- a/ci/docker/arm-unknown-linux-gnueabi/Dockerfile +++ b/ci/docker/arm-unknown-linux-gnueabi/Dockerfile @@ -1,10 +1,15 @@ ARG IMAGE=ubuntu:24.04 FROM $IMAGE + RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ gcc-arm-linux-gnueabi libc6-dev-armel-cross qemu-user-static -ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc \ + +ENV TOOLCHAIN_PREFIX=arm-linux-gnueabi- +ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER=qemu-arm-static \ + AR_arm_unknown_linux_gnueabi="$TOOLCHAIN_PREFIX"ar \ + CC_arm_unknown_linux_gnueabi="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/arm-linux-gnueabi \ RUST_TEST_THREADS=1 diff --git a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile index a127f67cb..ffead05d5 100644 --- a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile +++ b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile @@ -1,10 +1,15 @@ ARG IMAGE=ubuntu:24.04 FROM $IMAGE + RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ gcc-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-user-static -ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ + +ENV TOOLCHAIN_PREFIX=arm-linux-gnueabihf- +ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm-static \ + AR_arm_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"ar \ + CC_arm_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf \ RUST_TEST_THREADS=1 diff --git a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile index 67a3e51a9..9ab49e46e 100644 --- a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +++ b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -1,10 +1,15 @@ ARG IMAGE=ubuntu:24.04 FROM $IMAGE + RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ gcc-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-user-static -ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ + +ENV TOOLCHAIN_PREFIX=arm-linux-gnueabihf- +ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm-static \ + AR_armv7_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"ar \ + CC_armv7_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf \ RUST_TEST_THREADS=1 diff --git a/ci/docker/i586-unknown-linux-gnu/Dockerfile b/ci/docker/i586-unknown-linux-gnu/Dockerfile index 15285d9bb..d12ced325 100644 --- a/ci/docker/i586-unknown-linux-gnu/Dockerfile +++ b/ci/docker/i586-unknown-linux-gnu/Dockerfile @@ -1,5 +1,6 @@ ARG IMAGE=ubuntu:24.04 FROM $IMAGE + RUN apt-get update && \ apt-get install -y --no-install-recommends \ - gcc-multilib libc6-dev ca-certificates + gcc-multilib m4 make libc6-dev ca-certificates diff --git a/ci/docker/i686-unknown-linux-gnu/Dockerfile b/ci/docker/i686-unknown-linux-gnu/Dockerfile index 15285d9bb..d12ced325 100644 --- a/ci/docker/i686-unknown-linux-gnu/Dockerfile +++ b/ci/docker/i686-unknown-linux-gnu/Dockerfile @@ -1,5 +1,6 @@ ARG IMAGE=ubuntu:24.04 FROM $IMAGE + RUN apt-get update && \ apt-get install -y --no-install-recommends \ - gcc-multilib libc6-dev ca-certificates + gcc-multilib m4 make libc6-dev ca-certificates diff --git a/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile index 5107d20a2..62b43da9e 100644 --- a/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile @@ -8,6 +8,7 @@ RUN apt-get update && \ ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \ CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-loongarch64-static \ + AR_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-ar \ CC_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-gcc-14 \ QEMU_LD_PREFIX=/usr/loongarch64-linux-gnu \ RUST_TEST_THREADS=1 diff --git a/ci/docker/mips-unknown-linux-gnu/Dockerfile b/ci/docker/mips-unknown-linux-gnu/Dockerfile index a47dd9f19..c02a94672 100644 --- a/ci/docker/mips-unknown-linux-gnu/Dockerfile +++ b/ci/docker/mips-unknown-linux-gnu/Dockerfile @@ -7,7 +7,10 @@ RUN apt-get update && \ gcc-mips-linux-gnu libc6-dev-mips-cross \ binfmt-support qemu-user-static qemu-system-mips -ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \ +ENV TOOLCHAIN_PREFIX=mips-linux-gnu- +ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER=qemu-mips-static \ + AR_mips_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_mips_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/mips-linux-gnu \ RUST_TEST_THREADS=1 diff --git a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile index 688aa1ab2..6d8b96069 100644 --- a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile +++ b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile @@ -1,5 +1,6 @@ ARG IMAGE=ubuntu:24.04 FROM $IMAGE + RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates \ @@ -9,8 +10,11 @@ RUN apt-get update && \ libc6-dev-mips64-cross \ qemu-user-static \ qemu-system-mips -ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \ + +ENV TOOLCHAIN_PREFIX=mips64-linux-gnuabi64- +ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER=qemu-mips64-static \ - CC_mips64_unknown_linux_gnuabi64=mips64-linux-gnuabi64-gcc \ + AR_mips64_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"ar \ + CC_mips64_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/mips64-linux-gnuabi64 \ RUST_TEST_THREADS=1 diff --git a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile index 27d032a14..7e6ac7c3b 100644 --- a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile +++ b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile @@ -1,5 +1,6 @@ ARG IMAGE=ubuntu:24.04 FROM $IMAGE + RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates \ @@ -8,8 +9,11 @@ RUN apt-get update && \ libc6-dev \ libc6-dev-mips64el-cross \ qemu-user-static -ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \ + +ENV TOOLCHAIN_PREFIX=mips64el-linux-gnuabi64- +ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER=qemu-mips64el-static \ - CC_mips64el_unknown_linux_gnuabi64=mips64el-linux-gnuabi64-gcc \ + AR_mips64el_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"ar \ + CC_mips64el_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/mips64el-linux-gnuabi64 \ RUST_TEST_THREADS=1 diff --git a/ci/docker/mipsel-unknown-linux-gnu/Dockerfile b/ci/docker/mipsel-unknown-linux-gnu/Dockerfile index 4d18a6edb..9feadc7b5 100644 --- a/ci/docker/mipsel-unknown-linux-gnu/Dockerfile +++ b/ci/docker/mipsel-unknown-linux-gnu/Dockerfile @@ -7,7 +7,10 @@ RUN apt-get update && \ gcc-mipsel-linux-gnu libc6-dev-mipsel-cross \ binfmt-support qemu-user-static -ENV CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_LINKER=mipsel-linux-gnu-gcc \ +ENV TOOLCHAIN_PREFIX=mipsel-linux-gnu- +ENV CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_RUNNER=qemu-mipsel-static \ + AR_mipsel_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_mipsel_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/mipsel-linux-gnu \ RUST_TEST_THREADS=1 diff --git a/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc-unknown-linux-gnu/Dockerfile index 5225b833c..84dcaf47e 100644 --- a/ci/docker/powerpc-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc-unknown-linux-gnu/Dockerfile @@ -7,7 +7,10 @@ RUN apt-get update && \ gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \ qemu-system-ppc -ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \ +ENV TOOLCHAIN_PREFIX=powerpc-linux-gnu- +ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc-static \ + AR_powerpc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_powerpc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/powerpc-linux-gnu \ RUST_TEST_THREADS=1 diff --git a/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile index cbd78eac4..b90fd5ec5 100644 --- a/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile @@ -7,8 +7,10 @@ RUN apt-get update && \ gcc-powerpc64-linux-gnu libc6-dev-ppc64-cross \ binfmt-support qemu-user-static qemu-system-ppc -ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \ +ENV TOOLCHAIN_PREFIX=powerpc64-linux-gnu- +ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64-static \ - CC_powerpc64_unknown_linux_gnu=powerpc64-linux-gnu-gcc \ + AR_powerpc64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_powerpc64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/powerpc64-linux-gnu \ RUST_TEST_THREADS=1 diff --git a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile index bad064297..e6d1d1cd0 100644 --- a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile @@ -7,8 +7,11 @@ RUN apt-get update && \ gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \ qemu-system-ppc -ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \ +ENV TOOLCHAIN_PREFIX=powerpc64le-linux-gnu- +ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64le-static \ + AR_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ QEMU_CPU=POWER8 \ QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu \ RUST_TEST_THREADS=1 diff --git a/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile index 4d4a194fd..eeb4ed019 100644 --- a/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile +++ b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile @@ -7,7 +7,10 @@ RUN apt-get update && \ gcc-riscv64-linux-gnu libc6-dev-riscv64-cross \ qemu-system-riscv64 -ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER=riscv64-linux-gnu-gcc \ +ENV TOOLCHAIN_PREFIX=riscv64-linux-gnu- +ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64-static \ + AR_riscv64gc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_riscv64gc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ QEMU_LD_PREFIX=/usr/riscv64-linux-gnu \ RUST_TEST_THREADS=1 diff --git a/ci/docker/thumbv6m-none-eabi/Dockerfile b/ci/docker/thumbv6m-none-eabi/Dockerfile index f966b2b9f..ad0d4351e 100644 --- a/ci/docker/thumbv6m-none-eabi/Dockerfile +++ b/ci/docker/thumbv6m-none-eabi/Dockerfile @@ -1,8 +1,9 @@ ARG IMAGE=ubuntu:24.04 FROM $IMAGE + RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ gcc-arm-none-eabi \ libnewlib-arm-none-eabi -ENV NO_STD=1 +ENV BUILD_ONLY=1 diff --git a/ci/docker/thumbv7em-none-eabi/Dockerfile b/ci/docker/thumbv7em-none-eabi/Dockerfile index f966b2b9f..ad0d4351e 100644 --- a/ci/docker/thumbv7em-none-eabi/Dockerfile +++ b/ci/docker/thumbv7em-none-eabi/Dockerfile @@ -1,8 +1,9 @@ ARG IMAGE=ubuntu:24.04 FROM $IMAGE + RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ gcc-arm-none-eabi \ libnewlib-arm-none-eabi -ENV NO_STD=1 +ENV BUILD_ONLY=1 diff --git a/ci/docker/thumbv7em-none-eabihf/Dockerfile b/ci/docker/thumbv7em-none-eabihf/Dockerfile index f966b2b9f..ad0d4351e 100644 --- a/ci/docker/thumbv7em-none-eabihf/Dockerfile +++ b/ci/docker/thumbv7em-none-eabihf/Dockerfile @@ -1,8 +1,9 @@ ARG IMAGE=ubuntu:24.04 FROM $IMAGE + RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ gcc-arm-none-eabi \ libnewlib-arm-none-eabi -ENV NO_STD=1 +ENV BUILD_ONLY=1 diff --git a/ci/docker/thumbv7m-none-eabi/Dockerfile b/ci/docker/thumbv7m-none-eabi/Dockerfile index f966b2b9f..ad0d4351e 100644 --- a/ci/docker/thumbv7m-none-eabi/Dockerfile +++ b/ci/docker/thumbv7m-none-eabi/Dockerfile @@ -1,8 +1,9 @@ ARG IMAGE=ubuntu:24.04 FROM $IMAGE + RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ gcc-arm-none-eabi \ libnewlib-arm-none-eabi -ENV NO_STD=1 +ENV BUILD_ONLY=1 diff --git a/ci/docker/wasm32-unknown-unknown/Dockerfile b/ci/docker/wasm32-unknown-unknown/Dockerfile index 4d12b6ff4..2813d3186 100644 --- a/ci/docker/wasm32-unknown-unknown/Dockerfile +++ b/ci/docker/wasm32-unknown-unknown/Dockerfile @@ -1,5 +1,6 @@ ARG IMAGE=ubuntu:20.04 FROM $IMAGE + RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc clang libc6-dev ca-certificates diff --git a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index 670c24397..c590adcdd 100644 --- a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -1,5 +1,6 @@ ARG IMAGE=ubuntu:24.04 FROM $IMAGE + RUN apt-get update && \ apt-get install -y --no-install-recommends \ - gcc libc6-dev ca-certificates + gcc m4 make libc6-dev ca-certificates diff --git a/ci/run.sh b/ci/run.sh index 8dcb139d4..49cc16286 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -2,9 +2,10 @@ set -eux -target="${1:-}" - export RUST_BACKTRACE="${RUST_BACKTRACE:-full}" +export NEXTEST_STATUS_LEVEL=all + +target="${1:-}" if [ -z "$target" ]; then host_target=$(rustc -vV | awk '/^host/ { print $2 }') @@ -20,7 +21,7 @@ if [ "${USING_CONTAINER_RUSTC:-}" = 1 ]; then fi # Test our implementation -if [ "${NO_STD:-}" = "1" ]; then +if [ "${BUILD_ONLY:-}" = "1" ]; then echo "nothing to do for no_std" else run="cargo test --package builtins-test --no-fail-fast --target $target" From 8ee1aa9114e9bdec0282c970e95cd5743815fd0e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 04:55:59 +0000 Subject: [PATCH 1283/1459] ci: Update `ci/run-docker.sh` to match libm Prepare for having the repositories combined by ensuring EMULATED, RUST_BACKTRACE, and CI are set or forwarded as applicable. Also re-indent the file to four spaces and do some reorganization. --- ci/run-docker.sh | 124 ++++++++++++++++++++++++++--------------------- 1 file changed, 69 insertions(+), 55 deletions(-) diff --git a/ci/run-docker.sh b/ci/run-docker.sh index 5e19cf4d0..2c27ab795 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -5,88 +5,102 @@ set -euxo pipefail +host_arch="$(uname -m | sed 's/arm64/aarch64/')" + run() { local target="$1" - echo "TESTING TARGET: $target" + echo "testing target: $target" + + emulated="" + target_arch="$(echo "$target" | cut -d'-' -f1)" + if [ "$target_arch" != "$host_arch" ]; then + emulated=1 + echo "target is emulated" + fi # This directory needs to exist before calling docker, otherwise docker will create it but it # will be owned by root mkdir -p target + run_cmd="HOME=/tmp" + + if [ "${GITHUB_ACTIONS:-}" = "true" ]; then + # Enable Docker image caching on GHA + build_cmd=("buildx" "build") + build_args=( + "--cache-from" "type=local,src=/tmp/.buildx-cache" + "--cache-to" "type=local,dest=/tmp/.buildx-cache-new" + # This is the beautiful bash syntax for expanding an array but neither + # raising an error nor returning an empty string if the array is empty. + "${build_args[@]:+"${build_args[@]}"}" + "--load" + ) + fi + if [ "$(uname -s)" = "Linux" ] && [ -z "${DOCKER_BASE_IMAGE:-}" ]; then - # Share the host rustc and target. Do this only on Linux and if the image - # isn't overridden - run_args=( - --user "$(id -u):$(id -g)" - -e "CARGO_HOME=/cargo" - -v "${HOME}/.cargo:/cargo" - -v "$(pwd)/target:/builtins-target" - -v "$(rustc --print sysroot):/rust:ro" - ) - run_cmd="HOME=/tmp PATH=\$PATH:/rust/bin ci/run.sh $target" + # Share the host rustc and target. Do this only on Linux and if the image + # isn't overridden + run_args=( + --user "$(id -u):$(id -g)" + -e "CARGO_HOME=/cargo" + -v "${HOME}/.cargo:/cargo" + -v "$(pwd)/target:/builtins-target" + -v "$(rustc --print sysroot):/rust:ro" + ) + run_cmd="$run_cmd PATH=\$PATH:/rust/bin" else - # Use rustc provided by a docker image - docker volume create compiler-builtins-cache - build_args=( - "--build-arg" "IMAGE=${DOCKER_BASE_IMAGE:-rustlang/rust:nightly}" - ) - run_args=( - -v "compiler-builtins-cache:/builtins-target" - ) - run_cmd="HOME=/tmp USING_CONTAINER_RUSTC=1 ci/run.sh $target" + # Use rustc provided by a docker image + docker volume create compiler-builtins-cache + build_args=( + "--build-arg" "IMAGE=${DOCKER_BASE_IMAGE:-rustlang/rust:nightly}" + ) + run_args=(-v "compiler-builtins-cache:/builtins-target") + run_cmd="$run_cmd HOME=/tmp" "USING_CONTAINER_RUSTC=1" fi if [ -d compiler-rt ]; then - export RUST_COMPILER_RT_ROOT="/checkout/compiler-rt" + export RUST_COMPILER_RT_ROOT="/checkout/compiler-rt" fi - if [ "${GITHUB_ACTIONS:-}" = "true" ]; then - # Enable Docker image caching on GHA - - build_cmd=("buildx" "build") - build_args=( - "--cache-from" "type=local,src=/tmp/.buildx-cache" - "--cache-to" "type=local,dest=/tmp/.buildx-cache-new" - # This is the beautiful bash syntax for expanding an array but neither - # raising an error nor returning an empty string if the array is empty. - "${build_args[@]:+"${build_args[@]}"}" - "--load" - ) - fi + run_cmd="$run_cmd ci/run.sh $target" docker "${build_cmd[@]:-build}" \ - -t "builtins-$target" \ - "${build_args[@]:-}" \ - "ci/docker/$target" + -t "builtins-$target" \ + "${build_args[@]:-}" \ + "ci/docker/$target" docker run \ - --rm \ - -e RUST_COMPILER_RT_ROOT \ - -e RUSTFLAGS \ - -e "CARGO_TARGET_DIR=/builtins-target" \ - -v "$(pwd):/checkout:ro" \ - -w /checkout \ - "${run_args[@]:-}" \ - --init \ - "builtins-$target" \ - sh -c "$run_cmd" + --rm \ + -e CI \ + -e CARGO_TARGET_DIR=/builtins-target \ + -e CARGO_TERM_COLOR \ + -e RUSTFLAGS \ + -e RUST_BACKTRACE \ + -e RUST_COMPILER_RT_ROOT \ + -e "EMULATED=$emulated" \ + -v "$(pwd):/checkout:ro" \ + -w /checkout \ + "${run_args[@]:-}" \ + --init \ + "builtins-$target" \ + sh -c "$run_cmd" } if [ "${1:-}" = "--help" ] || [ "$#" -gt 1 ]; then - set +x - echo "\ + set +x + echo "\ usage: ./ci/run-docker.sh [target] you can also set DOCKER_BASE_IMAGE to use something other than the default ubuntu:24.04 (or rustlang/rust:nightly). - " - exit + " + exit fi if [ -z "${1:-}" ]; then - for d in ci/docker/*; do - run $(basename "$d") - done + for d in ci/docker/*; do + run $(basename "$d") + done else - run "$1" + run "$1" fi From cf4fd0290b67b45149f17d4ca6f46dc51a061dfc Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 23:13:44 +0000 Subject: [PATCH 1284/1459] Remove the libm submodule --- .gitmodules | 3 --- compiler-builtins/libm | 1 - 2 files changed, 4 deletions(-) delete mode 160000 compiler-builtins/libm diff --git a/.gitmodules b/.gitmodules index a0b0d021d..e69de29bb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "compiler-builtins/libm"] - path = compiler-builtins/libm - url = https://github.com/rust-lang/libm.git diff --git a/compiler-builtins/libm b/compiler-builtins/libm deleted file mode 160000 index 96d140032..000000000 --- a/compiler-builtins/libm +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 96d1400326f47381858f8149451a2b2fd8de2ea4 From b66086f9eb49d29e6ff25ef5223f6530b78ee085 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 23:42:14 +0000 Subject: [PATCH 1285/1459] Update submodules after the `libm` merge --- .gitmodules | 4 ++++ libm/.gitmodules | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) delete mode 100644 libm/.gitmodules diff --git a/.gitmodules b/.gitmodules index e69de29bb..ee941a47e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "musl"] + path = libm/crates/musl-math-sys/musl + url = https://git.musl-libc.org/git/musl + shallow = true diff --git a/libm/.gitmodules b/libm/.gitmodules deleted file mode 100644 index 35b269ead..000000000 --- a/libm/.gitmodules +++ /dev/null @@ -1,4 +0,0 @@ -[submodule "musl"] - path = crates/musl-math-sys/musl - url = https://git.musl-libc.org/git/musl - shallow = true From 0e4ba2a9b13cb55cbef87b1bd03be3b392414376 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 23:44:45 +0000 Subject: [PATCH 1286/1459] Update path to libm after the merge --- compiler-builtins/src/math.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-builtins/src/math.rs b/compiler-builtins/src/math.rs index ccd9c5421..bd52a749e 100644 --- a/compiler-builtins/src/math.rs +++ b/compiler-builtins/src/math.rs @@ -2,7 +2,7 @@ #[allow(dead_code)] #[allow(unused_imports)] #[allow(clippy::all)] -#[path = "../libm/src/math/mod.rs"] +#[path = "../../libm/libm/src/math/mod.rs"] pub(crate) mod libm; macro_rules! libm_intrinsics { From 52e748b7537688e565d0c0cabb064cc966aac8c4 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Apr 2025 23:48:27 +0000 Subject: [PATCH 1287/1459] Update .git-blame-ignore-revs after the libm merge --- libm/.git-blame-ignore-revs => .git-blame-ignore-revs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) rename libm/.git-blame-ignore-revs => .git-blame-ignore-revs (58%) diff --git a/libm/.git-blame-ignore-revs b/.git-blame-ignore-revs similarity index 58% rename from libm/.git-blame-ignore-revs rename to .git-blame-ignore-revs index c1e43134f..2ede10da5 100644 --- a/libm/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -2,4 +2,5 @@ # `git blame` ignore the following commits. # Reformat with a new `.rustfmt.toml` -5882cabb83c30bf7c36023f9a55a80583636b0e8 +# In rust-lang/libm this was 5882cabb83c30bf7c36023f9a55a80583636b0e8 +4bb07a6275cc628ef81c65ac971dc6479963322f From 18f4821a645f87e581fc7dbd8429c9d0df1e60ce Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 20:44:21 +0000 Subject: [PATCH 1288/1459] libm: Remove compiler-builtins-smoke-test Since `libm` is now part of the `compiler-builtins` repo, the crate to test that they work together is no longer needed. --- .../compiler-builtins-smoke-test/Cargo.toml | 38 ---- .../compiler-builtins-smoke-test/build.rs | 8 - .../compiler-builtins-smoke-test/src/lib.rs | 17 -- .../compiler-builtins-smoke-test/src/math.rs | 182 ------------------ 4 files changed, 245 deletions(-) delete mode 100644 libm/crates/compiler-builtins-smoke-test/Cargo.toml delete mode 100644 libm/crates/compiler-builtins-smoke-test/build.rs delete mode 100644 libm/crates/compiler-builtins-smoke-test/src/lib.rs delete mode 100644 libm/crates/compiler-builtins-smoke-test/src/math.rs diff --git a/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/libm/crates/compiler-builtins-smoke-test/Cargo.toml deleted file mode 100644 index 38a511669..000000000 --- a/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ /dev/null @@ -1,38 +0,0 @@ -[package] -name = "cb" -version = "0.1.0" -authors = ["Jorge Aparicio "] -edition = "2021" -publish = false - -[lib] -crate-type = ["staticlib"] -test = false -bench = false - -[features] -default = ["arch", "compiler-builtins", "unstable-float"] - -# Copied from `libm`'s root `Cargo.toml`' -arch = [] -compiler-builtins = [] -unstable-float = [] - -[lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = [ - "cfg(arch_enabled)", - "cfg(assert_no_panic)", - "cfg(intrinsics_enabled)", - 'cfg(feature, values("force-soft-floats"))', - 'cfg(feature, values("unstable"))', - 'cfg(feature, values("unstable-intrinsics"))', - 'cfg(feature, values("unstable-public-internals"))', -] } - -[profile.dev] -panic = "abort" - -[profile.release] -panic = "abort" -codegen-units = 1 -lto = "fat" diff --git a/libm/crates/compiler-builtins-smoke-test/build.rs b/libm/crates/compiler-builtins-smoke-test/build.rs deleted file mode 100644 index ef8d613c9..000000000 --- a/libm/crates/compiler-builtins-smoke-test/build.rs +++ /dev/null @@ -1,8 +0,0 @@ -#[path = "../../libm/configure.rs"] -mod configure; - -fn main() { - println!("cargo:rerun-if-changed=../../libm/configure.rs"); - let cfg = configure::Config::from_env(); - configure::emit_libm_config(&cfg); -} diff --git a/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/libm/crates/compiler-builtins-smoke-test/src/lib.rs deleted file mode 100644 index e70f6d9e0..000000000 --- a/libm/crates/compiler-builtins-smoke-test/src/lib.rs +++ /dev/null @@ -1,17 +0,0 @@ -//! Fake compiler-builtins crate -//! -//! This is used to test that we can source import `libm` into the compiler-builtins crate. -//! Additionally, it provides a `#[no_mangle]` C API that can be easier to inspect than the -//! default `.rlib`. - -#![compiler_builtins] -#![feature(core_intrinsics)] -#![feature(compiler_builtins)] -#![feature(f16)] -#![feature(f128)] -#![allow(internal_features)] -#![no_std] - -mod math; -// Required for macro paths. -use math::libm::support; diff --git a/libm/crates/compiler-builtins-smoke-test/src/math.rs b/libm/crates/compiler-builtins-smoke-test/src/math.rs deleted file mode 100644 index 58a5bfbb9..000000000 --- a/libm/crates/compiler-builtins-smoke-test/src/math.rs +++ /dev/null @@ -1,182 +0,0 @@ -use core::ffi::c_int; - -#[allow(dead_code)] -#[allow(clippy::all)] // We don't get `libm`'s list of `allow`s, so just ignore Clippy. -#[allow(unused_imports)] -#[path = "../../../libm/src/math/mod.rs"] -pub mod libm; - -/// Mark functions `#[no_mangle]` and with the C ABI. -macro_rules! no_mangle { - ($( $name:ident( $($tt:tt)+ ) -> $ret:ty; )+) => { - $( no_mangle!(@inner $name( $($tt)+ ) -> $ret); )+ - }; - - // Handle simple functions with single return types - (@inner $name:ident( $($arg:ident: $aty:ty),+ ) -> $ret:ty) => { - #[unsafe(no_mangle)] - extern "C" fn $name($($arg: $aty),+) -> $ret { - libm::$name($($arg),+) - } - }; - - - // Functions with `&mut` return values need to be handled differently, use `|` to - // separate inputs vs. outputs. - ( - @inner $name:ident( $($arg:ident: $aty:ty),+ | $($rarg:ident: $rty:ty),+) -> $ret:ty - ) => { - #[unsafe(no_mangle)] - extern "C" fn $name($($arg: $aty,)+ $($rarg: $rty),+) -> $ret { - let ret; - (ret, $(*$rarg),+) = libm::$name($($arg),+); - ret - } - }; -} - -no_mangle! { - frexp(x: f64 | y: &mut c_int) -> f64; - frexpf(x: f32 | y: &mut c_int) -> f32; - acos(x: f64) -> f64; - acosf(x: f32) -> f32; - acosh(x: f64) -> f64; - acoshf(x: f32) -> f32; - asin(x: f64) -> f64; - asinf(x: f32) -> f32; - asinh(x: f64) -> f64; - asinhf(x: f32) -> f32; - atan(x: f64) -> f64; - atan2(x: f64, y: f64) -> f64; - atan2f(x: f32, y: f32) -> f32; - atanf(x: f32) -> f32; - atanh(x: f64) -> f64; - atanhf(x: f32) -> f32; - cbrt(x: f64) -> f64; - cbrtf(x: f32) -> f32; - ceil(x: f64) -> f64; - ceilf(x: f32) -> f32; - ceilf128(x: f128) -> f128; - ceilf16(x: f16) -> f16; - copysign(x: f64, y: f64) -> f64; - copysignf(x: f32, y: f32) -> f32; - copysignf128(x: f128, y: f128) -> f128; - copysignf16(x: f16, y: f16) -> f16; - cos(x: f64) -> f64; - cosf(x: f32) -> f32; - cosh(x: f64) -> f64; - coshf(x: f32) -> f32; - erf(x: f64) -> f64; - erfc(x: f64) -> f64; - erfcf(x: f32) -> f32; - erff(x: f32) -> f32; - exp(x: f64) -> f64; - exp10(x: f64) -> f64; - exp10f(x: f32) -> f32; - exp2(x: f64) -> f64; - exp2f(x: f32) -> f32; - expf(x: f32) -> f32; - expm1(x: f64) -> f64; - expm1f(x: f32) -> f32; - fabs(x: f64) -> f64; - fabsf(x: f32) -> f32; - fabsf128(x: f128) -> f128; - fabsf16(x: f16) -> f16; - fdim(x: f64, y: f64) -> f64; - fdimf(x: f32, y: f32) -> f32; - fdimf128(x: f128, y: f128) -> f128; - fdimf16(x: f16, y: f16) -> f16; - floor(x: f64) -> f64; - floorf(x: f32) -> f32; - floorf128(x: f128) -> f128; - floorf16(x: f16) -> f16; - fma(x: f64, y: f64, z: f64) -> f64; - fmaf(x: f32, y: f32, z: f32) -> f32; - fmax(x: f64, y: f64) -> f64; - fmaxf(x: f32, y: f32) -> f32; - fmin(x: f64, y: f64) -> f64; - fminf(x: f32, y: f32) -> f32; - fmod(x: f64, y: f64) -> f64; - fmodf(x: f32, y: f32) -> f32; - hypot(x: f64, y: f64) -> f64; - hypotf(x: f32, y: f32) -> f32; - ilogb(x: f64) -> c_int; - ilogbf(x: f32) -> c_int; - j0(x: f64) -> f64; - j0f(x: f32) -> f32; - j1(x: f64) -> f64; - j1f(x: f32) -> f32; - jn(x: c_int, y: f64) -> f64; - jnf(x: c_int, y: f32) -> f32; - ldexp(x: f64, y: c_int) -> f64; - ldexpf(x: f32, y: c_int) -> f32; - lgamma(x: f64) -> f64; - lgamma_r(x: f64 | r: &mut c_int) -> f64; - lgammaf(x: f32) -> f32; - lgammaf_r(x: f32 | r: &mut c_int) -> f32; - log(x: f64) -> f64; - log10(x: f64) -> f64; - log10f(x: f32) -> f32; - log1p(x: f64) -> f64; - log1pf(x: f32) -> f32; - log2(x: f64) -> f64; - log2f(x: f32) -> f32; - logf(x: f32) -> f32; - modf(x: f64 | r: &mut f64) -> f64; - modff(x: f32 | r: &mut f32) -> f32; - nextafter(x: f64, y: f64) -> f64; - nextafterf(x: f32, y: f32) -> f32; - pow(x: f64, y: f64) -> f64; - powf(x: f32, y: f32) -> f32; - remainder(x: f64, y: f64) -> f64; - remainderf(x: f32, y: f32) -> f32; - remquo(x: f64, y: f64 | q: &mut c_int) -> f64; - remquof(x: f32, y: f32 | q: &mut c_int) -> f32; - rint(x: f64) -> f64; - rintf(x: f32) -> f32; - rintf128(x: f128) -> f128; - rintf16(x: f16) -> f16; - round(x: f64) -> f64; - roundf(x: f32) -> f32; - scalbn(x: f64, y: c_int) -> f64; - scalbnf(x: f32, y: c_int) -> f32; - sin(x: f64) -> f64; - sinf(x: f32) -> f32; - sinh(x: f64) -> f64; - sinhf(x: f32) -> f32; - sqrt(x: f64) -> f64; - sqrtf(x: f32) -> f32; - tan(x: f64) -> f64; - tanf(x: f32) -> f32; - tanh(x: f64) -> f64; - tanhf(x: f32) -> f32; - tgamma(x: f64) -> f64; - tgammaf(x: f32) -> f32; - trunc(x: f64) -> f64; - truncf(x: f32) -> f32; - truncf128(x: f128) -> f128; - truncf16(x: f16) -> f16; - y0(x: f64) -> f64; - y0f(x: f32) -> f32; - y1(x: f64) -> f64; - y1f(x: f32) -> f32; - yn(x: c_int, y: f64) -> f64; - ynf(x: c_int, y: f32) -> f32; -} - -/* sincos has no direct return type, not worth handling in the macro */ - -#[unsafe(no_mangle)] -extern "C" fn sincos(x: f64, s: &mut f64, c: &mut f64) { - (*s, *c) = libm::sincos(x); -} - -#[unsafe(no_mangle)] -extern "C" fn sincosf(x: f32, s: &mut f32, c: &mut f32) { - (*s, *c) = libm::sincosf(x); -} - -#[panic_handler] -fn panic(_info: &core::panic::PanicInfo) -> ! { - loop {} -} From 8f8dfb5bd9a9be6bd2d67b0f423a2d3bddbbdab3 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 20:58:25 +0000 Subject: [PATCH 1289/1459] libm: Reorganize into compiler-builtins Distribute everything from `libm/` to better locations in the repo. `libm/libm/*` has not moved yet to avoid Git seeing the move as an edit to `Cargo.toml`. Files that remain to be merged somehow are in `etc/libm`. --- .../crates => crates}/libm-macros/Cargo.toml | 0 .../libm-macros/src/enums.rs | 0 .../crates => crates}/libm-macros/src/lib.rs | 0 .../libm-macros/src/parse.rs | 0 .../libm-macros/src/shared.rs | 0 .../libm-macros/tests/basic.rs | 0 .../libm-macros/tests/enum.rs | 0 .../musl-math-sys/Cargo.toml | 0 .../crates => crates}/musl-math-sys/build.rs | 0 .../musl-math-sys/c_patches/alias.c | 0 .../musl-math-sys/c_patches/features.h | 0 .../musl-math-sys/src/lib.rs | 0 {libm/crates => crates}/util/Cargo.toml | 0 {libm/crates => crates}/util/build.rs | 0 {libm/crates => crates}/util/src/main.rs | 0 {libm/etc => etc}/function-definitions.json | 0 {libm/etc => etc}/function-list.txt | 0 {libm => etc/libm}/.editorconfig | 0 .../libm}/.github/workflows/main.yaml | 0 .../libm}/.github/workflows/publish.yaml | 0 {libm => etc/libm}/.gitignore | 0 {libm => etc/libm}/Cargo.toml | 0 {libm => etc/libm}/ci/bench-icount.sh | 0 {libm => etc/libm}/ci/ci-util.py | 0 .../aarch64-unknown-linux-gnu/Dockerfile | 0 .../arm-unknown-linux-gnueabi/Dockerfile | 0 .../arm-unknown-linux-gnueabihf/Dockerfile | 0 .../armv7-unknown-linux-gnueabihf/Dockerfile | 0 .../docker/i586-unknown-linux-gnu/Dockerfile | 0 .../docker/i686-unknown-linux-gnu/Dockerfile | 0 .../loongarch64-unknown-linux-gnu/Dockerfile | 0 .../docker/mips-unknown-linux-gnu/Dockerfile | 0 .../mips64-unknown-linux-gnuabi64/Dockerfile | 0 .../Dockerfile | 0 .../mipsel-unknown-linux-gnu/Dockerfile | 0 .../powerpc-unknown-linux-gnu/Dockerfile | 0 .../powerpc64-unknown-linux-gnu/Dockerfile | 0 .../powerpc64le-unknown-linux-gnu/Dockerfile | 0 .../riscv64gc-unknown-linux-gnu/Dockerfile | 0 .../ci/docker/thumbv6m-none-eabi/Dockerfile | 0 .../ci/docker/thumbv7em-none-eabi/Dockerfile | 0 .../docker/thumbv7em-none-eabihf/Dockerfile | 0 .../ci/docker/thumbv7m-none-eabi/Dockerfile | 0 .../x86_64-unknown-linux-gnu/Dockerfile | 0 {libm => etc/libm}/ci/run-docker.sh | 0 {libm => etc/libm}/ci/run.sh | 0 {libm/etc => etc}/update-api-list.py | 0 .../crates/libm-test => libm-test}/Cargo.toml | 0 .../libm-test => libm-test}/benches/icount.rs | 0 .../libm-test => libm-test}/benches/random.rs | 0 {libm/crates/libm-test => libm-test}/build.rs | 0 .../examples/plot_domains.rs | 0 .../examples/plot_file.jl | 0 .../libm-test => libm-test}/src/domain.rs | 0 .../libm-test => libm-test}/src/f8_impl.rs | 0 .../libm-test => libm-test}/src/generate.rs | 0 .../src/generate/case_list.rs | 0 .../src/generate/edge_cases.rs | 0 .../src/generate/random.rs | 0 .../src/generate/spaced.rs | 0 .../crates/libm-test => libm-test}/src/lib.rs | 0 .../libm-test => libm-test}/src/mpfloat.rs | 0 .../crates/libm-test => libm-test}/src/num.rs | 0 .../crates/libm-test => libm-test}/src/op.rs | 0 .../libm-test => libm-test}/src/precision.rs | 0 .../libm-test => libm-test}/src/run_cfg.rs | 0 .../src/test_traits.rs | 0 .../tests/check_coverage.rs | 0 .../tests/compare_built_musl.rs | 0 .../tests/multiprecision.rs | 0 .../tests/standalone.rs | 0 .../libm-test => libm-test}/tests/u256.rs | 0 .../tests/z_extensive/main.rs | 0 .../tests/z_extensive/run.rs | 0 libm/LICENSE.txt | 258 ----------------- libm/README.md | 56 ---- libm/crates/musl-math-sys/musl | 1 - libm/libm/LICENSE.txt | 259 +++++++++++++++++- libm/libm/README.md | 57 +++- 79 files changed, 314 insertions(+), 317 deletions(-) rename {libm/crates => crates}/libm-macros/Cargo.toml (100%) rename {libm/crates => crates}/libm-macros/src/enums.rs (100%) rename {libm/crates => crates}/libm-macros/src/lib.rs (100%) rename {libm/crates => crates}/libm-macros/src/parse.rs (100%) rename {libm/crates => crates}/libm-macros/src/shared.rs (100%) rename {libm/crates => crates}/libm-macros/tests/basic.rs (100%) rename {libm/crates => crates}/libm-macros/tests/enum.rs (100%) rename {libm/crates => crates}/musl-math-sys/Cargo.toml (100%) rename {libm/crates => crates}/musl-math-sys/build.rs (100%) rename {libm/crates => crates}/musl-math-sys/c_patches/alias.c (100%) rename {libm/crates => crates}/musl-math-sys/c_patches/features.h (100%) rename {libm/crates => crates}/musl-math-sys/src/lib.rs (100%) rename {libm/crates => crates}/util/Cargo.toml (100%) rename {libm/crates => crates}/util/build.rs (100%) rename {libm/crates => crates}/util/src/main.rs (100%) rename {libm/etc => etc}/function-definitions.json (100%) rename {libm/etc => etc}/function-list.txt (100%) rename {libm => etc/libm}/.editorconfig (100%) rename {libm => etc/libm}/.github/workflows/main.yaml (100%) rename {libm => etc/libm}/.github/workflows/publish.yaml (100%) rename {libm => etc/libm}/.gitignore (100%) rename {libm => etc/libm}/Cargo.toml (100%) rename {libm => etc/libm}/ci/bench-icount.sh (100%) rename {libm => etc/libm}/ci/ci-util.py (100%) rename {libm => etc/libm}/ci/docker/aarch64-unknown-linux-gnu/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/arm-unknown-linux-gnueabi/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/i586-unknown-linux-gnu/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/i686-unknown-linux-gnu/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/mips-unknown-linux-gnu/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/mipsel-unknown-linux-gnu/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/powerpc-unknown-linux-gnu/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/thumbv6m-none-eabi/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/thumbv7em-none-eabi/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/thumbv7em-none-eabihf/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/thumbv7m-none-eabi/Dockerfile (100%) rename {libm => etc/libm}/ci/docker/x86_64-unknown-linux-gnu/Dockerfile (100%) rename {libm => etc/libm}/ci/run-docker.sh (100%) rename {libm => etc/libm}/ci/run.sh (100%) rename {libm/etc => etc}/update-api-list.py (100%) rename {libm/crates/libm-test => libm-test}/Cargo.toml (100%) rename {libm/crates/libm-test => libm-test}/benches/icount.rs (100%) rename {libm/crates/libm-test => libm-test}/benches/random.rs (100%) rename {libm/crates/libm-test => libm-test}/build.rs (100%) rename {libm/crates/libm-test => libm-test}/examples/plot_domains.rs (100%) rename {libm/crates/libm-test => libm-test}/examples/plot_file.jl (100%) rename {libm/crates/libm-test => libm-test}/src/domain.rs (100%) rename {libm/crates/libm-test => libm-test}/src/f8_impl.rs (100%) rename {libm/crates/libm-test => libm-test}/src/generate.rs (100%) rename {libm/crates/libm-test => libm-test}/src/generate/case_list.rs (100%) rename {libm/crates/libm-test => libm-test}/src/generate/edge_cases.rs (100%) rename {libm/crates/libm-test => libm-test}/src/generate/random.rs (100%) rename {libm/crates/libm-test => libm-test}/src/generate/spaced.rs (100%) rename {libm/crates/libm-test => libm-test}/src/lib.rs (100%) rename {libm/crates/libm-test => libm-test}/src/mpfloat.rs (100%) rename {libm/crates/libm-test => libm-test}/src/num.rs (100%) rename {libm/crates/libm-test => libm-test}/src/op.rs (100%) rename {libm/crates/libm-test => libm-test}/src/precision.rs (100%) rename {libm/crates/libm-test => libm-test}/src/run_cfg.rs (100%) rename {libm/crates/libm-test => libm-test}/src/test_traits.rs (100%) rename {libm/crates/libm-test => libm-test}/tests/check_coverage.rs (100%) rename {libm/crates/libm-test => libm-test}/tests/compare_built_musl.rs (100%) rename {libm/crates/libm-test => libm-test}/tests/multiprecision.rs (100%) rename {libm/crates/libm-test => libm-test}/tests/standalone.rs (100%) rename {libm/crates/libm-test => libm-test}/tests/u256.rs (100%) rename {libm/crates/libm-test => libm-test}/tests/z_extensive/main.rs (100%) rename {libm/crates/libm-test => libm-test}/tests/z_extensive/run.rs (100%) delete mode 100644 libm/LICENSE.txt delete mode 100644 libm/README.md delete mode 160000 libm/crates/musl-math-sys/musl mode change 120000 => 100644 libm/libm/LICENSE.txt mode change 120000 => 100644 libm/libm/README.md diff --git a/libm/crates/libm-macros/Cargo.toml b/crates/libm-macros/Cargo.toml similarity index 100% rename from libm/crates/libm-macros/Cargo.toml rename to crates/libm-macros/Cargo.toml diff --git a/libm/crates/libm-macros/src/enums.rs b/crates/libm-macros/src/enums.rs similarity index 100% rename from libm/crates/libm-macros/src/enums.rs rename to crates/libm-macros/src/enums.rs diff --git a/libm/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs similarity index 100% rename from libm/crates/libm-macros/src/lib.rs rename to crates/libm-macros/src/lib.rs diff --git a/libm/crates/libm-macros/src/parse.rs b/crates/libm-macros/src/parse.rs similarity index 100% rename from libm/crates/libm-macros/src/parse.rs rename to crates/libm-macros/src/parse.rs diff --git a/libm/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs similarity index 100% rename from libm/crates/libm-macros/src/shared.rs rename to crates/libm-macros/src/shared.rs diff --git a/libm/crates/libm-macros/tests/basic.rs b/crates/libm-macros/tests/basic.rs similarity index 100% rename from libm/crates/libm-macros/tests/basic.rs rename to crates/libm-macros/tests/basic.rs diff --git a/libm/crates/libm-macros/tests/enum.rs b/crates/libm-macros/tests/enum.rs similarity index 100% rename from libm/crates/libm-macros/tests/enum.rs rename to crates/libm-macros/tests/enum.rs diff --git a/libm/crates/musl-math-sys/Cargo.toml b/crates/musl-math-sys/Cargo.toml similarity index 100% rename from libm/crates/musl-math-sys/Cargo.toml rename to crates/musl-math-sys/Cargo.toml diff --git a/libm/crates/musl-math-sys/build.rs b/crates/musl-math-sys/build.rs similarity index 100% rename from libm/crates/musl-math-sys/build.rs rename to crates/musl-math-sys/build.rs diff --git a/libm/crates/musl-math-sys/c_patches/alias.c b/crates/musl-math-sys/c_patches/alias.c similarity index 100% rename from libm/crates/musl-math-sys/c_patches/alias.c rename to crates/musl-math-sys/c_patches/alias.c diff --git a/libm/crates/musl-math-sys/c_patches/features.h b/crates/musl-math-sys/c_patches/features.h similarity index 100% rename from libm/crates/musl-math-sys/c_patches/features.h rename to crates/musl-math-sys/c_patches/features.h diff --git a/libm/crates/musl-math-sys/src/lib.rs b/crates/musl-math-sys/src/lib.rs similarity index 100% rename from libm/crates/musl-math-sys/src/lib.rs rename to crates/musl-math-sys/src/lib.rs diff --git a/libm/crates/util/Cargo.toml b/crates/util/Cargo.toml similarity index 100% rename from libm/crates/util/Cargo.toml rename to crates/util/Cargo.toml diff --git a/libm/crates/util/build.rs b/crates/util/build.rs similarity index 100% rename from libm/crates/util/build.rs rename to crates/util/build.rs diff --git a/libm/crates/util/src/main.rs b/crates/util/src/main.rs similarity index 100% rename from libm/crates/util/src/main.rs rename to crates/util/src/main.rs diff --git a/libm/etc/function-definitions.json b/etc/function-definitions.json similarity index 100% rename from libm/etc/function-definitions.json rename to etc/function-definitions.json diff --git a/libm/etc/function-list.txt b/etc/function-list.txt similarity index 100% rename from libm/etc/function-list.txt rename to etc/function-list.txt diff --git a/libm/.editorconfig b/etc/libm/.editorconfig similarity index 100% rename from libm/.editorconfig rename to etc/libm/.editorconfig diff --git a/libm/.github/workflows/main.yaml b/etc/libm/.github/workflows/main.yaml similarity index 100% rename from libm/.github/workflows/main.yaml rename to etc/libm/.github/workflows/main.yaml diff --git a/libm/.github/workflows/publish.yaml b/etc/libm/.github/workflows/publish.yaml similarity index 100% rename from libm/.github/workflows/publish.yaml rename to etc/libm/.github/workflows/publish.yaml diff --git a/libm/.gitignore b/etc/libm/.gitignore similarity index 100% rename from libm/.gitignore rename to etc/libm/.gitignore diff --git a/libm/Cargo.toml b/etc/libm/Cargo.toml similarity index 100% rename from libm/Cargo.toml rename to etc/libm/Cargo.toml diff --git a/libm/ci/bench-icount.sh b/etc/libm/ci/bench-icount.sh similarity index 100% rename from libm/ci/bench-icount.sh rename to etc/libm/ci/bench-icount.sh diff --git a/libm/ci/ci-util.py b/etc/libm/ci/ci-util.py similarity index 100% rename from libm/ci/ci-util.py rename to etc/libm/ci/ci-util.py diff --git a/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile similarity index 100% rename from libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile rename to etc/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile diff --git a/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/etc/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile similarity index 100% rename from libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile rename to etc/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile diff --git a/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/etc/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile similarity index 100% rename from libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile rename to etc/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile diff --git a/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/etc/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile similarity index 100% rename from libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile rename to etc/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile diff --git a/libm/ci/docker/i586-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/i586-unknown-linux-gnu/Dockerfile similarity index 100% rename from libm/ci/docker/i586-unknown-linux-gnu/Dockerfile rename to etc/libm/ci/docker/i586-unknown-linux-gnu/Dockerfile diff --git a/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile similarity index 100% rename from libm/ci/docker/i686-unknown-linux-gnu/Dockerfile rename to etc/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile diff --git a/libm/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile similarity index 100% rename from libm/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile rename to etc/libm/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile diff --git a/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile similarity index 100% rename from libm/ci/docker/mips-unknown-linux-gnu/Dockerfile rename to etc/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile diff --git a/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/etc/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile similarity index 100% rename from libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile rename to etc/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile diff --git a/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/etc/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile similarity index 100% rename from libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile rename to etc/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile diff --git a/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile similarity index 100% rename from libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile rename to etc/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile diff --git a/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile similarity index 100% rename from libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile rename to etc/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile diff --git a/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile similarity index 100% rename from libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile rename to etc/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile diff --git a/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile similarity index 100% rename from libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile rename to etc/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile diff --git a/libm/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile similarity index 100% rename from libm/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile rename to etc/libm/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile diff --git a/libm/ci/docker/thumbv6m-none-eabi/Dockerfile b/etc/libm/ci/docker/thumbv6m-none-eabi/Dockerfile similarity index 100% rename from libm/ci/docker/thumbv6m-none-eabi/Dockerfile rename to etc/libm/ci/docker/thumbv6m-none-eabi/Dockerfile diff --git a/libm/ci/docker/thumbv7em-none-eabi/Dockerfile b/etc/libm/ci/docker/thumbv7em-none-eabi/Dockerfile similarity index 100% rename from libm/ci/docker/thumbv7em-none-eabi/Dockerfile rename to etc/libm/ci/docker/thumbv7em-none-eabi/Dockerfile diff --git a/libm/ci/docker/thumbv7em-none-eabihf/Dockerfile b/etc/libm/ci/docker/thumbv7em-none-eabihf/Dockerfile similarity index 100% rename from libm/ci/docker/thumbv7em-none-eabihf/Dockerfile rename to etc/libm/ci/docker/thumbv7em-none-eabihf/Dockerfile diff --git a/libm/ci/docker/thumbv7m-none-eabi/Dockerfile b/etc/libm/ci/docker/thumbv7m-none-eabi/Dockerfile similarity index 100% rename from libm/ci/docker/thumbv7m-none-eabi/Dockerfile rename to etc/libm/ci/docker/thumbv7m-none-eabi/Dockerfile diff --git a/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile similarity index 100% rename from libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile rename to etc/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile diff --git a/libm/ci/run-docker.sh b/etc/libm/ci/run-docker.sh similarity index 100% rename from libm/ci/run-docker.sh rename to etc/libm/ci/run-docker.sh diff --git a/libm/ci/run.sh b/etc/libm/ci/run.sh similarity index 100% rename from libm/ci/run.sh rename to etc/libm/ci/run.sh diff --git a/libm/etc/update-api-list.py b/etc/update-api-list.py similarity index 100% rename from libm/etc/update-api-list.py rename to etc/update-api-list.py diff --git a/libm/crates/libm-test/Cargo.toml b/libm-test/Cargo.toml similarity index 100% rename from libm/crates/libm-test/Cargo.toml rename to libm-test/Cargo.toml diff --git a/libm/crates/libm-test/benches/icount.rs b/libm-test/benches/icount.rs similarity index 100% rename from libm/crates/libm-test/benches/icount.rs rename to libm-test/benches/icount.rs diff --git a/libm/crates/libm-test/benches/random.rs b/libm-test/benches/random.rs similarity index 100% rename from libm/crates/libm-test/benches/random.rs rename to libm-test/benches/random.rs diff --git a/libm/crates/libm-test/build.rs b/libm-test/build.rs similarity index 100% rename from libm/crates/libm-test/build.rs rename to libm-test/build.rs diff --git a/libm/crates/libm-test/examples/plot_domains.rs b/libm-test/examples/plot_domains.rs similarity index 100% rename from libm/crates/libm-test/examples/plot_domains.rs rename to libm-test/examples/plot_domains.rs diff --git a/libm/crates/libm-test/examples/plot_file.jl b/libm-test/examples/plot_file.jl similarity index 100% rename from libm/crates/libm-test/examples/plot_file.jl rename to libm-test/examples/plot_file.jl diff --git a/libm/crates/libm-test/src/domain.rs b/libm-test/src/domain.rs similarity index 100% rename from libm/crates/libm-test/src/domain.rs rename to libm-test/src/domain.rs diff --git a/libm/crates/libm-test/src/f8_impl.rs b/libm-test/src/f8_impl.rs similarity index 100% rename from libm/crates/libm-test/src/f8_impl.rs rename to libm-test/src/f8_impl.rs diff --git a/libm/crates/libm-test/src/generate.rs b/libm-test/src/generate.rs similarity index 100% rename from libm/crates/libm-test/src/generate.rs rename to libm-test/src/generate.rs diff --git a/libm/crates/libm-test/src/generate/case_list.rs b/libm-test/src/generate/case_list.rs similarity index 100% rename from libm/crates/libm-test/src/generate/case_list.rs rename to libm-test/src/generate/case_list.rs diff --git a/libm/crates/libm-test/src/generate/edge_cases.rs b/libm-test/src/generate/edge_cases.rs similarity index 100% rename from libm/crates/libm-test/src/generate/edge_cases.rs rename to libm-test/src/generate/edge_cases.rs diff --git a/libm/crates/libm-test/src/generate/random.rs b/libm-test/src/generate/random.rs similarity index 100% rename from libm/crates/libm-test/src/generate/random.rs rename to libm-test/src/generate/random.rs diff --git a/libm/crates/libm-test/src/generate/spaced.rs b/libm-test/src/generate/spaced.rs similarity index 100% rename from libm/crates/libm-test/src/generate/spaced.rs rename to libm-test/src/generate/spaced.rs diff --git a/libm/crates/libm-test/src/lib.rs b/libm-test/src/lib.rs similarity index 100% rename from libm/crates/libm-test/src/lib.rs rename to libm-test/src/lib.rs diff --git a/libm/crates/libm-test/src/mpfloat.rs b/libm-test/src/mpfloat.rs similarity index 100% rename from libm/crates/libm-test/src/mpfloat.rs rename to libm-test/src/mpfloat.rs diff --git a/libm/crates/libm-test/src/num.rs b/libm-test/src/num.rs similarity index 100% rename from libm/crates/libm-test/src/num.rs rename to libm-test/src/num.rs diff --git a/libm/crates/libm-test/src/op.rs b/libm-test/src/op.rs similarity index 100% rename from libm/crates/libm-test/src/op.rs rename to libm-test/src/op.rs diff --git a/libm/crates/libm-test/src/precision.rs b/libm-test/src/precision.rs similarity index 100% rename from libm/crates/libm-test/src/precision.rs rename to libm-test/src/precision.rs diff --git a/libm/crates/libm-test/src/run_cfg.rs b/libm-test/src/run_cfg.rs similarity index 100% rename from libm/crates/libm-test/src/run_cfg.rs rename to libm-test/src/run_cfg.rs diff --git a/libm/crates/libm-test/src/test_traits.rs b/libm-test/src/test_traits.rs similarity index 100% rename from libm/crates/libm-test/src/test_traits.rs rename to libm-test/src/test_traits.rs diff --git a/libm/crates/libm-test/tests/check_coverage.rs b/libm-test/tests/check_coverage.rs similarity index 100% rename from libm/crates/libm-test/tests/check_coverage.rs rename to libm-test/tests/check_coverage.rs diff --git a/libm/crates/libm-test/tests/compare_built_musl.rs b/libm-test/tests/compare_built_musl.rs similarity index 100% rename from libm/crates/libm-test/tests/compare_built_musl.rs rename to libm-test/tests/compare_built_musl.rs diff --git a/libm/crates/libm-test/tests/multiprecision.rs b/libm-test/tests/multiprecision.rs similarity index 100% rename from libm/crates/libm-test/tests/multiprecision.rs rename to libm-test/tests/multiprecision.rs diff --git a/libm/crates/libm-test/tests/standalone.rs b/libm-test/tests/standalone.rs similarity index 100% rename from libm/crates/libm-test/tests/standalone.rs rename to libm-test/tests/standalone.rs diff --git a/libm/crates/libm-test/tests/u256.rs b/libm-test/tests/u256.rs similarity index 100% rename from libm/crates/libm-test/tests/u256.rs rename to libm-test/tests/u256.rs diff --git a/libm/crates/libm-test/tests/z_extensive/main.rs b/libm-test/tests/z_extensive/main.rs similarity index 100% rename from libm/crates/libm-test/tests/z_extensive/main.rs rename to libm-test/tests/z_extensive/main.rs diff --git a/libm/crates/libm-test/tests/z_extensive/run.rs b/libm-test/tests/z_extensive/run.rs similarity index 100% rename from libm/crates/libm-test/tests/z_extensive/run.rs rename to libm-test/tests/z_extensive/run.rs diff --git a/libm/LICENSE.txt b/libm/LICENSE.txt deleted file mode 100644 index 2f8e41f14..000000000 --- a/libm/LICENSE.txt +++ /dev/null @@ -1,258 +0,0 @@ -rust-lang/libm as a whole is available for use under the MIT license: - ------------------------------------------------------------------------------- -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. ------------------------------------------------------------------------------- - -As a contributor, you agree that your code can be used under either the MIT -license or the Apache-2.0 license: - ------------------------------------------------------------------------------- - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ------------------------------------------------------------------------------- - -This Rust library contains the following copyrights: - - Copyright (c) 2018 Jorge Aparicio - -Portions of this software are derived from third-party works licensed under -terms compatible with the above MIT license: - -* musl libc https://www.musl-libc.org/. This library contains the following - copyright: - - Copyright © 2005-2020 Rich Felker, et al. - -* The CORE-MATH project https://core-math.gitlabpages.inria.fr/. CORE-MATH - routines are available under the MIT license on a per-file basis. - -The musl libc COPYRIGHT file also includes the following notice relevant to -math portions of the library: - ------------------------------------------------------------------------------- -Much of the math library code (src/math/* and src/complex/*) is -Copyright © 1993,2004 Sun Microsystems or -Copyright © 2003-2011 David Schultz or -Copyright © 2003-2009 Steven G. Kargl or -Copyright © 2003-2009 Bruce D. Evans or -Copyright © 2008 Stephen L. Moshier or -Copyright © 2017-2018 Arm Limited -and labelled as such in comments in the individual source files. All -have been licensed under extremely permissive terms. ------------------------------------------------------------------------------- - -Copyright notices are retained in src/* files where relevant. diff --git a/libm/README.md b/libm/README.md deleted file mode 100644 index 52d760a4f..000000000 --- a/libm/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# `libm` - -A port of [MUSL]'s libm to Rust. - -[MUSL]: https://musl.libc.org/ - -## Goals - -The short term goal of this library is to [enable math support (e.g. `sin`, `atan2`) for the -`wasm32-unknown-unknown` target][wasm] (cf. [rust-lang/compiler-builtins][pr]). The longer -term goal is to enable [math support in the `core` crate][core]. - -[wasm]: https://github.com/rust-lang/libm/milestone/1 -[pr]: https://github.com/rust-lang/compiler-builtins/pull/248 -[core]: https://github.com/rust-lang/libm/milestone/2 - -## Already usable - -This crate is [on crates.io] and can be used today in stable `#![no_std]` programs. - -The API documentation can be found [here](https://docs.rs/libm). - -[on crates.io]: https://crates.io/crates/libm - -## Benchmark -[benchmark]: #benchmark - -The benchmarks are located in `crates/libm-bench` and require a nightly Rust toolchain. -To run all benchmarks: - -> cargo +nightly bench --all - -## Contributing - -Please check [CONTRIBUTING.md](CONTRIBUTING.md) - -## Minimum Rust version policy - -This crate supports rustc 1.63 and newer. - -## License - -Usage is licensed under the MIT license ([LICENSE-MIT](LICENSE-MIT) or -https://opensource.org/licenses/MIT). - - -### Contribution - -Contributions are licensed under both the MIT license and the Apache License, -Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or -https://www.apache.org/licenses/LICENSE-2.0). Unless you explicitly state -otherwise, any contribution intentionally submitted for inclusion in the work -by you, as defined in the Apache-2.0 license, shall be dual licensed as -mentioned, without any additional terms or conditions. - -See `LICENSE.txt` for full details. diff --git a/libm/crates/musl-math-sys/musl b/libm/crates/musl-math-sys/musl deleted file mode 160000 index 61399d4bd..000000000 --- a/libm/crates/musl-math-sys/musl +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 61399d4bd02ae1ec03068445aa7ffe9174466bfd diff --git a/libm/libm/LICENSE.txt b/libm/libm/LICENSE.txt deleted file mode 120000 index 4ab43736a..000000000 --- a/libm/libm/LICENSE.txt +++ /dev/null @@ -1 +0,0 @@ -../LICENSE.txt \ No newline at end of file diff --git a/libm/libm/LICENSE.txt b/libm/libm/LICENSE.txt new file mode 100644 index 000000000..2f8e41f14 --- /dev/null +++ b/libm/libm/LICENSE.txt @@ -0,0 +1,258 @@ +rust-lang/libm as a whole is available for use under the MIT license: + +------------------------------------------------------------------------------ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ + +As a contributor, you agree that your code can be used under either the MIT +license or the Apache-2.0 license: + +------------------------------------------------------------------------------ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +------------------------------------------------------------------------------ + +This Rust library contains the following copyrights: + + Copyright (c) 2018 Jorge Aparicio + +Portions of this software are derived from third-party works licensed under +terms compatible with the above MIT license: + +* musl libc https://www.musl-libc.org/. This library contains the following + copyright: + + Copyright © 2005-2020 Rich Felker, et al. + +* The CORE-MATH project https://core-math.gitlabpages.inria.fr/. CORE-MATH + routines are available under the MIT license on a per-file basis. + +The musl libc COPYRIGHT file also includes the following notice relevant to +math portions of the library: + +------------------------------------------------------------------------------ +Much of the math library code (src/math/* and src/complex/*) is +Copyright © 1993,2004 Sun Microsystems or +Copyright © 2003-2011 David Schultz or +Copyright © 2003-2009 Steven G. Kargl or +Copyright © 2003-2009 Bruce D. Evans or +Copyright © 2008 Stephen L. Moshier or +Copyright © 2017-2018 Arm Limited +and labelled as such in comments in the individual source files. All +have been licensed under extremely permissive terms. +------------------------------------------------------------------------------ + +Copyright notices are retained in src/* files where relevant. diff --git a/libm/libm/README.md b/libm/libm/README.md deleted file mode 120000 index 32d46ee88..000000000 --- a/libm/libm/README.md +++ /dev/null @@ -1 +0,0 @@ -../README.md \ No newline at end of file diff --git a/libm/libm/README.md b/libm/libm/README.md new file mode 100644 index 000000000..52d760a4f --- /dev/null +++ b/libm/libm/README.md @@ -0,0 +1,56 @@ +# `libm` + +A port of [MUSL]'s libm to Rust. + +[MUSL]: https://musl.libc.org/ + +## Goals + +The short term goal of this library is to [enable math support (e.g. `sin`, `atan2`) for the +`wasm32-unknown-unknown` target][wasm] (cf. [rust-lang/compiler-builtins][pr]). The longer +term goal is to enable [math support in the `core` crate][core]. + +[wasm]: https://github.com/rust-lang/libm/milestone/1 +[pr]: https://github.com/rust-lang/compiler-builtins/pull/248 +[core]: https://github.com/rust-lang/libm/milestone/2 + +## Already usable + +This crate is [on crates.io] and can be used today in stable `#![no_std]` programs. + +The API documentation can be found [here](https://docs.rs/libm). + +[on crates.io]: https://crates.io/crates/libm + +## Benchmark +[benchmark]: #benchmark + +The benchmarks are located in `crates/libm-bench` and require a nightly Rust toolchain. +To run all benchmarks: + +> cargo +nightly bench --all + +## Contributing + +Please check [CONTRIBUTING.md](CONTRIBUTING.md) + +## Minimum Rust version policy + +This crate supports rustc 1.63 and newer. + +## License + +Usage is licensed under the MIT license ([LICENSE-MIT](LICENSE-MIT) or +https://opensource.org/licenses/MIT). + + +### Contribution + +Contributions are licensed under both the MIT license and the Apache License, +Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or +https://www.apache.org/licenses/LICENSE-2.0). Unless you explicitly state +otherwise, any contribution intentionally submitted for inclusion in the work +by you, as defined in the Apache-2.0 license, shall be dual licensed as +mentioned, without any additional terms or conditions. + +See `LICENSE.txt` for full details. From 7c5d42be04181e3af3942dc8f0c0869a409d37b3 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 21:09:49 +0000 Subject: [PATCH 1290/1459] libm: Flatten the `libm/libm` directory --- compiler-builtins/src/math.rs | 2 +- libm/{libm => }/Cargo.toml | 0 libm/{libm => }/LICENSE.txt | 0 libm/{libm => }/README.md | 0 libm/{libm => }/build.rs | 0 libm/{libm => }/configure.rs | 0 libm/{libm => }/src/lib.rs | 0 libm/{libm => }/src/libm_helper.rs | 0 libm/{libm => }/src/math/acos.rs | 0 libm/{libm => }/src/math/acosf.rs | 0 libm/{libm => }/src/math/acosh.rs | 0 libm/{libm => }/src/math/acoshf.rs | 0 libm/{libm => }/src/math/arch/aarch64.rs | 0 libm/{libm => }/src/math/arch/i586.rs | 0 libm/{libm => }/src/math/arch/i686.rs | 0 libm/{libm => }/src/math/arch/mod.rs | 0 libm/{libm => }/src/math/arch/wasm32.rs | 0 libm/{libm => }/src/math/asin.rs | 0 libm/{libm => }/src/math/asinf.rs | 0 libm/{libm => }/src/math/asinh.rs | 0 libm/{libm => }/src/math/asinhf.rs | 0 libm/{libm => }/src/math/atan.rs | 0 libm/{libm => }/src/math/atan2.rs | 0 libm/{libm => }/src/math/atan2f.rs | 0 libm/{libm => }/src/math/atanf.rs | 0 libm/{libm => }/src/math/atanh.rs | 0 libm/{libm => }/src/math/atanhf.rs | 0 libm/{libm => }/src/math/cbrt.rs | 0 libm/{libm => }/src/math/cbrtf.rs | 0 libm/{libm => }/src/math/ceil.rs | 0 libm/{libm => }/src/math/copysign.rs | 0 libm/{libm => }/src/math/copysignf.rs | 0 libm/{libm => }/src/math/copysignf128.rs | 0 libm/{libm => }/src/math/copysignf16.rs | 0 libm/{libm => }/src/math/cos.rs | 0 libm/{libm => }/src/math/cosf.rs | 0 libm/{libm => }/src/math/cosh.rs | 0 libm/{libm => }/src/math/coshf.rs | 0 libm/{libm => }/src/math/erf.rs | 0 libm/{libm => }/src/math/erff.rs | 0 libm/{libm => }/src/math/exp.rs | 0 libm/{libm => }/src/math/exp10.rs | 0 libm/{libm => }/src/math/exp10f.rs | 0 libm/{libm => }/src/math/exp2.rs | 0 libm/{libm => }/src/math/exp2f.rs | 0 libm/{libm => }/src/math/expf.rs | 0 libm/{libm => }/src/math/expm1.rs | 0 libm/{libm => }/src/math/expm1f.rs | 0 libm/{libm => }/src/math/expo2.rs | 0 libm/{libm => }/src/math/fabs.rs | 0 libm/{libm => }/src/math/fabsf.rs | 0 libm/{libm => }/src/math/fabsf128.rs | 0 libm/{libm => }/src/math/fabsf16.rs | 0 libm/{libm => }/src/math/fdim.rs | 0 libm/{libm => }/src/math/fdimf.rs | 0 libm/{libm => }/src/math/fdimf128.rs | 0 libm/{libm => }/src/math/fdimf16.rs | 0 libm/{libm => }/src/math/floor.rs | 0 libm/{libm => }/src/math/floorf.rs | 0 libm/{libm => }/src/math/floorf128.rs | 0 libm/{libm => }/src/math/floorf16.rs | 0 libm/{libm => }/src/math/fma.rs | 0 libm/{libm => }/src/math/fma_wide.rs | 0 libm/{libm => }/src/math/fmin_fmax.rs | 0 libm/{libm => }/src/math/fminimum_fmaximum.rs | 0 libm/{libm => }/src/math/fminimum_fmaximum_num.rs | 0 libm/{libm => }/src/math/fmod.rs | 0 libm/{libm => }/src/math/fmodf.rs | 0 libm/{libm => }/src/math/fmodf128.rs | 0 libm/{libm => }/src/math/fmodf16.rs | 0 libm/{libm => }/src/math/frexp.rs | 0 libm/{libm => }/src/math/frexpf.rs | 0 libm/{libm => }/src/math/generic/ceil.rs | 0 libm/{libm => }/src/math/generic/copysign.rs | 0 libm/{libm => }/src/math/generic/fabs.rs | 0 libm/{libm => }/src/math/generic/fdim.rs | 0 libm/{libm => }/src/math/generic/floor.rs | 0 libm/{libm => }/src/math/generic/fmax.rs | 0 libm/{libm => }/src/math/generic/fmaximum.rs | 0 libm/{libm => }/src/math/generic/fmaximum_num.rs | 0 libm/{libm => }/src/math/generic/fmin.rs | 0 libm/{libm => }/src/math/generic/fminimum.rs | 0 libm/{libm => }/src/math/generic/fminimum_num.rs | 0 libm/{libm => }/src/math/generic/fmod.rs | 0 libm/{libm => }/src/math/generic/mod.rs | 0 libm/{libm => }/src/math/generic/rint.rs | 0 libm/{libm => }/src/math/generic/round.rs | 0 libm/{libm => }/src/math/generic/scalbn.rs | 0 libm/{libm => }/src/math/generic/sqrt.rs | 0 libm/{libm => }/src/math/generic/trunc.rs | 0 libm/{libm => }/src/math/hypot.rs | 0 libm/{libm => }/src/math/hypotf.rs | 0 libm/{libm => }/src/math/ilogb.rs | 0 libm/{libm => }/src/math/ilogbf.rs | 0 libm/{libm => }/src/math/j0.rs | 0 libm/{libm => }/src/math/j0f.rs | 0 libm/{libm => }/src/math/j1.rs | 0 libm/{libm => }/src/math/j1f.rs | 0 libm/{libm => }/src/math/jn.rs | 0 libm/{libm => }/src/math/jnf.rs | 0 libm/{libm => }/src/math/k_cos.rs | 0 libm/{libm => }/src/math/k_cosf.rs | 0 libm/{libm => }/src/math/k_expo2.rs | 0 libm/{libm => }/src/math/k_expo2f.rs | 0 libm/{libm => }/src/math/k_sin.rs | 0 libm/{libm => }/src/math/k_sinf.rs | 0 libm/{libm => }/src/math/k_tan.rs | 0 libm/{libm => }/src/math/k_tanf.rs | 0 libm/{libm => }/src/math/ldexp.rs | 0 libm/{libm => }/src/math/ldexpf.rs | 0 libm/{libm => }/src/math/ldexpf128.rs | 0 libm/{libm => }/src/math/ldexpf16.rs | 0 libm/{libm => }/src/math/lgamma.rs | 0 libm/{libm => }/src/math/lgamma_r.rs | 0 libm/{libm => }/src/math/lgammaf.rs | 0 libm/{libm => }/src/math/lgammaf_r.rs | 0 libm/{libm => }/src/math/log.rs | 0 libm/{libm => }/src/math/log10.rs | 0 libm/{libm => }/src/math/log10f.rs | 0 libm/{libm => }/src/math/log1p.rs | 0 libm/{libm => }/src/math/log1pf.rs | 0 libm/{libm => }/src/math/log2.rs | 0 libm/{libm => }/src/math/log2f.rs | 0 libm/{libm => }/src/math/logf.rs | 0 libm/{libm => }/src/math/mod.rs | 0 libm/{libm => }/src/math/modf.rs | 0 libm/{libm => }/src/math/modff.rs | 0 libm/{libm => }/src/math/nextafter.rs | 0 libm/{libm => }/src/math/nextafterf.rs | 0 libm/{libm => }/src/math/pow.rs | 0 libm/{libm => }/src/math/powf.rs | 0 libm/{libm => }/src/math/rem_pio2.rs | 0 libm/{libm => }/src/math/rem_pio2_large.rs | 0 libm/{libm => }/src/math/rem_pio2f.rs | 0 libm/{libm => }/src/math/remainder.rs | 0 libm/{libm => }/src/math/remainderf.rs | 0 libm/{libm => }/src/math/remquo.rs | 0 libm/{libm => }/src/math/remquof.rs | 0 libm/{libm => }/src/math/rint.rs | 0 libm/{libm => }/src/math/round.rs | 0 libm/{libm => }/src/math/roundeven.rs | 0 libm/{libm => }/src/math/roundf.rs | 0 libm/{libm => }/src/math/roundf128.rs | 0 libm/{libm => }/src/math/roundf16.rs | 0 libm/{libm => }/src/math/scalbn.rs | 0 libm/{libm => }/src/math/scalbnf.rs | 0 libm/{libm => }/src/math/scalbnf128.rs | 0 libm/{libm => }/src/math/scalbnf16.rs | 0 libm/{libm => }/src/math/sin.rs | 0 libm/{libm => }/src/math/sincos.rs | 0 libm/{libm => }/src/math/sincosf.rs | 0 libm/{libm => }/src/math/sinf.rs | 0 libm/{libm => }/src/math/sinh.rs | 0 libm/{libm => }/src/math/sinhf.rs | 0 libm/{libm => }/src/math/sqrt.rs | 0 libm/{libm => }/src/math/sqrtf.rs | 0 libm/{libm => }/src/math/sqrtf128.rs | 0 libm/{libm => }/src/math/sqrtf16.rs | 0 libm/{libm => }/src/math/support/big.rs | 0 libm/{libm => }/src/math/support/big/tests.rs | 0 libm/{libm => }/src/math/support/env.rs | 0 libm/{libm => }/src/math/support/float_traits.rs | 0 libm/{libm => }/src/math/support/hex_float.rs | 0 libm/{libm => }/src/math/support/int_traits.rs | 0 libm/{libm => }/src/math/support/macros.rs | 0 libm/{libm => }/src/math/support/mod.rs | 0 libm/{libm => }/src/math/tan.rs | 0 libm/{libm => }/src/math/tanf.rs | 0 libm/{libm => }/src/math/tanh.rs | 0 libm/{libm => }/src/math/tanhf.rs | 0 libm/{libm => }/src/math/tgamma.rs | 0 libm/{libm => }/src/math/tgammaf.rs | 0 libm/{libm => }/src/math/trunc.rs | 0 libm/{libm => }/src/math/truncf.rs | 0 libm/{libm => }/src/math/truncf128.rs | 0 libm/{libm => }/src/math/truncf16.rs | 0 176 files changed, 1 insertion(+), 1 deletion(-) rename libm/{libm => }/Cargo.toml (100%) rename libm/{libm => }/LICENSE.txt (100%) rename libm/{libm => }/README.md (100%) rename libm/{libm => }/build.rs (100%) rename libm/{libm => }/configure.rs (100%) rename libm/{libm => }/src/lib.rs (100%) rename libm/{libm => }/src/libm_helper.rs (100%) rename libm/{libm => }/src/math/acos.rs (100%) rename libm/{libm => }/src/math/acosf.rs (100%) rename libm/{libm => }/src/math/acosh.rs (100%) rename libm/{libm => }/src/math/acoshf.rs (100%) rename libm/{libm => }/src/math/arch/aarch64.rs (100%) rename libm/{libm => }/src/math/arch/i586.rs (100%) rename libm/{libm => }/src/math/arch/i686.rs (100%) rename libm/{libm => }/src/math/arch/mod.rs (100%) rename libm/{libm => }/src/math/arch/wasm32.rs (100%) rename libm/{libm => }/src/math/asin.rs (100%) rename libm/{libm => }/src/math/asinf.rs (100%) rename libm/{libm => }/src/math/asinh.rs (100%) rename libm/{libm => }/src/math/asinhf.rs (100%) rename libm/{libm => }/src/math/atan.rs (100%) rename libm/{libm => }/src/math/atan2.rs (100%) rename libm/{libm => }/src/math/atan2f.rs (100%) rename libm/{libm => }/src/math/atanf.rs (100%) rename libm/{libm => }/src/math/atanh.rs (100%) rename libm/{libm => }/src/math/atanhf.rs (100%) rename libm/{libm => }/src/math/cbrt.rs (100%) rename libm/{libm => }/src/math/cbrtf.rs (100%) rename libm/{libm => }/src/math/ceil.rs (100%) rename libm/{libm => }/src/math/copysign.rs (100%) rename libm/{libm => }/src/math/copysignf.rs (100%) rename libm/{libm => }/src/math/copysignf128.rs (100%) rename libm/{libm => }/src/math/copysignf16.rs (100%) rename libm/{libm => }/src/math/cos.rs (100%) rename libm/{libm => }/src/math/cosf.rs (100%) rename libm/{libm => }/src/math/cosh.rs (100%) rename libm/{libm => }/src/math/coshf.rs (100%) rename libm/{libm => }/src/math/erf.rs (100%) rename libm/{libm => }/src/math/erff.rs (100%) rename libm/{libm => }/src/math/exp.rs (100%) rename libm/{libm => }/src/math/exp10.rs (100%) rename libm/{libm => }/src/math/exp10f.rs (100%) rename libm/{libm => }/src/math/exp2.rs (100%) rename libm/{libm => }/src/math/exp2f.rs (100%) rename libm/{libm => }/src/math/expf.rs (100%) rename libm/{libm => }/src/math/expm1.rs (100%) rename libm/{libm => }/src/math/expm1f.rs (100%) rename libm/{libm => }/src/math/expo2.rs (100%) rename libm/{libm => }/src/math/fabs.rs (100%) rename libm/{libm => }/src/math/fabsf.rs (100%) rename libm/{libm => }/src/math/fabsf128.rs (100%) rename libm/{libm => }/src/math/fabsf16.rs (100%) rename libm/{libm => }/src/math/fdim.rs (100%) rename libm/{libm => }/src/math/fdimf.rs (100%) rename libm/{libm => }/src/math/fdimf128.rs (100%) rename libm/{libm => }/src/math/fdimf16.rs (100%) rename libm/{libm => }/src/math/floor.rs (100%) rename libm/{libm => }/src/math/floorf.rs (100%) rename libm/{libm => }/src/math/floorf128.rs (100%) rename libm/{libm => }/src/math/floorf16.rs (100%) rename libm/{libm => }/src/math/fma.rs (100%) rename libm/{libm => }/src/math/fma_wide.rs (100%) rename libm/{libm => }/src/math/fmin_fmax.rs (100%) rename libm/{libm => }/src/math/fminimum_fmaximum.rs (100%) rename libm/{libm => }/src/math/fminimum_fmaximum_num.rs (100%) rename libm/{libm => }/src/math/fmod.rs (100%) rename libm/{libm => }/src/math/fmodf.rs (100%) rename libm/{libm => }/src/math/fmodf128.rs (100%) rename libm/{libm => }/src/math/fmodf16.rs (100%) rename libm/{libm => }/src/math/frexp.rs (100%) rename libm/{libm => }/src/math/frexpf.rs (100%) rename libm/{libm => }/src/math/generic/ceil.rs (100%) rename libm/{libm => }/src/math/generic/copysign.rs (100%) rename libm/{libm => }/src/math/generic/fabs.rs (100%) rename libm/{libm => }/src/math/generic/fdim.rs (100%) rename libm/{libm => }/src/math/generic/floor.rs (100%) rename libm/{libm => }/src/math/generic/fmax.rs (100%) rename libm/{libm => }/src/math/generic/fmaximum.rs (100%) rename libm/{libm => }/src/math/generic/fmaximum_num.rs (100%) rename libm/{libm => }/src/math/generic/fmin.rs (100%) rename libm/{libm => }/src/math/generic/fminimum.rs (100%) rename libm/{libm => }/src/math/generic/fminimum_num.rs (100%) rename libm/{libm => }/src/math/generic/fmod.rs (100%) rename libm/{libm => }/src/math/generic/mod.rs (100%) rename libm/{libm => }/src/math/generic/rint.rs (100%) rename libm/{libm => }/src/math/generic/round.rs (100%) rename libm/{libm => }/src/math/generic/scalbn.rs (100%) rename libm/{libm => }/src/math/generic/sqrt.rs (100%) rename libm/{libm => }/src/math/generic/trunc.rs (100%) rename libm/{libm => }/src/math/hypot.rs (100%) rename libm/{libm => }/src/math/hypotf.rs (100%) rename libm/{libm => }/src/math/ilogb.rs (100%) rename libm/{libm => }/src/math/ilogbf.rs (100%) rename libm/{libm => }/src/math/j0.rs (100%) rename libm/{libm => }/src/math/j0f.rs (100%) rename libm/{libm => }/src/math/j1.rs (100%) rename libm/{libm => }/src/math/j1f.rs (100%) rename libm/{libm => }/src/math/jn.rs (100%) rename libm/{libm => }/src/math/jnf.rs (100%) rename libm/{libm => }/src/math/k_cos.rs (100%) rename libm/{libm => }/src/math/k_cosf.rs (100%) rename libm/{libm => }/src/math/k_expo2.rs (100%) rename libm/{libm => }/src/math/k_expo2f.rs (100%) rename libm/{libm => }/src/math/k_sin.rs (100%) rename libm/{libm => }/src/math/k_sinf.rs (100%) rename libm/{libm => }/src/math/k_tan.rs (100%) rename libm/{libm => }/src/math/k_tanf.rs (100%) rename libm/{libm => }/src/math/ldexp.rs (100%) rename libm/{libm => }/src/math/ldexpf.rs (100%) rename libm/{libm => }/src/math/ldexpf128.rs (100%) rename libm/{libm => }/src/math/ldexpf16.rs (100%) rename libm/{libm => }/src/math/lgamma.rs (100%) rename libm/{libm => }/src/math/lgamma_r.rs (100%) rename libm/{libm => }/src/math/lgammaf.rs (100%) rename libm/{libm => }/src/math/lgammaf_r.rs (100%) rename libm/{libm => }/src/math/log.rs (100%) rename libm/{libm => }/src/math/log10.rs (100%) rename libm/{libm => }/src/math/log10f.rs (100%) rename libm/{libm => }/src/math/log1p.rs (100%) rename libm/{libm => }/src/math/log1pf.rs (100%) rename libm/{libm => }/src/math/log2.rs (100%) rename libm/{libm => }/src/math/log2f.rs (100%) rename libm/{libm => }/src/math/logf.rs (100%) rename libm/{libm => }/src/math/mod.rs (100%) rename libm/{libm => }/src/math/modf.rs (100%) rename libm/{libm => }/src/math/modff.rs (100%) rename libm/{libm => }/src/math/nextafter.rs (100%) rename libm/{libm => }/src/math/nextafterf.rs (100%) rename libm/{libm => }/src/math/pow.rs (100%) rename libm/{libm => }/src/math/powf.rs (100%) rename libm/{libm => }/src/math/rem_pio2.rs (100%) rename libm/{libm => }/src/math/rem_pio2_large.rs (100%) rename libm/{libm => }/src/math/rem_pio2f.rs (100%) rename libm/{libm => }/src/math/remainder.rs (100%) rename libm/{libm => }/src/math/remainderf.rs (100%) rename libm/{libm => }/src/math/remquo.rs (100%) rename libm/{libm => }/src/math/remquof.rs (100%) rename libm/{libm => }/src/math/rint.rs (100%) rename libm/{libm => }/src/math/round.rs (100%) rename libm/{libm => }/src/math/roundeven.rs (100%) rename libm/{libm => }/src/math/roundf.rs (100%) rename libm/{libm => }/src/math/roundf128.rs (100%) rename libm/{libm => }/src/math/roundf16.rs (100%) rename libm/{libm => }/src/math/scalbn.rs (100%) rename libm/{libm => }/src/math/scalbnf.rs (100%) rename libm/{libm => }/src/math/scalbnf128.rs (100%) rename libm/{libm => }/src/math/scalbnf16.rs (100%) rename libm/{libm => }/src/math/sin.rs (100%) rename libm/{libm => }/src/math/sincos.rs (100%) rename libm/{libm => }/src/math/sincosf.rs (100%) rename libm/{libm => }/src/math/sinf.rs (100%) rename libm/{libm => }/src/math/sinh.rs (100%) rename libm/{libm => }/src/math/sinhf.rs (100%) rename libm/{libm => }/src/math/sqrt.rs (100%) rename libm/{libm => }/src/math/sqrtf.rs (100%) rename libm/{libm => }/src/math/sqrtf128.rs (100%) rename libm/{libm => }/src/math/sqrtf16.rs (100%) rename libm/{libm => }/src/math/support/big.rs (100%) rename libm/{libm => }/src/math/support/big/tests.rs (100%) rename libm/{libm => }/src/math/support/env.rs (100%) rename libm/{libm => }/src/math/support/float_traits.rs (100%) rename libm/{libm => }/src/math/support/hex_float.rs (100%) rename libm/{libm => }/src/math/support/int_traits.rs (100%) rename libm/{libm => }/src/math/support/macros.rs (100%) rename libm/{libm => }/src/math/support/mod.rs (100%) rename libm/{libm => }/src/math/tan.rs (100%) rename libm/{libm => }/src/math/tanf.rs (100%) rename libm/{libm => }/src/math/tanh.rs (100%) rename libm/{libm => }/src/math/tanhf.rs (100%) rename libm/{libm => }/src/math/tgamma.rs (100%) rename libm/{libm => }/src/math/tgammaf.rs (100%) rename libm/{libm => }/src/math/trunc.rs (100%) rename libm/{libm => }/src/math/truncf.rs (100%) rename libm/{libm => }/src/math/truncf128.rs (100%) rename libm/{libm => }/src/math/truncf16.rs (100%) diff --git a/compiler-builtins/src/math.rs b/compiler-builtins/src/math.rs index bd52a749e..722374f8e 100644 --- a/compiler-builtins/src/math.rs +++ b/compiler-builtins/src/math.rs @@ -2,7 +2,7 @@ #[allow(dead_code)] #[allow(unused_imports)] #[allow(clippy::all)] -#[path = "../../libm/libm/src/math/mod.rs"] +#[path = "../../libm/src/math/mod.rs"] pub(crate) mod libm; macro_rules! libm_intrinsics { diff --git a/libm/libm/Cargo.toml b/libm/Cargo.toml similarity index 100% rename from libm/libm/Cargo.toml rename to libm/Cargo.toml diff --git a/libm/libm/LICENSE.txt b/libm/LICENSE.txt similarity index 100% rename from libm/libm/LICENSE.txt rename to libm/LICENSE.txt diff --git a/libm/libm/README.md b/libm/README.md similarity index 100% rename from libm/libm/README.md rename to libm/README.md diff --git a/libm/libm/build.rs b/libm/build.rs similarity index 100% rename from libm/libm/build.rs rename to libm/build.rs diff --git a/libm/libm/configure.rs b/libm/configure.rs similarity index 100% rename from libm/libm/configure.rs rename to libm/configure.rs diff --git a/libm/libm/src/lib.rs b/libm/src/lib.rs similarity index 100% rename from libm/libm/src/lib.rs rename to libm/src/lib.rs diff --git a/libm/libm/src/libm_helper.rs b/libm/src/libm_helper.rs similarity index 100% rename from libm/libm/src/libm_helper.rs rename to libm/src/libm_helper.rs diff --git a/libm/libm/src/math/acos.rs b/libm/src/math/acos.rs similarity index 100% rename from libm/libm/src/math/acos.rs rename to libm/src/math/acos.rs diff --git a/libm/libm/src/math/acosf.rs b/libm/src/math/acosf.rs similarity index 100% rename from libm/libm/src/math/acosf.rs rename to libm/src/math/acosf.rs diff --git a/libm/libm/src/math/acosh.rs b/libm/src/math/acosh.rs similarity index 100% rename from libm/libm/src/math/acosh.rs rename to libm/src/math/acosh.rs diff --git a/libm/libm/src/math/acoshf.rs b/libm/src/math/acoshf.rs similarity index 100% rename from libm/libm/src/math/acoshf.rs rename to libm/src/math/acoshf.rs diff --git a/libm/libm/src/math/arch/aarch64.rs b/libm/src/math/arch/aarch64.rs similarity index 100% rename from libm/libm/src/math/arch/aarch64.rs rename to libm/src/math/arch/aarch64.rs diff --git a/libm/libm/src/math/arch/i586.rs b/libm/src/math/arch/i586.rs similarity index 100% rename from libm/libm/src/math/arch/i586.rs rename to libm/src/math/arch/i586.rs diff --git a/libm/libm/src/math/arch/i686.rs b/libm/src/math/arch/i686.rs similarity index 100% rename from libm/libm/src/math/arch/i686.rs rename to libm/src/math/arch/i686.rs diff --git a/libm/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs similarity index 100% rename from libm/libm/src/math/arch/mod.rs rename to libm/src/math/arch/mod.rs diff --git a/libm/libm/src/math/arch/wasm32.rs b/libm/src/math/arch/wasm32.rs similarity index 100% rename from libm/libm/src/math/arch/wasm32.rs rename to libm/src/math/arch/wasm32.rs diff --git a/libm/libm/src/math/asin.rs b/libm/src/math/asin.rs similarity index 100% rename from libm/libm/src/math/asin.rs rename to libm/src/math/asin.rs diff --git a/libm/libm/src/math/asinf.rs b/libm/src/math/asinf.rs similarity index 100% rename from libm/libm/src/math/asinf.rs rename to libm/src/math/asinf.rs diff --git a/libm/libm/src/math/asinh.rs b/libm/src/math/asinh.rs similarity index 100% rename from libm/libm/src/math/asinh.rs rename to libm/src/math/asinh.rs diff --git a/libm/libm/src/math/asinhf.rs b/libm/src/math/asinhf.rs similarity index 100% rename from libm/libm/src/math/asinhf.rs rename to libm/src/math/asinhf.rs diff --git a/libm/libm/src/math/atan.rs b/libm/src/math/atan.rs similarity index 100% rename from libm/libm/src/math/atan.rs rename to libm/src/math/atan.rs diff --git a/libm/libm/src/math/atan2.rs b/libm/src/math/atan2.rs similarity index 100% rename from libm/libm/src/math/atan2.rs rename to libm/src/math/atan2.rs diff --git a/libm/libm/src/math/atan2f.rs b/libm/src/math/atan2f.rs similarity index 100% rename from libm/libm/src/math/atan2f.rs rename to libm/src/math/atan2f.rs diff --git a/libm/libm/src/math/atanf.rs b/libm/src/math/atanf.rs similarity index 100% rename from libm/libm/src/math/atanf.rs rename to libm/src/math/atanf.rs diff --git a/libm/libm/src/math/atanh.rs b/libm/src/math/atanh.rs similarity index 100% rename from libm/libm/src/math/atanh.rs rename to libm/src/math/atanh.rs diff --git a/libm/libm/src/math/atanhf.rs b/libm/src/math/atanhf.rs similarity index 100% rename from libm/libm/src/math/atanhf.rs rename to libm/src/math/atanhf.rs diff --git a/libm/libm/src/math/cbrt.rs b/libm/src/math/cbrt.rs similarity index 100% rename from libm/libm/src/math/cbrt.rs rename to libm/src/math/cbrt.rs diff --git a/libm/libm/src/math/cbrtf.rs b/libm/src/math/cbrtf.rs similarity index 100% rename from libm/libm/src/math/cbrtf.rs rename to libm/src/math/cbrtf.rs diff --git a/libm/libm/src/math/ceil.rs b/libm/src/math/ceil.rs similarity index 100% rename from libm/libm/src/math/ceil.rs rename to libm/src/math/ceil.rs diff --git a/libm/libm/src/math/copysign.rs b/libm/src/math/copysign.rs similarity index 100% rename from libm/libm/src/math/copysign.rs rename to libm/src/math/copysign.rs diff --git a/libm/libm/src/math/copysignf.rs b/libm/src/math/copysignf.rs similarity index 100% rename from libm/libm/src/math/copysignf.rs rename to libm/src/math/copysignf.rs diff --git a/libm/libm/src/math/copysignf128.rs b/libm/src/math/copysignf128.rs similarity index 100% rename from libm/libm/src/math/copysignf128.rs rename to libm/src/math/copysignf128.rs diff --git a/libm/libm/src/math/copysignf16.rs b/libm/src/math/copysignf16.rs similarity index 100% rename from libm/libm/src/math/copysignf16.rs rename to libm/src/math/copysignf16.rs diff --git a/libm/libm/src/math/cos.rs b/libm/src/math/cos.rs similarity index 100% rename from libm/libm/src/math/cos.rs rename to libm/src/math/cos.rs diff --git a/libm/libm/src/math/cosf.rs b/libm/src/math/cosf.rs similarity index 100% rename from libm/libm/src/math/cosf.rs rename to libm/src/math/cosf.rs diff --git a/libm/libm/src/math/cosh.rs b/libm/src/math/cosh.rs similarity index 100% rename from libm/libm/src/math/cosh.rs rename to libm/src/math/cosh.rs diff --git a/libm/libm/src/math/coshf.rs b/libm/src/math/coshf.rs similarity index 100% rename from libm/libm/src/math/coshf.rs rename to libm/src/math/coshf.rs diff --git a/libm/libm/src/math/erf.rs b/libm/src/math/erf.rs similarity index 100% rename from libm/libm/src/math/erf.rs rename to libm/src/math/erf.rs diff --git a/libm/libm/src/math/erff.rs b/libm/src/math/erff.rs similarity index 100% rename from libm/libm/src/math/erff.rs rename to libm/src/math/erff.rs diff --git a/libm/libm/src/math/exp.rs b/libm/src/math/exp.rs similarity index 100% rename from libm/libm/src/math/exp.rs rename to libm/src/math/exp.rs diff --git a/libm/libm/src/math/exp10.rs b/libm/src/math/exp10.rs similarity index 100% rename from libm/libm/src/math/exp10.rs rename to libm/src/math/exp10.rs diff --git a/libm/libm/src/math/exp10f.rs b/libm/src/math/exp10f.rs similarity index 100% rename from libm/libm/src/math/exp10f.rs rename to libm/src/math/exp10f.rs diff --git a/libm/libm/src/math/exp2.rs b/libm/src/math/exp2.rs similarity index 100% rename from libm/libm/src/math/exp2.rs rename to libm/src/math/exp2.rs diff --git a/libm/libm/src/math/exp2f.rs b/libm/src/math/exp2f.rs similarity index 100% rename from libm/libm/src/math/exp2f.rs rename to libm/src/math/exp2f.rs diff --git a/libm/libm/src/math/expf.rs b/libm/src/math/expf.rs similarity index 100% rename from libm/libm/src/math/expf.rs rename to libm/src/math/expf.rs diff --git a/libm/libm/src/math/expm1.rs b/libm/src/math/expm1.rs similarity index 100% rename from libm/libm/src/math/expm1.rs rename to libm/src/math/expm1.rs diff --git a/libm/libm/src/math/expm1f.rs b/libm/src/math/expm1f.rs similarity index 100% rename from libm/libm/src/math/expm1f.rs rename to libm/src/math/expm1f.rs diff --git a/libm/libm/src/math/expo2.rs b/libm/src/math/expo2.rs similarity index 100% rename from libm/libm/src/math/expo2.rs rename to libm/src/math/expo2.rs diff --git a/libm/libm/src/math/fabs.rs b/libm/src/math/fabs.rs similarity index 100% rename from libm/libm/src/math/fabs.rs rename to libm/src/math/fabs.rs diff --git a/libm/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs similarity index 100% rename from libm/libm/src/math/fabsf.rs rename to libm/src/math/fabsf.rs diff --git a/libm/libm/src/math/fabsf128.rs b/libm/src/math/fabsf128.rs similarity index 100% rename from libm/libm/src/math/fabsf128.rs rename to libm/src/math/fabsf128.rs diff --git a/libm/libm/src/math/fabsf16.rs b/libm/src/math/fabsf16.rs similarity index 100% rename from libm/libm/src/math/fabsf16.rs rename to libm/src/math/fabsf16.rs diff --git a/libm/libm/src/math/fdim.rs b/libm/src/math/fdim.rs similarity index 100% rename from libm/libm/src/math/fdim.rs rename to libm/src/math/fdim.rs diff --git a/libm/libm/src/math/fdimf.rs b/libm/src/math/fdimf.rs similarity index 100% rename from libm/libm/src/math/fdimf.rs rename to libm/src/math/fdimf.rs diff --git a/libm/libm/src/math/fdimf128.rs b/libm/src/math/fdimf128.rs similarity index 100% rename from libm/libm/src/math/fdimf128.rs rename to libm/src/math/fdimf128.rs diff --git a/libm/libm/src/math/fdimf16.rs b/libm/src/math/fdimf16.rs similarity index 100% rename from libm/libm/src/math/fdimf16.rs rename to libm/src/math/fdimf16.rs diff --git a/libm/libm/src/math/floor.rs b/libm/src/math/floor.rs similarity index 100% rename from libm/libm/src/math/floor.rs rename to libm/src/math/floor.rs diff --git a/libm/libm/src/math/floorf.rs b/libm/src/math/floorf.rs similarity index 100% rename from libm/libm/src/math/floorf.rs rename to libm/src/math/floorf.rs diff --git a/libm/libm/src/math/floorf128.rs b/libm/src/math/floorf128.rs similarity index 100% rename from libm/libm/src/math/floorf128.rs rename to libm/src/math/floorf128.rs diff --git a/libm/libm/src/math/floorf16.rs b/libm/src/math/floorf16.rs similarity index 100% rename from libm/libm/src/math/floorf16.rs rename to libm/src/math/floorf16.rs diff --git a/libm/libm/src/math/fma.rs b/libm/src/math/fma.rs similarity index 100% rename from libm/libm/src/math/fma.rs rename to libm/src/math/fma.rs diff --git a/libm/libm/src/math/fma_wide.rs b/libm/src/math/fma_wide.rs similarity index 100% rename from libm/libm/src/math/fma_wide.rs rename to libm/src/math/fma_wide.rs diff --git a/libm/libm/src/math/fmin_fmax.rs b/libm/src/math/fmin_fmax.rs similarity index 100% rename from libm/libm/src/math/fmin_fmax.rs rename to libm/src/math/fmin_fmax.rs diff --git a/libm/libm/src/math/fminimum_fmaximum.rs b/libm/src/math/fminimum_fmaximum.rs similarity index 100% rename from libm/libm/src/math/fminimum_fmaximum.rs rename to libm/src/math/fminimum_fmaximum.rs diff --git a/libm/libm/src/math/fminimum_fmaximum_num.rs b/libm/src/math/fminimum_fmaximum_num.rs similarity index 100% rename from libm/libm/src/math/fminimum_fmaximum_num.rs rename to libm/src/math/fminimum_fmaximum_num.rs diff --git a/libm/libm/src/math/fmod.rs b/libm/src/math/fmod.rs similarity index 100% rename from libm/libm/src/math/fmod.rs rename to libm/src/math/fmod.rs diff --git a/libm/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs similarity index 100% rename from libm/libm/src/math/fmodf.rs rename to libm/src/math/fmodf.rs diff --git a/libm/libm/src/math/fmodf128.rs b/libm/src/math/fmodf128.rs similarity index 100% rename from libm/libm/src/math/fmodf128.rs rename to libm/src/math/fmodf128.rs diff --git a/libm/libm/src/math/fmodf16.rs b/libm/src/math/fmodf16.rs similarity index 100% rename from libm/libm/src/math/fmodf16.rs rename to libm/src/math/fmodf16.rs diff --git a/libm/libm/src/math/frexp.rs b/libm/src/math/frexp.rs similarity index 100% rename from libm/libm/src/math/frexp.rs rename to libm/src/math/frexp.rs diff --git a/libm/libm/src/math/frexpf.rs b/libm/src/math/frexpf.rs similarity index 100% rename from libm/libm/src/math/frexpf.rs rename to libm/src/math/frexpf.rs diff --git a/libm/libm/src/math/generic/ceil.rs b/libm/src/math/generic/ceil.rs similarity index 100% rename from libm/libm/src/math/generic/ceil.rs rename to libm/src/math/generic/ceil.rs diff --git a/libm/libm/src/math/generic/copysign.rs b/libm/src/math/generic/copysign.rs similarity index 100% rename from libm/libm/src/math/generic/copysign.rs rename to libm/src/math/generic/copysign.rs diff --git a/libm/libm/src/math/generic/fabs.rs b/libm/src/math/generic/fabs.rs similarity index 100% rename from libm/libm/src/math/generic/fabs.rs rename to libm/src/math/generic/fabs.rs diff --git a/libm/libm/src/math/generic/fdim.rs b/libm/src/math/generic/fdim.rs similarity index 100% rename from libm/libm/src/math/generic/fdim.rs rename to libm/src/math/generic/fdim.rs diff --git a/libm/libm/src/math/generic/floor.rs b/libm/src/math/generic/floor.rs similarity index 100% rename from libm/libm/src/math/generic/floor.rs rename to libm/src/math/generic/floor.rs diff --git a/libm/libm/src/math/generic/fmax.rs b/libm/src/math/generic/fmax.rs similarity index 100% rename from libm/libm/src/math/generic/fmax.rs rename to libm/src/math/generic/fmax.rs diff --git a/libm/libm/src/math/generic/fmaximum.rs b/libm/src/math/generic/fmaximum.rs similarity index 100% rename from libm/libm/src/math/generic/fmaximum.rs rename to libm/src/math/generic/fmaximum.rs diff --git a/libm/libm/src/math/generic/fmaximum_num.rs b/libm/src/math/generic/fmaximum_num.rs similarity index 100% rename from libm/libm/src/math/generic/fmaximum_num.rs rename to libm/src/math/generic/fmaximum_num.rs diff --git a/libm/libm/src/math/generic/fmin.rs b/libm/src/math/generic/fmin.rs similarity index 100% rename from libm/libm/src/math/generic/fmin.rs rename to libm/src/math/generic/fmin.rs diff --git a/libm/libm/src/math/generic/fminimum.rs b/libm/src/math/generic/fminimum.rs similarity index 100% rename from libm/libm/src/math/generic/fminimum.rs rename to libm/src/math/generic/fminimum.rs diff --git a/libm/libm/src/math/generic/fminimum_num.rs b/libm/src/math/generic/fminimum_num.rs similarity index 100% rename from libm/libm/src/math/generic/fminimum_num.rs rename to libm/src/math/generic/fminimum_num.rs diff --git a/libm/libm/src/math/generic/fmod.rs b/libm/src/math/generic/fmod.rs similarity index 100% rename from libm/libm/src/math/generic/fmod.rs rename to libm/src/math/generic/fmod.rs diff --git a/libm/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs similarity index 100% rename from libm/libm/src/math/generic/mod.rs rename to libm/src/math/generic/mod.rs diff --git a/libm/libm/src/math/generic/rint.rs b/libm/src/math/generic/rint.rs similarity index 100% rename from libm/libm/src/math/generic/rint.rs rename to libm/src/math/generic/rint.rs diff --git a/libm/libm/src/math/generic/round.rs b/libm/src/math/generic/round.rs similarity index 100% rename from libm/libm/src/math/generic/round.rs rename to libm/src/math/generic/round.rs diff --git a/libm/libm/src/math/generic/scalbn.rs b/libm/src/math/generic/scalbn.rs similarity index 100% rename from libm/libm/src/math/generic/scalbn.rs rename to libm/src/math/generic/scalbn.rs diff --git a/libm/libm/src/math/generic/sqrt.rs b/libm/src/math/generic/sqrt.rs similarity index 100% rename from libm/libm/src/math/generic/sqrt.rs rename to libm/src/math/generic/sqrt.rs diff --git a/libm/libm/src/math/generic/trunc.rs b/libm/src/math/generic/trunc.rs similarity index 100% rename from libm/libm/src/math/generic/trunc.rs rename to libm/src/math/generic/trunc.rs diff --git a/libm/libm/src/math/hypot.rs b/libm/src/math/hypot.rs similarity index 100% rename from libm/libm/src/math/hypot.rs rename to libm/src/math/hypot.rs diff --git a/libm/libm/src/math/hypotf.rs b/libm/src/math/hypotf.rs similarity index 100% rename from libm/libm/src/math/hypotf.rs rename to libm/src/math/hypotf.rs diff --git a/libm/libm/src/math/ilogb.rs b/libm/src/math/ilogb.rs similarity index 100% rename from libm/libm/src/math/ilogb.rs rename to libm/src/math/ilogb.rs diff --git a/libm/libm/src/math/ilogbf.rs b/libm/src/math/ilogbf.rs similarity index 100% rename from libm/libm/src/math/ilogbf.rs rename to libm/src/math/ilogbf.rs diff --git a/libm/libm/src/math/j0.rs b/libm/src/math/j0.rs similarity index 100% rename from libm/libm/src/math/j0.rs rename to libm/src/math/j0.rs diff --git a/libm/libm/src/math/j0f.rs b/libm/src/math/j0f.rs similarity index 100% rename from libm/libm/src/math/j0f.rs rename to libm/src/math/j0f.rs diff --git a/libm/libm/src/math/j1.rs b/libm/src/math/j1.rs similarity index 100% rename from libm/libm/src/math/j1.rs rename to libm/src/math/j1.rs diff --git a/libm/libm/src/math/j1f.rs b/libm/src/math/j1f.rs similarity index 100% rename from libm/libm/src/math/j1f.rs rename to libm/src/math/j1f.rs diff --git a/libm/libm/src/math/jn.rs b/libm/src/math/jn.rs similarity index 100% rename from libm/libm/src/math/jn.rs rename to libm/src/math/jn.rs diff --git a/libm/libm/src/math/jnf.rs b/libm/src/math/jnf.rs similarity index 100% rename from libm/libm/src/math/jnf.rs rename to libm/src/math/jnf.rs diff --git a/libm/libm/src/math/k_cos.rs b/libm/src/math/k_cos.rs similarity index 100% rename from libm/libm/src/math/k_cos.rs rename to libm/src/math/k_cos.rs diff --git a/libm/libm/src/math/k_cosf.rs b/libm/src/math/k_cosf.rs similarity index 100% rename from libm/libm/src/math/k_cosf.rs rename to libm/src/math/k_cosf.rs diff --git a/libm/libm/src/math/k_expo2.rs b/libm/src/math/k_expo2.rs similarity index 100% rename from libm/libm/src/math/k_expo2.rs rename to libm/src/math/k_expo2.rs diff --git a/libm/libm/src/math/k_expo2f.rs b/libm/src/math/k_expo2f.rs similarity index 100% rename from libm/libm/src/math/k_expo2f.rs rename to libm/src/math/k_expo2f.rs diff --git a/libm/libm/src/math/k_sin.rs b/libm/src/math/k_sin.rs similarity index 100% rename from libm/libm/src/math/k_sin.rs rename to libm/src/math/k_sin.rs diff --git a/libm/libm/src/math/k_sinf.rs b/libm/src/math/k_sinf.rs similarity index 100% rename from libm/libm/src/math/k_sinf.rs rename to libm/src/math/k_sinf.rs diff --git a/libm/libm/src/math/k_tan.rs b/libm/src/math/k_tan.rs similarity index 100% rename from libm/libm/src/math/k_tan.rs rename to libm/src/math/k_tan.rs diff --git a/libm/libm/src/math/k_tanf.rs b/libm/src/math/k_tanf.rs similarity index 100% rename from libm/libm/src/math/k_tanf.rs rename to libm/src/math/k_tanf.rs diff --git a/libm/libm/src/math/ldexp.rs b/libm/src/math/ldexp.rs similarity index 100% rename from libm/libm/src/math/ldexp.rs rename to libm/src/math/ldexp.rs diff --git a/libm/libm/src/math/ldexpf.rs b/libm/src/math/ldexpf.rs similarity index 100% rename from libm/libm/src/math/ldexpf.rs rename to libm/src/math/ldexpf.rs diff --git a/libm/libm/src/math/ldexpf128.rs b/libm/src/math/ldexpf128.rs similarity index 100% rename from libm/libm/src/math/ldexpf128.rs rename to libm/src/math/ldexpf128.rs diff --git a/libm/libm/src/math/ldexpf16.rs b/libm/src/math/ldexpf16.rs similarity index 100% rename from libm/libm/src/math/ldexpf16.rs rename to libm/src/math/ldexpf16.rs diff --git a/libm/libm/src/math/lgamma.rs b/libm/src/math/lgamma.rs similarity index 100% rename from libm/libm/src/math/lgamma.rs rename to libm/src/math/lgamma.rs diff --git a/libm/libm/src/math/lgamma_r.rs b/libm/src/math/lgamma_r.rs similarity index 100% rename from libm/libm/src/math/lgamma_r.rs rename to libm/src/math/lgamma_r.rs diff --git a/libm/libm/src/math/lgammaf.rs b/libm/src/math/lgammaf.rs similarity index 100% rename from libm/libm/src/math/lgammaf.rs rename to libm/src/math/lgammaf.rs diff --git a/libm/libm/src/math/lgammaf_r.rs b/libm/src/math/lgammaf_r.rs similarity index 100% rename from libm/libm/src/math/lgammaf_r.rs rename to libm/src/math/lgammaf_r.rs diff --git a/libm/libm/src/math/log.rs b/libm/src/math/log.rs similarity index 100% rename from libm/libm/src/math/log.rs rename to libm/src/math/log.rs diff --git a/libm/libm/src/math/log10.rs b/libm/src/math/log10.rs similarity index 100% rename from libm/libm/src/math/log10.rs rename to libm/src/math/log10.rs diff --git a/libm/libm/src/math/log10f.rs b/libm/src/math/log10f.rs similarity index 100% rename from libm/libm/src/math/log10f.rs rename to libm/src/math/log10f.rs diff --git a/libm/libm/src/math/log1p.rs b/libm/src/math/log1p.rs similarity index 100% rename from libm/libm/src/math/log1p.rs rename to libm/src/math/log1p.rs diff --git a/libm/libm/src/math/log1pf.rs b/libm/src/math/log1pf.rs similarity index 100% rename from libm/libm/src/math/log1pf.rs rename to libm/src/math/log1pf.rs diff --git a/libm/libm/src/math/log2.rs b/libm/src/math/log2.rs similarity index 100% rename from libm/libm/src/math/log2.rs rename to libm/src/math/log2.rs diff --git a/libm/libm/src/math/log2f.rs b/libm/src/math/log2f.rs similarity index 100% rename from libm/libm/src/math/log2f.rs rename to libm/src/math/log2f.rs diff --git a/libm/libm/src/math/logf.rs b/libm/src/math/logf.rs similarity index 100% rename from libm/libm/src/math/logf.rs rename to libm/src/math/logf.rs diff --git a/libm/libm/src/math/mod.rs b/libm/src/math/mod.rs similarity index 100% rename from libm/libm/src/math/mod.rs rename to libm/src/math/mod.rs diff --git a/libm/libm/src/math/modf.rs b/libm/src/math/modf.rs similarity index 100% rename from libm/libm/src/math/modf.rs rename to libm/src/math/modf.rs diff --git a/libm/libm/src/math/modff.rs b/libm/src/math/modff.rs similarity index 100% rename from libm/libm/src/math/modff.rs rename to libm/src/math/modff.rs diff --git a/libm/libm/src/math/nextafter.rs b/libm/src/math/nextafter.rs similarity index 100% rename from libm/libm/src/math/nextafter.rs rename to libm/src/math/nextafter.rs diff --git a/libm/libm/src/math/nextafterf.rs b/libm/src/math/nextafterf.rs similarity index 100% rename from libm/libm/src/math/nextafterf.rs rename to libm/src/math/nextafterf.rs diff --git a/libm/libm/src/math/pow.rs b/libm/src/math/pow.rs similarity index 100% rename from libm/libm/src/math/pow.rs rename to libm/src/math/pow.rs diff --git a/libm/libm/src/math/powf.rs b/libm/src/math/powf.rs similarity index 100% rename from libm/libm/src/math/powf.rs rename to libm/src/math/powf.rs diff --git a/libm/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs similarity index 100% rename from libm/libm/src/math/rem_pio2.rs rename to libm/src/math/rem_pio2.rs diff --git a/libm/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs similarity index 100% rename from libm/libm/src/math/rem_pio2_large.rs rename to libm/src/math/rem_pio2_large.rs diff --git a/libm/libm/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs similarity index 100% rename from libm/libm/src/math/rem_pio2f.rs rename to libm/src/math/rem_pio2f.rs diff --git a/libm/libm/src/math/remainder.rs b/libm/src/math/remainder.rs similarity index 100% rename from libm/libm/src/math/remainder.rs rename to libm/src/math/remainder.rs diff --git a/libm/libm/src/math/remainderf.rs b/libm/src/math/remainderf.rs similarity index 100% rename from libm/libm/src/math/remainderf.rs rename to libm/src/math/remainderf.rs diff --git a/libm/libm/src/math/remquo.rs b/libm/src/math/remquo.rs similarity index 100% rename from libm/libm/src/math/remquo.rs rename to libm/src/math/remquo.rs diff --git a/libm/libm/src/math/remquof.rs b/libm/src/math/remquof.rs similarity index 100% rename from libm/libm/src/math/remquof.rs rename to libm/src/math/remquof.rs diff --git a/libm/libm/src/math/rint.rs b/libm/src/math/rint.rs similarity index 100% rename from libm/libm/src/math/rint.rs rename to libm/src/math/rint.rs diff --git a/libm/libm/src/math/round.rs b/libm/src/math/round.rs similarity index 100% rename from libm/libm/src/math/round.rs rename to libm/src/math/round.rs diff --git a/libm/libm/src/math/roundeven.rs b/libm/src/math/roundeven.rs similarity index 100% rename from libm/libm/src/math/roundeven.rs rename to libm/src/math/roundeven.rs diff --git a/libm/libm/src/math/roundf.rs b/libm/src/math/roundf.rs similarity index 100% rename from libm/libm/src/math/roundf.rs rename to libm/src/math/roundf.rs diff --git a/libm/libm/src/math/roundf128.rs b/libm/src/math/roundf128.rs similarity index 100% rename from libm/libm/src/math/roundf128.rs rename to libm/src/math/roundf128.rs diff --git a/libm/libm/src/math/roundf16.rs b/libm/src/math/roundf16.rs similarity index 100% rename from libm/libm/src/math/roundf16.rs rename to libm/src/math/roundf16.rs diff --git a/libm/libm/src/math/scalbn.rs b/libm/src/math/scalbn.rs similarity index 100% rename from libm/libm/src/math/scalbn.rs rename to libm/src/math/scalbn.rs diff --git a/libm/libm/src/math/scalbnf.rs b/libm/src/math/scalbnf.rs similarity index 100% rename from libm/libm/src/math/scalbnf.rs rename to libm/src/math/scalbnf.rs diff --git a/libm/libm/src/math/scalbnf128.rs b/libm/src/math/scalbnf128.rs similarity index 100% rename from libm/libm/src/math/scalbnf128.rs rename to libm/src/math/scalbnf128.rs diff --git a/libm/libm/src/math/scalbnf16.rs b/libm/src/math/scalbnf16.rs similarity index 100% rename from libm/libm/src/math/scalbnf16.rs rename to libm/src/math/scalbnf16.rs diff --git a/libm/libm/src/math/sin.rs b/libm/src/math/sin.rs similarity index 100% rename from libm/libm/src/math/sin.rs rename to libm/src/math/sin.rs diff --git a/libm/libm/src/math/sincos.rs b/libm/src/math/sincos.rs similarity index 100% rename from libm/libm/src/math/sincos.rs rename to libm/src/math/sincos.rs diff --git a/libm/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs similarity index 100% rename from libm/libm/src/math/sincosf.rs rename to libm/src/math/sincosf.rs diff --git a/libm/libm/src/math/sinf.rs b/libm/src/math/sinf.rs similarity index 100% rename from libm/libm/src/math/sinf.rs rename to libm/src/math/sinf.rs diff --git a/libm/libm/src/math/sinh.rs b/libm/src/math/sinh.rs similarity index 100% rename from libm/libm/src/math/sinh.rs rename to libm/src/math/sinh.rs diff --git a/libm/libm/src/math/sinhf.rs b/libm/src/math/sinhf.rs similarity index 100% rename from libm/libm/src/math/sinhf.rs rename to libm/src/math/sinhf.rs diff --git a/libm/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs similarity index 100% rename from libm/libm/src/math/sqrt.rs rename to libm/src/math/sqrt.rs diff --git a/libm/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs similarity index 100% rename from libm/libm/src/math/sqrtf.rs rename to libm/src/math/sqrtf.rs diff --git a/libm/libm/src/math/sqrtf128.rs b/libm/src/math/sqrtf128.rs similarity index 100% rename from libm/libm/src/math/sqrtf128.rs rename to libm/src/math/sqrtf128.rs diff --git a/libm/libm/src/math/sqrtf16.rs b/libm/src/math/sqrtf16.rs similarity index 100% rename from libm/libm/src/math/sqrtf16.rs rename to libm/src/math/sqrtf16.rs diff --git a/libm/libm/src/math/support/big.rs b/libm/src/math/support/big.rs similarity index 100% rename from libm/libm/src/math/support/big.rs rename to libm/src/math/support/big.rs diff --git a/libm/libm/src/math/support/big/tests.rs b/libm/src/math/support/big/tests.rs similarity index 100% rename from libm/libm/src/math/support/big/tests.rs rename to libm/src/math/support/big/tests.rs diff --git a/libm/libm/src/math/support/env.rs b/libm/src/math/support/env.rs similarity index 100% rename from libm/libm/src/math/support/env.rs rename to libm/src/math/support/env.rs diff --git a/libm/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs similarity index 100% rename from libm/libm/src/math/support/float_traits.rs rename to libm/src/math/support/float_traits.rs diff --git a/libm/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs similarity index 100% rename from libm/libm/src/math/support/hex_float.rs rename to libm/src/math/support/hex_float.rs diff --git a/libm/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs similarity index 100% rename from libm/libm/src/math/support/int_traits.rs rename to libm/src/math/support/int_traits.rs diff --git a/libm/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs similarity index 100% rename from libm/libm/src/math/support/macros.rs rename to libm/src/math/support/macros.rs diff --git a/libm/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs similarity index 100% rename from libm/libm/src/math/support/mod.rs rename to libm/src/math/support/mod.rs diff --git a/libm/libm/src/math/tan.rs b/libm/src/math/tan.rs similarity index 100% rename from libm/libm/src/math/tan.rs rename to libm/src/math/tan.rs diff --git a/libm/libm/src/math/tanf.rs b/libm/src/math/tanf.rs similarity index 100% rename from libm/libm/src/math/tanf.rs rename to libm/src/math/tanf.rs diff --git a/libm/libm/src/math/tanh.rs b/libm/src/math/tanh.rs similarity index 100% rename from libm/libm/src/math/tanh.rs rename to libm/src/math/tanh.rs diff --git a/libm/libm/src/math/tanhf.rs b/libm/src/math/tanhf.rs similarity index 100% rename from libm/libm/src/math/tanhf.rs rename to libm/src/math/tanhf.rs diff --git a/libm/libm/src/math/tgamma.rs b/libm/src/math/tgamma.rs similarity index 100% rename from libm/libm/src/math/tgamma.rs rename to libm/src/math/tgamma.rs diff --git a/libm/libm/src/math/tgammaf.rs b/libm/src/math/tgammaf.rs similarity index 100% rename from libm/libm/src/math/tgammaf.rs rename to libm/src/math/tgammaf.rs diff --git a/libm/libm/src/math/trunc.rs b/libm/src/math/trunc.rs similarity index 100% rename from libm/libm/src/math/trunc.rs rename to libm/src/math/trunc.rs diff --git a/libm/libm/src/math/truncf.rs b/libm/src/math/truncf.rs similarity index 100% rename from libm/libm/src/math/truncf.rs rename to libm/src/math/truncf.rs diff --git a/libm/libm/src/math/truncf128.rs b/libm/src/math/truncf128.rs similarity index 100% rename from libm/libm/src/math/truncf128.rs rename to libm/src/math/truncf128.rs diff --git a/libm/libm/src/math/truncf16.rs b/libm/src/math/truncf16.rs similarity index 100% rename from libm/libm/src/math/truncf16.rs rename to libm/src/math/truncf16.rs From d69c42a2fcb8553dd20d522d46830b77e18ad668 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 21:25:19 +0000 Subject: [PATCH 1291/1459] Fix the release-plz job --- .github/workflows/publish.yaml | 9 ++------- etc/libm/.github/workflows/publish.yaml | 27 ------------------------- 2 files changed, 2 insertions(+), 34 deletions(-) delete mode 100644 etc/libm/.github/workflows/publish.yaml diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 7d6a8df03..85a33c039 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -5,24 +5,19 @@ permissions: contents: write on: - push: - branches: - - master + push: { branches: [master] } jobs: release-plz: name: Release-plz - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - name: Checkout repository uses: actions/checkout@v4 with: fetch-depth: 0 - submodules: true - name: Install Rust (rustup) run: rustup update nightly --no-self-update && rustup default nightly - - name: Publish `libm` as part of builtins, rather than its own crate - run: rm compiler-builtins/libm/Cargo.toml - name: Run release-plz uses: MarcoIeni/release-plz-action@v0.5 env: diff --git a/etc/libm/.github/workflows/publish.yaml b/etc/libm/.github/workflows/publish.yaml deleted file mode 100644 index 15904079d..000000000 --- a/etc/libm/.github/workflows/publish.yaml +++ /dev/null @@ -1,27 +0,0 @@ -name: Release-plz - -permissions: - pull-requests: write - contents: write - -on: - push: - branches: - - master - -jobs: - release-plz: - name: Release-plz - runs-on: ubuntu-24.04 - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Install Rust (rustup) - run: rustup update nightly --no-self-update && rustup default nightly - - name: Run release-plz - uses: MarcoIeni/release-plz-action@v0.5 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} From 6c51404e305485117d49b91633c7649a8b14925d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 22:15:43 +0000 Subject: [PATCH 1292/1459] libm: Fix crate compilation Update paths and submodules to fix `libm-test` and `util` building so we will be able to add them to the workspace. --- .gitmodules | 4 ++-- crates/musl-math-sys/musl | 1 + crates/util/Cargo.toml | 2 +- libm-test/Cargo.toml | 6 +++--- libm-test/build.rs | 4 ++-- libm-test/src/op.rs | 4 ++-- 6 files changed, 11 insertions(+), 10 deletions(-) create mode 160000 crates/musl-math-sys/musl diff --git a/.gitmodules b/.gitmodules index ee941a47e..792ed9ab2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ -[submodule "musl"] - path = libm/crates/musl-math-sys/musl +[submodule "crates/musl-math-sys/musl"] + path = crates/musl-math-sys/musl url = https://git.musl-libc.org/git/musl shallow = true diff --git a/crates/musl-math-sys/musl b/crates/musl-math-sys/musl new file mode 160000 index 000000000..0784374d5 --- /dev/null +++ b/crates/musl-math-sys/musl @@ -0,0 +1 @@ +Subproject commit 0784374d561435f7c787a555aeab8ede699ed298 diff --git a/crates/util/Cargo.toml b/crates/util/Cargo.toml index 4bcb97472..ae37a7238 100644 --- a/crates/util/Cargo.toml +++ b/crates/util/Cargo.toml @@ -13,6 +13,6 @@ unstable-float = ["libm/unstable-float", "libm-test/unstable-float", "rug?/night [dependencies] libm = { path = "../../libm", default-features = false } libm-macros = { path = "../libm-macros" } -libm-test = { path = "../libm-test", default-features = false } +libm-test = { path = "../../libm-test", default-features = false } musl-math-sys = { path = "../musl-math-sys", optional = true } rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "std"] } diff --git a/libm-test/Cargo.toml b/libm-test/Cargo.toml index 5d150b4ae..c9b3b7958 100644 --- a/libm-test/Cargo.toml +++ b/libm-test/Cargo.toml @@ -32,9 +32,9 @@ anyhow = "1.0.97" gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false } iai-callgrind = { version = "0.14.0", optional = true } indicatif = { version = "0.17.11", default-features = false } -libm = { path = "../../libm", features = ["unstable-public-internals"] } -libm-macros = { path = "../libm-macros" } -musl-math-sys = { path = "../musl-math-sys", optional = true } +libm = { path = "../libm", features = ["unstable-public-internals"] } +libm-macros = { path = "../crates/libm-macros" } +musl-math-sys = { path = "../crates/musl-math-sys", optional = true } paste = "1.0.15" rand = "0.9.0" rand_chacha = "0.9.0" diff --git a/libm-test/build.rs b/libm-test/build.rs index f75e3dda5..510ba842f 100644 --- a/libm-test/build.rs +++ b/libm-test/build.rs @@ -1,9 +1,9 @@ -#[path = "../../libm/configure.rs"] +#[path = "../libm/configure.rs"] mod configure; use configure::Config; fn main() { - println!("cargo:rerun-if-changed=../../libm/configure.rs"); + println!("cargo:rerun-if-changed=../libm/configure.rs"); let cfg = Config::from_env(); configure::emit_test_config(&cfg); } diff --git a/libm-test/src/op.rs b/libm-test/src/op.rs index 47d72ae58..4f251f80d 100644 --- a/libm-test/src/op.rs +++ b/libm-test/src/op.rs @@ -16,12 +16,12 @@ use std::fmt; use std::panic::{RefUnwindSafe, UnwindSafe}; -pub use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty}; +pub use shared::{FloatTy, MathOpInfo, Ty, ALL_OPERATIONS}; use crate::{CheckOutput, Float, TupleCall}; mod shared { - include!("../../libm-macros/src/shared.rs"); + include!("../../crates/libm-macros/src/shared.rs"); } /// An enum representing each possible symbol name (`sin`, `sinf`, `sinl`, etc). From a03560715ef6ec7312c368331db3d3ea89b605b7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 21:33:14 +0000 Subject: [PATCH 1293/1459] Add libm and libm-macros to the workspace These should build and test correctly. `libm-test` and others that depend on it are excluded since the necessary CI is not yet set up. --- Cargo.toml | 36 +++++++++++++++++++++++++++++++++--- etc/libm/Cargo.toml | 37 ------------------------------------- 2 files changed, 33 insertions(+), 40 deletions(-) delete mode 100644 etc/libm/Cargo.toml diff --git a/Cargo.toml b/Cargo.toml index 2e17c303a..155fb00b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,15 +7,45 @@ members = [ "builtins-test", "builtins-test-intrinsics", "compiler-builtins", + "crates/libm-macros", + "libm", + # FIXME(libm): disabled until tests work in CI + # "libm-test", + # "crates/musl-math-sys", + # "crates/util", ] default-members = [ - "compiler-builtins", "builtins-test", + "compiler-builtins", + "crates/libm-macros", + # FIXME(libm): disabled until tests work in CI + # "crates/libm-test" + "libm", ] [profile.release] -panic = 'abort' +panic = "abort" [profile.dev] -panic = 'abort' +panic = "abort" + +# FIXME(libm): these profiles are needed for testing +# # The default release profile is unchanged. + +# # Release mode with debug assertions +# [profile.release-checked] +# inherits = "release" +# debug-assertions = true +# overflow-checks = true + +# # Release with maximum optimizations, which is very slow to build. This is also +# # what is needed to check `no-panic`. +# [profile.release-opt] +# inherits = "release" +# codegen-units = 1 +# lto = "fat" + +# [profile.bench] +# # Required for iai-callgrind +# debug = true diff --git a/etc/libm/Cargo.toml b/etc/libm/Cargo.toml deleted file mode 100644 index 268b6fb0e..000000000 --- a/etc/libm/Cargo.toml +++ /dev/null @@ -1,37 +0,0 @@ -[workspace] -resolver = "2" -members = [ - "libm", - "crates/libm-macros", - "crates/libm-test", - "crates/musl-math-sys", - "crates/util", -] -default-members = [ - "libm", - "crates/libm-macros", - "crates/libm-test" -] -exclude = [ - # Requires `panic = abort` so can't be a member of the workspace - "crates/compiler-builtins-smoke-test", -] - -# The default release profile is unchanged. - -# Release mode with debug assertions -[profile.release-checked] -inherits = "release" -debug-assertions = true -overflow-checks = true - -# Release with maximum optimizations, which is very slow to build. This is also -# what is needed to check `no-panic`. -[profile.release-opt] -inherits = "release" -codegen-units = 1 -lto = "fat" - -[profile.bench] -# Required for iai-callgrind -debug = true From 0bd4d0a56b6663c973c5e543ab5c3b063b8a2f34 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 22:54:23 +0000 Subject: [PATCH 1294/1459] Add a .rustfmt.toml with style edition 2024 Use the 2024 style edition for all crates and enable import sorting. 2024 already applies some smaller heuristics that look good in compiler-builtins, I have dropped `use_small_heuristics` that was set in `libm` because it seems to negatively affect the readibility of anything working with numbers (e.g. collapsing multiple small `if` expressions into a single line). --- libm/.rustfmt.toml => .rustfmt.toml | 1 - 1 file changed, 1 deletion(-) rename libm/.rustfmt.toml => .rustfmt.toml (79%) diff --git a/libm/.rustfmt.toml b/.rustfmt.toml similarity index 79% rename from libm/.rustfmt.toml rename to .rustfmt.toml index c73bb9301..79ac399c1 100644 --- a/libm/.rustfmt.toml +++ b/.rustfmt.toml @@ -1,5 +1,4 @@ # This matches rustc style_edition = "2024" -use_small_heuristics = "Max" group_imports = "StdExternalCrate" imports_granularity = "Module" From 40688f6a4adc6f2abc92556167276d175c286c77 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 22:39:00 +0000 Subject: [PATCH 1295/1459] Run `cargo fmt` on all projects Apply the same formatting rules to both `libm` and `compiler-builtins`. --- builtins-test-intrinsics/src/main.rs | 1 + builtins-test/benches/float_cmp.rs | 3 +- builtins-test/benches/mem_icount.rs | 3 +- builtins-test/src/bench.rs | 2 +- builtins-test/src/lib.rs | 1 - builtins-test/tests/cmp.rs | 1 - builtins-test/tests/div_rem.rs | 3 +- compiler-builtins/build.rs | 7 +- compiler-builtins/src/arm_linux.rs | 3 +- compiler-builtins/src/float/conv.rs | 3 +- compiler-builtins/src/float/div.rs | 7 +- compiler-builtins/src/float/mod.rs | 1 - compiler-builtins/src/float/pow.rs | 6 +- compiler-builtins/src/int/big.rs | 3 +- compiler-builtins/src/int/leading_zeros.rs | 6 +- compiler-builtins/src/int/mod.rs | 2 - .../src/int/specialized_div_rem/mod.rs | 1 - compiler-builtins/src/int/udiv.rs | 1 - compiler-builtins/src/mem/x86_64.rs | 3 +- crates/libm-macros/src/enums.rs | 31 +++- crates/libm-macros/src/lib.rs | 44 +++-- crates/libm-macros/src/parse.rs | 38 +++- crates/libm-macros/src/shared.rs | 175 ++++++++++++++---- crates/musl-math-sys/build.rs | 57 ++++-- crates/util/src/main.rs | 6 +- libm-test/benches/random.rs | 17 +- libm-test/examples/plot_domains.rs | 8 +- libm-test/src/domain.rs | 73 +++++--- libm-test/src/f8_impl.rs | 4 +- libm-test/src/generate.rs | 11 +- libm-test/src/generate/case_list.rs | 50 ++++- libm-test/src/generate/edge_cases.rs | 6 +- libm-test/src/generate/random.rs | 5 +- libm-test/src/generate/spaced.rs | 11 +- libm-test/src/lib.rs | 11 +- libm-test/src/num.rs | 93 ++++++++-- libm-test/src/op.rs | 2 +- libm-test/src/run_cfg.rs | 25 ++- libm-test/src/test_traits.rs | 10 +- libm-test/tests/u256.rs | 12 +- libm-test/tests/z_extensive/run.rs | 22 ++- libm/configure.rs | 10 +- libm/src/lib.rs | 5 +- libm/src/math/atanf.rs | 9 +- libm/src/math/cbrt.rs | 6 +- libm/src/math/erf.rs | 6 +- libm/src/math/erff.rs | 6 +- libm/src/math/exp10f.rs | 5 +- libm/src/math/expm1f.rs | 6 +- libm/src/math/fma.rs | 10 +- libm/src/math/fma_wide.rs | 11 +- libm/src/math/generic/ceil.rs | 9 +- libm/src/math/generic/floor.rs | 9 +- libm/src/math/generic/rint.rs | 15 +- libm/src/math/generic/sqrt.rs | 5 +- libm/src/math/generic/trunc.rs | 15 +- libm/src/math/ilogb.rs | 6 +- libm/src/math/k_sin.rs | 6 +- libm/src/math/log1p.rs | 6 +- libm/src/math/log1pf.rs | 6 +- libm/src/math/pow.rs | 48 +++-- libm/src/math/powf.rs | 12 +- libm/src/math/rem_pio2.rs | 20 +- libm/src/math/sinf.rs | 12 +- libm/src/math/support/big.rs | 30 ++- libm/src/math/support/big/tests.rs | 168 +++++++++++++++-- libm/src/math/support/env.rs | 5 +- libm/src/math/support/float_traits.rs | 81 +++++++- libm/src/math/support/hex_float.rs | 46 ++++- libm/src/math/tan.rs | 6 +- libm/src/math/tanf.rs | 6 +- 71 files changed, 1070 insertions(+), 283 deletions(-) diff --git a/builtins-test-intrinsics/src/main.rs b/builtins-test-intrinsics/src/main.rs index 21d0a083c..c4c026368 100644 --- a/builtins-test-intrinsics/src/main.rs +++ b/builtins-test-intrinsics/src/main.rs @@ -480,6 +480,7 @@ mod intrinsics { fn run() { use core::hint::black_box as bb; + use intrinsics::*; // FIXME(f16_f128): some PPC f128 <-> int conversion functions have the wrong names diff --git a/builtins-test/benches/float_cmp.rs b/builtins-test/benches/float_cmp.rs index 4493765ec..42d665239 100644 --- a/builtins-test/benches/float_cmp.rs +++ b/builtins-test/benches/float_cmp.rs @@ -1,9 +1,8 @@ #![cfg_attr(f128_enabled, feature(f128))] use builtins_test::float_bench; -use criterion::{Criterion, criterion_main}; - use compiler_builtins::float::cmp; +use criterion::{Criterion, criterion_main}; /// `gt` symbols are allowed to return differing results, they just get compared /// to 0. diff --git a/builtins-test/benches/mem_icount.rs b/builtins-test/benches/mem_icount.rs index 63045f6e1..bd88cf80c 100644 --- a/builtins-test/benches/mem_icount.rs +++ b/builtins-test/benches/mem_icount.rs @@ -239,9 +239,10 @@ mod mcmp { } mod mmove { - use super::*; use Spread::{Aligned, Large, Medium, Small}; + use super::*; + struct Cfg { len: usize, spread: Spread, diff --git a/builtins-test/src/bench.rs b/builtins-test/src/bench.rs index 45a3a1ad4..2348f6bc9 100644 --- a/builtins-test/src/bench.rs +++ b/builtins-test/src/bench.rs @@ -1,6 +1,6 @@ +use alloc::vec::Vec; use core::cell::RefCell; -use alloc::vec::Vec; use compiler_builtins::float::Float; /// Fuzz with these many items to ensure equal functions diff --git a/builtins-test/src/lib.rs b/builtins-test/src/lib.rs index a83aea562..c596ac213 100644 --- a/builtins-test/src/lib.rs +++ b/builtins-test/src/lib.rs @@ -21,7 +21,6 @@ extern crate alloc; use compiler_builtins::float::Float; use compiler_builtins::int::{Int, MinInt}; - use rand_xoshiro::Xoshiro128StarStar; use rand_xoshiro::rand_core::{RngCore, SeedableRng}; diff --git a/builtins-test/tests/cmp.rs b/builtins-test/tests/cmp.rs index dbedd213e..a904dc5f7 100644 --- a/builtins-test/tests/cmp.rs +++ b/builtins-test/tests/cmp.rs @@ -97,7 +97,6 @@ mod float_comparisons { __eqkf2 as __eqtf2, __gekf2 as __getf2, __gtkf2 as __gttf2, __lekf2 as __letf2, __ltkf2 as __lttf2, __nekf2 as __netf2, __unordkf2 as __unordtf2, }; - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] use compiler_builtins::float::cmp::{ __eqtf2, __getf2, __gttf2, __letf2, __lttf2, __netf2, __unordtf2, diff --git a/builtins-test/tests/div_rem.rs b/builtins-test/tests/div_rem.rs index 6c0280a32..5ae653cc9 100644 --- a/builtins-test/tests/div_rem.rs +++ b/builtins-test/tests/div_rem.rs @@ -1,11 +1,10 @@ #![feature(f128)] #![allow(unused_macros)] +use builtins_test::*; use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4}; use compiler_builtins::int::udiv::{__udivmoddi4, __udivmodsi4, __udivmodti4, u128_divide_sparc}; -use builtins_test::*; - // Division algorithms have by far the nastiest and largest number of edge cases, and experience shows // that sometimes 100_000 iterations of the random fuzzer is needed. diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs index d627121f3..04369a4aa 100644 --- a/compiler-builtins/build.rs +++ b/compiler-builtins/build.rs @@ -1,8 +1,11 @@ mod configure; -use std::{collections::BTreeMap, env, path::PathBuf, sync::atomic::Ordering}; +use std::collections::BTreeMap; +use std::env; +use std::path::PathBuf; +use std::sync::atomic::Ordering; -use configure::{configure_aliases, configure_f16_f128, Target}; +use configure::{Target, configure_aliases, configure_f16_f128}; fn main() { println!("cargo::rerun-if-changed=build.rs"); diff --git a/compiler-builtins/src/arm_linux.rs b/compiler-builtins/src/arm_linux.rs index aeb3ff3e5..6ce67ba71 100644 --- a/compiler-builtins/src/arm_linux.rs +++ b/compiler-builtins/src/arm_linux.rs @@ -1,6 +1,5 @@ -use core::arch; -use core::mem; use core::sync::atomic::{AtomicU32, Ordering}; +use core::{arch, mem}; // Kernel-provided user-mode helper functions: // https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt diff --git a/compiler-builtins/src/float/conv.rs b/compiler-builtins/src/float/conv.rs index 42a526bd5..f5427a113 100644 --- a/compiler-builtins/src/float/conv.rs +++ b/compiler-builtins/src/float/conv.rs @@ -1,8 +1,7 @@ use core::ops::Neg; -use crate::int::{CastFrom, CastInto, Int, MinInt}; - use super::Float; +use crate::int::{CastFrom, CastInto, Int, MinInt}; /// Conversions from integers to floats. /// diff --git a/compiler-builtins/src/float/div.rs b/compiler-builtins/src/float/div.rs index 929f29197..5df637c7e 100644 --- a/compiler-builtins/src/float/div.rs +++ b/compiler-builtins/src/float/div.rs @@ -79,11 +79,12 @@ //! //! [Newton-Raphson method]: https://en.wikipedia.org/wiki/Newton%27s_method +use core::mem::size_of; +use core::ops; + use super::HalfRep; use crate::float::Float; use crate::int::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; -use core::mem::size_of; -use core::ops; fn div(a: F, b: F) -> F where @@ -487,7 +488,7 @@ where }; residual_lo += abs_result & one; // tie to even - // conditionally turns the below LT comparison into LTE + // conditionally turns the below LT comparison into LTE abs_result += u8::from(residual_lo > b_significand).into(); if F::BITS == 128 || (F::BITS == 32 && half_iterations > 0) { diff --git a/compiler-builtins/src/float/mod.rs b/compiler-builtins/src/float/mod.rs index 41b308626..f2c543bd2 100644 --- a/compiler-builtins/src/float/mod.rs +++ b/compiler-builtins/src/float/mod.rs @@ -11,6 +11,5 @@ pub mod trunc; #[cfg(not(feature = "public-test-deps"))] pub(crate) use traits::{Float, HalfRep}; - #[cfg(feature = "public-test-deps")] pub use traits::{Float, HalfRep}; diff --git a/compiler-builtins/src/float/pow.rs b/compiler-builtins/src/float/pow.rs index fe76060e0..45a4ad904 100644 --- a/compiler-builtins/src/float/pow.rs +++ b/compiler-builtins/src/float/pow.rs @@ -18,11 +18,7 @@ fn pow(a: F, b: i32) -> F { a *= a; } - if recip { - F::ONE / mul - } else { - mul - } + if recip { F::ONE / mul } else { mul } } intrinsics! { diff --git a/compiler-builtins/src/int/big.rs b/compiler-builtins/src/int/big.rs index 0ef3caaed..61f1349d9 100644 --- a/compiler-builtins/src/int/big.rs +++ b/compiler-builtins/src/int/big.rs @@ -2,9 +2,10 @@ #![allow(unused)] -use crate::int::{DInt, HInt, Int, MinInt}; use core::{fmt, ops}; +use crate::int::{DInt, HInt, Int, MinInt}; + const WORD_LO_MASK: u64 = 0x00000000ffffffff; const WORD_HI_MASK: u64 = 0xffffffff00000000; const WORD_FULL_MASK: u64 = 0xffffffffffffffff; diff --git a/compiler-builtins/src/int/leading_zeros.rs b/compiler-builtins/src/int/leading_zeros.rs index ba735aa74..a57f88184 100644 --- a/compiler-builtins/src/int/leading_zeros.rs +++ b/compiler-builtins/src/int/leading_zeros.rs @@ -60,11 +60,7 @@ mod implementation { } // the last two bisections are combined into one conditional t = x >> 1; - if t != T::ZERO { - z - 2 - } else { - z - x.cast() - } + if t != T::ZERO { z - 2 } else { z - x.cast() } // We could potentially save a few cycles by using the LUT trick from // "https://embeddedgurus.com/state-space/2014/09/ diff --git a/compiler-builtins/src/int/mod.rs b/compiler-builtins/src/int/mod.rs index 1f1be711b..5633510d3 100644 --- a/compiler-builtins/src/int/mod.rs +++ b/compiler-builtins/src/int/mod.rs @@ -12,9 +12,7 @@ mod traits; pub mod udiv; pub use big::{i256, u256}; - #[cfg(not(feature = "public-test-deps"))] pub(crate) use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; - #[cfg(feature = "public-test-deps")] pub use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; diff --git a/compiler-builtins/src/int/specialized_div_rem/mod.rs b/compiler-builtins/src/int/specialized_div_rem/mod.rs index a91fe6632..b81f04698 100644 --- a/compiler-builtins/src/int/specialized_div_rem/mod.rs +++ b/compiler-builtins/src/int/specialized_div_rem/mod.rs @@ -58,7 +58,6 @@ mod delegate; #[allow(unused_imports)] #[cfg(not(feature = "public-test-deps"))] pub(crate) use self::delegate::u128_divide_sparc; - #[cfg(feature = "public-test-deps")] pub use self::delegate::u128_divide_sparc; diff --git a/compiler-builtins/src/int/udiv.rs b/compiler-builtins/src/int/udiv.rs index f18537b00..4e985ba47 100644 --- a/compiler-builtins/src/int/udiv.rs +++ b/compiler-builtins/src/int/udiv.rs @@ -1,6 +1,5 @@ #[cfg(not(feature = "public-test-deps"))] pub(crate) use crate::int::specialized_div_rem::*; - #[cfg(feature = "public-test-deps")] pub use crate::int::specialized_div_rem::*; diff --git a/compiler-builtins/src/mem/x86_64.rs b/compiler-builtins/src/mem/x86_64.rs index 40b67093f..5cbe83ab1 100644 --- a/compiler-builtins/src/mem/x86_64.rs +++ b/compiler-builtins/src/mem/x86_64.rs @@ -17,8 +17,7 @@ // Note that ERMSB does not enhance the backwards (DF=1) "rep movsb". use core::arch::asm; -use core::intrinsics; -use core::mem; +use core::{intrinsics, mem}; #[inline(always)] #[cfg(target_feature = "ermsb")] diff --git a/crates/libm-macros/src/enums.rs b/crates/libm-macros/src/enums.rs index 864b625ea..b4646f984 100644 --- a/crates/libm-macros/src/enums.rs +++ b/crates/libm-macros/src/enums.rs @@ -26,7 +26,10 @@ pub fn function_enum( }; if let Some(tt) = attr.next() { - return Err(syn::Error::new(tt.span(), "unexpected token after identifier")); + return Err(syn::Error::new( + tt.span(), + "unexpected token after identifier", + )); } let enum_name = &item.ident; @@ -46,8 +49,12 @@ pub fn function_enum( // Match arm for `fn base_name(self)` matcher base_arms.push(quote! { Self::#ident => #base_enum::#bname_ident }); - let variant = - Variant { attrs: Vec::new(), ident, fields: Fields::Unit, discriminant: None }; + let variant = Variant { + attrs: Vec::new(), + ident, + fields: Fields::Unit, + discriminant: None, + }; item.variants.push(variant); } @@ -108,7 +115,10 @@ pub fn base_name_enum( return Err(syn::Error::new(sp.span(), "no attributes expected")); } - let mut base_names: Vec<_> = ALL_OPERATIONS.iter().map(|func| base_name(func.name)).collect(); + let mut base_names: Vec<_> = ALL_OPERATIONS + .iter() + .map(|func| base_name(func.name)) + .collect(); base_names.sort_unstable(); base_names.dedup(); @@ -121,8 +131,12 @@ pub fn base_name_enum( // Match arm for `fn as_str(self)` matcher as_str_arms.push(quote! { Self::#ident => #base_name }); - let variant = - Variant { attrs: Vec::new(), ident, fields: Fields::Unit, discriminant: None }; + let variant = Variant { + attrs: Vec::new(), + ident, + fields: Fields::Unit, + discriminant: None, + }; item.variants.push(variant); } @@ -147,7 +161,10 @@ pub fn base_name_enum( /// Verify that an enum is empty, otherwise return an error fn expect_empty_enum(item: &ItemEnum) -> syn::Result<()> { if !item.variants.is_empty() { - Err(syn::Error::new(item.variants.span(), "expected an empty enum")) + Err(syn::Error::new( + item.variants.span(), + "expected an empty enum", + )) } else { Ok(()) } diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs index 3cee5385b..3cdd364e8 100644 --- a/crates/libm-macros/src/lib.rs +++ b/crates/libm-macros/src/lib.rs @@ -11,7 +11,9 @@ use syn::spanned::Spanned; use syn::visit_mut::VisitMut; use syn::{Ident, ItemEnum}; -const KNOWN_TYPES: &[&str] = &["FTy", "CFn", "CArgs", "CRet", "RustFn", "RustArgs", "RustRet"]; +const KNOWN_TYPES: &[&str] = &[ + "FTy", "CFn", "CArgs", "CRet", "RustFn", "RustArgs", "RustRet", +]; /// Populate an enum with a variant representing function. Names are in upper camel case. /// @@ -142,10 +144,17 @@ fn validate(input: &mut StructuredInput) -> syn::Result .flat_map(|map_list| map_list.iter()) .flat_map(|attr_map| attr_map.names.iter()); let only_mentions = input.only.iter().flat_map(|only_list| only_list.iter()); - let fn_extra_mentions = - input.fn_extra.iter().flat_map(|v| v.keys()).filter(|name| *name != "_"); - let all_mentioned_fns = - input.skip.iter().chain(only_mentions).chain(attr_mentions).chain(fn_extra_mentions); + let fn_extra_mentions = input + .fn_extra + .iter() + .flat_map(|v| v.keys()) + .filter(|name| *name != "_"); + let all_mentioned_fns = input + .skip + .iter() + .chain(only_mentions) + .chain(attr_mentions) + .chain(fn_extra_mentions); // Make sure that every function mentioned is a real function for mentioned in all_mentioned_fns { @@ -171,7 +180,11 @@ fn validate(input: &mut StructuredInput) -> syn::Result for func in ALL_OPERATIONS.iter() { let fn_name = func.name; // If we have an `only` list and it does _not_ contain this function name, skip it - if input.only.as_ref().is_some_and(|only| !only.iter().any(|o| o == fn_name)) { + if input + .only + .as_ref() + .is_some_and(|only| !only.iter().any(|o| o == fn_name)) + { continue; } @@ -296,8 +309,11 @@ fn expand(input: StructuredInput, fn_list: &[&MathOpInfo]) -> syn::Result { - let mut fn_extra = - map.get(&fn_name).or_else(|| map.get(&default_ident)).unwrap().clone(); + let mut fn_extra = map + .get(&fn_name) + .or_else(|| map.get(&default_ident)) + .unwrap() + .clone(); let mut v = MacroReplace::new(func.name); v.visit_expr_mut(&mut fn_extra); @@ -357,7 +373,11 @@ struct MacroReplace { impl MacroReplace { fn new(name: &'static str) -> Self { let norm_name = base_name(name); - Self { fn_name: name, norm_name: norm_name.to_owned(), error: None } + Self { + fn_name: name, + norm_name: norm_name.to_owned(), + error: None, + } } fn finish(self) -> syn::Result<()> { @@ -377,8 +397,10 @@ impl MacroReplace { "MACRO_FN_NAME" => *i = Ident::new(self.fn_name, i.span()), "MACRO_FN_NAME_NORMALIZED" => *i = Ident::new(&self.norm_name, i.span()), _ => { - self.error = - Some(syn::Error::new(i.span(), format!("unrecognized meta expression `{s}`"))); + self.error = Some(syn::Error::new( + i.span(), + format!("unrecognized meta expression `{s}`"), + )); } } } diff --git a/crates/libm-macros/src/parse.rs b/crates/libm-macros/src/parse.rs index 369bbae2f..d60d1247a 100644 --- a/crates/libm-macros/src/parse.rs +++ b/crates/libm-macros/src/parse.rs @@ -16,7 +16,9 @@ pub struct Invocation { impl Parse for Invocation { fn parse(input: ParseStream) -> syn::Result { - Ok(Self { fields: input.parse_terminated(Mapping::parse, Token![,])? }) + Ok(Self { + fields: input.parse_terminated(Mapping::parse, Token![,])?, + }) } } @@ -30,7 +32,11 @@ struct Mapping { impl Parse for Mapping { fn parse(input: ParseStream) -> syn::Result { - Ok(Self { name: input.parse()?, _sep: input.parse()?, expr: input.parse()? }) + Ok(Self { + name: input.parse()?, + _sep: input.parse()?, + expr: input.parse()?, + }) } } @@ -133,7 +139,13 @@ fn extract_fn_extra_field(expr: Expr) -> syn::Result> { return Err(e); }; - let ExprMatch { attrs, match_token: _, expr, brace_token: _, arms } = mexpr; + let ExprMatch { + attrs, + match_token: _, + expr, + brace_token: _, + arms, + } = mexpr; expect_empty_attrs(&attrs)?; @@ -146,7 +158,14 @@ fn extract_fn_extra_field(expr: Expr) -> syn::Result> { let mut res = BTreeMap::new(); for arm in arms { - let Arm { attrs, pat, guard, fat_arrow_token: _, body, comma: _ } = arm; + let Arm { + attrs, + pat, + guard, + fat_arrow_token: _, + body, + comma: _, + } = arm; expect_empty_attrs(&attrs)?; @@ -177,15 +196,20 @@ fn expect_empty_attrs(attrs: &[Attribute]) -> syn::Result<()> { return Ok(()); } - let e = - syn::Error::new(attrs.first().unwrap().span(), "no attributes allowed in this position"); + let e = syn::Error::new( + attrs.first().unwrap().span(), + "no attributes allowed in this position", + ); Err(e) } /// Extract a named field from a map, raising an error if it doesn't exist. fn expect_field(v: &mut Vec, name: &str) -> syn::Result { let pos = v.iter().position(|v| v.name == name).ok_or_else(|| { - syn::Error::new(Span::call_site(), format!("missing expected field `{name}`")) + syn::Error::new( + Span::call_site(), + format!("missing expected field `{name}`"), + ) })?; Ok(v.remove(pos).expr) diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs index 5e58220eb..750ed1afb 100644 --- a/crates/libm-macros/src/shared.rs +++ b/crates/libm-macros/src/shared.rs @@ -7,7 +7,10 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] ( // `fn(f16) -> f16` FloatTy::F16, - Signature { args: &[Ty::F16], returns: &[Ty::F16] }, + Signature { + args: &[Ty::F16], + returns: &[Ty::F16], + }, None, &[ "ceilf16", @@ -23,7 +26,10 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] ( // `fn(f32) -> f32` FloatTy::F32, - Signature { args: &[Ty::F32], returns: &[Ty::F32] }, + Signature { + args: &[Ty::F32], + returns: &[Ty::F32], + }, None, &[ "acosf", @@ -68,7 +74,10 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] ( // `(f64) -> f64` FloatTy::F64, - Signature { args: &[Ty::F64], returns: &[Ty::F64] }, + Signature { + args: &[Ty::F64], + returns: &[Ty::F64], + }, None, &[ "acos", @@ -113,7 +122,10 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] ( // `fn(f128) -> f128` FloatTy::F128, - Signature { args: &[Ty::F128], returns: &[Ty::F128] }, + Signature { + args: &[Ty::F128], + returns: &[Ty::F128], + }, None, &[ "ceilf128", @@ -129,7 +141,10 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] ( // `(f16, f16) -> f16` FloatTy::F16, - Signature { args: &[Ty::F16, Ty::F16], returns: &[Ty::F16] }, + Signature { + args: &[Ty::F16, Ty::F16], + returns: &[Ty::F16], + }, None, &[ "copysignf16", @@ -146,7 +161,10 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] ( // `(f32, f32) -> f32` FloatTy::F32, - Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32] }, + Signature { + args: &[Ty::F32, Ty::F32], + returns: &[Ty::F32], + }, None, &[ "atan2f", @@ -168,7 +186,10 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] ( // `(f64, f64) -> f64` FloatTy::F64, - Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64] }, + Signature { + args: &[Ty::F64, Ty::F64], + returns: &[Ty::F64], + }, None, &[ "atan2", @@ -190,7 +211,10 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] ( // `(f128, f128) -> f128` FloatTy::F128, - Signature { args: &[Ty::F128, Ty::F128], returns: &[Ty::F128] }, + Signature { + args: &[Ty::F128, Ty::F128], + returns: &[Ty::F128], + }, None, &[ "copysignf128", @@ -207,134 +231,215 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] ( // `(f32, f32, f32) -> f32` FloatTy::F32, - Signature { args: &[Ty::F32, Ty::F32, Ty::F32], returns: &[Ty::F32] }, + Signature { + args: &[Ty::F32, Ty::F32, Ty::F32], + returns: &[Ty::F32], + }, None, &["fmaf"], ), ( // `(f64, f64, f64) -> f64` FloatTy::F64, - Signature { args: &[Ty::F64, Ty::F64, Ty::F64], returns: &[Ty::F64] }, + Signature { + args: &[Ty::F64, Ty::F64, Ty::F64], + returns: &[Ty::F64], + }, None, &["fma"], ), ( // `(f128, f128, f128) -> f128` FloatTy::F128, - Signature { args: &[Ty::F128, Ty::F128, Ty::F128], returns: &[Ty::F128] }, + Signature { + args: &[Ty::F128, Ty::F128, Ty::F128], + returns: &[Ty::F128], + }, None, &["fmaf128"], ), ( // `(f32) -> i32` FloatTy::F32, - Signature { args: &[Ty::F32], returns: &[Ty::I32] }, + Signature { + args: &[Ty::F32], + returns: &[Ty::I32], + }, None, &["ilogbf"], ), ( // `(f64) -> i32` FloatTy::F64, - Signature { args: &[Ty::F64], returns: &[Ty::I32] }, + Signature { + args: &[Ty::F64], + returns: &[Ty::I32], + }, None, &["ilogb"], ), ( // `(i32, f32) -> f32` FloatTy::F32, - Signature { args: &[Ty::I32, Ty::F32], returns: &[Ty::F32] }, + Signature { + args: &[Ty::I32, Ty::F32], + returns: &[Ty::F32], + }, None, &["jnf", "ynf"], ), ( // `(i32, f64) -> f64` FloatTy::F64, - Signature { args: &[Ty::I32, Ty::F64], returns: &[Ty::F64] }, + Signature { + args: &[Ty::I32, Ty::F64], + returns: &[Ty::F64], + }, None, &["jn", "yn"], ), ( // `(f16, i32) -> f16` FloatTy::F16, - Signature { args: &[Ty::F16, Ty::I32], returns: &[Ty::F16] }, + Signature { + args: &[Ty::F16, Ty::I32], + returns: &[Ty::F16], + }, None, &["ldexpf16", "scalbnf16"], ), ( // `(f32, i32) -> f32` FloatTy::F32, - Signature { args: &[Ty::F32, Ty::I32], returns: &[Ty::F32] }, + Signature { + args: &[Ty::F32, Ty::I32], + returns: &[Ty::F32], + }, None, &["ldexpf", "scalbnf"], ), ( // `(f64, i64) -> f64` FloatTy::F64, - Signature { args: &[Ty::F64, Ty::I32], returns: &[Ty::F64] }, + Signature { + args: &[Ty::F64, Ty::I32], + returns: &[Ty::F64], + }, None, &["ldexp", "scalbn"], ), ( // `(f128, i32) -> f128` FloatTy::F128, - Signature { args: &[Ty::F128, Ty::I32], returns: &[Ty::F128] }, + Signature { + args: &[Ty::F128, Ty::I32], + returns: &[Ty::F128], + }, None, &["ldexpf128", "scalbnf128"], ), ( // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)` FloatTy::F32, - Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] }, - Some(Signature { args: &[Ty::F32, Ty::MutF32], returns: &[Ty::F32] }), + Signature { + args: &[Ty::F32], + returns: &[Ty::F32, Ty::F32], + }, + Some(Signature { + args: &[Ty::F32, Ty::MutF32], + returns: &[Ty::F32], + }), &["modff"], ), ( // `(f64, &mut f64) -> f64` as `(f64) -> (f64, f64)` FloatTy::F64, - Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] }, - Some(Signature { args: &[Ty::F64, Ty::MutF64], returns: &[Ty::F64] }), + Signature { + args: &[Ty::F64], + returns: &[Ty::F64, Ty::F64], + }, + Some(Signature { + args: &[Ty::F64, Ty::MutF64], + returns: &[Ty::F64], + }), &["modf"], ), ( // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)` FloatTy::F32, - Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::I32] }, - Some(Signature { args: &[Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }), + Signature { + args: &[Ty::F32], + returns: &[Ty::F32, Ty::I32], + }, + Some(Signature { + args: &[Ty::F32, Ty::MutCInt], + returns: &[Ty::F32], + }), &["frexpf", "lgammaf_r"], ), ( // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)` FloatTy::F64, - Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::I32] }, - Some(Signature { args: &[Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }), + Signature { + args: &[Ty::F64], + returns: &[Ty::F64, Ty::I32], + }, + Some(Signature { + args: &[Ty::F64, Ty::MutCInt], + returns: &[Ty::F64], + }), &["frexp", "lgamma_r"], ), ( // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)` FloatTy::F32, - Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32, Ty::I32] }, - Some(Signature { args: &[Ty::F32, Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }), + Signature { + args: &[Ty::F32, Ty::F32], + returns: &[Ty::F32, Ty::I32], + }, + Some(Signature { + args: &[Ty::F32, Ty::F32, Ty::MutCInt], + returns: &[Ty::F32], + }), &["remquof"], ), ( // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)` FloatTy::F64, - Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64, Ty::I32] }, - Some(Signature { args: &[Ty::F64, Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }), + Signature { + args: &[Ty::F64, Ty::F64], + returns: &[Ty::F64, Ty::I32], + }, + Some(Signature { + args: &[Ty::F64, Ty::F64, Ty::MutCInt], + returns: &[Ty::F64], + }), &["remquo"], ), ( // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)` FloatTy::F32, - Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] }, - Some(Signature { args: &[Ty::F32, Ty::MutF32, Ty::MutF32], returns: &[] }), + Signature { + args: &[Ty::F32], + returns: &[Ty::F32, Ty::F32], + }, + Some(Signature { + args: &[Ty::F32, Ty::MutF32, Ty::MutF32], + returns: &[], + }), &["sincosf"], ), ( // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)` FloatTy::F64, - Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] }, - Some(Signature { args: &[Ty::F64, Ty::MutF64, Ty::MutF64], returns: &[] }), + Signature { + args: &[Ty::F64], + returns: &[Ty::F64, Ty::F64], + }, + Some(Signature { + args: &[Ty::F64, Ty::MutF64, Ty::MutF64], + returns: &[], + }), &["sincos"], ), ]; diff --git a/crates/musl-math-sys/build.rs b/crates/musl-math-sys/build.rs index f06d84ee2..b00dbc73e 100644 --- a/crates/musl-math-sys/build.rs +++ b/crates/musl-math-sys/build.rs @@ -8,7 +8,10 @@ const LIB_NAME: &str = "musl_math_prefixed"; /// Files that have more than one symbol. Map of file names to the symbols defined in that file. const MULTIPLE_SYMBOLS: &[(&str, &[&str])] = &[ - ("__invtrigl", &["__invtrigl", "__invtrigl_R", "__pio2_hi", "__pio2_lo"]), + ( + "__invtrigl", + &["__invtrigl", "__invtrigl_R", "__pio2_hi", "__pio2_lo"], + ), ("__polevll", &["__polevll", "__p1evll"]), ("erf", &["erf", "erfc"]), ("erff", &["erff", "erfcf"]), @@ -82,9 +85,16 @@ impl Config { let musl_dir = manifest_dir.join("musl"); let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); - let musl_arch = if target_arch == "x86" { "i386".to_owned() } else { target_arch.clone() }; + let musl_arch = if target_arch == "x86" { + "i386".to_owned() + } else { + target_arch.clone() + }; - println!("cargo::rerun-if-changed={}/c_patches", manifest_dir.display()); + println!( + "cargo::rerun-if-changed={}/c_patches", + manifest_dir.display() + ); println!("cargo::rerun-if-changed={}", musl_dir.display()); Self { @@ -108,7 +118,10 @@ fn build_musl_math(cfg: &Config) { let musl_dir = &cfg.musl_dir; let math = musl_dir.join("src/math"); let arch_dir = musl_dir.join("arch").join(&cfg.musl_arch); - assert!(math.exists(), "musl source not found. Is the submodule up to date?"); + assert!( + math.exists(), + "musl source not found. Is the submodule up to date?" + ); let source_map = find_math_source(&math, cfg); let out_path = cfg.out_dir.join(format!("lib{LIB_NAME}.a")); @@ -125,7 +138,11 @@ fn build_musl_math(cfg: &Config) { .stderr(Stdio::inherit()) .output() .unwrap(); - assert!(sed_stat.status.success(), "sed command failed: {:?}", sed_stat.status); + assert!( + sed_stat.status.success(), + "sed command failed: {:?}", + sed_stat.status + ); fs::write(obj_include.join("bits/alltypes.h"), sed_stat.stdout).unwrap(); @@ -163,8 +180,9 @@ fn build_musl_math(cfg: &Config) { // Trickery! Redefine the symbol names to have the prefix `musl_`, which allows us to // differentiate these symbols from whatever we provide. - if let Some((_names, syms)) = - MULTIPLE_SYMBOLS.iter().find(|(name, _syms)| *name == sym_name) + if let Some((_names, syms)) = MULTIPLE_SYMBOLS + .iter() + .find(|(name, _syms)| *name == sym_name) { // Handle the occasional file that defines multiple symbols for sym in *syms { @@ -291,21 +309,34 @@ fn validate_archive_symbols(out_path: &Path) { ]; // List global undefined symbols - let out = - Command::new("nm").arg("-guj").arg(out_path).stderr(Stdio::inherit()).output().unwrap(); + let out = Command::new("nm") + .arg("-guj") + .arg(out_path) + .stderr(Stdio::inherit()) + .output() + .unwrap(); let undef = str::from_utf8(&out.stdout).unwrap(); let mut undef = undef.lines().collect::>(); undef.retain(|sym| { // Account for file formats that add a leading `_` - !ALLOWED_UNDEF_PFX.iter().any(|pfx| sym.starts_with(pfx) || sym[1..].starts_with(pfx)) + !ALLOWED_UNDEF_PFX + .iter() + .any(|pfx| sym.starts_with(pfx) || sym[1..].starts_with(pfx)) }); - assert!(undef.is_empty(), "found disallowed undefined symbols: {undef:#?}"); + assert!( + undef.is_empty(), + "found disallowed undefined symbols: {undef:#?}" + ); // Find any symbols that are missing the `_musl_` prefix` - let out = - Command::new("nm").arg("-gUj").arg(out_path).stderr(Stdio::inherit()).output().unwrap(); + let out = Command::new("nm") + .arg("-gUj") + .arg(out_path) + .stderr(Stdio::inherit()) + .output() + .unwrap(); let defined = str::from_utf8(&out.stdout).unwrap(); let mut defined = defined.lines().collect::>(); diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs index ef70ec903..e70578699 100644 --- a/crates/util/src/main.rs +++ b/crates/util/src/main.rs @@ -221,7 +221,11 @@ macro_rules! impl_parse_tuple_via_rug { impl ParseTuple for ($ty, $ty, $ty) { fn parse(input: &[&str]) -> Self { assert_eq!(input.len(), 3, "expected three arguments, got {input:?}"); - (parse_rug(input, 0), parse_rug(input, 1), parse_rug(input, 2)) + ( + parse_rug(input, 0), + parse_rug(input, 1), + parse_rug(input, 2), + ) } } }; diff --git a/libm-test/benches/random.rs b/libm-test/benches/random.rs index 63d7e5c6d..81f58e3a6 100644 --- a/libm-test/benches/random.rs +++ b/libm-test/benches/random.rs @@ -7,7 +7,11 @@ use libm_test::generate::random::RandomInput; use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, TupleCall}; /// Benchmark with this many items to get a variety -const BENCH_ITER_ITEMS: usize = if cfg!(feature = "short-benchmarks") { 50 } else { 500 }; +const BENCH_ITER_ITEMS: usize = if cfg!(feature = "short-benchmarks") { + 50 +} else { + 500 +}; /// Extra parameters we only care about if we are benchmarking against musl. #[allow(dead_code)] @@ -53,8 +57,10 @@ where let name = Op::NAME; let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl, GeneratorKind::Random); - let benchvec: Vec<_> = - random::get_test_cases::(&ctx).0.take(BENCH_ITER_ITEMS).collect(); + let benchvec: Vec<_> = random::get_test_cases::(&ctx) + .0 + .take(BENCH_ITER_ITEMS) + .collect(); // Perform a sanity check that we are benchmarking the same thing // Don't test against musl if it is not available @@ -73,7 +79,10 @@ where let musl_res = input.call(musl_fn); let crate_res = input.call(Op::ROUTINE); - crate_res.validate(musl_res, input, &ctx).context(name).unwrap(); + crate_res + .validate(musl_res, input, &ctx) + .context(name) + .unwrap(); } #[cfg(not(feature = "build-musl"))] diff --git a/libm-test/examples/plot_domains.rs b/libm-test/examples/plot_domains.rs index 78524761e..3563103b8 100644 --- a/libm-test/examples/plot_domains.rs +++ b/libm-test/examples/plot_domains.rs @@ -56,7 +56,13 @@ where Op::RustArgs: SpacedInput, { let mut ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr, GeneratorKind::QuickSpaced); - plot_one_generator(out_dir, &ctx, "logspace", config, spaced::get_test_cases::(&ctx).0); + plot_one_generator( + out_dir, + &ctx, + "logspace", + config, + spaced::get_test_cases::(&ctx).0, + ); ctx.gen_kind = GeneratorKind::EdgeCases; plot_one_generator( out_dir, diff --git a/libm-test/src/domain.rs b/libm-test/src/domain.rs index 41e948461..94641be9b 100644 --- a/libm-test/src/domain.rs +++ b/libm-test/src/domain.rs @@ -67,16 +67,25 @@ impl EitherPrim { /// Convenience 1-dimensional float domains. impl Domain { /// x ∈ ℝ - const UNBOUNDED: Self = - Self { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None }; + const UNBOUNDED: Self = Self { + start: Bound::Unbounded, + end: Bound::Unbounded, + check_points: None, + }; /// x ∈ ℝ >= 0 - const POSITIVE: Self = - Self { start: Bound::Included(F::ZERO), end: Bound::Unbounded, check_points: None }; + const POSITIVE: Self = Self { + start: Bound::Included(F::ZERO), + end: Bound::Unbounded, + check_points: None, + }; /// x ∈ ℝ > 0 - const STRICTLY_POSITIVE: Self = - Self { start: Bound::Excluded(F::ZERO), end: Bound::Unbounded, check_points: None }; + const STRICTLY_POSITIVE: Self = Self { + start: Bound::Excluded(F::ZERO), + end: Bound::Unbounded, + check_points: None, + }; /// Wrap in the float variant of [`EitherPrim`]. const fn into_prim_float(self) -> EitherPrim> { @@ -87,8 +96,11 @@ impl Domain { /// Convenience 1-dimensional integer domains. impl Domain { /// x ∈ ℝ - const UNBOUNDED_INT: Self = - Self { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None }; + const UNBOUNDED_INT: Self = Self { + start: Bound::Unbounded, + end: Bound::Unbounded, + check_points: None, + }; /// Wrap in the int variant of [`EitherPrim`]. const fn into_prim_int(self) -> EitherPrim, Self> { @@ -99,13 +111,18 @@ impl Domain { /// Multidimensional domains, represented as an array of 1-D domains. impl EitherPrim, Domain> { /// x ∈ ℝ - const UNBOUNDED1: [Self; 1] = - [Domain { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None } - .into_prim_float()]; + const UNBOUNDED1: [Self; 1] = [Domain { + start: Bound::Unbounded, + end: Bound::Unbounded, + check_points: None, + } + .into_prim_float()]; /// {x1, x2} ∈ ℝ - const UNBOUNDED2: [Self; 2] = - [Domain::UNBOUNDED.into_prim_float(), Domain::UNBOUNDED.into_prim_float()]; + const UNBOUNDED2: [Self; 2] = [ + Domain::UNBOUNDED.into_prim_float(), + Domain::UNBOUNDED.into_prim_float(), + ]; /// {x1, x2, x3} ∈ ℝ const UNBOUNDED3: [Self; 3] = [ @@ -115,8 +132,10 @@ impl EitherPrim, Domain> { ]; /// {x1, x2} ∈ ℝ, one float and one int - const UNBOUNDED_F_I: [Self; 2] = - [Domain::UNBOUNDED.into_prim_float(), Domain::UNBOUNDED_INT.into_prim_int()]; + const UNBOUNDED_F_I: [Self; 2] = [ + Domain::UNBOUNDED.into_prim_float(), + Domain::UNBOUNDED_INT.into_prim_int(), + ]; /// x ∈ ℝ >= 0 const POSITIVE: [Self; 1] = [Domain::POSITIVE.into_prim_float()]; @@ -133,9 +152,12 @@ impl EitherPrim, Domain> { .into_prim_float()]; /// Domain for `acosh` - const ACOSH: [Self; 1] = - [Domain { start: Bound::Included(F::ONE), end: Bound::Unbounded, check_points: None } - .into_prim_float()]; + const ACOSH: [Self; 1] = [Domain { + start: Bound::Included(F::ONE), + end: Bound::Unbounded, + check_points: None, + } + .into_prim_float()]; /// Domain for `atanh` const ATANH: [Self; 1] = [Domain { @@ -157,9 +179,12 @@ impl EitherPrim, Domain> { const LOG: [Self; 1] = Self::STRICTLY_POSITIVE; /// Domain for `log1p` i.e. `log(1 + x)` - const LOG1P: [Self; 1] = - [Domain { start: Bound::Excluded(F::NEG_ONE), end: Bound::Unbounded, check_points: None } - .into_prim_float()]; + const LOG1P: [Self; 1] = [Domain { + start: Bound::Excluded(F::NEG_ONE), + end: Bound::Unbounded, + check_points: None, + } + .into_prim_float()]; /// Domain for `sqrt` const SQRT: [Self; 1] = Self::POSITIVE; @@ -187,8 +212,10 @@ impl EitherPrim, Domain> { /// Domain for `jn` and `yn`. // FIXME: the domain should provide some sort of "reasonable range" so we don't actually test // the entire system unbounded. - const BESSEL_N: [Self; 2] = - [Domain::UNBOUNDED_INT.into_prim_int(), Domain::UNBOUNDED.into_prim_float()]; + const BESSEL_N: [Self; 2] = [ + Domain::UNBOUNDED_INT.into_prim_int(), + Domain::UNBOUNDED.into_prim_float(), + ]; } /// Get the domain for a given function. diff --git a/libm-test/src/f8_impl.rs b/libm-test/src/f8_impl.rs index ddb7bf90e..905c7d7fd 100644 --- a/libm-test/src/f8_impl.rs +++ b/libm-test/src/f8_impl.rs @@ -498,6 +498,8 @@ impl fmt::LowerHex for f8 { } pub const fn hf8(s: &str) -> f8 { - let Ok(bits) = libm::support::hex_float::parse_hex_exact(s, 8, 3) else { panic!() }; + let Ok(bits) = libm::support::hex_float::parse_hex_exact(s, 8, 3) else { + panic!() + }; f8(bits as u8) } diff --git a/libm-test/src/generate.rs b/libm-test/src/generate.rs index 89ca09a7a..da080d23f 100644 --- a/libm-test/src/generate.rs +++ b/libm-test/src/generate.rs @@ -16,7 +16,11 @@ pub struct KnownSize { impl KnownSize { pub fn new(iter: I, total: u64) -> Self { - Self { total, current: 0, iter } + Self { + total, + current: 0, + iter, + } } } @@ -30,7 +34,10 @@ impl Iterator for KnownSize { return next; } - assert_eq!(self.current, self.total, "total items did not match expected"); + assert_eq!( + self.current, self.total, + "total items did not match expected" + ); None } diff --git a/libm-test/src/generate/case_list.rs b/libm-test/src/generate/case_list.rs index e3628d51c..f1e6fcec3 100644 --- a/libm-test/src/generate/case_list.rs +++ b/libm-test/src/generate/case_list.rs @@ -20,14 +20,21 @@ pub struct TestCase { impl TestCase { #[expect(dead_code)] fn append_inputs(v: &mut Vec, l: &[Op::RustArgs]) { - v.extend(l.iter().copied().map(|input| Self { input, output: None })); + v.extend(l.iter().copied().map(|input| Self { + input, + output: None, + })); } fn append_pairs(v: &mut Vec, l: &[(Op::RustArgs, Option)]) where Op::RustRet: Copy, { - v.extend(l.iter().copied().map(|(input, output)| Self { input, output })); + v.extend( + l.iter() + .copied() + .map(|(input, output)| Self { input, output }), + ); } } @@ -603,9 +610,15 @@ fn rint_cases() -> Vec> { &[ // Known failure on i586 #[cfg(not(x86_no_sse))] - ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff994000p+38"))), + ( + (hf64!("-0x1.e3f13ff995ffcp+38"),), + Some(hf64!("-0x1.e3f13ff994000p+38")), + ), #[cfg(x86_no_sse)] - ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff998000p+38"))), + ( + (hf64!("-0x1.e3f13ff995ffcp+38"),), + Some(hf64!("-0x1.e3f13ff998000p+38")), + ), ], ); v @@ -655,9 +668,15 @@ fn roundeven_cases() -> Vec> { &[ // Known failure on i586 #[cfg(not(x86_no_sse))] - ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff994000p+38"))), + ( + (hf64!("-0x1.e3f13ff995ffcp+38"),), + Some(hf64!("-0x1.e3f13ff994000p+38")), + ), #[cfg(x86_no_sse)] - ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff998000p+38"))), + ( + (hf64!("-0x1.e3f13ff995ffcp+38"),), + Some(hf64!("-0x1.e3f13ff998000p+38")), + ), ], ); v @@ -832,7 +851,9 @@ where { assert_eq!(ctx.basis, CheckBasis::None); assert_eq!(ctx.gen_kind, GeneratorKind::List); - Op::get_cases().into_iter().filter_map(|x| x.output.map(|o| (x.input, o))) + Op::get_cases() + .into_iter() + .filter_map(|x| x.output.map(|o| (x.input, o))) } /// Opposite of the above; extract only test cases that don't have a known output, to be run @@ -847,7 +868,18 @@ where assert_eq!(ctx.gen_kind, GeneratorKind::List); let cases = Op::get_cases(); - let count: u64 = cases.iter().filter(|case| case.output.is_none()).count().try_into().unwrap(); + let count: u64 = cases + .iter() + .filter(|case| case.output.is_none()) + .count() + .try_into() + .unwrap(); - (cases.into_iter().filter(|x| x.output.is_none()).map(|x| x.input), count) + ( + cases + .into_iter() + .filter(|x| x.output.is_none()) + .map(|x| x.input), + count, + ) } diff --git a/libm-test/src/generate/edge_cases.rs b/libm-test/src/generate/edge_cases.rs index 56cc9fa9a..2fb074638 100644 --- a/libm-test/src/generate/edge_cases.rs +++ b/libm-test/src/generate/edge_cases.rs @@ -249,7 +249,11 @@ macro_rules! impl_edge_case_input { .flat_map(move |(first, second)| { iter2.clone().map(move |third| (first, second, third)) }); - let count = steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap(); + let count = steps0 + .checked_mul(steps1) + .unwrap() + .checked_mul(steps2) + .unwrap(); (iter, count) } diff --git a/libm-test/src/generate/random.rs b/libm-test/src/generate/random.rs index e8a7ee905..4ee88946d 100644 --- a/libm-test/src/generate/random.rs +++ b/libm-test/src/generate/random.rs @@ -117,7 +117,10 @@ impl_random_input!(f128); /// Create a test case iterator. pub fn get_test_cases( ctx: &CheckCtx, -) -> (impl Iterator + Send + use<'_, RustArgs>, u64) { +) -> ( + impl Iterator + Send + use<'_, RustArgs>, + u64, +) { let (iter, count) = RustArgs::get_cases(ctx); // Wrap in `KnownSize` so we get an assertion if the cuunt is wrong. diff --git a/libm-test/src/generate/spaced.rs b/libm-test/src/generate/spaced.rs index bea3f4c7e..8e6b376eb 100644 --- a/libm-test/src/generate/spaced.rs +++ b/libm-test/src/generate/spaced.rs @@ -70,7 +70,9 @@ fn value_count() -> Option where u64: TryFrom, { - u64::try_from(F::Int::MAX).ok().and_then(|max| max.checked_add(1)) + u64::try_from(F::Int::MAX) + .ok() + .and_then(|max| max.checked_add(1)) } /// Returns an iterator of every possible value of type `F`. @@ -162,8 +164,11 @@ macro_rules! impl_spaced_input { .flat_map(move |(first, second)| { iter2.clone().map(move |third| (first, second, third)) }); - let count = - steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap(); + let count = steps0 + .checked_mul(steps1) + .unwrap() + .checked_mul(steps2) + .unwrap(); (EitherIter::B(iter), count) } diff --git a/libm-test/src/lib.rs b/libm-test/src/lib.rs index 485c01a47..730318abc 100644 --- a/libm-test/src/lib.rs +++ b/libm-test/src/lib.rs @@ -71,7 +71,12 @@ pub fn test_log(s: &str) { return None; }; - PathBuf::from(x).parent().unwrap().parent().unwrap().join("target") + PathBuf::from(x) + .parent() + .unwrap() + .parent() + .unwrap() + .join("target") } }; let outfile = target_dir.join("test-log.txt"); @@ -81,7 +86,9 @@ pub fn test_log(s: &str) { .append(true) .open(outfile) .expect("failed to open logfile"); - let now = SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap(); + let now = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap(); writeln!(f, "\n\nTest run at {}", now.as_secs()).unwrap(); writeln!(f, "arch: {}", env::consts::ARCH).unwrap(); diff --git a/libm-test/src/num.rs b/libm-test/src/num.rs index eed941423..3237c8503 100644 --- a/libm-test/src/num.rs +++ b/libm-test/src/num.rs @@ -180,8 +180,17 @@ impl Consts { neg_max_snan, } = self; - [pos_nan, neg_nan, max_qnan, min_snan, max_snan, neg_max_qnan, neg_min_snan, neg_max_snan] - .into_iter() + [ + pos_nan, + neg_nan, + max_qnan, + min_snan, + max_snan, + neg_max_qnan, + neg_min_snan, + neg_max_snan, + ] + .into_iter() } } @@ -229,7 +238,9 @@ where assert!(!end.is_nan()); assert!(end >= start); - let steps = steps.checked_sub(F::Int::ONE).expect("`steps` must be at least 2"); + let steps = steps + .checked_sub(F::Int::ONE) + .expect("`steps` must be at least 2"); let between = ulp_between(start, end).expect("`start` or `end` is NaN"); let spacing = (between / steps).max(F::Int::ONE); let steps = steps.min(between); // At maximum, one step per ULP @@ -283,15 +294,22 @@ mod tests { if i == 0 { assert_eq!(down, f8::NEG_INFINITY.to_bits(), "{i} next_down({v:#010b})"); } else { - let expected = - if v == f8::ZERO { 1 | f8::SIGN_MASK } else { f8::ALL[i - 1].to_bits() }; + let expected = if v == f8::ZERO { + 1 | f8::SIGN_MASK + } else { + f8::ALL[i - 1].to_bits() + }; assert_eq!(down, expected, "{i} next_down({v:#010b})"); } if i == f8::ALL_LEN - 1 { assert_eq!(up, f8::INFINITY.to_bits(), "{i} next_up({v:#010b})"); } else { - let expected = if v == f8::NEG_ZERO { 1 } else { f8::ALL[i + 1].to_bits() }; + let expected = if v == f8::NEG_ZERO { + 1 + } else { + f8::ALL[i + 1].to_bits() + }; assert_eq!(up, expected, "{i} next_up({v:#010b})"); } } @@ -300,8 +318,14 @@ mod tests { #[test] fn test_next_up_down_inf_nan() { assert_eq!(f8::NEG_INFINITY.next_up().to_bits(), f8::ALL[0].to_bits(),); - assert_eq!(f8::NEG_INFINITY.next_down().to_bits(), f8::NEG_INFINITY.to_bits(),); - assert_eq!(f8::INFINITY.next_down().to_bits(), f8::ALL[f8::ALL_LEN - 1].to_bits(),); + assert_eq!( + f8::NEG_INFINITY.next_down().to_bits(), + f8::NEG_INFINITY.to_bits(), + ); + assert_eq!( + f8::INFINITY.next_down().to_bits(), + f8::ALL[f8::ALL_LEN - 1].to_bits(), + ); assert_eq!(f8::INFINITY.next_up().to_bits(), f8::INFINITY.to_bits(),); assert_eq!(f8::NAN.next_up().to_bits(), f8::NAN.to_bits(),); assert_eq!(f8::NAN.next_down().to_bits(), f8::NAN.to_bits(),); @@ -321,7 +345,10 @@ mod tests { // Check across zero assert_eq!(f8::from_bits(0b1_0000_111).n_up(8).to_bits(), 0b0_0000_001); - assert_eq!(f8::from_bits(0b0_0000_111).n_down(8).to_bits(), 0b1_0000_001); + assert_eq!( + f8::from_bits(0b0_0000_111).n_down(8).to_bits(), + 0b1_0000_001 + ); } #[test] @@ -337,13 +364,25 @@ mod tests { #[test] fn test_n_up_down_inf_nan_zero() { assert_eq!(f8::NEG_INFINITY.n_up(1).to_bits(), f8::ALL[0].to_bits()); - assert_eq!(f8::NEG_INFINITY.n_up(239).to_bits(), f8::ALL[f8::ALL_LEN - 1].to_bits()); + assert_eq!( + f8::NEG_INFINITY.n_up(239).to_bits(), + f8::ALL[f8::ALL_LEN - 1].to_bits() + ); assert_eq!(f8::NEG_INFINITY.n_up(240).to_bits(), f8::INFINITY.to_bits()); - assert_eq!(f8::NEG_INFINITY.n_down(u8::MAX).to_bits(), f8::NEG_INFINITY.to_bits()); + assert_eq!( + f8::NEG_INFINITY.n_down(u8::MAX).to_bits(), + f8::NEG_INFINITY.to_bits() + ); - assert_eq!(f8::INFINITY.n_down(1).to_bits(), f8::ALL[f8::ALL_LEN - 1].to_bits()); + assert_eq!( + f8::INFINITY.n_down(1).to_bits(), + f8::ALL[f8::ALL_LEN - 1].to_bits() + ); assert_eq!(f8::INFINITY.n_down(239).to_bits(), f8::ALL[0].to_bits()); - assert_eq!(f8::INFINITY.n_down(240).to_bits(), f8::NEG_INFINITY.to_bits()); + assert_eq!( + f8::INFINITY.n_down(240).to_bits(), + f8::NEG_INFINITY.to_bits() + ); assert_eq!(f8::INFINITY.n_up(u8::MAX).to_bits(), f8::INFINITY.to_bits()); assert_eq!(f8::NAN.n_up(u8::MAX).to_bits(), f8::NAN.to_bits()); @@ -381,7 +420,11 @@ mod tests { assert_eq!(down, expected, "{i} {n} n_down({v:#010b})"); } else { // Overflow to -inf - assert_eq!(down, f8::NEG_INFINITY.to_bits(), "{i} {n} n_down({v:#010b})"); + assert_eq!( + down, + f8::NEG_INFINITY.to_bits(), + "{i} {n} n_down({v:#010b})" + ); } let mut up_exp_idx = i + n; @@ -438,13 +481,22 @@ mod tests { #[test] fn test_ulp_between_inf_nan_zero() { - assert_eq!(ulp_between(f8::NEG_INFINITY, f8::INFINITY).unwrap(), f8::ALL_LEN as u8); - assert_eq!(ulp_between(f8::INFINITY, f8::NEG_INFINITY).unwrap(), f8::ALL_LEN as u8); + assert_eq!( + ulp_between(f8::NEG_INFINITY, f8::INFINITY).unwrap(), + f8::ALL_LEN as u8 + ); + assert_eq!( + ulp_between(f8::INFINITY, f8::NEG_INFINITY).unwrap(), + f8::ALL_LEN as u8 + ); assert_eq!( ulp_between(f8::NEG_INFINITY, f8::ALL[f8::ALL_LEN - 1]).unwrap(), f8::ALL_LEN as u8 - 1 ); - assert_eq!(ulp_between(f8::INFINITY, f8::ALL[0]).unwrap(), f8::ALL_LEN as u8 - 1); + assert_eq!( + ulp_between(f8::INFINITY, f8::ALL[0]).unwrap(), + f8::ALL_LEN as u8 - 1 + ); assert_eq!(ulp_between(f8::ZERO, f8::NEG_ZERO).unwrap(), 0); assert_eq!(ulp_between(f8::NAN, f8::ZERO), None); @@ -469,7 +521,12 @@ mod tests { // of steps. let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x3), 10); let ls: Vec<_> = ls.collect(); - let exp = [f8::from_bits(0x0), f8::from_bits(0x1), f8::from_bits(0x2), f8::from_bits(0x3)]; + let exp = [ + f8::from_bits(0x0), + f8::from_bits(0x1), + f8::from_bits(0x2), + f8::from_bits(0x3), + ]; assert_eq!(ls, exp); assert_eq!(ls.len(), usize::from(count)); } diff --git a/libm-test/src/op.rs b/libm-test/src/op.rs index 4f251f80d..bd17aad7d 100644 --- a/libm-test/src/op.rs +++ b/libm-test/src/op.rs @@ -16,7 +16,7 @@ use std::fmt; use std::panic::{RefUnwindSafe, UnwindSafe}; -pub use shared::{FloatTy, MathOpInfo, Ty, ALL_OPERATIONS}; +pub use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty}; use crate::{CheckOutput, Float, TupleCall}; diff --git a/libm-test/src/run_cfg.rs b/libm-test/src/run_cfg.rs index b36164b00..3345a01d2 100644 --- a/libm-test/src/run_cfg.rs +++ b/libm-test/src/run_cfg.rs @@ -15,7 +15,9 @@ pub const EXTENSIVE_ITER_ENV: &str = "LIBM_EXTENSIVE_ITERATIONS"; /// The override value, if set by the above environment. static EXTENSIVE_ITER_OVERRIDE: LazyLock> = LazyLock::new(|| { - env::var(EXTENSIVE_ITER_ENV).map(|v| v.parse().expect("failed to parse iteration count")).ok() + env::var(EXTENSIVE_ITER_ENV) + .map(|v| v.parse().expect("failed to parse iteration count")) + .ok() }); /// Specific tests that need to have a reduced amount of iterations to complete in a reasonable @@ -115,7 +117,10 @@ static EXTENSIVE: LazyLock> = LazyLock::new(|| { let mut ret = Vec::new(); let append_ty_ops = |ret: &mut Vec<_>, fty: FloatTy| { - let iter = Identifier::ALL.iter().filter(move |id| id.math_op().float_ty == fty).copied(); + let iter = Identifier::ALL + .iter() + .filter(move |id| id.math_op().float_ty == fty) + .copied(); ret.extend(iter); }; @@ -276,7 +281,10 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { let seed_msg = match ctx.gen_kind { GeneratorKind::QuickSpaced | GeneratorKind::Extensive => String::new(), GeneratorKind::Random => { - format!(" using `{SEED_ENV}={}`", str::from_utf8(SEED.as_slice()).unwrap()) + format!( + " using `{SEED_ENV}={}`", + str::from_utf8(SEED.as_slice()).unwrap() + ) } GeneratorKind::EdgeCases | GeneratorKind::List => unimplemented!(), }; @@ -303,7 +311,10 @@ pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive { return i32::MIN..=i32::MAX; } - assert_eq!(argnum, 0, "For `jn`/`yn`, only the first argument takes an integer"); + assert_eq!( + argnum, 0, + "For `jn`/`yn`, only the first argument takes an integer" + ); // The integer argument to `jn` is an iteration count. Limit this to ensure tests can be // completed in a reasonable amount of time. @@ -331,7 +342,11 @@ pub fn check_point_count(ctx: &CheckCtx) -> usize { "check_point_count is intended for edge case tests" ); let t_env = TestEnv::from_env(ctx); - if t_env.slow_platform || !cfg!(optimizations_enabled) { 4 } else { 10 } + if t_env.slow_platform || !cfg!(optimizations_enabled) { + 4 + } else { + 10 + } } /// When validating points of interest (e.g. asymptotes, inflection points, extremes), also check diff --git a/libm-test/src/test_traits.rs b/libm-test/src/test_traits.rs index c560dade8..dbb970161 100644 --- a/libm-test/src/test_traits.rs +++ b/libm-test/src/test_traits.rs @@ -328,7 +328,10 @@ where // Check when both are NaNs if actual.is_nan() && expected.is_nan() { if require_biteq && ctx.basis == CheckBasis::None { - ensure!(actual.to_bits() == expected.to_bits(), "mismatched NaN bitpatterns"); + ensure!( + actual.to_bits() == expected.to_bits(), + "mismatched NaN bitpatterns" + ); } // By default, NaNs have nothing special to check. return Ok(()); @@ -340,7 +343,10 @@ where // Make sure that the signs are the same before checing ULP to avoid wraparound let act_sig = actual.signum(); let exp_sig = expected.signum(); - ensure!(act_sig == exp_sig, "mismatched signs {act_sig:?} {exp_sig:?}"); + ensure!( + act_sig == exp_sig, + "mismatched signs {act_sig:?} {exp_sig:?}" + ); if actual.is_infinite() ^ expected.is_infinite() { bail!("mismatched infinities"); diff --git a/libm-test/tests/u256.rs b/libm-test/tests/u256.rs index 4444036d0..8cbb3ad22 100644 --- a/libm-test/tests/u256.rs +++ b/libm-test/tests/u256.rs @@ -40,7 +40,10 @@ fn from_bigint(bx: &mut BigInt) -> u256 { let mut bres = [0u128, 0]; bx.write_digits(&mut bres, Order::Lsf); bx.assign(0); - u256 { lo: bres[0], hi: bres[1] } + u256 { + lo: bres[0], + hi: bres[1], + } } fn check_one( @@ -142,6 +145,11 @@ fn mp_u256_widen_mul() { by.assign(y); let actual = x.widen_mul(y); bx *= &by; - check_one(|| format!("{x:#034x}"), || Some(format!("{y:#034x}")), actual, &mut bx); + check_one( + || format!("{x:#034x}"), + || Some(format!("{y:#034x}")), + actual, + &mut bx, + ); } } diff --git a/libm-test/tests/z_extensive/run.rs b/libm-test/tests/z_extensive/run.rs index b10c231d1..59c806ce7 100644 --- a/libm-test/tests/z_extensive/run.rs +++ b/libm-test/tests/z_extensive/run.rs @@ -28,8 +28,15 @@ pub fn run() { // With default parallelism, the CPU doesn't saturate. We don't need to be nice to // other processes, so do 1.5x to make sure we use all available resources. - let threads = std::thread::available_parallelism().map(Into::into).unwrap_or(0) * 3 / 2; - rayon::ThreadPoolBuilder::new().num_threads(threads).build_global().unwrap(); + let threads = std::thread::available_parallelism() + .map(Into::into) + .unwrap_or(0) + * 3 + / 2; + rayon::ThreadPoolBuilder::new() + .num_threads(threads) + .build_global() + .unwrap(); libtest_mimic::run(&args, tests).exit(); } @@ -134,7 +141,9 @@ where }); // Run the actual tests - let res = chunks.par_bridge().try_for_each_init(Op::new_mp, test_single_chunk); + let res = chunks + .par_bridge() + .try_for_each_init(Op::new_mp, test_single_chunk); let real_total = completed.load(Ordering::Relaxed); pb.complete(real_total); @@ -179,7 +188,12 @@ impl Progress { let pb = ProgressBar::new(total); pb.set_style(initial_style); - Self { pb, final_style, name_padded, is_tty } + Self { + pb, + final_style, + name_padded, + is_tty, + } } fn update(&self, completed: u64, input: impl fmt::Debug) { diff --git a/libm/configure.rs b/libm/configure.rs index 8b8ba9815..2a497c7b1 100644 --- a/libm/configure.rs +++ b/libm/configure.rs @@ -107,9 +107,15 @@ fn emit_cfg_shorthands(cfg: &Config) { /// Reemit config that we make use of for test logging. fn emit_cfg_env(cfg: &Config) { - println!("cargo:rustc-env=CFG_CARGO_FEATURES={:?}", cfg.cargo_features); + println!( + "cargo:rustc-env=CFG_CARGO_FEATURES={:?}", + cfg.cargo_features + ); println!("cargo:rustc-env=CFG_OPT_LEVEL={}", cfg.opt_level); - println!("cargo:rustc-env=CFG_TARGET_FEATURES={:?}", cfg.target_features); + println!( + "cargo:rustc-env=CFG_TARGET_FEATURES={:?}", + cfg.target_features + ); } /// Configure whether or not `f16` and `f128` support should be enabled. diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 7e56bd079..7df84fe18 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -2,7 +2,10 @@ #![no_std] #![cfg_attr(intrinsics_enabled, allow(internal_features))] #![cfg_attr(intrinsics_enabled, feature(core_intrinsics))] -#![cfg_attr(all(intrinsics_enabled, target_family = "wasm"), feature(wasm_numeric_instr))] +#![cfg_attr( + all(intrinsics_enabled, target_family = "wasm"), + feature(wasm_numeric_instr) +)] #![cfg_attr(f128_enabled, feature(f128))] #![cfg_attr(f16_enabled, feature(f16))] #![allow(clippy::assign_op_pattern)] diff --git a/libm/src/math/atanf.rs b/libm/src/math/atanf.rs index eb3d401cd..da8daa41a 100644 --- a/libm/src/math/atanf.rs +++ b/libm/src/math/atanf.rs @@ -29,8 +29,13 @@ const ATAN_LO: [f32; 4] = [ 7.5497894159e-08, /* atan(inf)lo 0x33a22168 */ ]; -const A_T: [f32; 5] = - [3.3333328366e-01, -1.9999158382e-01, 1.4253635705e-01, -1.0648017377e-01, 6.1687607318e-02]; +const A_T: [f32; 5] = [ + 3.3333328366e-01, + -1.9999158382e-01, + 1.4253635705e-01, + -1.0648017377e-01, + 6.1687607318e-02, +]; /// Arctangent (f32) /// diff --git a/libm/src/math/cbrt.rs b/libm/src/math/cbrt.rs index 9d3311cd6..cf56f7a97 100644 --- a/libm/src/math/cbrt.rs +++ b/libm/src/math/cbrt.rs @@ -171,7 +171,11 @@ pub fn cbrt_round(x: f64, round: Round) -> FpResult { for (a, b) in wlist { if azz == a { - let tmp = if round as u64 + sign == 2 { hf64!("0x1p-52") } else { 0.0 }; + let tmp = if round as u64 + sign == 2 { + hf64!("0x1p-52") + } else { + 0.0 + }; y1 = (b + tmp).copysign(zz); } } diff --git a/libm/src/math/erf.rs b/libm/src/math/erf.rs index 1b634abec..5d82228a0 100644 --- a/libm/src/math/erf.rs +++ b/libm/src/math/erf.rs @@ -306,5 +306,9 @@ pub fn erfc(x: f64) -> f64 { } let x1p_1022 = f64::from_bits(0x0010000000000000); - if sign != 0 { 2.0 - x1p_1022 } else { x1p_1022 * x1p_1022 } + if sign != 0 { + 2.0 - x1p_1022 + } else { + x1p_1022 * x1p_1022 + } } diff --git a/libm/src/math/erff.rs b/libm/src/math/erff.rs index 2e41183bf..fe15f0108 100644 --- a/libm/src/math/erff.rs +++ b/libm/src/math/erff.rs @@ -218,5 +218,9 @@ pub fn erfcf(x: f32) -> f32 { } let x1p_120 = f32::from_bits(0x03800000); - if sign != 0 { 2.0 - x1p_120 } else { x1p_120 * x1p_120 } + if sign != 0 { + 2.0 - x1p_120 + } else { + x1p_120 * x1p_120 + } } diff --git a/libm/src/math/exp10f.rs b/libm/src/math/exp10f.rs index 0520a41f2..303045b33 100644 --- a/libm/src/math/exp10f.rs +++ b/libm/src/math/exp10f.rs @@ -2,8 +2,9 @@ use super::{exp2, exp2f, modff}; const LN10_F32: f32 = 3.32192809488736234787031942948939; const LN10_F64: f64 = 3.32192809488736234787031942948939; -const P10: &[f32] = - &[1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7]; +const P10: &[f32] = &[ + 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, +]; /// Calculates 10 raised to the power of `x` (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] diff --git a/libm/src/math/expm1f.rs b/libm/src/math/expm1f.rs index 12c6f532b..63dc86e37 100644 --- a/libm/src/math/expm1f.rs +++ b/libm/src/math/expm1f.rs @@ -126,5 +126,9 @@ pub fn expm1f(mut x: f32) -> f32 { return y - 1.; } let uf = f32::from_bits(((0x7f - k) << 23) as u32); /* 2^-k */ - if k < 23 { (x - e + (1. - uf)) * twopk } else { (x - (e + uf) + 1.) * twopk } + if k < 23 { + (x - e + (1. - uf)) * twopk + } else { + (x - (e + uf) + 1.) * twopk + } } diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index e0b3347ac..8856e63f5 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -387,11 +387,17 @@ mod tests { #[test] fn fma_sbb() { - assert_eq!(fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), -3991680619069439e277); + assert_eq!( + fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), + -3991680619069439e277 + ); } #[test] fn fma_underflow() { - assert_eq!(fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), 0.0,); + assert_eq!( + fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), + 0.0, + ); } } diff --git a/libm/src/math/fma_wide.rs b/libm/src/math/fma_wide.rs index 08b78b022..f268c2f14 100644 --- a/libm/src/math/fma_wide.rs +++ b/libm/src/math/fma_wide.rs @@ -75,11 +75,18 @@ where } } - return FpResult { val: result.narrow(), status }; + return FpResult { + val: result.narrow(), + status, + }; } let neg = ui >> (B::BITS - 1) != IntTy::::ZERO; - let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy }; + let err = if neg == (zb > xy) { + xy - result + zb + } else { + zb - result + xy + }; if neg == (err < B::ZERO) { ui += one; } else { diff --git a/libm/src/math/generic/ceil.rs b/libm/src/math/generic/ceil.rs index 5c5bb4763..499770c0d 100644 --- a/libm/src/math/generic/ceil.rs +++ b/libm/src/math/generic/ceil.rs @@ -75,7 +75,14 @@ mod tests { /// Test against https://en.cppreference.com/w/cpp/numeric/math/ceil fn spec_test(cases: &[(F, F, Status)]) { - let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY]; + let roundtrip = [ + F::ZERO, + F::ONE, + F::NEG_ONE, + F::NEG_ZERO, + F::INFINITY, + F::NEG_INFINITY, + ]; for x in roundtrip { let FpResult { val, status } = ceil_status(x); diff --git a/libm/src/math/generic/floor.rs b/libm/src/math/generic/floor.rs index 243804625..58d1ee4c2 100644 --- a/libm/src/math/generic/floor.rs +++ b/libm/src/math/generic/floor.rs @@ -75,7 +75,14 @@ mod tests { /// Test against https://en.cppreference.com/w/cpp/numeric/math/floor fn spec_test(cases: &[(F, F, Status)]) { - let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY]; + let roundtrip = [ + F::ZERO, + F::ONE, + F::NEG_ONE, + F::NEG_ZERO, + F::INFINITY, + F::NEG_INFINITY, + ]; for x in roundtrip { let FpResult { val, status } = floor_status(x); diff --git a/libm/src/math/generic/rint.rs b/libm/src/math/generic/rint.rs index 9cdeb1185..7bf38e323 100644 --- a/libm/src/math/generic/rint.rs +++ b/libm/src/math/generic/rint.rs @@ -15,7 +15,11 @@ pub fn rint_round(x: F, _round: Round) -> FpResult { // On i386 `force_eval!` must be used to force rounding via storage to memory. Otherwise, // the excess precission from x87 would cause an incorrect final result. let force = |x| { - if cfg!(x86_no_sse) && (F::BITS == 32 || F::BITS == 64) { force_eval!(x) } else { x } + if cfg!(x86_no_sse) && (F::BITS == 32 || F::BITS == 64) { + force_eval!(x) + } else { + x + } }; let res = if e >= F::EXP_BIAS + F::SIG_BITS { @@ -47,7 +51,14 @@ mod tests { use crate::support::{Hexf, Status}; fn spec_test(cases: &[(F, F, Status)]) { - let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY]; + let roundtrip = [ + F::ZERO, + F::ONE, + F::NEG_ONE, + F::NEG_ZERO, + F::INFINITY, + F::NEG_INFINITY, + ]; for x in roundtrip { let FpResult { val, status } = rint_round(x, Round::Nearest); diff --git a/libm/src/math/generic/sqrt.rs b/libm/src/math/generic/sqrt.rs index ec9ff22df..c52560bdb 100644 --- a/libm/src/math/generic/sqrt.rs +++ b/libm/src/math/generic/sqrt.rs @@ -521,7 +521,10 @@ mod tests { f128::from_bits(0x400c3880000000000000000000000000), 0x40059000000000000000000000000000_u128, ), - (f128::from_bits(0x0000000f), 0x1fc9efbdeb14f4ed9b17ae807907e1e9_u128), + ( + f128::from_bits(0x0000000f), + 0x1fc9efbdeb14f4ed9b17ae807907e1e9_u128, + ), (f128::INFINITY, f128::INFINITY.to_bits()), ]; diff --git a/libm/src/math/generic/trunc.rs b/libm/src/math/generic/trunc.rs index 25414ecf4..29a28f47b 100644 --- a/libm/src/math/generic/trunc.rs +++ b/libm/src/math/generic/trunc.rs @@ -36,7 +36,11 @@ pub fn trunc_status(x: F) -> FpResult { // C5: Otherwise the result is inexact and we will truncate. Raise `FE_INEXACT`, mask the // result, and return. - let status = if xi & F::SIG_MASK == F::Int::ZERO { Status::OK } else { Status::INEXACT }; + let status = if xi & F::SIG_MASK == F::Int::ZERO { + Status::OK + } else { + Status::INEXACT + }; xi &= mask; FpResult::new(F::from_bits(xi), status) } @@ -47,7 +51,14 @@ mod tests { use crate::support::Hexf; fn spec_test(cases: &[(F, F, Status)]) { - let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY]; + let roundtrip = [ + F::ZERO, + F::ONE, + F::NEG_ONE, + F::NEG_ZERO, + F::INFINITY, + F::NEG_INFINITY, + ]; for x in roundtrip { let FpResult { val, status } = trunc_status(x); diff --git a/libm/src/math/ilogb.rs b/libm/src/math/ilogb.rs index ccc4914be..5b41f7b1d 100644 --- a/libm/src/math/ilogb.rs +++ b/libm/src/math/ilogb.rs @@ -21,7 +21,11 @@ pub fn ilogb(x: f64) -> i32 { e } else if e == 0x7ff { force_eval!(0.0 / 0.0); - if (i << 12) != 0 { FP_ILOGBNAN } else { i32::MAX } + if (i << 12) != 0 { + FP_ILOGBNAN + } else { + i32::MAX + } } else { e - 0x3ff } diff --git a/libm/src/math/k_sin.rs b/libm/src/math/k_sin.rs index 42441455f..9dd96c944 100644 --- a/libm/src/math/k_sin.rs +++ b/libm/src/math/k_sin.rs @@ -49,5 +49,9 @@ pub(crate) fn k_sin(x: f64, y: f64, iy: i32) -> f64 { let w = z * z; let r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6); let v = z * x; - if iy == 0 { x + v * (S1 + z * r) } else { x - ((z * (0.5 * y - v * r) - y) - v * S1) } + if iy == 0 { + x + v * (S1 + z * r) + } else { + x - ((z * (0.5 * y - v * r) - y) - v * S1) + } } diff --git a/libm/src/math/log1p.rs b/libm/src/math/log1p.rs index b7f3fb09e..65142c0d6 100644 --- a/libm/src/math/log1p.rs +++ b/libm/src/math/log1p.rs @@ -118,7 +118,11 @@ pub fn log1p(x: f64) -> f64 { k = (hu >> 20) as i32 - 0x3ff; /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ if k < 54 { - c = if k >= 2 { 1. - (f64::from_bits(ui) - x) } else { x - (f64::from_bits(ui) - 1.) }; + c = if k >= 2 { + 1. - (f64::from_bits(ui) - x) + } else { + x - (f64::from_bits(ui) - 1.) + }; c /= f64::from_bits(ui); } else { c = 0.; diff --git a/libm/src/math/log1pf.rs b/libm/src/math/log1pf.rs index bba5b8a2f..23978e61c 100644 --- a/libm/src/math/log1pf.rs +++ b/libm/src/math/log1pf.rs @@ -73,7 +73,11 @@ pub fn log1pf(x: f32) -> f32 { k = (iu >> 23) as i32 - 0x7f; /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ if k < 25 { - c = if k >= 2 { 1. - (f32::from_bits(ui) - x) } else { x - (f32::from_bits(ui) - 1.) }; + c = if k >= 2 { + 1. - (f32::from_bits(ui) - x) + } else { + x - (f32::from_bits(ui) - 1.) + }; c /= f32::from_bits(ui); } else { c = 0.; diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 80b2a2499..7e7d049b9 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -239,10 +239,18 @@ pub fn pow(x: f64, y: f64) -> f64 { /* over/underflow if x is not close to one */ if ix < 0x3fefffff { - return if hy < 0 { s * HUGE * HUGE } else { s * TINY * TINY }; + return if hy < 0 { + s * HUGE * HUGE + } else { + s * TINY * TINY + }; } if ix > 0x3ff00000 { - return if hy > 0 { s * HUGE * HUGE } else { s * TINY * TINY }; + return if hy > 0 { + s * HUGE * HUGE + } else { + s * TINY * TINY + }; } /* now |1-x| is TINY <= 2**-20, suffice to compute @@ -439,7 +447,11 @@ mod tests { fn pow_test(base: f64, exponent: f64, expected: f64) { let res = pow(base, exponent); assert!( - if expected.is_nan() { res.is_nan() } else { pow(base, exponent) == expected }, + if expected.is_nan() { + res.is_nan() + } else { + pow(base, exponent) == expected + }, "{} ** {} was {} instead of {}", base, exponent, @@ -449,11 +461,13 @@ mod tests { } fn test_sets_as_base(sets: &[&[f64]], exponent: f64, expected: f64) { - sets.iter().for_each(|s| s.iter().for_each(|val| pow_test(*val, exponent, expected))); + sets.iter() + .for_each(|s| s.iter().for_each(|val| pow_test(*val, exponent, expected))); } fn test_sets_as_exponent(base: f64, sets: &[&[f64]], expected: f64) { - sets.iter().for_each(|s| s.iter().for_each(|val| pow_test(base, *val, expected))); + sets.iter() + .for_each(|s| s.iter().for_each(|val| pow_test(base, *val, expected))); } fn test_sets(sets: &[&[f64]], computed: &dyn Fn(f64) -> f64, expected: &dyn Fn(f64) -> f64) { @@ -467,7 +481,11 @@ mod tests { #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] let res = force_eval!(res); assert!( - if exp.is_nan() { res.is_nan() } else { exp == res }, + if exp.is_nan() { + res.is_nan() + } else { + exp == res + }, "test for {} was {} instead of {}", val, res, @@ -515,7 +533,9 @@ mod tests { // (-Infinity ^ anything but odd ints should be == -0 ^ (-anything)) // We can lump in pos/neg odd ints here because they don't seem to // cause panics (div by zero) in release mode (I think). - test_sets(ALL, &|v: f64| pow(f64::NEG_INFINITY, v), &|v: f64| pow(-0.0, -v)); + test_sets(ALL, &|v: f64| pow(f64::NEG_INFINITY, v), &|v: f64| { + pow(-0.0, -v) + }); } #[test] @@ -582,11 +602,15 @@ mod tests { // Factoring -1 out: // (negative anything ^ integer should be (-1 ^ integer) * (positive anything ^ integer)) - [POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS].iter().for_each(|int_set| { - int_set.iter().for_each(|int| { - test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| pow(-1.0, *int) * pow(v, *int)); - }) - }); + [POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS] + .iter() + .for_each(|int_set| { + int_set.iter().for_each(|int| { + test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| { + pow(-1.0, *int) * pow(v, *int) + }); + }) + }); // Negative base (imaginary results): // (-anything except 0 and Infinity ^ non-integer should be NAN) diff --git a/libm/src/math/powf.rs b/libm/src/math/powf.rs index 839c6c23d..11c7a7cbd 100644 --- a/libm/src/math/powf.rs +++ b/libm/src/math/powf.rs @@ -182,11 +182,19 @@ pub fn powf(x: f32, y: f32) -> f32 { /* if |y| > 2**27 */ /* over/underflow if x is not close to one */ if ix < 0x3f7ffff8 { - return if hy < 0 { sn * HUGE * HUGE } else { sn * TINY * TINY }; + return if hy < 0 { + sn * HUGE * HUGE + } else { + sn * TINY * TINY + }; } if ix > 0x3f800007 { - return if hy > 0 { sn * HUGE * HUGE } else { sn * TINY * TINY }; + return if hy > 0 { + sn * HUGE * HUGE + } else { + sn * TINY * TINY + }; } /* now |1-x| is TINY <= 2**-20, suffice to compute diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 917e90819..d677fd9dc 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -199,16 +199,28 @@ mod tests { fn test_near_pi() { let arg = 3.141592025756836; let arg = force_eval!(arg); - assert_eq!(rem_pio2(arg), (2, -6.278329573009626e-7, -2.1125998133974653e-23)); + assert_eq!( + rem_pio2(arg), + (2, -6.278329573009626e-7, -2.1125998133974653e-23) + ); let arg = 3.141592033207416; let arg = force_eval!(arg); - assert_eq!(rem_pio2(arg), (2, -6.20382377148128e-7, -2.1125998133974653e-23)); + assert_eq!( + rem_pio2(arg), + (2, -6.20382377148128e-7, -2.1125998133974653e-23) + ); let arg = 3.141592144966125; let arg = force_eval!(arg); - assert_eq!(rem_pio2(arg), (2, -5.086236681942706e-7, -2.1125998133974653e-23)); + assert_eq!( + rem_pio2(arg), + (2, -5.086236681942706e-7, -2.1125998133974653e-23) + ); let arg = 3.141592979431152; let arg = force_eval!(arg); - assert_eq!(rem_pio2(arg), (2, 3.2584135866119817e-7, -2.1125998133974653e-23)); + assert_eq!( + rem_pio2(arg), + (2, 3.2584135866119817e-7, -2.1125998133974653e-23) + ); } #[test] diff --git a/libm/src/math/sinf.rs b/libm/src/math/sinf.rs index b8fae2c98..709b63fcf 100644 --- a/libm/src/math/sinf.rs +++ b/libm/src/math/sinf.rs @@ -42,7 +42,11 @@ pub fn sinf(x: f32) -> f32 { if ix < 0x39800000 { /* |x| < 2**-12 */ /* raise inexact if x!=0 and underflow if subnormal */ - force_eval!(if ix < 0x00800000 { x / x1p120 } else { x + x1p120 }); + force_eval!(if ix < 0x00800000 { + x / x1p120 + } else { + x + x1p120 + }); return x; } return k_sinf(x64); @@ -57,7 +61,11 @@ pub fn sinf(x: f32) -> f32 { return k_cosf(x64 - S1_PIO2); } } - return k_sinf(if sign { -(x64 + S2_PIO2) } else { -(x64 - S2_PIO2) }); + return k_sinf(if sign { + -(x64 + S2_PIO2) + } else { + -(x64 - S2_PIO2) + }); } if ix <= 0x40e231d5 { /* |x| ~<= 9*pi/4 */ diff --git a/libm/src/math/support/big.rs b/libm/src/math/support/big.rs index eae08238e..f24c063cd 100644 --- a/libm/src/math/support/big.rs +++ b/libm/src/math/support/big.rs @@ -19,11 +19,17 @@ pub struct u256 { impl u256 { #[cfg(any(test, feature = "unstable-public-internals"))] - pub const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX }; + pub const MAX: Self = Self { + lo: u128::MAX, + hi: u128::MAX, + }; /// Reinterpret as a signed integer pub fn signed(self) -> i256 { - i256 { lo: self.lo, hi: self.hi } + i256 { + lo: self.lo, + hi: self.hi, + } } } @@ -39,7 +45,10 @@ impl i256 { /// Reinterpret as an unsigned integer #[cfg(any(test, feature = "unstable-public-internals"))] pub fn unsigned(self) -> u256 { - u256 { lo: self.lo, hi: self.hi } + u256 { + lo: self.lo, + hi: self.hi, + } } } @@ -53,7 +62,10 @@ impl MinInt for u256 { const ZERO: Self = Self { lo: 0, hi: 0 }; const ONE: Self = Self { lo: 1, hi: 0 }; const MIN: Self = Self { lo: 0, hi: 0 }; - const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX }; + const MAX: Self = Self { + lo: u128::MAX, + hi: u128::MAX, + }; } impl MinInt for i256 { @@ -65,8 +77,14 @@ impl MinInt for i256 { const BITS: u32 = 256; const ZERO: Self = Self { lo: 0, hi: 0 }; const ONE: Self = Self { lo: 1, hi: 0 }; - const MIN: Self = Self { lo: 0, hi: 1 << 127 }; - const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX << 1 }; + const MIN: Self = Self { + lo: 0, + hi: 1 << 127, + }; + const MAX: Self = Self { + lo: u128::MAX, + hi: u128::MAX << 1, + }; } macro_rules! impl_common { diff --git a/libm/src/math/support/big/tests.rs b/libm/src/math/support/big/tests.rs index 2c71191ba..d2010f021 100644 --- a/libm/src/math/support/big/tests.rs +++ b/libm/src/math/support/big/tests.rs @@ -13,23 +13,62 @@ fn hexu(v: u256) -> String { #[test] fn widen_u128() { - assert_eq!(u128::MAX.widen(), u256 { lo: u128::MAX, hi: 0 }); - assert_eq!(LOHI_SPLIT.widen(), u256 { lo: LOHI_SPLIT, hi: 0 }); + assert_eq!( + u128::MAX.widen(), + u256 { + lo: u128::MAX, + hi: 0 + } + ); + assert_eq!( + LOHI_SPLIT.widen(), + u256 { + lo: LOHI_SPLIT, + hi: 0 + } + ); } #[test] fn widen_i128() { assert_eq!((-1i128).widen(), u256::MAX.signed()); - assert_eq!((LOHI_SPLIT as i128).widen(), i256 { lo: LOHI_SPLIT, hi: u128::MAX }); + assert_eq!( + (LOHI_SPLIT as i128).widen(), + i256 { + lo: LOHI_SPLIT, + hi: u128::MAX + } + ); assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen()); } #[test] fn widen_mul_u128() { let tests = [ - (u128::MAX / 2, 2_u128, u256 { lo: u128::MAX - 1, hi: 0 }), - (u128::MAX, 2_u128, u256 { lo: u128::MAX - 1, hi: 1 }), - (u128::MAX, u128::MAX, u256 { lo: 1, hi: u128::MAX - 1 }), + ( + u128::MAX / 2, + 2_u128, + u256 { + lo: u128::MAX - 1, + hi: 0, + }, + ), + ( + u128::MAX, + 2_u128, + u256 { + lo: u128::MAX - 1, + hi: 1, + }, + ), + ( + u128::MAX, + u128::MAX, + u256 { + lo: 1, + hi: u128::MAX - 1, + }, + ), (0, 0, u256::ZERO), (1234u128, 0, u256::ZERO), (0, 1234, u256::ZERO), @@ -68,7 +107,13 @@ fn not_u256() { #[test] fn shr_u256() { - let only_low = [1, u16::MAX.into(), u32::MAX.into(), u64::MAX.into(), u128::MAX]; + let only_low = [ + 1, + u16::MAX.into(), + u32::MAX.into(), + u64::MAX.into(), + u128::MAX, + ]; let mut has_errors = false; let mut add_error = |a, b, expected, actual| { @@ -99,23 +144,106 @@ fn shr_u256() { } let check = [ - (u256::MAX, 1, u256 { lo: u128::MAX, hi: u128::MAX >> 1 }), - (u256::MAX, 5, u256 { lo: u128::MAX, hi: u128::MAX >> 5 }), - (u256::MAX, 63, u256 { lo: u128::MAX, hi: u64::MAX as u128 | (1 << 64) }), - (u256::MAX, 64, u256 { lo: u128::MAX, hi: u64::MAX as u128 }), - (u256::MAX, 65, u256 { lo: u128::MAX, hi: (u64::MAX >> 1) as u128 }), - (u256::MAX, 127, u256 { lo: u128::MAX, hi: 1 }), - (u256::MAX, 128, u256 { lo: u128::MAX, hi: 0 }), - (u256::MAX, 129, u256 { lo: u128::MAX >> 1, hi: 0 }), - (u256::MAX, 191, u256 { lo: u64::MAX as u128 | 1 << 64, hi: 0 }), - (u256::MAX, 192, u256 { lo: u64::MAX as u128, hi: 0 }), - (u256::MAX, 193, u256 { lo: u64::MAX as u128 >> 1, hi: 0 }), + ( + u256::MAX, + 1, + u256 { + lo: u128::MAX, + hi: u128::MAX >> 1, + }, + ), + ( + u256::MAX, + 5, + u256 { + lo: u128::MAX, + hi: u128::MAX >> 5, + }, + ), + ( + u256::MAX, + 63, + u256 { + lo: u128::MAX, + hi: u64::MAX as u128 | (1 << 64), + }, + ), + ( + u256::MAX, + 64, + u256 { + lo: u128::MAX, + hi: u64::MAX as u128, + }, + ), + ( + u256::MAX, + 65, + u256 { + lo: u128::MAX, + hi: (u64::MAX >> 1) as u128, + }, + ), + ( + u256::MAX, + 127, + u256 { + lo: u128::MAX, + hi: 1, + }, + ), + ( + u256::MAX, + 128, + u256 { + lo: u128::MAX, + hi: 0, + }, + ), + ( + u256::MAX, + 129, + u256 { + lo: u128::MAX >> 1, + hi: 0, + }, + ), + ( + u256::MAX, + 191, + u256 { + lo: u64::MAX as u128 | 1 << 64, + hi: 0, + }, + ), + ( + u256::MAX, + 192, + u256 { + lo: u64::MAX as u128, + hi: 0, + }, + ), + ( + u256::MAX, + 193, + u256 { + lo: u64::MAX as u128 >> 1, + hi: 0, + }, + ), (u256::MAX, 254, u256 { lo: 0b11, hi: 0 }), (u256::MAX, 255, u256 { lo: 1, hi: 0 }), ( - u256 { hi: LOHI_SPLIT, lo: 0 }, + u256 { + hi: LOHI_SPLIT, + lo: 0, + }, 64, - u256 { lo: 0xffffffffffffffff0000000000000000, hi: 0xaaaaaaaaaaaaaaaa }, + u256 { + lo: 0xffffffffffffffff0000000000000000, + hi: 0xaaaaaaaaaaaaaaaa, + }, ), ]; diff --git a/libm/src/math/support/env.rs b/libm/src/math/support/env.rs index 796309372..53ae32f65 100644 --- a/libm/src/math/support/env.rs +++ b/libm/src/math/support/env.rs @@ -25,7 +25,10 @@ impl FpResult { /// Return `val` with `Status::OK`. pub fn ok(val: T) -> Self { - Self { val, status: Status::OK } + Self { + val, + status: Status::OK, + } } } diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index fac104832..8094a7b84 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -105,7 +105,11 @@ pub trait Float: /// if `NaN` should not be treated separately. #[allow(dead_code)] fn eq_repr(self, rhs: Self) -> bool { - if self.is_nan() && rhs.is_nan() { true } else { self.biteq(rhs) } + if self.is_nan() && rhs.is_nan() { + true + } else { + self.biteq(rhs) + } } /// Returns true if the value is NaN. @@ -149,7 +153,11 @@ pub trait Float: /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. fn from_parts(negative: bool, exponent: u32, significand: Self::Int) -> Self { - let sign = if negative { Self::Int::ONE } else { Self::Int::ZERO }; + let sign = if negative { + Self::Int::ONE + } else { + Self::Int::ZERO + }; Self::from_bits( (sign << (Self::BITS - 1)) | (Self::Int::cast_from(exponent & Self::EXP_SAT) << Self::SIG_BITS) @@ -173,7 +181,11 @@ pub trait Float: /// Returns a number that represents the sign of self. #[allow(dead_code)] fn signum(self) -> Self { - if self.is_nan() { self } else { Self::ONE.copysign(self) } + if self.is_nan() { + self + } else { + Self::ONE.copysign(self) + } } } @@ -273,18 +285,61 @@ macro_rules! float_impl { } fn normalize(significand: Self::Int) -> (i32, Self::Int) { let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); - (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int) + ( + 1i32.wrapping_sub(shift as i32), + significand << shift as Self::Int, + ) } } }; } #[cfg(f16_enabled)] -float_impl!(f16, u16, i16, 16, 10, f16::from_bits, f16::to_bits, fmaf16, fmaf16); -float_impl!(f32, u32, i32, 32, 23, f32_from_bits, f32_to_bits, fmaf, fmaf32); -float_impl!(f64, u64, i64, 64, 52, f64_from_bits, f64_to_bits, fma, fmaf64); +float_impl!( + f16, + u16, + i16, + 16, + 10, + f16::from_bits, + f16::to_bits, + fmaf16, + fmaf16 +); +float_impl!( + f32, + u32, + i32, + 32, + 23, + f32_from_bits, + f32_to_bits, + fmaf, + fmaf32 +); +float_impl!( + f64, + u64, + i64, + 64, + 52, + f64_from_bits, + f64_to_bits, + fma, + fmaf64 +); #[cfg(f128_enabled)] -float_impl!(f128, u128, i128, 128, 112, f128::from_bits, f128::to_bits, fmaf128, fmaf128); +float_impl!( + f128, + u128, + i128, + 128, + 112, + f128::from_bits, + f128::to_bits, + fmaf128, + fmaf128 +); /* FIXME(msrv): vendor some things that are not const stable at our MSRV */ @@ -424,7 +479,10 @@ mod tests { // `from_parts` assert_biteq!(f32::from_parts(true, f32::EXP_BIAS, 0), -1.0f32); - assert_biteq!(f32::from_parts(false, 10 + f32::EXP_BIAS, 0), hf32!("0x1p10")); + assert_biteq!( + f32::from_parts(false, 10 + f32::EXP_BIAS, 0), + hf32!("0x1p10") + ); assert_biteq!(f32::from_parts(false, 0, 1), f32::from_bits(0x1)); } @@ -451,7 +509,10 @@ mod tests { // `from_parts` assert_biteq!(f64::from_parts(true, f64::EXP_BIAS, 0), -1.0f64); - assert_biteq!(f64::from_parts(false, 10 + f64::EXP_BIAS, 0), hf64!("0x1p10")); + assert_biteq!( + f64::from_parts(false, 10 + f64::EXP_BIAS, 0), + hf64!("0x1p10") + ); assert_biteq!(f64::from_parts(false, 0, 1), f64::from_bits(0x1)); } diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs index 819e2f56e..85569d98a 100644 --- a/libm/src/math/support/hex_float.rs +++ b/libm/src/math/support/hex_float.rs @@ -234,7 +234,9 @@ const fn parse_hex(mut b: &[u8]) -> Result { match c { b'.' => { if seen_point { - return Err(HexFloatParseError("unexpected '.' parsing fractional digits")); + return Err(HexFloatParseError( + "unexpected '.' parsing fractional digits", + )); } seen_point = true; continue; @@ -294,7 +296,9 @@ const fn parse_hex(mut b: &[u8]) -> Result { } if !some_digits { - return Err(HexFloatParseError("at least one exponent digit is required")); + return Err(HexFloatParseError( + "at least one exponent digit is required", + )); }; { @@ -542,7 +546,11 @@ mod parse_tests { for k in -149..=127 { let s = format!("0x1p{k}"); let x = hf32(&s); - let y = if k < 0 { 0.5f32.powi(-k) } else { 2.0f32.powi(k) }; + let y = if k < 0 { + 0.5f32.powi(-k) + } else { + 2.0f32.powi(k) + }; assert_eq!(x, y); } @@ -613,9 +621,14 @@ mod parse_tests { fn rounding_extreme_underflow() { for k in 1..1000 { let s = format!("0x1p{}", -149 - k); - let Ok((bits, status)) = parse_any(&s, 32, 23, Round::Nearest) else { unreachable!() }; + let Ok((bits, status)) = parse_any(&s, 32, 23, Round::Nearest) else { + unreachable!() + }; assert_eq!(bits, 0, "{s} should round to zero, got bits={bits}"); - assert!(status.underflow(), "should indicate underflow when parsing {s}"); + assert!( + status.underflow(), + "should indicate underflow when parsing {s}" + ); assert!(status.inexact(), "should indicate inexact when parsing {s}"); } } @@ -623,11 +636,15 @@ mod parse_tests { fn long_tail() { for k in 1..1000 { let s = format!("0x1.{}p0", "0".repeat(k)); - let Ok(bits) = parse_hex_exact(&s, 32, 23) else { panic!("parsing {s} failed") }; + let Ok(bits) = parse_hex_exact(&s, 32, 23) else { + panic!("parsing {s} failed") + }; assert_eq!(f32::from_bits(bits as u32), 1.0); let s = format!("0x1.{}1p0", "0".repeat(k)); - let Ok((bits, status)) = parse_any(&s, 32, 23, Round::Nearest) else { unreachable!() }; + let Ok((bits, status)) = parse_any(&s, 32, 23, Round::Nearest) else { + unreachable!() + }; if status.inexact() { assert!(1.0 == f32::from_bits(bits as u32)); } else { @@ -839,7 +856,10 @@ mod parse_tests { assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000_u32); assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000_u64); #[cfg(f128_enabled)] - assert_eq!(hf128!("0x1.ffep+8").to_bits(), 0x4007ffe0000000000000000000000000_u128); + assert_eq!( + hf128!("0x1.ffep+8").to_bits(), + 0x4007ffe0000000000000000000000000_u128 + ); } } @@ -1143,8 +1163,14 @@ mod print_tests { #[cfg(f128_enabled)] { - assert_eq!(Hexf(f128::MAX).to_string(), "0x1.ffffffffffffffffffffffffffffp+16383"); - assert_eq!(Hexf(f128::MIN).to_string(), "-0x1.ffffffffffffffffffffffffffffp+16383"); + assert_eq!( + Hexf(f128::MAX).to_string(), + "0x1.ffffffffffffffffffffffffffffp+16383" + ); + assert_eq!( + Hexf(f128::MIN).to_string(), + "-0x1.ffffffffffffffffffffffffffffp+16383" + ); assert_eq!(Hexf(f128::ZERO).to_string(), "0x0p+0"); assert_eq!(Hexf(f128::NEG_ZERO).to_string(), "-0x0p+0"); assert_eq!(Hexf(f128::NAN).to_string(), "NaN"); diff --git a/libm/src/math/tan.rs b/libm/src/math/tan.rs index a074ca554..a072bdec5 100644 --- a/libm/src/math/tan.rs +++ b/libm/src/math/tan.rs @@ -53,7 +53,11 @@ pub fn tan(x: f64) -> f64 { if ix < 0x3e400000 { /* |x| < 2**-27 */ /* raise inexact if x!=0 and underflow if subnormal */ - force_eval!(if ix < 0x00100000 { x / x1p120 as f64 } else { x + x1p120 as f64 }); + force_eval!(if ix < 0x00100000 { + x / x1p120 as f64 + } else { + x + x1p120 as f64 + }); return x; } return k_tan(x, 0.0, 0); diff --git a/libm/src/math/tanf.rs b/libm/src/math/tanf.rs index 7586aae4c..8bcf9581f 100644 --- a/libm/src/math/tanf.rs +++ b/libm/src/math/tanf.rs @@ -42,7 +42,11 @@ pub fn tanf(x: f32) -> f32 { if ix < 0x39800000 { /* |x| < 2**-12 */ /* raise inexact if x!=0 and underflow if subnormal */ - force_eval!(if ix < 0x00800000 { x / x1p120 } else { x + x1p120 }); + force_eval!(if ix < 0x00800000 { + x / x1p120 + } else { + x + x1p120 + }); return x; } return k_tanf(x64, false); From 239cd0ff0e41ef4487e4231487c3497016b8eb33 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 23:30:12 +0000 Subject: [PATCH 1296/1459] Remove libm CI dockerfiles These are identical to what already exists in compiler-builtins except for some base image changes, so we can eliminate the duplicates. --- .../aarch64-unknown-linux-gnu/Dockerfile | 15 --------------- .../arm-unknown-linux-gnueabi/Dockerfile | 14 -------------- .../arm-unknown-linux-gnueabihf/Dockerfile | 14 -------------- .../armv7-unknown-linux-gnueabihf/Dockerfile | 14 -------------- .../docker/i586-unknown-linux-gnu/Dockerfile | 5 ----- .../docker/i686-unknown-linux-gnu/Dockerfile | 5 ----- .../loongarch64-unknown-linux-gnu/Dockerfile | 13 ------------- .../docker/mips-unknown-linux-gnu/Dockerfile | 15 --------------- .../mips64-unknown-linux-gnuabi64/Dockerfile | 19 ------------------- .../Dockerfile | 18 ------------------ .../mipsel-unknown-linux-gnu/Dockerfile | 15 --------------- .../powerpc-unknown-linux-gnu/Dockerfile | 15 --------------- .../powerpc64-unknown-linux-gnu/Dockerfile | 15 --------------- .../powerpc64le-unknown-linux-gnu/Dockerfile | 16 ---------------- .../riscv64gc-unknown-linux-gnu/Dockerfile | 15 --------------- .../ci/docker/thumbv6m-none-eabi/Dockerfile | 9 --------- .../ci/docker/thumbv7em-none-eabi/Dockerfile | 9 --------- .../docker/thumbv7em-none-eabihf/Dockerfile | 9 --------- .../ci/docker/thumbv7m-none-eabi/Dockerfile | 9 --------- .../x86_64-unknown-linux-gnu/Dockerfile | 5 ----- 20 files changed, 249 deletions(-) delete mode 100644 etc/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile delete mode 100644 etc/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile delete mode 100644 etc/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile delete mode 100644 etc/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile delete mode 100644 etc/libm/ci/docker/i586-unknown-linux-gnu/Dockerfile delete mode 100644 etc/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile delete mode 100644 etc/libm/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile delete mode 100644 etc/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile delete mode 100644 etc/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile delete mode 100644 etc/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile delete mode 100644 etc/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile delete mode 100644 etc/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile delete mode 100644 etc/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile delete mode 100644 etc/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile delete mode 100644 etc/libm/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile delete mode 100644 etc/libm/ci/docker/thumbv6m-none-eabi/Dockerfile delete mode 100644 etc/libm/ci/docker/thumbv7em-none-eabi/Dockerfile delete mode 100644 etc/libm/ci/docker/thumbv7em-none-eabihf/Dockerfile delete mode 100644 etc/libm/ci/docker/thumbv7m-none-eabi/Dockerfile delete mode 100644 etc/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile diff --git a/etc/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile deleted file mode 100644 index 7fa06b286..000000000 --- a/etc/libm/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc libc6-dev ca-certificates \ - gcc-aarch64-linux-gnu m4 make libc6-dev-arm64-cross \ - qemu-user-static - -ENV TOOLCHAIN_PREFIX=aarch64-linux-gnu- -ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ - CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-aarch64-static \ - AR_aarch64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ - CC_aarch64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ - QEMU_LD_PREFIX=/usr/aarch64-linux-gnu \ - RUST_TEST_THREADS=1 diff --git a/etc/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/etc/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile deleted file mode 100644 index e070a7d93..000000000 --- a/etc/libm/ci/docker/arm-unknown-linux-gnueabi/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc libc6-dev ca-certificates \ - gcc-arm-linux-gnueabi libc6-dev-armel-cross qemu-user-static - -ENV TOOLCHAIN_PREFIX=arm-linux-gnueabi- -ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER="$TOOLCHAIN_PREFIX"gcc \ - CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER=qemu-arm-static \ - AR_arm_unknown_linux_gnueabi="$TOOLCHAIN_PREFIX"ar \ - CC_arm_unknown_linux_gnueabi="$TOOLCHAIN_PREFIX"gcc \ - QEMU_LD_PREFIX=/usr/arm-linux-gnueabi \ - RUST_TEST_THREADS=1 diff --git a/etc/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/etc/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile deleted file mode 100644 index 29f1e04a9..000000000 --- a/etc/libm/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc libc6-dev ca-certificates \ - gcc-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-user-static - -ENV TOOLCHAIN_PREFIX=arm-linux-gnueabihf- -ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER="$TOOLCHAIN_PREFIX"gcc \ - CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm-static \ - AR_arm_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"ar \ - CC_arm_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"gcc \ - QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf \ - RUST_TEST_THREADS=1 diff --git a/etc/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/etc/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile deleted file mode 100644 index 0a30801b4..000000000 --- a/etc/libm/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc libc6-dev ca-certificates \ - gcc-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-user-static - -ENV TOOLCHAIN_PREFIX=arm-linux-gnueabihf- -ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER="$TOOLCHAIN_PREFIX"gcc \ - CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm-static \ - AR_armv7_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"ar \ - CC_armv7_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"gcc \ - QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf \ - RUST_TEST_THREADS=1 diff --git a/etc/libm/ci/docker/i586-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/i586-unknown-linux-gnu/Dockerfile deleted file mode 100644 index 37e206a84..000000000 --- a/etc/libm/ci/docker/i586-unknown-linux-gnu/Dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc-multilib m4 make libc6-dev ca-certificates diff --git a/etc/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile deleted file mode 100644 index 37e206a84..000000000 --- a/etc/libm/ci/docker/i686-unknown-linux-gnu/Dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc-multilib m4 make libc6-dev ca-certificates diff --git a/etc/libm/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile deleted file mode 100644 index a9ce320e8..000000000 --- a/etc/libm/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile +++ /dev/null @@ -1,13 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc libc6-dev qemu-user-static ca-certificates \ - gcc-14-loongarch64-linux-gnu libc6-dev-loong64-cross - -ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \ - CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-loongarch64-static \ - AR_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-ar \ - CC_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-gcc-14 \ - QEMU_LD_PREFIX=/usr/loongarch64-linux-gnu \ - RUST_TEST_THREADS=1 diff --git a/etc/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile deleted file mode 100644 index 298208c92..000000000 --- a/etc/libm/ci/docker/mips-unknown-linux-gnu/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc libc6-dev ca-certificates \ - gcc-mips-linux-gnu libc6-dev-mips-cross \ - binfmt-support qemu-user-static qemu-system-mips - -ENV TOOLCHAIN_PREFIX=mips-linux-gnu- -ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ - CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER=qemu-mips-static \ - AR_mips_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ - CC_mips_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ - QEMU_LD_PREFIX=/usr/mips-linux-gnu \ - RUST_TEST_THREADS=1 diff --git a/etc/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/etc/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile deleted file mode 100644 index 101b3853e..000000000 --- a/etc/libm/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - ca-certificates \ - gcc \ - gcc-mips64-linux-gnuabi64 \ - libc6-dev \ - libc6-dev-mips64-cross \ - qemu-user-static \ - qemu-system-mips - -ENV TOOLCHAIN_PREFIX=mips64-linux-gnuabi64- -ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER="$TOOLCHAIN_PREFIX"gcc \ - CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER=qemu-mips64-static \ - AR_mips64_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"ar \ - CC_mips64_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"gcc \ - QEMU_LD_PREFIX=/usr/mips64-linux-gnuabi64 \ - RUST_TEST_THREADS=1 diff --git a/etc/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/etc/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile deleted file mode 100644 index 0eb14f9ac..000000000 --- a/etc/libm/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile +++ /dev/null @@ -1,18 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - ca-certificates \ - gcc \ - gcc-mips64el-linux-gnuabi64 \ - libc6-dev \ - libc6-dev-mips64el-cross \ - qemu-user-static - -ENV TOOLCHAIN_PREFIX=mips64el-linux-gnuabi64- -ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER="$TOOLCHAIN_PREFIX"gcc \ - CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER=qemu-mips64el-static \ - AR_mips64el_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"ar \ - CC_mips64el_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"gcc \ - QEMU_LD_PREFIX=/usr/mips64el-linux-gnuabi64 \ - RUST_TEST_THREADS=1 diff --git a/etc/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile deleted file mode 100644 index 1b9817cfe..000000000 --- a/etc/libm/ci/docker/mipsel-unknown-linux-gnu/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc libc6-dev ca-certificates \ - gcc-mipsel-linux-gnu libc6-dev-mipsel-cross \ - binfmt-support qemu-user-static - -ENV TOOLCHAIN_PREFIX=mipsel-linux-gnu- -ENV CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ - CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_RUNNER=qemu-mipsel-static \ - AR_mipsel_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ - CC_mipsel_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ - QEMU_LD_PREFIX=/usr/mipsel-linux-gnu \ - RUST_TEST_THREADS=1 diff --git a/etc/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile deleted file mode 100644 index 1ea2e30a2..000000000 --- a/etc/libm/ci/docker/powerpc-unknown-linux-gnu/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc libc6-dev qemu-user-static ca-certificates \ - gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \ - qemu-system-ppc - -ENV TOOLCHAIN_PREFIX=powerpc-linux-gnu- -ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ - CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc-static \ - AR_powerpc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ - CC_powerpc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ - QEMU_LD_PREFIX=/usr/powerpc-linux-gnu \ - RUST_TEST_THREADS=1 diff --git a/etc/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile deleted file mode 100644 index 373814bca..000000000 --- a/etc/libm/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc libc6-dev ca-certificates \ - gcc-powerpc64-linux-gnu libc6-dev-ppc64-cross \ - binfmt-support qemu-user-static qemu-system-ppc - -ENV TOOLCHAIN_PREFIX=powerpc64-linux-gnu- -ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ - CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64-static \ - AR_powerpc64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ - CC_powerpc64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ - QEMU_LD_PREFIX=/usr/powerpc64-linux-gnu \ - RUST_TEST_THREADS=1 diff --git a/etc/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile deleted file mode 100644 index 403bb1d95..000000000 --- a/etc/libm/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc libc6-dev qemu-user-static ca-certificates \ - gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \ - qemu-system-ppc - -ENV TOOLCHAIN_PREFIX=powerpc64le-linux-gnu- -ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ - CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64le-static \ - AR_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ - CC_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ - QEMU_CPU=POWER8 \ - QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu \ - RUST_TEST_THREADS=1 diff --git a/etc/libm/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile deleted file mode 100644 index 5f8a28924..000000000 --- a/etc/libm/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc libc6-dev qemu-user-static ca-certificates \ - gcc-riscv64-linux-gnu libc6-dev-riscv64-cross \ - qemu-system-riscv64 - -ENV TOOLCHAIN_PREFIX=riscv64-linux-gnu- -ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ - CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64-static \ - AR_riscv64gc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ - CC_riscv64gc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ - QEMU_LD_PREFIX=/usr/riscv64-linux-gnu \ - RUST_TEST_THREADS=1 diff --git a/etc/libm/ci/docker/thumbv6m-none-eabi/Dockerfile b/etc/libm/ci/docker/thumbv6m-none-eabi/Dockerfile deleted file mode 100644 index ad0d4351e..000000000 --- a/etc/libm/ci/docker/thumbv6m-none-eabi/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -ARG IMAGE=ubuntu:24.04 -FROM $IMAGE - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc libc6-dev ca-certificates \ - gcc-arm-none-eabi \ - libnewlib-arm-none-eabi -ENV BUILD_ONLY=1 diff --git a/etc/libm/ci/docker/thumbv7em-none-eabi/Dockerfile b/etc/libm/ci/docker/thumbv7em-none-eabi/Dockerfile deleted file mode 100644 index ad0d4351e..000000000 --- a/etc/libm/ci/docker/thumbv7em-none-eabi/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -ARG IMAGE=ubuntu:24.04 -FROM $IMAGE - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc libc6-dev ca-certificates \ - gcc-arm-none-eabi \ - libnewlib-arm-none-eabi -ENV BUILD_ONLY=1 diff --git a/etc/libm/ci/docker/thumbv7em-none-eabihf/Dockerfile b/etc/libm/ci/docker/thumbv7em-none-eabihf/Dockerfile deleted file mode 100644 index ad0d4351e..000000000 --- a/etc/libm/ci/docker/thumbv7em-none-eabihf/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -ARG IMAGE=ubuntu:24.04 -FROM $IMAGE - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc libc6-dev ca-certificates \ - gcc-arm-none-eabi \ - libnewlib-arm-none-eabi -ENV BUILD_ONLY=1 diff --git a/etc/libm/ci/docker/thumbv7m-none-eabi/Dockerfile b/etc/libm/ci/docker/thumbv7m-none-eabi/Dockerfile deleted file mode 100644 index ad0d4351e..000000000 --- a/etc/libm/ci/docker/thumbv7m-none-eabi/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -ARG IMAGE=ubuntu:24.04 -FROM $IMAGE - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc libc6-dev ca-certificates \ - gcc-arm-none-eabi \ - libnewlib-arm-none-eabi -ENV BUILD_ONLY=1 diff --git a/etc/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/etc/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile deleted file mode 100644 index c84a31c57..000000000 --- a/etc/libm/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -FROM ubuntu:24.04 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - gcc m4 make libc6-dev ca-certificates From 72089ec4c1207218e6d54ce8cffb156f77332d4b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 23:34:41 +0000 Subject: [PATCH 1297/1459] Combine the libm .gitignore --- .gitignore | 13 ++++++++++++- etc/libm/.gitignore | 11 ----------- 2 files changed, 12 insertions(+), 12 deletions(-) delete mode 100644 etc/libm/.gitignore diff --git a/.gitignore b/.gitignore index 97df30ffa..5287a6c72 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,16 @@ -*.rs.bk +# Rust files Cargo.lock target + +# Sources for external files compiler-rt *.tar.gz + +# Benchmark cache +baseline-* +iai-home + +# Temporary files +*.bk +*.rs.bk +.#* diff --git a/etc/libm/.gitignore b/etc/libm/.gitignore deleted file mode 100644 index d5caba1a0..000000000 --- a/etc/libm/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -**.bk -.#* -/bin -/math/src -target -Cargo.lock -**.tar.gz - -# Benchmark cache -iai-home -baseline-* From adcb81107f66879e44c70f01adb230c387a24382 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 23:35:50 +0000 Subject: [PATCH 1298/1459] Move the libm .editorconfig to root --- etc/libm/.editorconfig => .editorconfig | 5 ----- 1 file changed, 5 deletions(-) rename etc/libm/.editorconfig => .editorconfig (73%) diff --git a/etc/libm/.editorconfig b/.editorconfig similarity index 73% rename from etc/libm/.editorconfig rename to .editorconfig index ec6e107d5..f0735cedf 100644 --- a/etc/libm/.editorconfig +++ b/.editorconfig @@ -12,10 +12,5 @@ insert_final_newline = true indent_style = space indent_size = 4 -[*.md] -# double whitespace at end of line -# denotes a line break in Markdown -trim_trailing_whitespace = false - [*.yml] indent_size = 2 From a5ba95e8c0535b6c0fd1a40fdf0cfe5b2f2d7c2c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 01:18:08 +0000 Subject: [PATCH 1299/1459] Move builtins-test-intrinsics out of the workspace This crate doesn't need to be a default member since it requires the opposite settings from everything else. Exclude it from the workspace and run it only when explicitly requested. This also makes `cargo t --no-default-features` work without additional qualifiers. `--no-default-features` still needs to be passed to ensure `#![compiler_builtins]` does not get set. compiler-builtins needs doctests disabled in order for everything to work correctly, since this causes an error running rustdoc that is unrelated to features (our `compiler_builtins` is getting into the crate graph before that from the sysroot, but `#![compiler_builtins]` is not set). We can also remove `test = false` and `doctest = false` in `builtins-test` since these no longer cause issues. This is unlikely to be used but it is better to not quietly skip if anything ever gets added by accident. --- Cargo.toml | 11 +++++++---- builtins-test-intrinsics/Cargo.toml | 8 +++++++- builtins-test/Cargo.toml | 4 ---- ci/run-docker.sh | 9 ++++++--- ci/run.sh | 10 +++++----- compiler-builtins/Cargo.toml | 3 ++- 6 files changed, 27 insertions(+), 18 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 155fb00b1..feaeb9791 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,7 @@ [workspace] resolver = "3" members = [ - # Note that builtins-test-intrinsics cannot be a default member because it - # needs the `mangled-names` feature disabled, while `builtins-test` needs - # it enabled. "builtins-test", - "builtins-test-intrinsics", "compiler-builtins", "crates/libm-macros", "libm", @@ -24,6 +20,13 @@ default-members = [ "libm", ] +exclude = [ + # `builtins-test-intrinsics` needs the feature `compiler-builtins` enabled + # and `mangled-names` disabled, which is the opposite of what is needed for + # other tests, so it makes sense to keep it out of the workspace. + "builtins-test-intrinsics", +] + [profile.release] panic = "abort" diff --git a/builtins-test-intrinsics/Cargo.toml b/builtins-test-intrinsics/Cargo.toml index 9dbd3c32f..6d88cbec9 100644 --- a/builtins-test-intrinsics/Cargo.toml +++ b/builtins-test-intrinsics/Cargo.toml @@ -6,7 +6,13 @@ publish = false [dependencies] compiler_builtins = { path = "../compiler-builtins", features = ["compiler-builtins"]} -panic-handler = { path = '../crates/panic-handler' } +panic-handler = { path = "../crates/panic-handler" } [features] c = ["compiler_builtins/c"] + +[profile.release] +panic = "abort" + +[profile.dev] +panic = "abort" diff --git a/builtins-test/Cargo.toml b/builtins-test/Cargo.toml index 526e9b18a..18185d8fe 100644 --- a/builtins-test/Cargo.toml +++ b/builtins-test/Cargo.toml @@ -5,10 +5,6 @@ authors = ["Alex Crichton "] edition = "2024" publish = false -[lib] -test = false -doctest = false - [dependencies] # For fuzzing tests we want a deterministic seedable RNG. We also eliminate potential # problems with system RNGs on the variety of platforms this crate is tested on. diff --git a/ci/run-docker.sh b/ci/run-docker.sh index 2c27ab795..d07e7784c 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -19,9 +19,11 @@ run() { echo "target is emulated" fi - # This directory needs to exist before calling docker, otherwise docker will create it but it - # will be owned by root + # Directories and files that do not yet exist need to be created before + # calling docker, otherwise docker will create them but they will be owned + # by root. mkdir -p target + cargo generate-lockfile --manifest-path builtins-test-intrinsics/Cargo.toml run_cmd="HOME=/tmp" @@ -53,7 +55,8 @@ run() { # Use rustc provided by a docker image docker volume create compiler-builtins-cache build_args=( - "--build-arg" "IMAGE=${DOCKER_BASE_IMAGE:-rustlang/rust:nightly}" + "--build-arg" + "IMAGE=${DOCKER_BASE_IMAGE:-rustlang/rust:nightly}" ) run_args=(-v "compiler-builtins-cache:/builtins-target") run_cmd="$run_cmd HOME=/tmp" "USING_CONTAINER_RUSTC=1" diff --git a/ci/run.sh b/ci/run.sh index 49cc16286..96a6e92a9 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -122,7 +122,9 @@ done rm -f "${rlib_paths[@]}" build_intrinsics_test() { - cargo build --target "$target" -v --package builtins-test-intrinsics "$@" + cargo build \ + --target "$target" --verbose \ + --manifest-path builtins-test-intrinsics/Cargo.toml "$@" } # Verify that we haven't dropped any intrinsics/symbols @@ -133,10 +135,8 @@ build_intrinsics_test --features c --release # Verify that there are no undefined symbols to `panic` within our # implementations -CARGO_PROFILE_DEV_LTO=true \ - cargo build --target "$target" --package builtins-test-intrinsics -CARGO_PROFILE_RELEASE_LTO=true \ - cargo build --target "$target" --package builtins-test-intrinsics --release +CARGO_PROFILE_DEV_LTO=true build_intrinsics_test +CARGO_PROFILE_RELEASE_LTO=true build_intrinsics_test --release # Ensure no references to any symbols from core update_rlib_paths diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 3151546ab..c9503bbce 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -25,8 +25,9 @@ include = [ links = 'compiler-rt' [lib] -test = false bench = false +doctest = false +test = false [dependencies] # For more information on this dependency see From 897e60a5c80e783939bc3db2ec48b11924754798 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 03:02:47 +0000 Subject: [PATCH 1300/1459] Add remaining `libm` crates to the workspace These are still not yet covered in CI since we always name explicit packages there, but all crates are now part of the workspace. --- Cargo.toml | 10 ++++------ ci/run.sh | 16 ++++++++-------- libm-test/tests/check_coverage.rs | 4 ++-- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index feaeb9791..194a2cdc2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,20 +4,18 @@ members = [ "builtins-test", "compiler-builtins", "crates/libm-macros", + "crates/musl-math-sys", + "crates/util", "libm", - # FIXME(libm): disabled until tests work in CI - # "libm-test", - # "crates/musl-math-sys", - # "crates/util", + "libm-test", ] default-members = [ "builtins-test", "compiler-builtins", "crates/libm-macros", - # FIXME(libm): disabled until tests work in CI - # "crates/libm-test" "libm", + "libm-test", ] exclude = [ diff --git a/ci/run.sh b/ci/run.sh index 96a6e92a9..bce90d48d 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -58,14 +58,14 @@ update_rlib_paths() { update_rlib_paths rm -f "${rlib_paths[@]}" -cargo build --target "$target" -cargo build --target "$target" --release -cargo build --target "$target" --features c -cargo build --target "$target" --release --features c -cargo build --target "$target" --features no-asm -cargo build --target "$target" --release --features no-asm -cargo build --target "$target" --features no-f16-f128 -cargo build --target "$target" --release --features no-f16-f128 +cargo build -p compiler_builtins --target "$target" +cargo build -p compiler_builtins --target "$target" --release +cargo build -p compiler_builtins --target "$target" --features c +cargo build -p compiler_builtins --target "$target" --release --features c +cargo build -p compiler_builtins --target "$target" --features no-asm +cargo build -p compiler_builtins --target "$target" --release --features no-asm +cargo build -p compiler_builtins --target "$target" --features no-f16-f128 +cargo build -p compiler_builtins --target "$target" --release --features no-f16-f128 PREFIX=${target//unknown-/}- case "$target" in diff --git a/libm-test/tests/check_coverage.rs b/libm-test/tests/check_coverage.rs index c23298686..3b445a3de 100644 --- a/libm-test/tests/check_coverage.rs +++ b/libm-test/tests/check_coverage.rs @@ -19,7 +19,7 @@ macro_rules! callback { #[test] fn test_for_each_function_all_included() { - let all_functions: HashSet<_> = include_str!("../../../etc/function-list.txt") + let all_functions: HashSet<_> = include_str!("../../etc/function-list.txt") .lines() .filter(|line| !line.starts_with("#")) .collect(); @@ -52,7 +52,7 @@ fn ensure_list_updated() { } let res = Command::new("python3") - .arg(Path::new(env!("CARGO_MANIFEST_DIR")).join("../../etc/update-api-list.py")) + .arg(Path::new(env!("CARGO_MANIFEST_DIR")).join("../etc/update-api-list.py")) .arg("--check") .status() .unwrap(); From c257f55d0f26b449b5eb77c95a3e37366cc058aa Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 03:28:17 +0000 Subject: [PATCH 1301/1459] Enable icount benchmarks in CI --- .github/workflows/main.yaml | 55 +++++++++++++++++++++++++++-- {etc/libm/ci => ci}/bench-icount.sh | 0 {etc/libm/ci => ci}/ci-util.py | 0 compiler-builtins/Cargo.toml | 3 +- 4 files changed, 55 insertions(+), 3 deletions(-) rename {etc/libm/ci => ci}/bench-icount.sh (100%) rename {etc/libm/ci => ci}/ci-util.py (100%) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index eec747a24..54cfd506e 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -13,6 +13,7 @@ env: RUSTDOCFLAGS: -Dwarnings RUSTFLAGS: -Dwarnings RUST_BACKTRACE: full + BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results jobs: test: @@ -161,6 +162,55 @@ jobs: - uses: Swatinem/rust-cache@v2 - run: cargo clippy -- -D clippy::all + benchmarks: + name: Benchmarks + runs-on: ubuntu-24.04 + timeout-minutes: 20 + steps: + - uses: actions/checkout@master + with: + submodules: true + - uses: taiki-e/install-action@cargo-binstall + + - name: Set up dependencies + run: | + sudo apt-get update + sudo apt-get install -y valgrind gdb libc6-dbg # Needed for iai-callgrind + rustup update "$BENCHMARK_RUSTC" --no-self-update + rustup default "$BENCHMARK_RUSTC" + # Install the version of iai-callgrind-runner that is specified in Cargo.toml + iai_version="$(cargo metadata --format-version=1 --features icount | + jq -r '.packages[] | select(.name == "iai-callgrind").version')" + cargo binstall -y iai-callgrind-runner --version "$iai_version" + sudo apt-get install valgrind + - uses: Swatinem/rust-cache@v2 + + - name: Run icount benchmarks + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + run: ./ci/bench-icount.sh + + - name: Upload the benchmark baseline + uses: actions/upload-artifact@v4 + with: + name: ${{ env.BASELINE_NAME }} + path: ${{ env.BASELINE_NAME }}.tar.xz + + - name: Run wall time benchmarks + run: | + # Always use the same seed for benchmarks. Ideally we should switch to a + # non-random generator. + export LIBM_SEED=benchesbenchesbenchesbencheswoo! + cargo bench --package libm-test \ + --no-default-features \ + --features short-benchmarks,build-musl,libm/force-soft-floats + + - name: Print test logs if available + if: always() + run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi + shell: bash + miri: name: Miri runs-on: ubuntu-24.04 @@ -189,10 +239,11 @@ jobs: success: needs: - - test - - rustfmt + - benchmarks - clippy - miri + - rustfmt + - test runs-on: ubuntu-24.04 # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency # failed" as success. So we have to do some contortions to ensure the job fails if any of its diff --git a/etc/libm/ci/bench-icount.sh b/ci/bench-icount.sh similarity index 100% rename from etc/libm/ci/bench-icount.sh rename to ci/bench-icount.sh diff --git a/etc/libm/ci/ci-util.py b/ci/ci-util.py similarity index 100% rename from etc/libm/ci/ci-util.py rename to ci/ci-util.py diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index c9503bbce..a014baf04 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -73,4 +73,5 @@ rustc-dep-of-std = ['compiler-builtins', 'core'] public-test-deps = [] [lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(bootstrap)'] } +# The cygwin config can be dropped after our benchmark toolchain is bumped +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(bootstrap)', 'cfg(target_os, values("cygwin"))'] } From 98cdc8e8734a998517bb407184fe486b852f0a9a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 04:48:51 +0000 Subject: [PATCH 1302/1459] Fix the libm-test logfile path This was broken since the crate's location relative to the target directory had changed. --- libm-test/src/lib.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/libm-test/src/lib.rs b/libm-test/src/lib.rs index 730318abc..accb39654 100644 --- a/libm-test/src/lib.rs +++ b/libm-test/src/lib.rs @@ -71,12 +71,7 @@ pub fn test_log(s: &str) { return None; }; - PathBuf::from(x) - .parent() - .unwrap() - .parent() - .unwrap() - .join("target") + PathBuf::from(x).join("../target") } }; let outfile = target_dir.join("test-log.txt"); From 33ec3e8876509bcd36e64a44288ecb5d65b9a013 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 06:10:45 +0000 Subject: [PATCH 1303/1459] ci: Add a libm MSRV check Add the job from rust-lang/libm --- .github/workflows/main.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 54cfd506e..7ef0074de 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -226,6 +226,26 @@ jobs: - uses: Swatinem/rust-cache@v2 - run: ./ci/miri.sh + msrv: + name: Check libm MSRV + runs-on: ubuntu-24.04 + timeout-minutes: 10 + env: + RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings` + steps: + - uses: actions/checkout@master + - name: Install Rust + run: | + msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' libm/Cargo.toml)" + echo "MSRV: $msrv" + rustup update "$msrv" --no-self-update && rustup default "$msrv" + - uses: Swatinem/rust-cache@v2 + - run: | + # FIXME(msrv): Remove the workspace Cargo.toml so 1.63 cargo doesn't see + # `edition = "2024"` and get spooked. + rm Cargo.toml + cargo build --manifest-path libm/Cargo.toml + rustfmt: name: Rustfmt runs-on: ubuntu-24.04 @@ -242,6 +262,7 @@ jobs: - benchmarks - clippy - miri + - msrv - rustfmt - test runs-on: ubuntu-24.04 From 4fa8a6877456f5981f79df75492f6aab8aff2a85 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 06:21:44 +0000 Subject: [PATCH 1304/1459] ci: Add extensive tests from rust-lang/libm --- .github/workflows/main.yaml | 68 +++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 7ef0074de..8f8ac033a 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -257,10 +257,78 @@ jobs: run: rustup set profile minimal && rustup default stable && rustup component add rustfmt - run: cargo fmt -- --check + # Determine which extensive tests should be run based on changed files. + calculate_extensive_matrix: + name: Calculate job matrix + runs-on: ubuntu-24.04 + timeout-minutes: 10 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + outputs: + matrix: ${{ steps.script.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 100 + - name: Fetch pull request ref + run: git fetch origin "$GITHUB_REF:$GITHUB_REF" + if: github.event_name == 'pull_request' + - run: python3 ci/ci-util.py generate-matrix >> "$GITHUB_OUTPUT" + id: script + + extensive: + name: Extensive tests for ${{ matrix.ty }} + needs: + # Wait on `clippy` so we have some confidence that the crate will build + - clippy + - calculate_extensive_matrix + runs-on: ubuntu-24.04 + timeout-minutes: 240 # 4 hours + strategy: + matrix: + # Use the output from `calculate_extensive_matrix` to calculate the matrix + # FIXME: it would be better to run all jobs (i.e. all types) but mark those that + # didn't change as skipped, rather than completely excluding the job. However, + # this is not currently possible https://github.com/actions/runner/issues/1985. + include: ${{ fromJSON(needs.calculate_extensive_matrix.outputs.matrix).matrix }} + env: + TO_TEST: ${{ matrix.to_test }} + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install Rust + run: | + rustup update nightly --no-self-update + rustup default nightly + - uses: Swatinem/rust-cache@v2 + - name: Run extensive tests + run: | + echo "Tests to run: '$TO_TEST'" + if [ -z "$TO_TEST" ]; then + echo "No tests to run, exiting." + exit + fi + + set -x + + # Run the non-extensive tests first to catch any easy failures + cargo t --profile release-checked -- "$TO_TEST" + + LIBM_EXTENSIVE_TESTS="$TO_TEST" cargo test \ + --features build-mpfr,unstable,force-soft-floats \ + --profile release-checked \ + -- extensive + - name: Print test logs if available + run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi + shell: bash + success: needs: - benchmarks - clippy + - extensive - miri - msrv - rustfmt From f4a4e933120320b4f4ee1152abbf46791949b801 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 04:40:12 +0000 Subject: [PATCH 1305/1459] ci: Enable testing of `libm` crates Update `run.sh` to start testing `libm`. Currently this is somewhat inefficient because `builtins-test` gets run more than once on some targets; this can be cleaned up later. --- .github/workflows/main.yaml | 12 ++- Cargo.toml | 35 ++++---- ci/run.sh | 167 +++++++++++++++++++++++++++++++----- etc/libm/ci/run-docker.sh | 56 ------------ etc/libm/ci/run.sh | 130 ---------------------------- libm-test/Cargo.toml | 3 + 6 files changed, 176 insertions(+), 227 deletions(-) delete mode 100755 etc/libm/ci/run-docker.sh delete mode 100755 etc/libm/ci/run.sh diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 8f8ac033a..95ed6a855 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -97,6 +97,7 @@ jobs: rustup default "$channel" rustup target add "${{ matrix.target }}" rustup component add llvm-tools-preview + - uses: taiki-e/install-action@nextest - uses: Swatinem/rust-cache@v2 with: key: ${{ matrix.target }} @@ -107,7 +108,7 @@ jobs: path: /tmp/.buildx-cache key: ${{ matrix.target }}-buildx-${{ github.sha }} restore-keys: ${{ matrix.target }}-buildx- - + - name: Cache compiler-rt id: cache-compiler-rt uses: actions/cache@v4 @@ -121,6 +122,10 @@ jobs: - run: echo "RUST_COMPILER_RT_ROOT=$(realpath ./compiler-rt)" >> "$GITHUB_ENV" shell: bash + - name: Verify API list + if: matrix.os == 'ubuntu-24.04' + run: python3 etc/update-api-list.py --check + # Non-linux tests just use our raw script - name: Run locally if: matrix.os != 'ubuntu-24.04' @@ -136,6 +141,11 @@ jobs: if: matrix.os == 'ubuntu-24.04' run: cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} + - name: Print test logs if available + if: always() + run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi + shell: bash + # Workaround to keep Docker cache smaller # https://github.com/docker/build-push-action/issues/252 # https://github.com/moby/buildkit/issues/1896 diff --git a/Cargo.toml b/Cargo.toml index 194a2cdc2..c3737a042 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,22 +31,19 @@ panic = "abort" [profile.dev] panic = "abort" -# FIXME(libm): these profiles are needed for testing -# # The default release profile is unchanged. - -# # Release mode with debug assertions -# [profile.release-checked] -# inherits = "release" -# debug-assertions = true -# overflow-checks = true - -# # Release with maximum optimizations, which is very slow to build. This is also -# # what is needed to check `no-panic`. -# [profile.release-opt] -# inherits = "release" -# codegen-units = 1 -# lto = "fat" - -# [profile.bench] -# # Required for iai-callgrind -# debug = true +# Release mode with debug assertions +[profile.release-checked] +inherits = "release" +debug-assertions = true +overflow-checks = true + +# Release with maximum optimizations, which is very slow to build. This is also +# what is needed to check `no-panic`. +[profile.release-opt] +inherits = "release" +codegen-units = 1 +lto = "fat" + +[profile.bench] +# Required for iai-callgrind +debug = true diff --git a/ci/run.sh b/ci/run.sh index bce90d48d..730079be7 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -13,6 +13,11 @@ if [ -z "$target" ]; then target="$host_target" fi +if [[ "$target" = *"wasm"* ]]; then + # Enable the random backend + export RUSTFLAGS="${RUSTFLAGS:-} --cfg getrandom_backend=\"wasm_js\"" +fi + if [ "${USING_CONTAINER_RUSTC:-}" = 1 ]; then # Install nonstandard components if we have control of the environment rustup target list --installed | @@ -22,27 +27,27 @@ fi # Test our implementation if [ "${BUILD_ONLY:-}" = "1" ]; then - echo "nothing to do for no_std" + echo "no tests to run for build-only targets" else - run="cargo test --package builtins-test --no-fail-fast --target $target" - $run - $run --release - $run --features c - $run --features c --release - $run --features no-asm - $run --features no-asm --release - $run --features no-f16-f128 - $run --features no-f16-f128 --release - $run --benches - $run --benches --release -fi + test_builtins=(cargo test --package builtins-test --no-fail-fast --target "$target") + "${test_builtins[@]}" + "${test_builtins[@]}" --release + "${test_builtins[@]}" --features c + "${test_builtins[@]}" --features c --release + "${test_builtins[@]}" --features no-asm + "${test_builtins[@]}" --features no-asm --release + "${test_builtins[@]}" --features no-f16-f128 + "${test_builtins[@]}" --features no-f16-f128 --release + "${test_builtins[@]}" --benches + "${test_builtins[@]}" --benches --release -if [ "${TEST_VERBATIM:-}" = "1" ]; then - verb_path=$(cmd.exe //C echo \\\\?\\%cd%\\builtins-test\\target2) - cargo build --package builtins-test \ - --target "$target" --target-dir "$verb_path" --features c + if [ "${TEST_VERBATIM:-}" = "1" ]; then + verb_path=$(cmd.exe //C echo \\\\?\\%cd%\\builtins-test\\target2) + "${test_builtins[@]}" --target-dir "$verb_path" --features c + fi fi + declare -a rlib_paths # Set the `rlib_paths` global array to a list of all compiler-builtins rlibs @@ -61,11 +66,11 @@ rm -f "${rlib_paths[@]}" cargo build -p compiler_builtins --target "$target" cargo build -p compiler_builtins --target "$target" --release cargo build -p compiler_builtins --target "$target" --features c -cargo build -p compiler_builtins --target "$target" --release --features c +cargo build -p compiler_builtins --target "$target" --features c --release cargo build -p compiler_builtins --target "$target" --features no-asm -cargo build -p compiler_builtins --target "$target" --release --features no-asm +cargo build -p compiler_builtins --target "$target" --features no-asm --release cargo build -p compiler_builtins --target "$target" --features no-f16-f128 -cargo build -p compiler_builtins --target "$target" --release --features no-f16-f128 +cargo build -p compiler_builtins --target "$target" --features no-f16-f128 --release PREFIX=${target//unknown-/}- case "$target" in @@ -167,4 +172,124 @@ for rlib in "${rlib_paths[@]}"; do fi done -true +# Test libm + +mflags=() + +# We enumerate features manually. +mflags+=(--no-default-features) + +# Enable arch-specific routines when available. +mflags+=(--features arch) + +# Always enable `unstable-float` since it expands available API but does not +# change any implementations. +mflags+=(--features unstable-float) + +# We need to specifically skip tests for musl-math-sys on systems that can't +# build musl since otherwise `--all` will activate it. +case "$target" in + # Can't build at all on MSVC, WASM, or thumb + *windows-msvc*) mflags+=(--exclude musl-math-sys) ;; + *wasm*) mflags+=(--exclude musl-math-sys) ;; + *thumb*) mflags+=(--exclude musl-math-sys) ;; + + # We can build musl on MinGW but running tests gets a stack overflow + *windows-gnu*) ;; + # FIXME(#309): LE PPC crashes calling the musl version of some functions. It + # seems like a qemu bug but should be investigated further at some point. + # See . + *powerpc64le*) ;; + + # Everything else gets musl enabled + *) mflags+=(--features libm-test/build-musl) ;; +esac + + +# Configure which targets test against MPFR +case "$target" in + # MSVC cannot link MPFR + *windows-msvc*) ;; + # FIXME: MinGW should be able to build MPFR, but setup in CI is nontrivial. + *windows-gnu*) ;; + # Targets that aren't cross compiled in CI work fine + aarch64*apple*) mflags+=(--features libm-test/build-mpfr) ;; + aarch64*linux*) mflags+=(--features libm-test/build-mpfr) ;; + i586*) mflags+=(--features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross) ;; + i686*) mflags+=(--features libm-test/build-mpfr) ;; + x86_64*) mflags+=(--features libm-test/build-mpfr) ;; +esac + +# FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI. +# +case "$target" in + *windows-gnu) mflags+=(--exclude libm-macros) ;; +esac + +# Make sure a simple build works +cargo check -p libm --no-default-features --target "$target" + + +if [ "${BUILD_ONLY:-}" = "1" ]; then + # If we are on targets that can't run tests, verify that we can build. + cmd=(cargo build --target "$target" --package libm) + "${cmd[@]}" + "${cmd[@]}" --features unstable-intrinsics + + echo "can't run tests on $target; skipping" +else + mflags+=(--workspace --target "$target") + cmd=(cargo test "${mflags[@]}") + profile_flag="--profile" + + # If nextest is available, use that + command -v cargo-nextest && nextest=1 || nextest=0 + if [ "$nextest" = "1" ]; then + cmd=(cargo nextest run --max-fail=10) + + # Workaround for https://github.com/nextest-rs/nextest/issues/2066 + if [ -f /.dockerenv ]; then + cfg_file="/tmp/nextest-config.toml" + echo "[store]" >> "$cfg_file" + echo "dir = \"$CARGO_TARGET_DIR/nextest\"" >> "$cfg_file" + cmd+=(--config-file "$cfg_file") + fi + + cmd+=("${mflags[@]}") + profile_flag="--cargo-profile" + fi + + # Test once without intrinsics + "${cmd[@]}" + + # Run doctests if they were excluded by nextest + [ "$nextest" = "1" ] && cargo test --doc --exclude compiler_builtins "${mflags[@]}" + + # Exclude the macros and utile crates from the rest of the tests to save CI + # runtime, they shouldn't have anything feature- or opt-level-dependent. + cmd+=(--exclude util --exclude libm-macros) + + # Test once with intrinsics enabled + "${cmd[@]}" --features unstable-intrinsics + "${cmd[@]}" --features unstable-intrinsics --benches + + # Test the same in release mode, which also increases coverage. Also ensure + # the soft float routines are checked. + "${cmd[@]}" "$profile_flag" release-checked + "${cmd[@]}" "$profile_flag" release-checked --features force-soft-floats + "${cmd[@]}" "$profile_flag" release-checked --features unstable-intrinsics + "${cmd[@]}" "$profile_flag" release-checked --features unstable-intrinsics --benches + + # Ensure that the routines do not panic. + # + # `--tests` must be passed because no-panic is only enabled as a dev + # dependency. The `release-opt` profile must be used to enable LTO and a + # single CGU. + ENSURE_NO_PANIC=1 cargo build \ + -p libm \ + --target "$target" \ + --no-default-features \ + --features unstable-float \ + --tests \ + --profile release-opt +fi diff --git a/etc/libm/ci/run-docker.sh b/etc/libm/ci/run-docker.sh deleted file mode 100755 index 6626e7226..000000000 --- a/etc/libm/ci/run-docker.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash - -# Small script to run tests for a target (or all targets) inside all the -# respective docker images. - -set -euxo pipefail - -host_arch="$(uname -m | sed 's/arm64/aarch64/')" - -run() { - local target=$1 - - echo "testing target: $target" - - target_arch="$(echo "$target" | cut -d'-' -f1)" - - emulated="" - if [ "$target_arch" != "$host_arch" ]; then - emulated=1 - echo "target is emulated" - fi - - # This directory needs to exist before calling docker, otherwise docker will create it but it - # will be owned by root - mkdir -p target - - set_env="HOME=/tmp PATH=\$PATH:/rust/bin:/cargo/bin" - docker build -t "libm-$target" "ci/docker/$target" - docker run \ - --rm \ - --user "$(id -u):$(id -g)" \ - -e CI \ - -e RUSTFLAGS \ - -e CARGO_TERM_COLOR \ - -e CARGO_HOME=/cargo \ - -e CARGO_TARGET_DIR=/target \ - -e "EMULATED=$emulated" \ - -v "${HOME}/.cargo:/cargo" \ - -v "$(pwd)/target:/target" \ - -v "$(pwd):/checkout:ro" \ - -v "$(rustc --print sysroot):/rust:ro" \ - --init \ - -w /checkout \ - "libm-$target" \ - sh -c "$set_env exec ci/run.sh $target" -} - -if [ -z "$1" ]; then - echo "running tests for all targets" - - for d in ci/docker/*; do - run $d - done -else - run $1 -fi diff --git a/etc/libm/ci/run.sh b/etc/libm/ci/run.sh deleted file mode 100755 index a946d325e..000000000 --- a/etc/libm/ci/run.sh +++ /dev/null @@ -1,130 +0,0 @@ -#!/bin/sh - -set -eux - -export RUST_BACKTRACE="${RUST_BACKTRACE:-full}" -export NEXTEST_STATUS_LEVEL=all - -target="${1:-}" -flags="" - -if [ -z "$target" ]; then - host_target=$(rustc -vV | awk '/^host/ { print $2 }') - echo "Defaulted to host target $host_target" - target="$host_target" -fi - -# We enumerate features manually. -flags="$flags --no-default-features" - -# Enable arch-specific routines when available. -flags="$flags --features arch" - -# Always enable `unstable-float` since it expands available API but does not -# change any implementations. -flags="$flags --features unstable-float" - -# We need to specifically skip tests for musl-math-sys on systems that can't -# build musl since otherwise `--all` will activate it. -case "$target" in - # Can't build at all on MSVC, WASM, or thumb - *windows-msvc*) flags="$flags --exclude musl-math-sys" ;; - *wasm*) flags="$flags --exclude musl-math-sys" ;; - *thumb*) flags="$flags --exclude musl-math-sys" ;; - - # We can build musl on MinGW but running tests gets a stack overflow - *windows-gnu*) ;; - # FIXME(#309): LE PPC crashes calling the musl version of some functions. It - # seems like a qemu bug but should be investigated further at some point. - # See . - *powerpc64le*) ;; - - # Everything else gets musl enabled - *) flags="$flags --features libm-test/build-musl" ;; -esac - -# Configure which targets test against MPFR -case "$target" in - # MSVC cannot link MPFR - *windows-msvc*) ;; - # FIXME: MinGW should be able to build MPFR, but setup in CI is nontrivial. - *windows-gnu*) ;; - # Targets that aren't cross compiled in CI work fine - aarch64*apple*) flags="$flags --features libm-test/build-mpfr" ;; - aarch64*linux*) flags="$flags --features libm-test/build-mpfr" ;; - i586*) flags="$flags --features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross" ;; - i686*) flags="$flags --features libm-test/build-mpfr" ;; - x86_64*) flags="$flags --features libm-test/build-mpfr" ;; -esac - -# FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI. -# -case "$target" in - *windows-gnu) flags="$flags --exclude libm-macros" ;; -esac - -# Make sure we can build with overriding features. -cargo check -p libm --no-default-features - -if [ "${BUILD_ONLY:-}" = "1" ]; then - # If we are on targets that can't run tests, verify that we can build. - cmd="cargo build --target $target --package libm" - $cmd - $cmd --features unstable-intrinsics - - echo "can't run tests on $target; skipping" - exit -fi - -flags="$flags --all --target $target" -cmd="cargo test $flags" -profile="--profile" - -# If nextest is available, use that -command -v cargo-nextest && nextest=1 || nextest=0 -if [ "$nextest" = "1" ]; then - # Workaround for https://github.com/nextest-rs/nextest/issues/2066 - if [ -f /.dockerenv ]; then - cfg_file="/tmp/nextest-config.toml" - echo "[store]" >> "$cfg_file" - echo "dir = \"$CARGO_TARGET_DIR/nextest\"" >> "$cfg_file" - cfg_flag="--config-file $cfg_file" - fi - - cmd="cargo nextest run ${cfg_flag:-} --max-fail=10 $flags" - profile="--cargo-profile" -fi - -# Test once without intrinsics -$cmd - -# Run doctests if they were excluded by nextest -[ "$nextest" = "1" ] && cargo test --doc $flags - -# Exclude the macros and utile crates from the rest of the tests to save CI -# runtime, they shouldn't have anything feature- or opt-level-dependent. -cmd="$cmd --exclude util --exclude libm-macros" - -# Test once with intrinsics enabled -$cmd --features unstable-intrinsics -$cmd --features unstable-intrinsics --benches - -# Test the same in release mode, which also increases coverage. Also ensure -# the soft float routines are checked. -$cmd "$profile" release-checked -$cmd "$profile" release-checked --features force-soft-floats -$cmd "$profile" release-checked --features unstable-intrinsics -$cmd "$profile" release-checked --features unstable-intrinsics --benches - -# Ensure that the routines do not panic. -# -# `--tests` must be passed because no-panic is only enabled as a dev -# dependency. The `release-opt` profile must be used to enable LTO and a -# single CGU. -ENSURE_NO_PANIC=1 cargo build \ - -p libm \ - --target "$target" \ - --no-default-features \ - --features unstable-float \ - --tests \ - --profile release-opt diff --git a/libm-test/Cargo.toml b/libm-test/Cargo.toml index c9b3b7958..6fd49774e 100644 --- a/libm-test/Cargo.toml +++ b/libm-test/Cargo.toml @@ -41,6 +41,9 @@ rand_chacha = "0.9.0" rayon = "1.10.0" rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "integer", "std"] } +[target.'cfg(target_family = "wasm")'.dependencies] +getrandom = { version = "0.3.2", features = ["wasm_js"] } + [build-dependencies] rand = { version = "0.9.0", optional = true } From f878724027c7dd88f37bdf15c1aa4232045f190d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 05:37:08 +0000 Subject: [PATCH 1306/1459] Set the musl submodule to 61399d4b ("loongarch64: add TLSDESC support") Set the submodule to the same version we had been using in rust-lang/libm. This is a downgrade from the current version but it avoids some new deviations that show up, which can be corrected later. --- crates/musl-math-sys/musl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/musl-math-sys/musl b/crates/musl-math-sys/musl index 0784374d5..61399d4bd 160000 --- a/crates/musl-math-sys/musl +++ b/crates/musl-math-sys/musl @@ -1 +1 @@ -Subproject commit 0784374d561435f7c787a555aeab8ede699ed298 +Subproject commit 61399d4bd02ae1ec03068445aa7ffe9174466bfd From bddbd7dbd3558b1c732751f468398eab75dc9c11 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 07:26:43 +0000 Subject: [PATCH 1307/1459] ci: Add `/cargo/bin` to the path in Docker This makes it possible to use nextest within the container. --- ci/run-docker.sh | 2 +- ci/run.sh | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/run-docker.sh b/ci/run-docker.sh index d07e7784c..c4d3af55d 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -50,7 +50,7 @@ run() { -v "$(pwd)/target:/builtins-target" -v "$(rustc --print sysroot):/rust:ro" ) - run_cmd="$run_cmd PATH=\$PATH:/rust/bin" + run_cmd="$run_cmd PATH=\$PATH:/rust/bin:/cargo/bin" else # Use rustc provided by a docker image docker volume create compiler-builtins-cache diff --git a/ci/run.sh b/ci/run.sh index 730079be7..0ad99da19 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -229,7 +229,6 @@ esac # Make sure a simple build works cargo check -p libm --no-default-features --target "$target" - if [ "${BUILD_ONLY:-}" = "1" ]; then # If we are on targets that can't run tests, verify that we can build. cmd=(cargo build --target "$target" --package libm) @@ -255,6 +254,9 @@ else cmd+=(--config-file "$cfg_file") fi + # Not all configurations have tests to run on wasm + [[ "$target" = *"wasm"* ]] && cmd+=(--no-tests=warn) + cmd+=("${mflags[@]}") profile_flag="--cargo-profile" fi From 40582678ca7b70be54b2d56428e11f0cc0899842 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 07:05:32 +0000 Subject: [PATCH 1308/1459] ci: Skip testing libm in PRs if it did not change Many contributions to compiler-builtins don't have any need to touch libm, and could get by with the few minutes of CI for compiler-builtins rather than the ~30 minutes for libm. We already have some scripts that handle changed file detection, so expand its use to skip libm CI if it doesn't need to run. --- .github/workflows/main.yaml | 49 ++++++++++++++++++++----------------- ci/ci-util.py | 42 ++++++++++++++++++++++--------- ci/run-docker.sh | 1 + ci/run.sh | 11 ++++++--- 4 files changed, 66 insertions(+), 37 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 95ed6a855..690cdc89b 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -16,6 +16,27 @@ env: BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results jobs: + # Determine which tests should be run based on changed files. + calculate_vars: + name: Calculate workflow variables + runs-on: ubuntu-24.04 + timeout-minutes: 10 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + outputs: + extensive_matrix: ${{ steps.script.outputs.extensive_matrix }} + may_skip_libm_ci: ${{ steps.script.outputs.may_skip_libm_ci }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 500 + - name: Fetch pull request ref + run: git fetch origin "$GITHUB_REF:$GITHUB_REF" + if: github.event_name == 'pull_request' + - run: python3 ci/ci-util.py generate-matrix >> "$GITHUB_OUTPUT" + id: script + test: name: Build and test timeout-minutes: 60 @@ -78,9 +99,11 @@ jobs: os: windows-2025 channel: nightly-x86_64-gnu runs-on: ${{ matrix.os }} + needs: [calculate_vars] env: BUILD_ONLY: ${{ matrix.build_only }} TEST_VERBATIM: ${{ matrix.test_verbatim }} + MAY_SKIP_LIBM_CI: ${{ needs.calculate_vars.outputs.may_skip_libm_ci }} steps: - name: Print runner information run: uname -a @@ -267,41 +290,21 @@ jobs: run: rustup set profile minimal && rustup default stable && rustup component add rustfmt - run: cargo fmt -- --check - # Determine which extensive tests should be run based on changed files. - calculate_extensive_matrix: - name: Calculate job matrix - runs-on: ubuntu-24.04 - timeout-minutes: 10 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - PR_NUMBER: ${{ github.event.pull_request.number }} - outputs: - matrix: ${{ steps.script.outputs.matrix }} - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 100 - - name: Fetch pull request ref - run: git fetch origin "$GITHUB_REF:$GITHUB_REF" - if: github.event_name == 'pull_request' - - run: python3 ci/ci-util.py generate-matrix >> "$GITHUB_OUTPUT" - id: script - extensive: name: Extensive tests for ${{ matrix.ty }} needs: # Wait on `clippy` so we have some confidence that the crate will build - clippy - - calculate_extensive_matrix + - calculate_vars runs-on: ubuntu-24.04 timeout-minutes: 240 # 4 hours strategy: matrix: - # Use the output from `calculate_extensive_matrix` to calculate the matrix + # Use the output from `calculate_vars` to create the matrix # FIXME: it would be better to run all jobs (i.e. all types) but mark those that # didn't change as skipped, rather than completely excluding the job. However, # this is not currently possible https://github.com/actions/runner/issues/1985. - include: ${{ fromJSON(needs.calculate_extensive_matrix.outputs.matrix).matrix }} + include: ${{ fromJSON(needs.calculate_vars.outputs.extensive_matrix).extensive_matrix }} env: TO_TEST: ${{ matrix.to_test }} steps: diff --git a/ci/ci-util.py b/ci/ci-util.py index d9e402d6b..7486d6b41 100755 --- a/ci/ci-util.py +++ b/ci/ci-util.py @@ -7,6 +7,7 @@ import json import os +import re import subprocess as sp import sys from dataclasses import dataclass @@ -68,6 +69,10 @@ "libm/src/math/arch/intrinsics.rs", ] +# libm PR CI takes a long time and doesn't need to run unless relevant files have been +# changed. Anything matching this regex pattern will trigger a run. +TRIGGER_LIBM_PR_CI = ".*(libm|musl).*" + TYPES = ["f16", "f32", "f64", "f128"] @@ -116,7 +121,6 @@ class FunctionDef(TypedDict): type: str -@dataclass class Context: gh_ref: str | None changed: list[Path] @@ -142,7 +146,7 @@ def _init_change_list(self): # the PR number), and sets this as `GITHUB_REF`. ref = self.gh_ref eprint(f"using ref `{ref}`") - if ref is None or "merge" not in ref: + if not self.is_pr(): # If the ref is not for `merge` then we are not in PR CI eprint("No diff available for ref") return @@ -170,6 +174,10 @@ def _init_change_list(self): ) self.changed = [Path(p) for p in textlist.splitlines()] + def is_pr(self) -> bool: + """Check if we are looking at a PR rather than a push.""" + return self.gh_ref is not None and "merge" in self.gh_ref + @staticmethod def _ignore_file(fname: str) -> bool: return any(fname.startswith(pfx) for pfx in IGNORE_FILES) @@ -196,7 +204,16 @@ def changed_routines(self) -> dict[str, list[str]]: return ret - def make_workflow_output(self) -> str: + def may_skip_libm_ci(self) -> bool: + """If this is a PR and no libm files were changed, allow skipping libm + jobs.""" + + if self.is_pr(): + return all(not re.match(TRIGGER_LIBM_PR_CI, str(f)) for f in self.changed) + + return False + + def emit_workflow_output(self): """Create a JSON object a list items for each type's changed files, if any did change, and the routines that were affected by the change. """ @@ -216,9 +233,10 @@ def make_workflow_output(self) -> str: eprint("Skipping all extensive tests") changed = self.changed_routines() - ret = [] + matrix = [] total_to_test = 0 + # Figure out which extensive tests need to run for ty in TYPES: ty_changed = changed.get(ty, []) ty_to_test = [] if skip_tests else ty_changed @@ -230,9 +248,14 @@ def make_workflow_output(self) -> str: "to_test": ",".join(ty_to_test), } - ret.append(item) - output = json.dumps({"matrix": ret}, separators=(",", ":")) - eprint(f"output: {output}") + matrix.append(item) + + ext_matrix = json.dumps({"extensive_matrix": matrix}, separators=(",", ":")) + may_skip = str(self.may_skip_libm_ci()).lower() + print(f"extensive_matrix={ext_matrix}") + print(f"may_skip_libm_ci={may_skip}") + eprint(f"extensive_matrix={ext_matrix}") + eprint(f"may_skip_libm_ci={may_skip}") eprint(f"total extensive tests: {total_to_test}") if error_on_many_tests and total_to_test > MANY_EXTENSIVE_THRESHOLD: @@ -242,8 +265,6 @@ def make_workflow_output(self) -> str: ) exit(1) - return output - def locate_baseline(flags: list[str]) -> None: """Find the most recent baseline from CI, download it if specified. @@ -398,8 +419,7 @@ def main(): match sys.argv[1:]: case ["generate-matrix"]: ctx = Context() - output = ctx.make_workflow_output() - print(f"matrix={output}") + ctx.emit_workflow_output() case ["locate-baseline", *flags]: locate_baseline(flags) case ["check-regressions", *args]: diff --git a/ci/run-docker.sh b/ci/run-docker.sh index c4d3af55d..58838fadf 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -77,6 +77,7 @@ run() { -e CI \ -e CARGO_TARGET_DIR=/builtins-target \ -e CARGO_TERM_COLOR \ + -e MAY_SKIP_LIBM_CI \ -e RUSTFLAGS \ -e RUST_BACKTRACE \ -e RUST_COMPILER_RT_ROOT \ diff --git a/ci/run.sh b/ci/run.sh index 0ad99da19..68d13c130 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -174,6 +174,14 @@ done # Test libm +# Make sure a simple build works +cargo check -p libm --no-default-features --target "$target" + +if [ "${MAY_SKIP_LIBM_CI:-}" = "true" ]; then + echo "skipping libm PR CI" + exit +fi + mflags=() # We enumerate features manually. @@ -226,9 +234,6 @@ case "$target" in *windows-gnu) mflags+=(--exclude libm-macros) ;; esac -# Make sure a simple build works -cargo check -p libm --no-default-features --target "$target" - if [ "${BUILD_ONLY:-}" = "1" ]; then # If we are on targets that can't run tests, verify that we can build. cmd=(cargo build --target "$target" --package libm) From d045d7428165cec1ac7d19ecad46f0602cb28fc8 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 08:26:08 +0000 Subject: [PATCH 1309/1459] ci: Add a timeout for all jobs --- .github/workflows/main.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 690cdc89b..2b57d7eb3 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -247,6 +247,7 @@ jobs: miri: name: Miri runs-on: ubuntu-24.04 + timeout-minutes: 10 steps: - uses: actions/checkout@v4 with: @@ -282,6 +283,7 @@ jobs: rustfmt: name: Rustfmt runs-on: ubuntu-24.04 + timeout-minutes: 10 steps: - uses: actions/checkout@v4 with: @@ -347,6 +349,7 @@ jobs: - rustfmt - test runs-on: ubuntu-24.04 + timeout-minutes: 10 # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency # failed" as success. So we have to do some contortions to ensure the job fails if any of its # dependencies fails. From c907a32ec0ec7cb0312bb1edfde0a013f185c607 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 08:34:47 +0000 Subject: [PATCH 1310/1459] ci: Use lowercase for bash locals, fix shellcheck --- ci/miri.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/ci/miri.sh b/ci/miri.sh index 79e660bab..7b0ea44c6 100755 --- a/ci/miri.sh +++ b/ci/miri.sh @@ -1,5 +1,5 @@ #!/bin/bash -set -ex +set -eux # We need Tree Borrows as some of our raw pointer patterns are not # compatible with Stacked Borrows. @@ -7,10 +7,12 @@ export MIRIFLAGS="-Zmiri-tree-borrows" # One target that sets `mem-unaligned` and one that does not, # and a big-endian target. -TARGETS=(x86_64-unknown-linux-gnu +targets=( + x86_64-unknown-linux-gnu armv7-unknown-linux-gnueabihf - s390x-unknown-linux-gnu) -for TARGET in "${TARGETS[@]}"; do + s390x-unknown-linux-gnu +) +for target in "${targets[@]}"; do # Only run the `mem` tests to avoid this taking too long. - cargo miri test --manifest-path builtins-test/Cargo.toml --features no-asm --target $TARGET -- mem + cargo miri test --manifest-path builtins-test/Cargo.toml --features no-asm --target "$target" -- mem done From 727df697aec9bbbb845c3f115e10db0ded6d15bd Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 08:42:12 +0000 Subject: [PATCH 1311/1459] ci: Clean up workflow file and docker script --- .github/workflows/main.yaml | 11 +++++------ ci/run-docker.sh | 13 +++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 2b57d7eb3..f7522a53d 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -131,6 +131,9 @@ jobs: path: /tmp/.buildx-cache key: ${{ matrix.target }}-buildx-${{ github.sha }} restore-keys: ${{ matrix.target }}-buildx- + # Configure buildx to use Docker layer caching + - uses: docker/setup-buildx-action@v3 + if: matrix.os == 'ubuntu-24.04' - name: Cache compiler-rt id: cache-compiler-rt @@ -155,14 +158,10 @@ jobs: shell: bash run: ./ci/run.sh ${{ matrix.target }} - # Configure buildx to use Docker layer caching - - uses: docker/setup-buildx-action@v3 - if: matrix.os == 'ubuntu-24.04' - # Otherwise we use our docker containers to run builds - name: Run in Docker if: matrix.os == 'ubuntu-24.04' - run: cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} + run: ./ci/run-docker.sh ${{ matrix.target }} - name: Print test logs if available if: always() @@ -193,7 +192,7 @@ jobs: rustup default nightly rustup component add clippy - uses: Swatinem/rust-cache@v2 - - run: cargo clippy -- -D clippy::all + - run: cargo clippy --workspace --all-targets benchmarks: name: Benchmarks diff --git a/ci/run-docker.sh b/ci/run-docker.sh index 58838fadf..d0122dee5 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -7,6 +7,13 @@ set -euxo pipefail host_arch="$(uname -m | sed 's/arm64/aarch64/')" +# Directories and files that do not yet exist need to be created before +# calling docker, otherwise docker will create them but they will be owned +# by root. +mkdir -p target +cargo generate-lockfile +cargo generate-lockfile --manifest-path builtins-test-intrinsics/Cargo.toml + run() { local target="$1" @@ -19,12 +26,6 @@ run() { echo "target is emulated" fi - # Directories and files that do not yet exist need to be created before - # calling docker, otherwise docker will create them but they will be owned - # by root. - mkdir -p target - cargo generate-lockfile --manifest-path builtins-test-intrinsics/Cargo.toml - run_cmd="HOME=/tmp" if [ "${GITHUB_ACTIONS:-}" = "true" ]; then From 9d0d854318af26fd73714747bcf61c29734fea52 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 08:22:36 +0000 Subject: [PATCH 1312/1459] ci: Configure release-plz to run semver checks on `libm` --- .release-plz.toml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.release-plz.toml b/.release-plz.toml index e32f67610..95d7517fe 100644 --- a/.release-plz.toml +++ b/.release-plz.toml @@ -1,7 +1,10 @@ -[workspace] +[[package]] +name = "compiler_builtins" semver_check = false - # As part of the release process, we delete `libm/Cargo.toml`. Since # this is only run in CI, we shouldn't need to worry about it. allow_dirty = true publish_allow_dirty = true + +[[package]] +name = "libm" From 6e9e3d855b405b91bb85bc50b3d78619630a61d8 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 08:47:02 +0000 Subject: [PATCH 1313/1459] ci: Remove the old libm workflow file All jobs are now run as part of `compiler-builtins`. --- etc/libm/.github/workflows/main.yaml | 324 --------------------------- 1 file changed, 324 deletions(-) delete mode 100644 etc/libm/.github/workflows/main.yaml diff --git a/etc/libm/.github/workflows/main.yaml b/etc/libm/.github/workflows/main.yaml deleted file mode 100644 index 5ce0dbc26..000000000 --- a/etc/libm/.github/workflows/main.yaml +++ /dev/null @@ -1,324 +0,0 @@ -name: CI -on: - push: - branches: - - master - pull_request: - -concurrency: - # Make sure that new pushes cancel running jobs - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -env: - CARGO_TERM_COLOR: always - RUSTDOCFLAGS: -Dwarnings - RUSTFLAGS: -Dwarnings - RUST_BACKTRACE: full - BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results - -jobs: - test: - name: Build and test - timeout-minutes: 60 - strategy: - fail-fast: false - matrix: - include: - - target: aarch64-apple-darwin - os: macos-15 - # FIXME: pinned due to https://github.com/llvm/llvm-project/issues/127804 - channel: nightly-2025-02-07 - - target: aarch64-unknown-linux-gnu - os: ubuntu-24.04-arm - # FIXME: pinned due to https://github.com/llvm/llvm-project/issues/127804 - channel: nightly-2025-02-07 - - target: aarch64-pc-windows-msvc - os: windows-2025 - build_only: 1 # Can't run on x86 hosts - - target: arm-unknown-linux-gnueabi - os: ubuntu-24.04 - - target: arm-unknown-linux-gnueabihf - os: ubuntu-24.04 - - target: armv7-unknown-linux-gnueabihf - os: ubuntu-24.04 - - target: i586-unknown-linux-gnu - os: ubuntu-24.04 - - target: i686-unknown-linux-gnu - os: ubuntu-24.04 - - target: loongarch64-unknown-linux-gnu - os: ubuntu-24.04 - - target: powerpc-unknown-linux-gnu - os: ubuntu-24.04 - - target: powerpc64-unknown-linux-gnu - os: ubuntu-24.04 - - target: powerpc64le-unknown-linux-gnu - os: ubuntu-24.04 - - target: riscv64gc-unknown-linux-gnu - os: ubuntu-24.04 - - target: thumbv6m-none-eabi - os: ubuntu-24.04 - - target: thumbv7em-none-eabi - os: ubuntu-24.04 - - target: thumbv7em-none-eabihf - os: ubuntu-24.04 - - target: thumbv7m-none-eabi - os: ubuntu-24.04 - - target: x86_64-unknown-linux-gnu - os: ubuntu-24.04 - - target: x86_64-apple-darwin - os: macos-13 - - target: wasm32-unknown-unknown - os: ubuntu-24.04 - build_only: 1 - - target: i686-pc-windows-msvc - os: windows-2025 - - target: x86_64-pc-windows-msvc - os: windows-2025 - - target: i686-pc-windows-gnu - os: windows-2025 - # FIXME: pinned due to https://github.com/rust-lang/rust/issues/136795 - channel: nightly-2025-02-07-i686-gnu - - target: x86_64-pc-windows-gnu - os: windows-2025 - channel: nightly-x86_64-gnu - runs-on: ${{ matrix.os }} - env: - BUILD_ONLY: ${{ matrix.build_only }} - steps: - - name: Print runner information - run: uname -a - - uses: actions/checkout@v4 - with: - submodules: true - - name: Install Rust (rustup) - shell: bash - run: | - channel="nightly" - # Account for channels that have required components (MinGW) - [ -n "${{ matrix.channel }}" ] && channel="${{ matrix.channel }}" - rustup update "$channel" --no-self-update - rustup default "$channel" - rustup target add "${{ matrix.target }}" - rustup component add clippy llvm-tools-preview - - uses: taiki-e/install-action@nextest - - uses: Swatinem/rust-cache@v2 - with: - key: ${{ matrix.target }} - - - name: Verify API list - if: matrix.os == 'ubuntu-24.04' - run: python3 etc/update-api-list.py --check - - # Non-linux tests just use our raw script - - name: Run locally - if: matrix.os != 'ubuntu-24.04' || contains(matrix.target, 'wasm') - shell: bash - run: ./ci/run.sh ${{ matrix.target }} - - # Otherwise we use our docker containers to run builds - - name: Run in Docker - if: matrix.os == 'ubuntu-24.04' && !contains(matrix.target, 'wasm') - run: | - rustup target add x86_64-unknown-linux-musl - cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} - - - name: Print test logs if available - if: always() - run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi - shell: bash - - clippy: - name: Clippy - runs-on: ubuntu-24.04 - timeout-minutes: 10 - steps: - - uses: actions/checkout@master - with: - submodules: true - - name: Install Rust - run: | - rustup update nightly --no-self-update - rustup default nightly - rustup component add clippy - - uses: Swatinem/rust-cache@v2 - - run: cargo clippy --all --all-features --all-targets - - builtins: - name: Check use with compiler-builtins - runs-on: ubuntu-24.04 - timeout-minutes: 10 - steps: - - uses: actions/checkout@master - - name: Install Rust - run: rustup update nightly --no-self-update && rustup default nightly - - uses: Swatinem/rust-cache@v2 - - run: cargo check --manifest-path crates/compiler-builtins-smoke-test/Cargo.toml - - run: cargo test --manifest-path crates/compiler-builtins-smoke-test/Cargo.toml - - benchmarks: - name: Benchmarks - runs-on: ubuntu-24.04 - timeout-minutes: 20 - steps: - - uses: actions/checkout@master - with: - submodules: true - - uses: taiki-e/install-action@cargo-binstall - - - name: Set up dependencies - run: | - sudo apt update - sudo apt install -y valgrind gdb libc6-dbg # Needed for iai-callgrind - rustup update "$BENCHMARK_RUSTC" --no-self-update - rustup default "$BENCHMARK_RUSTC" - # Install the version of iai-callgrind-runner that is specified in Cargo.toml - iai_version="$(cargo metadata --format-version=1 --features icount | - jq -r '.packages[] | select(.name == "iai-callgrind").version')" - cargo binstall -y iai-callgrind-runner --version "$iai_version" - sudo apt-get install valgrind - - - uses: Swatinem/rust-cache@v2 - - - name: Run icount benchmarks - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - PR_NUMBER: ${{ github.event.pull_request.number }} - run: ./ci/bench-icount.sh - - - name: Upload the benchmark baseline - uses: actions/upload-artifact@v4 - with: - name: ${{ env.BASELINE_NAME }} - path: ${{ env.BASELINE_NAME }}.tar.xz - - - name: Run wall time benchmarks - run: | - # Always use the same seed for benchmarks. Ideally we should switch to a - # non-random generator. - export LIBM_SEED=benchesbenchesbenchesbencheswoo! - cargo bench --all --features short-benchmarks,build-musl,force-soft-floats - - - name: Print test logs if available - if: always() - run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi - shell: bash - - msrv: - name: Check MSRV - runs-on: ubuntu-24.04 - timeout-minutes: 10 - env: - RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings` - steps: - - uses: actions/checkout@master - - name: Install Rust - run: | - msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' libm/Cargo.toml)" - echo "MSRV: $msrv" - rustup update "$msrv" --no-self-update && rustup default "$msrv" - - uses: Swatinem/rust-cache@v2 - - run: | - # FIXME(msrv): Remove the workspace Cargo.toml so 1.63 cargo doesn't see - # `edition = "2024"` and get spooked. - rm Cargo.toml - cargo build --manifest-path libm/Cargo.toml - - rustfmt: - name: Rustfmt - runs-on: ubuntu-24.04 - timeout-minutes: 10 - steps: - - uses: actions/checkout@master - - name: Install Rust - run: | - rustup update nightly --no-self-update - rustup default nightly - rustup component add rustfmt - - run: cargo fmt -- --check - - # Determine which extensive tests should be run based on changed files. - calculate_extensive_matrix: - name: Calculate job matrix - runs-on: ubuntu-24.04 - timeout-minutes: 10 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - PR_NUMBER: ${{ github.event.pull_request.number }} - outputs: - matrix: ${{ steps.script.outputs.matrix }} - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 100 - - name: Fetch pull request ref - run: git fetch origin "$GITHUB_REF:$GITHUB_REF" - if: github.event_name == 'pull_request' - - run: python3 ci/ci-util.py generate-matrix >> "$GITHUB_OUTPUT" - id: script - - extensive: - name: Extensive tests for ${{ matrix.ty }} - needs: - # Wait on `clippy` so we have some confidence that the crate will build - - clippy - - calculate_extensive_matrix - runs-on: ubuntu-24.04 - timeout-minutes: 240 # 4 hours - strategy: - matrix: - # Use the output from `calculate_extensive_matrix` to calculate the matrix - # FIXME: it would be better to run all jobs (i.e. all types) but mark those that - # didn't change as skipped, rather than completely excluding the job. However, - # this is not currently possible https://github.com/actions/runner/issues/1985. - include: ${{ fromJSON(needs.calculate_extensive_matrix.outputs.matrix).matrix }} - env: - TO_TEST: ${{ matrix.to_test }} - steps: - - uses: actions/checkout@v4 - with: - submodules: true - - name: Install Rust - run: | - rustup update nightly --no-self-update - rustup default nightly - - uses: Swatinem/rust-cache@v2 - - name: Run extensive tests - run: | - echo "Tests to run: '$TO_TEST'" - if [ -z "$TO_TEST" ]; then - echo "No tests to run, exiting." - exit - fi - - set -x - - # Run the non-extensive tests first to catch any easy failures - cargo t --profile release-checked -- "$TO_TEST" - - LIBM_EXTENSIVE_TESTS="$TO_TEST" cargo t \ - --features build-mpfr,unstable,force-soft-floats \ - --profile release-checked \ - -- extensive - - name: Print test logs if available - run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi - shell: bash - - success: - needs: - - test - - builtins - - benchmarks - - msrv - - rustfmt - - extensive - runs-on: ubuntu-24.04 - timeout-minutes: 10 - # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency - # failed" as success. So we have to do some contortions to ensure the job fails if any of its - # dependencies fails. - if: always() # make sure this is never "skipped" - steps: - # Manually check the status of all dependencies. `if: failure()` does not work. - - name: check if any dependency failed - run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}' From 09584ed44889420eb9f45094399004d226b1d975 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 08:54:11 +0000 Subject: [PATCH 1314/1459] Add panic-handler to the workspace This crate does not interact with features so there isn't any reason it can't become part of the workspace. --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.toml b/Cargo.toml index c3737a042..75bb81ec1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ members = [ "compiler-builtins", "crates/libm-macros", "crates/musl-math-sys", + "crates/panic-handler", "crates/util", "libm", "libm-test", From 6675cd13603b7831c2401a929ce0fcc7ef66a54a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 20 Apr 2025 18:39:53 +0000 Subject: [PATCH 1315/1459] ci: Fix release-plz configuration --- .release-plz.toml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.release-plz.toml b/.release-plz.toml index 95d7517fe..95e91a46c 100644 --- a/.release-plz.toml +++ b/.release-plz.toml @@ -1,10 +1,12 @@ -[[package]] -name = "compiler_builtins" -semver_check = false +[workspace] # As part of the release process, we delete `libm/Cargo.toml`. Since # this is only run in CI, we shouldn't need to worry about it. allow_dirty = true publish_allow_dirty = true +[[package]] +name = "compiler_builtins" +semver_check = false + [[package]] name = "libm" From 74d2c455beb409921f96a2e8e0ae2ad9d3e015d6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 21 Apr 2025 06:35:12 +0000 Subject: [PATCH 1316/1459] Remove `unsafe` from `naked_asm!` blocks This was changed in a recent nightly so the unsafety is only in the attribute, `#[unsafe(naked)]`. --- compiler-builtins/src/aarch64_linux.rs | 16 ++++++++-------- compiler-builtins/src/macros.rs | 3 ++- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/compiler-builtins/src/aarch64_linux.rs b/compiler-builtins/src/aarch64_linux.rs index 5515dbfc4..e238d0237 100644 --- a/compiler-builtins/src/aarch64_linux.rs +++ b/compiler-builtins/src/aarch64_linux.rs @@ -136,7 +136,7 @@ macro_rules! compare_and_swap { expected: int_ty!($bytes), desired: int_ty!($bytes), ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { // We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap. - unsafe { core::arch::naked_asm! { + core::arch::naked_asm! { // UXT s(tmp0), s(0) concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", @@ -150,7 +150,7 @@ macro_rules! compare_and_swap { "cbnz w17, 0b", "1:", "ret", - } } + } } } }; @@ -165,7 +165,7 @@ macro_rules! compare_and_swap_i128 { pub unsafe extern "C" fn $name ( expected: i128, desired: i128, ptr: *mut i128 ) -> i128 { - unsafe { core::arch::naked_asm! { + core::arch::naked_asm! { "mov x16, x0", "mov x17, x1", "0:", @@ -179,7 +179,7 @@ macro_rules! compare_and_swap_i128 { "cbnz w15, 0b", "1:", "ret", - } } + } } } }; @@ -194,7 +194,7 @@ macro_rules! swap { pub unsafe extern "C" fn $name ( left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { - unsafe { core::arch::naked_asm! { + core::arch::naked_asm! { // mov s(tmp0), s(0) concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", @@ -204,7 +204,7 @@ macro_rules! swap { concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"), "cbnz w17, 0b", "ret", - } } + } } } }; @@ -219,7 +219,7 @@ macro_rules! fetch_op { pub unsafe extern "C" fn $name ( val: int_ty!($bytes), ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { - unsafe { core::arch::naked_asm! { + core::arch::naked_asm! { // mov s(tmp0), s(0) concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", @@ -231,7 +231,7 @@ macro_rules! fetch_op { concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"), "cbnz w15, 0b", "ret", - } } + } } } } diff --git a/compiler-builtins/src/macros.rs b/compiler-builtins/src/macros.rs index b83414ce2..4fa53656e 100644 --- a/compiler-builtins/src/macros.rs +++ b/compiler-builtins/src/macros.rs @@ -436,11 +436,12 @@ macro_rules! intrinsics { // FIXME: when bootstrap supports `#[unsafe(naked)]` this duplication can be removed #[cfg(bootstrap)] #[naked] + #[allow(unused_unsafe)] $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { - $($body)* + unsafe { $($body)* } } #[cfg(not(bootstrap))] From e823bb7004056919f6cc602c6d0763e8542ee80f Mon Sep 17 00:00:00 2001 From: Patryk Wychowaniec Date: Sun, 20 Apr 2025 09:05:10 +0200 Subject: [PATCH 1317/1459] avr: Provide `abort()` --- compiler-builtins/src/avr.rs | 23 +++++++++++++++++++++++ compiler-builtins/src/lib.rs | 3 +++ 2 files changed, 26 insertions(+) create mode 100644 compiler-builtins/src/avr.rs diff --git a/compiler-builtins/src/avr.rs b/compiler-builtins/src/avr.rs new file mode 100644 index 000000000..359a1d1ac --- /dev/null +++ b/compiler-builtins/src/avr.rs @@ -0,0 +1,23 @@ +intrinsics! { + pub unsafe extern "C" fn abort() -> ! { + // On AVRs, an architecture that doesn't support traps, unreachable code + // paths get lowered into calls to `abort`: + // + // https://github.com/llvm/llvm-project/blob/cbe8f3ad7621e402b050e768f400ff0d19c3aedd/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp#L4462 + // + // When control gets here, it means that either core::intrinsics::abort() + // was called or an undefined bebavior has occurred, so there's not that + // much we can do to recover - we can't `panic!()`, because for all we + // know the environment is gone now, so panicking might end up with us + // getting back to this very function. + // + // So let's do the next best thing, loop. + // + // Alternatively we could (try to) restart the program, but since + // undefined behavior is undefined, there's really no obligation for us + // to do anything here - for all we care, we could just set the chip on + // fire; but that'd be bad for the environment. + + loop {} + } +} diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs index 16de96b4d..067855603 100644 --- a/compiler-builtins/src/lib.rs +++ b/compiler-builtins/src/lib.rs @@ -63,6 +63,9 @@ pub mod aarch64_linux; ))] pub mod arm_linux; +#[cfg(target_arch = "avr")] +pub mod avr; + #[cfg(target_arch = "hexagon")] pub mod hexagon; From 2691b35123b60d259d42d5608ae0fc6919443b3b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 21 Apr 2025 07:15:48 +0000 Subject: [PATCH 1318/1459] Update licensing information after repository refactoring In order to disambiguate things now that libm is part of the compiler-builtins repository, do the following: * Mention libm in LICENSE.txt * Clarify the default license for crates other than libm and compiler-builtins * Add an explicit license field to Cargo.toml for all other crates --- LICENSE.txt | 17 +++++++++++------ builtins-test-intrinsics/Cargo.toml | 1 + builtins-test/Cargo.toml | 1 + crates/libm-macros/Cargo.toml | 1 + crates/musl-math-sys/Cargo.toml | 1 + crates/util/Cargo.toml | 1 + libm-test/Cargo.toml | 1 + 7 files changed, 17 insertions(+), 6 deletions(-) diff --git a/LICENSE.txt b/LICENSE.txt index 367e3538d..00ae6140b 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,10 +1,14 @@ -compiler-builtins as a whole is available for use under both the MIT license +The compiler-builtins crate is available for use under both the MIT license and the Apache-2.0 license with the LLVM exception (MIT AND Apache-2.0 WITH LLVM-exception). -As a contributor, you agree that your code can be used under either the MIT -license, or the Apache-2.0 license, or the Apache-2.0 license with the LLVM -exception. +The libm crate is available for use under the MIT license. + +As a contributor, you agree that your code may be used under any of the +following: the MIT license, the Apache-2.0 license, or the Apache-2.0 license +with the LLVM exception. In other words, original (non-derivative) work is +licensed under MIT OR Apache-2.0 OR Apache-2.0 WITH LLVM-exception. This is +the default license for all other source in this repository. Text of the relevant licenses is provided below: @@ -263,8 +267,9 @@ license: * Work derived from compiler-rt after 2019-01-19 is usable under the Apache-2.0 license with the LLVM exception. -* The bundled `math` module is from rust-lang/libm, usable under the MIT - license. See https://github.com/rust-lang/libm for details. +* The bundled `math` module is from the libm crate, usable under the MIT + license. For further details and copyrights, see see libm/LICENSE.txt at + https://github.com/rust-lang/compiler-builtins. Additionally, some source files may contain comments with specific copyrights or licenses. diff --git a/builtins-test-intrinsics/Cargo.toml b/builtins-test-intrinsics/Cargo.toml index 6d88cbec9..6e10628a4 100644 --- a/builtins-test-intrinsics/Cargo.toml +++ b/builtins-test-intrinsics/Cargo.toml @@ -3,6 +3,7 @@ name = "builtins-test-intrinsics" version = "0.1.0" edition = "2021" publish = false +license = "MIT OR Apache-2.0" [dependencies] compiler_builtins = { path = "../compiler-builtins", features = ["compiler-builtins"]} diff --git a/builtins-test/Cargo.toml b/builtins-test/Cargo.toml index 18185d8fe..f7bcb52b4 100644 --- a/builtins-test/Cargo.toml +++ b/builtins-test/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" authors = ["Alex Crichton "] edition = "2024" publish = false +license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" [dependencies] # For fuzzing tests we want a deterministic seedable RNG. We also eliminate potential diff --git a/crates/libm-macros/Cargo.toml b/crates/libm-macros/Cargo.toml index 50c869db7..3929854f0 100644 --- a/crates/libm-macros/Cargo.toml +++ b/crates/libm-macros/Cargo.toml @@ -3,6 +3,7 @@ name = "libm-macros" version = "0.1.0" edition = "2024" publish = false +license = "MIT OR Apache-2.0" [lib] proc-macro = true diff --git a/crates/musl-math-sys/Cargo.toml b/crates/musl-math-sys/Cargo.toml index 9e866a970..d3fb147e5 100644 --- a/crates/musl-math-sys/Cargo.toml +++ b/crates/musl-math-sys/Cargo.toml @@ -3,6 +3,7 @@ name = "musl-math-sys" version = "0.1.0" edition = "2024" publish = false +license = "MIT OR Apache-2.0" [dependencies] diff --git a/crates/util/Cargo.toml b/crates/util/Cargo.toml index ae37a7238..614c54bd8 100644 --- a/crates/util/Cargo.toml +++ b/crates/util/Cargo.toml @@ -3,6 +3,7 @@ name = "util" version = "0.1.0" edition = "2024" publish = false +license = "MIT OR Apache-2.0" [features] default = ["build-musl", "build-mpfr", "unstable-float"] diff --git a/libm-test/Cargo.toml b/libm-test/Cargo.toml index 6fd49774e..7a306e735 100644 --- a/libm-test/Cargo.toml +++ b/libm-test/Cargo.toml @@ -3,6 +3,7 @@ name = "libm-test" version = "0.1.0" edition = "2024" publish = false +license = "MIT OR Apache-2.0" [features] default = ["build-mpfr", "build-musl", "unstable-float"] From 07521a632e78cb040e079d567e50664123396d18 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 21 Apr 2025 07:22:24 +0000 Subject: [PATCH 1319/1459] Move README.md to compiler-builtins, CONTRIBUTING.md to root Do this in a commit so git tracks the move; a new README will be introduced in the root, and CONTRIBUTING will be updated to apply to both crates. --- libm/CONTRIBUTING.md => CONTRIBUTING.md | 0 compiler-builtins/Cargo.toml | 2 +- README.md => compiler-builtins/README.md | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename libm/CONTRIBUTING.md => CONTRIBUTING.md (100%) rename README.md => compiler-builtins/README.md (100%) diff --git a/libm/CONTRIBUTING.md b/CONTRIBUTING.md similarity index 100% rename from libm/CONTRIBUTING.md rename to CONTRIBUTING.md diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index a014baf04..e2a6c0a74 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -18,8 +18,8 @@ include = [ '/configure.rs', '/src/*', '../LICENSE.txt', - '../README.md', '../compiler-rt/*', + 'README.md', 'libm/src/math/*', ] links = 'compiler-rt' diff --git a/README.md b/compiler-builtins/README.md similarity index 100% rename from README.md rename to compiler-builtins/README.md From 33dedd29d5a3f76b81bdc817e7d6499e0dd5c362 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 21 Apr 2025 08:46:19 +0000 Subject: [PATCH 1320/1459] Update README and CONTRIBUTING for the new repository layout --- CONTRIBUTING.md | 189 +++++++++++++++++++++++++---------- README.md | 27 +++++ compiler-builtins/Cargo.toml | 23 ++--- compiler-builtins/README.md | 76 ++++---------- libm/Cargo.toml | 2 +- libm/README.md | 54 ++++------ 6 files changed, 213 insertions(+), 158 deletions(-) create mode 100644 README.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dc4006035..9f67cfc31 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,68 +1,115 @@ # How to contribute -- Pick your favorite math function from the [issue tracker]. -- Look for the C implementation of the function in the [MUSL source code][src]. -- Copy paste the C code into a Rust file in the `src/math` directory and adjust - `src/math/mod.rs` accordingly. Also, uncomment the corresponding trait method - in `src/lib.rs`. -- Write some simple tests in your module (using `#[test]`) -- Run `cargo test` to make sure it works. Full tests are only run when enabling - features, see [Testing](#testing) below. -- Send us a pull request! Make sure to run `cargo fmt` on your code before - sending the PR. Also include "closes #42" in the PR description to close the - corresponding issue. -- :tada: - -[issue tracker]: https://github.com/rust-lang/libm/issues -[src]: https://git.musl-libc.org/cgit/musl/tree/src/math -[`src/math/truncf.rs`]: https://github.com/rust-lang/libm/blob/master/src/math/truncf.rs - -Check [PR #65] for an example. - -[PR #65]: https://github.com/rust-lang/libm/pull/65 +## compiler-builtins + +1. From the [pending list](compiler-builtins/README.md#progress), pick one or + more intrinsics. +2. Port the version from [`compiler-rt`] and, if applicable, their + [tests][rt-tests]. Note that this crate has generic implementations for a lot + of routines, which may be usable without porting the entire implementation. +3. Add a test to `builtins-test`, comparing the behavior of the ported + intrinsic(s) with their implementation on the testing host. +4. Add the intrinsic to `builtins-test-intrinsics/src/main.rs` to verify it can + be linked on all targets. +5. Send a Pull Request (PR) :tada:. + +[`compiler-rt`]: https://github.com/llvm/llvm-project/tree/b6820c35c59a4da3e59c11f657093ffbd79ae1db/compiler-rt/lib/builtins +[rt-tests]: https://github.com/llvm/llvm-project/tree/b6820c35c59a4da3e59c11f657093ffbd79ae1db/compiler-rt/test/builtins + +## Porting Reminders + +1. [Rust][prec-rust] and [C][prec-c] have slightly different operator + precedence. C evaluates comparisons (`== !=`) before bitwise operations + (`& | ^`), while Rust evaluates the other way. +2. C assumes wrapping operations everywhere. Rust panics on overflow when in + debug mode. Consider using the [Wrapping][wrap-ty] type or the explicit + [wrapping_*][wrap-fn] functions where applicable. +3. Note [C implicit casts][casts], especially integer promotion. Rust is much + more explicit about casting, so be sure that any cast which affects the + output is ported to the Rust implementation. +4. Rust has [many functions][i32] for integer or floating point manipulation in + the standard library. Consider using one of these functions rather than + porting a new one. + +[prec-rust]: https://doc.rust-lang.org/reference/expressions.html#expression-precedence +[prec-c]: http://en.cppreference.com/w/c/language/operator_precedence +[wrap-ty]: https://doc.rust-lang.org/core/num/struct.Wrapping.html +[wrap-fn]: https://doc.rust-lang.org/std/primitive.i32.html#method.wrapping_add +[casts]: http://en.cppreference.com/w/cpp/language/implicit_conversion +[i32]: https://doc.rust-lang.org/std/primitive.i32.html ## Tips and tricks -- *IMPORTANT* The code in this crate will end up being used in the `core` crate so it can **not** - have any external dependencies (other than `core` itself). - -- Only use relative imports within the `math` directory / module, e.g. `use self::fabs::fabs` or -`use super::k_cos`. Absolute imports from core are OK, e.g. `use core::u64`. - -- To reinterpret a float as an integer use the `to_bits` method. The MUSL code uses the - `GET_FLOAT_WORD` macro, or a union, to do this operation. - -- To reinterpret an integer as a float use the `f32::from_bits` constructor. The MUSL code uses the - `SET_FLOAT_WORD` macro, or a union, to do this operation. - +- _IMPORTANT_ The code in this crate will end up being used in the `core` crate + so it can **not** have any external dependencies (other than a subset of + `core` itself). +- Only use relative imports within the `math` directory / module, e.g. + `use self::fabs::fabs` or `use super::k_cos`. Absolute imports from core are + OK, e.g. `use core::u64`. +- To reinterpret a float as an integer use the `to_bits` method. The MUSL code + uses the `GET_FLOAT_WORD` macro, or a union, to do this operation. +- To reinterpret an integer as a float use the `f32::from_bits` constructor. The + MUSL code uses the `SET_FLOAT_WORD` macro, or a union, to do this operation. - You may use other methods from core like `f64::is_nan`, etc. as appropriate. - -- If you're implementing one of the private double-underscore functions, take a look at the - "source" name in the comment at the top for an idea for alternate naming. For example, `__sin` - was renamed to `k_sin` after the FreeBSD source code naming. Do `use` these private functions in - `mod.rs`. - -- You may encounter weird literals like `0x1p127f` in the MUSL code. These are hexadecimal floating - point literals. Rust (the language) doesn't support these kind of literals. This crate provides - two macros, `hf32!` and `hf64!`, which convert string literals to floats at compile time. +- Rust does not have hex float literals. This crate provides two `hf16!`, + `hf32!`, `hf64!`, and `hf128!` which convert string literals to floats at + compile time. ```rust assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000); assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000); ``` -- Rust code panics on arithmetic overflows when not optimized. You may need to use the [`Wrapping`] - newtype to avoid this problem, or individual methods like [`wrapping_add`]. +- Rust code panics on arithmetic overflows when not optimized. You may need to + use the [`Wrapping`] newtype to avoid this problem, or individual methods like + [`wrapping_add`]. [`Wrapping`]: https://doc.rust-lang.org/std/num/struct.Wrapping.html [`wrapping_add`]: https://doc.rust-lang.org/std/primitive.u32.html#method.wrapping_add ## Testing -Normal tests can be executed with: +Testing for these crates can be somewhat complex, so feel free to rely on CI. + +The easiest way replicate CI testing is using Docker. This can be done by +running `./ci/run-docker.sh [target]`. If no target is specified, all targets +will be run. + +Tests can also be run without Docker: ```sh -# Tests against musl require that the submodule is up to date. +# Run basic tests +# +# --no-default-features always needs to be passed, an unfortunate limitation +# since the `#![compiler_builtins]` feature is enabled by default. +cargo test --workspace --no-default-features + +# Test with all interesting features +cargo test --workspace --no-default-features \ + --features arch,unstable-float,unstable-intrinsics,mem + +# Run with more detailed tests for libm +cargo test --workspace --no-default-features \ + --features arch,unstable-float,unstable-intrinsics,mem \ + --features build-mpfr,build-musl \ + --profile release-checked +``` + +The multiprecision tests use the [`rug`] crate for bindings to MPFR. MPFR can be +difficult to build on non-Unix systems, refer to [`gmp_mpfr_sys`] for help. + +`build-musl` does not build with MSVC, Wasm, or Thumb. + +[`rug`]: https://docs.rs/rug/latest/rug/ +[`gmp_mpfr_sys`]: https://docs.rs/gmp-mpfr-sys/1.6.4/gmp_mpfr_sys/ + +In order to run all tests, some dependencies may be required: + +```sh +# Allow testing compiler-builtins +./ci/download-compiler-rt.sh + +# Optional, initialize musl for `--features build-musl` git submodule init git submodule update @@ -70,13 +117,51 @@ git submodule update cargo test --release ``` -If you are on a system that cannot build musl or MPFR, passing -`--no-default-features` will run some limited tests. +### Extensive tests -The multiprecision tests use the [`rug`] crate for bindings to MPFR. MPFR can -be difficult to build on non-Unix systems, refer to [`gmp_mpfr_sys`] for help. +Libm also has tests that are exhaustive (for single-argument `f32` and 1- or 2- +argument `f16`) or extensive (for all other float and argument combinations). +These take quite a long time to run, but are launched in CI when relevant files +are changed. -`build-musl` does not build with MSVC, Wasm, or Thumb. +Exhaustive tests can be selected by passing an environment variable: -[`rug`]: https://docs.rs/rug/latest/rug/ -[`gmp_mpfr_sys`]: https://docs.rs/gmp-mpfr-sys/1.6.4/gmp_mpfr_sys/ +```sh +LIBM_EXTENSIVE_TESTS=sqrt,sqrtf cargo test --features build-mpfr \ + --test z_extensive \ + --profile release-checked + +# Run all tests for one type +LIBM_EXTENSIVE_TESTS=all_f16 cargo test ... + +# Ensure `f64` tests can run exhaustively. Estimated completion test for a +# single test is 57306 years on my machine so this may be worth skipping. +LIBM_EXTENSIVE_TESTS=all LIBM_EXTENSIVE_ITERATIONS=18446744073709551615 cargo test ... +``` + +## Benchmarking + +Regular walltime benchmarks can be run with `cargo bench`: + +```sh +cargo bench --no-default-features \ + --features arch,unstable-float,unstable-intrinsics,mem \ + --features benchmarking-reports +``` + +There are also benchmarks that check instruction count behind the `icount` +feature. These require [`iai-callgrind-runner`] (via Cargo) and [Valgrind] +to be installed, which means these only run on limited platforms. + +Instruction count benchmarks are run as part of CI to flag performance +regresions. + +```sh +cargo bench --no-default-features \ + --features arch,unstable-float,unstable-intrinsics,mem \ + --features icount \ + --bench icount --bench mem_icount +``` + +[`iai-callgrind-runner`]: https://crates.io/crates/iai-callgrind-runner +[Valgrind]: https://valgrind.org/ diff --git a/README.md b/README.md new file mode 100644 index 000000000..3130ff7b7 --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +# `compiler-builtins` and `libm` + +This repository contains two main crates: + +* `compiler-builtins`: symbols that the compiler expects to be available at + link time +* `libm`: a Rust implementation of C math libraries, used to provide + implementations in `ocre`. + +More details are at [compiler-builtins/README.md](compiler-builtins/README.md) +and [libm/README.md](libm/README.md). + +For instructions on contributing, see [CONTRIBUTING.md](CONTRIBUTING.md). + +## License + +* `libm` may be used under the [MIT License] +* `compiler-builtins` may be used under the [MIT License] and the + [Apache License, Version 2.0] with the LLVM exception. +* All original contributions must be under all of: the MIT license, the + Apache-2.0 license, and the Apache-2.0 license with the LLVM exception. + +More details are in [LICENSE.txt](LICENSE.txt) and +[libm/LICENSE.txt](libm/LICENSE.txt). + +[MIT License]: https://opensource.org/license/mit +[Apache License, Version 2.0]: htps://www.apache.org/licenses/LICENSE-2.0 diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index e2a6c0a74..9e23c75a8 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -3,24 +3,21 @@ authors = ["Jorge Aparicio "] name = "compiler_builtins" version = "0.1.155" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" -readme = "../README.md" +readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" homepage = "https://github.com/rust-lang/compiler-builtins" documentation = "https://docs.rs/compiler_builtins" edition = "2021" -description = """ -Compiler intrinsics used by the Rust compiler. Also available for other targets -if necessary! -""" +description = "Compiler intrinsics used by the Rust compiler." include = [ - '/Cargo.toml', - '/build.rs', - '/configure.rs', - '/src/*', - '../LICENSE.txt', - '../compiler-rt/*', - 'README.md', - 'libm/src/math/*', + "../LICENSE.txt", + "../compiler-rt/*", + "/Cargo.toml", + "/build.rs", + "/configure.rs", + "/src/*", + "README.md", + "libm/src/math/*", ] links = 'compiler-rt' diff --git a/compiler-builtins/README.md b/compiler-builtins/README.md index e5350d58c..387b70c04 100644 --- a/compiler-builtins/README.md +++ b/compiler-builtins/README.md @@ -1,64 +1,18 @@ # `compiler-builtins` -This crate provides external symbols that the compiler expects to be available when -building Rust projects, typically software routines for basic operations that do not -have hardware support. It is largely a port of LLVM's [`compiler-rt`]. +This crate provides external symbols that the compiler expects to be available +when building Rust projects, typically software routines for basic operations +that do not have hardware support. It is largely a port of LLVM's +[`compiler-rt`]. -It is distributed as part of Rust's sysroot. +It is distributed as part of Rust's sysroot. `compiler-builtins` does not need +to be added as an explicit dependency in `Cargo.toml`. [`compiler-rt`]: https://github.com/llvm/llvm-project/tree/1b1dc505057322f4fa1110ef4f53c44347f52986/compiler-rt ## Contributing -1. Pick one or more intrinsics from the [pending list](#progress). -2. Fork this repository. -3. Port the intrinsic(s) and their corresponding [unit tests][1] from their - [C implementation][2] to Rust. -4. Add a test to compare the behavior of the ported intrinsic(s) with their - implementation on the testing host. -5. Add the intrinsic to `builtins-test-intrinsics/src/main.rs` to verify it - can be linked on all targets. -6. Send a Pull Request (PR). -7. Once the PR passes our extensive testing infrastructure, we'll merge it! -8. Celebrate :tada: - -[1]: https://github.com/rust-lang/llvm-project/tree/9e3de9490ff580cd484fbfa2908292b4838d56e7/compiler-rt/test/builtins/Unit -[2]: https://github.com/rust-lang/llvm-project/tree/9e3de9490ff580cd484fbfa2908292b4838d56e7/compiler-rt/lib/builtins -[3]: https://github.com/rust-lang/compiler-builtins/actions - -### Porting Reminders - -1. [Rust][5a] and [C][5b] have slightly different operator precedence. C evaluates comparisons (`== !=`) before bitwise operations (`& | ^`), while Rust evaluates the other way. -2. C assumes wrapping operations everywhere. Rust panics on overflow when in debug mode. Consider using the [Wrapping][6] type or the explicit [wrapping_*][7] functions where applicable. -3. Note [C implicit casts][8], especially integer promotion. Rust is much more explicit about casting, so be sure that any cast which affects the output is ported to the Rust implementation. -4. Rust has [many functions][9] for integer or floating point manipulation in the standard library. Consider using one of these functions rather than porting a new one. - -[5a]: https://doc.rust-lang.org/reference/expressions.html#expression-precedence -[5b]: http://en.cppreference.com/w/c/language/operator_precedence -[6]: https://doc.rust-lang.org/core/num/struct.Wrapping.html -[7]: https://doc.rust-lang.org/std/primitive.i32.html#method.wrapping_add -[8]: http://en.cppreference.com/w/cpp/language/implicit_conversion -[9]: https://doc.rust-lang.org/std/primitive.i32.html - -## Testing - -The easiest way to test locally is using Docker. This can be done by running -`./ci/run-docker.sh [target]`. If no target is specified, all targets will be -run. - -In order to run the full test suite, you will also need the C compiler runtime -to test against, located in a directory called `compiler-rt`. This can be -obtained with the following: - -```sh -curl -L -o rustc-llvm-20.1.tar.gz https://github.com/rust-lang/llvm-project/archive/rustc/20.1-2025-02-13.tar.gz -tar xzf rustc-llvm-20.1.tar.gz --strip-components 1 llvm-project-rustc-20.1-2025-02-13/compiler-rt -``` - -Local targets may also be tested with `./ci/run.sh [target]`. - -Note that testing may not work on all hosts, in which cases it is acceptable to -rely on CI. +See [CONTRIBUTING.md](CONTRIBUTING.md). ## Progress @@ -468,9 +422,15 @@ Unsupported in any current target: used on old versions of 32-bit iOS with ARMv5 ## License -The compiler-builtins crate is dual licensed under both the University of -Illinois "BSD-Like" license and the MIT license. As a user of this code you may -choose to use it under either license. As a contributor, you agree to allow -your code to be used under both. +Usage is allowed under the [MIT License] and the [Apache License, Version 2.0] +with the LLVM exception. + +[MIT License]: https://opensource.org/license/mit +[Apache License, Version 2.0]: htps://www.apache.org/licenses/LICENSE-2.0 + +### Contribution + +Contributions are licensed under the MIT License, the Apache License, +Version 2.0, and the Apache-2.0 license with the LLVM exception. -Full text of the relevant licenses is in LICENSE.TXT. +See [LICENSE.txt](../LICENSE.txt) for full details. diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 44154c1a8..4e3850bbf 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -7,7 +7,7 @@ keywords = ["libm", "math"] license = "MIT" name = "libm" readme = "README.md" -repository = "https://github.com/rust-lang/libm" +repository = "https://github.com/rust-lang/compiler-builtins" version = "0.2.11" edition = "2021" rust-version = "1.63" diff --git a/libm/README.md b/libm/README.md index 52d760a4f..349e892df 100644 --- a/libm/README.md +++ b/libm/README.md @@ -1,38 +1,26 @@ # `libm` -A port of [MUSL]'s libm to Rust. +A Rust implementations of the C math library. -[MUSL]: https://musl.libc.org/ +## Usage -## Goals +`libm` provides fallback implementations for Rust's [float math functions] in +`core`, and the [`core_float_math`] feature. If what is available suits your +needs, there is no need to add `libm` as a dependency. -The short term goal of this library is to [enable math support (e.g. `sin`, `atan2`) for the -`wasm32-unknown-unknown` target][wasm] (cf. [rust-lang/compiler-builtins][pr]). The longer -term goal is to enable [math support in the `core` crate][core]. +If more functionality is needed, this crate can also be used directly: -[wasm]: https://github.com/rust-lang/libm/milestone/1 -[pr]: https://github.com/rust-lang/compiler-builtins/pull/248 -[core]: https://github.com/rust-lang/libm/milestone/2 +```toml +[dependencies] +libm = "0.2.11" +``` -## Already usable - -This crate is [on crates.io] and can be used today in stable `#![no_std]` programs. - -The API documentation can be found [here](https://docs.rs/libm). - -[on crates.io]: https://crates.io/crates/libm - -## Benchmark -[benchmark]: #benchmark - -The benchmarks are located in `crates/libm-bench` and require a nightly Rust toolchain. -To run all benchmarks: - -> cargo +nightly bench --all +[float math functions]: https://doc.rust-lang.org/std/primitive.f32.html +[`core_float_math`]: https://github.com/rust-lang/rust/issues/137578 ## Contributing -Please check [CONTRIBUTING.md](CONTRIBUTING.md) +Please check [CONTRIBUTING.md](../CONTRIBUTING.md) ## Minimum Rust version policy @@ -40,17 +28,15 @@ This crate supports rustc 1.63 and newer. ## License -Usage is licensed under the MIT license ([LICENSE-MIT](LICENSE-MIT) or -https://opensource.org/licenses/MIT). - +Usage is under the MIT license, available at +. ### Contribution Contributions are licensed under both the MIT license and the Apache License, -Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or -https://www.apache.org/licenses/LICENSE-2.0). Unless you explicitly state -otherwise, any contribution intentionally submitted for inclusion in the work -by you, as defined in the Apache-2.0 license, shall be dual licensed as -mentioned, without any additional terms or conditions. +Version 2.0, available at . Unless +you explicitly state otherwise, any contribution intentionally submitted for +inclusion in the work by you, as defined in the Apache-2.0 license, shall be +dual licensed as mentioned, without any additional terms or conditions. -See `LICENSE.txt` for full details. +See [LICENSE.txt](LICENSE.txt) for full details. From b5f8003acc61241a05b631428aab9ea74cc55fe7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 21 Apr 2025 09:20:37 +0000 Subject: [PATCH 1321/1459] Fix compiler-builtins publish compiler-builtins currently wouldn't publish correctly because of a relative path to `libm` that doesn't get included in the package. Fix this by simlinking `libm` to within the `compiler-builtins` directory. Also symlink LICENSE.txt which lets us drop the `include` array in Cargo.toml. LICENSE.txt and compiler-rt were not being included anyway, since Cargo silently drops items that are not within the crate directory. --- compiler-builtins/Cargo.toml | 10 ---------- compiler-builtins/LICENSE.txt | 1 + compiler-builtins/src/lib.rs | 2 +- compiler-builtins/src/math/libm_math | 1 + compiler-builtins/src/{math.rs => math/mod.rs} | 9 ++++----- 5 files changed, 7 insertions(+), 16 deletions(-) create mode 120000 compiler-builtins/LICENSE.txt create mode 120000 compiler-builtins/src/math/libm_math rename compiler-builtins/src/{math.rs => math/mod.rs} (96%) diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 9e23c75a8..1de37bd86 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -9,16 +9,6 @@ homepage = "https://github.com/rust-lang/compiler-builtins" documentation = "https://docs.rs/compiler_builtins" edition = "2021" description = "Compiler intrinsics used by the Rust compiler." -include = [ - "../LICENSE.txt", - "../compiler-rt/*", - "/Cargo.toml", - "/build.rs", - "/configure.rs", - "/src/*", - "README.md", - "libm/src/math/*", -] links = 'compiler-rt' [lib] diff --git a/compiler-builtins/LICENSE.txt b/compiler-builtins/LICENSE.txt new file mode 120000 index 000000000..4ab43736a --- /dev/null +++ b/compiler-builtins/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs index 067855603..7523a00cf 100644 --- a/compiler-builtins/src/lib.rs +++ b/compiler-builtins/src/lib.rs @@ -45,7 +45,7 @@ pub mod math; pub mod mem; // `libm` expects its `support` module to be available in the crate root. -use math::libm::support; +use math::libm_math::support; #[cfg(target_arch = "arm")] pub mod arm; diff --git a/compiler-builtins/src/math/libm_math b/compiler-builtins/src/math/libm_math new file mode 120000 index 000000000..4d65313c2 --- /dev/null +++ b/compiler-builtins/src/math/libm_math @@ -0,0 +1 @@ +../../../libm/src/math \ No newline at end of file diff --git a/compiler-builtins/src/math.rs b/compiler-builtins/src/math/mod.rs similarity index 96% rename from compiler-builtins/src/math.rs rename to compiler-builtins/src/math/mod.rs index 722374f8e..078feb9ff 100644 --- a/compiler-builtins/src/math.rs +++ b/compiler-builtins/src/math/mod.rs @@ -2,15 +2,14 @@ #[allow(dead_code)] #[allow(unused_imports)] #[allow(clippy::all)] -#[path = "../../libm/src/math/mod.rs"] -pub(crate) mod libm; +pub(crate) mod libm_math; macro_rules! libm_intrinsics { ($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => { intrinsics! { $( pub extern "C" fn $fun($($iid: $ity),+) -> $oty { - $crate::math::libm::$fun($($iid),+) + $crate::math::libm_math::$fun($($iid),+) } )+ } @@ -185,13 +184,13 @@ pub mod partial_availability { // allow for windows (and other targets) intrinsics! { pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 { - let r = super::libm::lgamma_r(x); + let r = super::libm_math::lgamma_r(x); *s = r.1; r.0 } pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 { - let r = super::libm::lgammaf_r(x); + let r = super::libm_math::lgammaf_r(x); *s = r.1; r.0 } From 767597f061709a1570f034708ad10746c8db920f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 22 Apr 2025 02:23:44 +0000 Subject: [PATCH 1322/1459] ci: Fix extensive tests Move this to a script and ensure only `libm-test` gets built to avoid default feature issues with `compiler-builtins`. --- .github/workflows/main.yaml | 17 +---------------- ci/run-extensive.sh | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 16 deletions(-) create mode 100755 ci/run-extensive.sh diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index f7522a53d..d13dd6b0f 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -318,22 +318,7 @@ jobs: rustup default nightly - uses: Swatinem/rust-cache@v2 - name: Run extensive tests - run: | - echo "Tests to run: '$TO_TEST'" - if [ -z "$TO_TEST" ]; then - echo "No tests to run, exiting." - exit - fi - - set -x - - # Run the non-extensive tests first to catch any easy failures - cargo t --profile release-checked -- "$TO_TEST" - - LIBM_EXTENSIVE_TESTS="$TO_TEST" cargo test \ - --features build-mpfr,unstable,force-soft-floats \ - --profile release-checked \ - -- extensive + run: ./ci/run-extensive.sh - name: Print test logs if available run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi shell: bash diff --git a/ci/run-extensive.sh b/ci/run-extensive.sh new file mode 100755 index 000000000..4ba41a026 --- /dev/null +++ b/ci/run-extensive.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +set -euo pipefail + +echo "Tests to run: '$TO_TEST'" + +if [ -z "$TO_TEST" ]; then + echo "No tests to run, exiting." + exit +fi + +set -x + +test_cmd=( + cargo test + --package libm-test + --features "build-mpfr,libm/unstable,libm/force-soft-floats" + --profile release-checked +) + +# Run the non-extensive tests first to catch any easy failures +"${test_cmd[@]}" -- "$TO_TEST" + +LIBM_EXTENSIVE_TESTS="$TO_TEST" "${test_cmd[@]}" -- extensive From 4f4dbe8081bd4cc11211a61722bc017f4237ef86 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 22 Apr 2025 03:10:38 +0000 Subject: [PATCH 1323/1459] Rename the `public-test-deps` feature to `unstable-public-internals` `compiler-builtins` uses `public-test-deps`, `libm` uses `unstable-public-internals`. Consolidate these under the `libm` name. Once compiler-builtins is no longer published, this feature can probably be dropped. Also switch to `dep:` syntax for features that enable dependencies. --- builtins-test/Cargo.toml | 2 +- compiler-builtins/Cargo.toml | 12 ++++++------ compiler-builtins/src/float/mod.rs | 4 ++-- compiler-builtins/src/int/leading_zeros.rs | 4 ++-- compiler-builtins/src/int/mod.rs | 4 ++-- compiler-builtins/src/int/specialized_div_rem/mod.rs | 4 ++-- compiler-builtins/src/int/trailing_zeros.rs | 4 ++-- compiler-builtins/src/int/udiv.rs | 4 ++-- 8 files changed, 19 insertions(+), 19 deletions(-) diff --git a/builtins-test/Cargo.toml b/builtins-test/Cargo.toml index f7bcb52b4..10978c0bb 100644 --- a/builtins-test/Cargo.toml +++ b/builtins-test/Cargo.toml @@ -19,7 +19,7 @@ iai-callgrind = { version = "0.14.0", optional = true } [dependencies.compiler_builtins] path = "../compiler-builtins" default-features = false -features = ["public-test-deps"] +features = ["unstable-public-internals"] [dev-dependencies] criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 1de37bd86..1ea32c10c 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -9,7 +9,7 @@ homepage = "https://github.com/rust-lang/compiler-builtins" documentation = "https://docs.rs/compiler_builtins" edition = "2021" description = "Compiler intrinsics used by the Rust compiler." -links = 'compiler-rt' +links = "compiler-rt" [lib] bench = false @@ -19,20 +19,20 @@ test = false [dependencies] # For more information on this dependency see # https://github.com/rust-lang/rust/tree/master/library/rustc-std-workspace-core -core = { version = "1.0.0", optional = true, package = 'rustc-std-workspace-core' } +core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core" } [build-dependencies] cc = { optional = true, version = "1.0" } [dev-dependencies] -panic-handler = { path = '../crates/panic-handler' } +panic-handler = { path = "../crates/panic-handler" } [features] default = ["compiler-builtins"] # Enable compilation of C code in compiler-rt, filling in some more optimized # implementations and also filling in unimplemented intrinsics -c = ["cc"] +c = ["dep:cc"] # Workaround for the Cranelift codegen backend. Disables any implementations # which use inline assembly and fall back to pure Rust versions (if avalible). @@ -53,11 +53,11 @@ mem = [] mangled-names = [] # Only used in the compiler's build system -rustc-dep-of-std = ['compiler-builtins', 'core'] +rustc-dep-of-std = ["compiler-builtins", "dep:core"] # This makes certain traits and function specializations public that # are not normally public but are required by the `builtins-test` -public-test-deps = [] +unstable-public-internals = [] [lints.rust] # The cygwin config can be dropped after our benchmark toolchain is bumped diff --git a/compiler-builtins/src/float/mod.rs b/compiler-builtins/src/float/mod.rs index f2c543bd2..4a379d0d3 100644 --- a/compiler-builtins/src/float/mod.rs +++ b/compiler-builtins/src/float/mod.rs @@ -9,7 +9,7 @@ pub mod sub; pub(crate) mod traits; pub mod trunc; -#[cfg(not(feature = "public-test-deps"))] +#[cfg(not(feature = "unstable-public-internals"))] pub(crate) use traits::{Float, HalfRep}; -#[cfg(feature = "public-test-deps")] +#[cfg(feature = "unstable-public-internals")] pub use traits::{Float, HalfRep}; diff --git a/compiler-builtins/src/int/leading_zeros.rs b/compiler-builtins/src/int/leading_zeros.rs index a57f88184..112f4d036 100644 --- a/compiler-builtins/src/int/leading_zeros.rs +++ b/compiler-builtins/src/int/leading_zeros.rs @@ -3,9 +3,9 @@ // adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`. // Compilers will insert the check for zero in cases where it is needed. -#[cfg(feature = "public-test-deps")] +#[cfg(feature = "unstable-public-internals")] pub use implementation::{leading_zeros_default, leading_zeros_riscv}; -#[cfg(not(feature = "public-test-deps"))] +#[cfg(not(feature = "unstable-public-internals"))] pub(crate) use implementation::{leading_zeros_default, leading_zeros_riscv}; mod implementation { diff --git a/compiler-builtins/src/int/mod.rs b/compiler-builtins/src/int/mod.rs index 5633510d3..518ccb23f 100644 --- a/compiler-builtins/src/int/mod.rs +++ b/compiler-builtins/src/int/mod.rs @@ -12,7 +12,7 @@ mod traits; pub mod udiv; pub use big::{i256, u256}; -#[cfg(not(feature = "public-test-deps"))] +#[cfg(not(feature = "unstable-public-internals"))] pub(crate) use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; -#[cfg(feature = "public-test-deps")] +#[cfg(feature = "unstable-public-internals")] pub use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; diff --git a/compiler-builtins/src/int/specialized_div_rem/mod.rs b/compiler-builtins/src/int/specialized_div_rem/mod.rs index b81f04698..43f466e75 100644 --- a/compiler-builtins/src/int/specialized_div_rem/mod.rs +++ b/compiler-builtins/src/int/specialized_div_rem/mod.rs @@ -56,9 +56,9 @@ mod delegate; // used on SPARC #[allow(unused_imports)] -#[cfg(not(feature = "public-test-deps"))] +#[cfg(not(feature = "unstable-public-internals"))] pub(crate) use self::delegate::u128_divide_sparc; -#[cfg(feature = "public-test-deps")] +#[cfg(feature = "unstable-public-internals")] pub use self::delegate::u128_divide_sparc; #[macro_use] diff --git a/compiler-builtins/src/int/trailing_zeros.rs b/compiler-builtins/src/int/trailing_zeros.rs index dbc0cce9f..c45d6b1cf 100644 --- a/compiler-builtins/src/int/trailing_zeros.rs +++ b/compiler-builtins/src/int/trailing_zeros.rs @@ -1,6 +1,6 @@ -#[cfg(feature = "public-test-deps")] +#[cfg(feature = "unstable-public-internals")] pub use implementation::trailing_zeros; -#[cfg(not(feature = "public-test-deps"))] +#[cfg(not(feature = "unstable-public-internals"))] pub(crate) use implementation::trailing_zeros; mod implementation { diff --git a/compiler-builtins/src/int/udiv.rs b/compiler-builtins/src/int/udiv.rs index 4e985ba47..b9dee63c4 100644 --- a/compiler-builtins/src/int/udiv.rs +++ b/compiler-builtins/src/int/udiv.rs @@ -1,6 +1,6 @@ -#[cfg(not(feature = "public-test-deps"))] +#[cfg(not(feature = "unstable-public-internals"))] pub(crate) use crate::int::specialized_div_rem::*; -#[cfg(feature = "public-test-deps")] +#[cfg(feature = "unstable-public-internals")] pub use crate::int::specialized_div_rem::*; intrinsics! { From 2dc292bd1b38760682295444b3f90706a403f368 Mon Sep 17 00:00:00 2001 From: quaternic <57393910+quaternic@users.noreply.github.com> Date: Tue, 22 Apr 2025 03:56:51 +0300 Subject: [PATCH 1324/1459] Reimplement the generic fmod --- libm/src/lib.rs | 1 + libm/src/math/generic/fmod.rs | 116 ++++++++++++---------------- libm/src/math/support/int_traits.rs | 4 + 3 files changed, 55 insertions(+), 66 deletions(-) diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 7df84fe18..31b122353 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -14,6 +14,7 @@ #![allow(clippy::excessive_precision)] #![allow(clippy::float_cmp)] #![allow(clippy::int_plus_one)] +#![allow(clippy::just_underscores_and_digits)] #![allow(clippy::many_single_char_names)] #![allow(clippy::mixed_case_hex_literals)] #![allow(clippy::needless_late_init)] diff --git a/libm/src/math/generic/fmod.rs b/libm/src/math/generic/fmod.rs index 6414bbd25..e9898012f 100644 --- a/libm/src/math/generic/fmod.rs +++ b/libm/src/math/generic/fmod.rs @@ -1,84 +1,68 @@ -/* SPDX-License-Identifier: MIT */ -/* origin: musl src/math/fmod.c. Ported to generic Rust algorithm in 2025, TG. */ - +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ use super::super::{CastFrom, Float, Int, MinInt}; #[inline] pub fn fmod(x: F, y: F) -> F { - let zero = F::Int::ZERO; - let one = F::Int::ONE; - let mut ix = x.to_bits(); - let mut iy = y.to_bits(); - let mut ex = x.ex().signed(); - let mut ey = y.ex().signed(); - let sx = ix & F::SIGN_MASK; + let _1 = F::Int::ONE; + let sx = x.to_bits() & F::SIGN_MASK; + let ux = x.to_bits() & !F::SIGN_MASK; + let uy = y.to_bits() & !F::SIGN_MASK; - if iy << 1 == zero || y.is_nan() || ex == F::EXP_SAT as i32 { + // Cases that return NaN: + // NaN % _ + // Inf % _ + // _ % NaN + // _ % 0 + let x_nan_or_inf = ux & F::EXP_MASK == F::EXP_MASK; + let y_nan_or_zero = uy.wrapping_sub(_1) & F::EXP_MASK == F::EXP_MASK; + if x_nan_or_inf | y_nan_or_zero { return (x * y) / (x * y); } - if ix << 1 <= iy << 1 { - if ix << 1 == iy << 1 { - return F::ZERO * x; - } + if ux < uy { + // |x| < |y| return x; } - /* normalize x and y */ - if ex == 0 { - let i = ix << (F::EXP_BITS + 1); - ex -= i.leading_zeros() as i32; - ix <<= -ex + 1; - } else { - ix &= F::Int::MAX >> F::EXP_BITS; - ix |= one << F::SIG_BITS; - } - - if ey == 0 { - let i = iy << (F::EXP_BITS + 1); - ey -= i.leading_zeros() as i32; - iy <<= -ey + 1; - } else { - iy &= F::Int::MAX >> F::EXP_BITS; - iy |= one << F::SIG_BITS; - } - - /* x mod y */ - while ex > ey { - let i = ix.wrapping_sub(iy); - if i >> (F::BITS - 1) == zero { - if i == zero { - return F::ZERO * x; - } - ix = i; - } + let (num, ex) = into_sig_exp::(ux); + let (div, ey) = into_sig_exp::(uy); - ix <<= 1; - ex -= 1; - } + // To compute `(num << ex) % (div << ey)`, first + // evaluate `rem = (num << (ex - ey)) % div` ... + let rem = reduction(num, ex - ey, div); + // ... so the result will be `rem << ey` - let i = ix.wrapping_sub(iy); - if i >> (F::BITS - 1) == zero { - if i == zero { - return F::ZERO * x; - } + if rem.is_zero() { + // Return zero with the sign of `x` + return F::from_bits(sx); + }; - ix = i; - } + // We would shift `rem` up by `ey`, but have to stop at `F::SIG_BITS` + let shift = ey.min(F::SIG_BITS - rem.ilog2()); + // Anything past that is added to the exponent field + let bits = (rem << shift) + (F::Int::cast_from(ey - shift) << F::SIG_BITS); + F::from_bits(sx + bits) +} - let shift = ix.leading_zeros().saturating_sub(F::EXP_BITS); - ix <<= shift; - ex -= shift as i32; +/// Given the bits of a finite float, return a tuple of +/// - the mantissa with the implicit bit (0 if subnormal, 1 otherwise) +/// - the additional exponent past 1, (0 for subnormal, 0 or more otherwise) +fn into_sig_exp(mut bits: F::Int) -> (F::Int, u32) { + bits &= !F::SIGN_MASK; + // Subtract 1 from the exponent, clamping at 0 + let sat = bits.checked_sub(F::IMPLICIT_BIT).unwrap_or(F::Int::ZERO); + ( + bits - (sat & F::EXP_MASK), + u32::cast_from(sat >> F::SIG_BITS), + ) +} - /* scale result */ - if ex > 0 { - ix -= one << F::SIG_BITS; - ix |= F::Int::cast_from(ex) << F::SIG_BITS; - } else { - ix >>= -ex + 1; +/// Compute the remainder `(x * 2.pow(e)) % y` without overflow. +fn reduction(mut x: I, e: u32, y: I) -> I { + x %= y; + for _ in 0..e { + x <<= 1; + x = x.checked_sub(y).unwrap_or(x); } - - ix |= sx; - - F::from_bits(ix) + x } diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index 491adb1f2..3ec1faba1 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -40,6 +40,9 @@ pub trait Int: + PartialOrd + ops::AddAssign + ops::SubAssign + + ops::MulAssign + + ops::DivAssign + + ops::RemAssign + ops::BitAndAssign + ops::BitOrAssign + ops::BitXorAssign @@ -51,6 +54,7 @@ pub trait Int: + ops::Sub + ops::Mul + ops::Div + + ops::Rem + ops::Shl + ops::Shl + ops::Shr From fe23e618fc55a4373da452875d61e9787525462c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 07:03:13 +0000 Subject: [PATCH 1325/1459] chore: release builtins 0.1.156 and libm 0.2.12 --- compiler-builtins/CHANGELOG.md | 12 ++++++++++++ compiler-builtins/Cargo.toml | 2 +- libm/CHANGELOG.md | 36 ++++++++++++++++++++++++++++++++++ libm/Cargo.toml | 2 +- 4 files changed, 50 insertions(+), 2 deletions(-) diff --git a/compiler-builtins/CHANGELOG.md b/compiler-builtins/CHANGELOG.md index 9916f2986..3450da992 100644 --- a/compiler-builtins/CHANGELOG.md +++ b/compiler-builtins/CHANGELOG.md @@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.156](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.155...compiler_builtins-v0.1.156) - 2025-04-21 + +### Other + +- avr: Provide `abort()` +- Remove `unsafe` from `naked_asm!` blocks +- Enable icount benchmarks in CI +- Move builtins-test-intrinsics out of the workspace +- Run `cargo fmt` on all projects +- Flatten the `libm/libm` directory +- Update path to libm after the merge + ## [0.1.155](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.154...compiler_builtins-v0.1.155) - 2025-04-17 ### Other diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 1ea32c10c..acbace687 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.155" +version = "0.1.156" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index 4e5acb899..c507608dd 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -8,6 +8,42 @@ and this project adheres to ## [Unreleased] +## [0.2.12](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.11...libm-v0.2.12) - 2025-04-21 + +- Mark generic functions `#[inline]` +- Combine the source files for `fmod` +- Ensure all public functions are marked `no_panic` +- Add assembly version of simple operations on aarch64 +- Add `roundeven{,f,f16,f128}` +- Add `fminimum`, `fmaximum`, `fminimum_num`, and `fmaximum_num` +- Eliminate the use of `force_eval!` in `ceil`, `floor`, and `trunc` +- Port the CORE-MATH version of `cbrt` +- Add `fmaf128` +- fma: Ensure zero has the correct sign +- Add `scalbnf16`, `scalbnf128`, `ldexpf16`, and `ldexpf128` +- Specify license as just MIT +- Add `fmodf128` +- Add `fmodf16` using the generic implementation +- Add `fminf16`, `fmaxf16`, `fminf128`, and `fmaxf128` +- Add `roundf16` and `roundf128` +- Add `rintf16` and `rintf128` +- Add `floorf16` and `floorf128` +- Add `ceilf16` and `ceilf128` +- Add `sqrtf16` and `sqrtf128` +- Simplify and optimize `fdim` ([#442](https://github.com/rust-lang/libm/pull/442)) +- Add `fdimf16` and `fdimf128` +- Add `truncf16` and `truncf128` +- Add `fabsf16`, `fabsf128`, `copysignf16`, and `copysignf128` +- Move some numeric trait logic to default implementations +- Add some more basic docstrings ([#352](https://github.com/rust-lang/libm/pull/352)) +- Add support for loongarch64-unknown-linux-gnu +- Add an "arch" Cargo feature that is on by default +- Rename the `special_case` module to `precision` and move default ULP +- Move the existing "unstable" feature to "unstable-intrinsics" + +There are a number of things that changed internally, see the git log for a full +list of changes. + ## [0.2.11](https://github.com/rust-lang/libm/compare/libm-v0.2.10...libm-v0.2.11) - 2024-10-28 ### Fixed diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 4e3850bbf..dc553ca4a 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT" name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" -version = "0.2.11" +version = "0.2.12" edition = "2021" rust-version = "1.63" From 313e3eef4e23728664f5cd1f9bfb1b102c152c86 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 22 Apr 2025 03:43:35 +0000 Subject: [PATCH 1326/1459] chore: Combine CHANGELOG files for compiler-builtins This unintentionally got split when compiler-builtins was moved to a subdirectory. --- CHANGELOG.md | 108 --------------------------------- compiler-builtins/CHANGELOG.md | 100 ++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 108 deletions(-) delete mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index 763b0e10e..000000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,108 +0,0 @@ -# Changelog - -All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## [Unreleased] - -## [0.1.151](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.150...compiler_builtins-v0.1.151) - 2025-03-05 - -### Other - -- Add cygwin support -- Enable `f16` for LoongArch ([#770](https://github.com/rust-lang/compiler-builtins/pull/770)) -- Add __extendhfdf2 and add __truncdfhf2 test -- Remove outdated information from the readme - -## [0.1.150](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.149...compiler_builtins-v0.1.150) - 2025-03-01 - -### Other - -- Disable `f16` on AArch64 without the `neon` feature -- Update LLVM downloads to 20.1-2025-02-13 - -## [0.1.149](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.148...compiler_builtins-v0.1.149) - 2025-02-25 - -### Other - -- Make a subset of `libm` symbols weakly available on all platforms - -## [0.1.148](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.147...compiler_builtins-v0.1.148) - 2025-02-24 - -### Other - -- Update the `libm` submodule -- Enable `f16` for MIPS -- Eliminate the use of `public_test_dep!` for a third time - -## [0.1.147](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.146...compiler_builtins-v0.1.147) - 2025-02-19 - -### Other - -- remove win64_128bit_abi_hack - -## [0.1.146](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.145...compiler_builtins-v0.1.146) - 2025-02-06 - -### Other - -- Expose erf{,c}{,f} from libm - -## [0.1.145](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.144...compiler_builtins-v0.1.145) - 2025-02-04 - -### Other - -- Revert "Eliminate the use of `public_test_dep!`" -- Indentation fix to please clippy -- Don't build out of line atomics support code for uefi -- Add a version to some FIXMEs that will be resolved in LLVM 20 -- Remove use of the `start` feature - -## [0.1.144](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.143...compiler_builtins-v0.1.144) - 2025-01-15 - -### Other - -- Eliminate the use of `public_test_dep!` - -## [0.1.143](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.142...compiler_builtins-v0.1.143) - 2025-01-15 - -### Other - -- Use a C-safe return type for `__rust_[ui]128_*` overflowing intrinsics - -## [0.1.142](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.141...compiler_builtins-v0.1.142) - 2025-01-07 - -### Other - -- Account for optimization levels other than numbers - -## [0.1.141](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.140...compiler_builtins-v0.1.141) - 2025-01-07 - -### Other - -- Update the `libm` submodule -- Fix new `clippy::precedence` errors -- Rename `EXP_MAX` to `EXP_SAT` -- Shorten prefixes for float constants - -## [0.1.140](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.139...compiler_builtins-v0.1.140) - 2024-12-26 - -### Other - -- Disable f128 for amdgpu ([#737](https://github.com/rust-lang/compiler-builtins/pull/737)) -- Fix a bug in `abs_diff` -- Disable `f16` on platforms that have recursion problems - -## [0.1.139](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.138...compiler_builtins-v0.1.139) - 2024-11-03 - -### Other - -- Remove incorrect `sparcv9` match pattern from `configure_f16_f128` - -## [0.1.138](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.137...compiler_builtins-v0.1.138) - 2024-11-01 - -### Other - -- Use `f16_enabled`/`f128_enabled` in `examples/intrinsics.rs` ([#724](https://github.com/rust-lang/compiler-builtins/pull/724)) -- Disable `f16` for LoongArch64 ([#722](https://github.com/rust-lang/compiler-builtins/pull/722)) diff --git a/compiler-builtins/CHANGELOG.md b/compiler-builtins/CHANGELOG.md index 3450da992..34b413a86 100644 --- a/compiler-builtins/CHANGELOG.md +++ b/compiler-builtins/CHANGELOG.md @@ -48,3 +48,103 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Remove use of `atomic_load_unordered` and undefined behaviour from `arm_linux.rs` - Switch repository layout to use a virtual manifest + +## [0.1.151](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.150...compiler_builtins-v0.1.151) - 2025-03-05 + +### Other + +- Add cygwin support +- Enable `f16` for LoongArch ([#770](https://github.com/rust-lang/compiler-builtins/pull/770)) +- Add __extendhfdf2 and add __truncdfhf2 test +- Remove outdated information from the readme + +## [0.1.150](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.149...compiler_builtins-v0.1.150) - 2025-03-01 + +### Other + +- Disable `f16` on AArch64 without the `neon` feature +- Update LLVM downloads to 20.1-2025-02-13 + +## [0.1.149](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.148...compiler_builtins-v0.1.149) - 2025-02-25 + +### Other + +- Make a subset of `libm` symbols weakly available on all platforms + +## [0.1.148](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.147...compiler_builtins-v0.1.148) - 2025-02-24 + +### Other + +- Update the `libm` submodule +- Enable `f16` for MIPS +- Eliminate the use of `public_test_dep!` for a third time + +## [0.1.147](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.146...compiler_builtins-v0.1.147) - 2025-02-19 + +### Other + +- remove win64_128bit_abi_hack + +## [0.1.146](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.145...compiler_builtins-v0.1.146) - 2025-02-06 + +### Other + +- Expose erf{,c}{,f} from libm + +## [0.1.145](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.144...compiler_builtins-v0.1.145) - 2025-02-04 + +### Other + +- Revert "Eliminate the use of `public_test_dep!`" +- Indentation fix to please clippy +- Don't build out of line atomics support code for uefi +- Add a version to some FIXMEs that will be resolved in LLVM 20 +- Remove use of the `start` feature + +## [0.1.144](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.143...compiler_builtins-v0.1.144) - 2025-01-15 + +### Other + +- Eliminate the use of `public_test_dep!` + +## [0.1.143](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.142...compiler_builtins-v0.1.143) - 2025-01-15 + +### Other + +- Use a C-safe return type for `__rust_[ui]128_*` overflowing intrinsics + +## [0.1.142](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.141...compiler_builtins-v0.1.142) - 2025-01-07 + +### Other + +- Account for optimization levels other than numbers + +## [0.1.141](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.140...compiler_builtins-v0.1.141) - 2025-01-07 + +### Other + +- Update the `libm` submodule +- Fix new `clippy::precedence` errors +- Rename `EXP_MAX` to `EXP_SAT` +- Shorten prefixes for float constants + +## [0.1.140](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.139...compiler_builtins-v0.1.140) - 2024-12-26 + +### Other + +- Disable f128 for amdgpu ([#737](https://github.com/rust-lang/compiler-builtins/pull/737)) +- Fix a bug in `abs_diff` +- Disable `f16` on platforms that have recursion problems + +## [0.1.139](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.138...compiler_builtins-v0.1.139) - 2024-11-03 + +### Other + +- Remove incorrect `sparcv9` match pattern from `configure_f16_f128` + +## [0.1.138](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.137...compiler_builtins-v0.1.138) - 2024-11-01 + +### Other + +- Use `f16_enabled`/`f128_enabled` in `examples/intrinsics.rs` ([#724](https://github.com/rust-lang/compiler-builtins/pull/724)) +- Disable `f16` for LoongArch64 ([#722](https://github.com/rust-lang/compiler-builtins/pull/722)) From dfd553af25c649366343bd966a79b72df861fd49 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 22 Apr 2025 07:46:09 +0000 Subject: [PATCH 1327/1459] fix: Switch to resolver v2 The published crates fail to build with an edition less than 2024 because they are packaged with `resolver = "3"`, which is a 2024-only option. Revert back to resolver v2 to drop this requirement. Fixes: https://github.com/rust-lang/compiler-builtins/issues/883 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 75bb81ec1..b39ec8a25 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -resolver = "3" +resolver = "2" members = [ "builtins-test", "compiler-builtins", From 0bdef053a00a5a17722733c550606ad15d62cea6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 22 Apr 2025 08:05:23 +0000 Subject: [PATCH 1328/1459] chore: Release libm v0.2.13 --- libm/CHANGELOG.md | 6 ++++++ libm/Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index c507608dd..292561f86 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -8,6 +8,12 @@ and this project adheres to ## [Unreleased] +## [0.2.13](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.12...libm-v0.2.13) - 2025-04-21 + +### Fixed + +- Switch back to workspace resolver v2 to unbreak builds without the 2024 edition + ## [0.2.12](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.11...libm-v0.2.12) - 2025-04-21 - Mark generic functions `#[inline]` diff --git a/libm/Cargo.toml b/libm/Cargo.toml index dc553ca4a..f80715ff6 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT" name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" -version = "0.2.12" +version = "0.2.13" edition = "2021" rust-version = "1.63" From 667ba286c7d27d124fe4b4f0d0c933212313105f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 22 Apr 2025 16:30:17 -0400 Subject: [PATCH 1329/1459] musl: Update submodule Update the musl submodule to c47ad25ea3 ("iconv: harden UTF-8 output code path against input decoder bugs"). --- crates/musl-math-sys/musl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/musl-math-sys/musl b/crates/musl-math-sys/musl index 61399d4bd..c47ad25ea 160000 --- a/crates/musl-math-sys/musl +++ b/crates/musl-math-sys/musl @@ -1 +1 @@ -Subproject commit 61399d4bd02ae1ec03068445aa7ffe9174466bfd +Subproject commit c47ad25ea3b484e10326f933e927c0bc8cded3da From 672ba576cb97c4d89e45d30509c6a989ff57e4aa Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 23 Apr 2025 06:46:31 +0000 Subject: [PATCH 1330/1459] libm-macros: Start tracking which functions are public It would be nice to reuse some of the macro structure for internal functions, like `rem_pio2`. To facilitate this, add a `public` field and make it available in the macro's API. --- crates/libm-macros/src/lib.rs | 6 +- crates/libm-macros/src/shared.rs | 383 +++++++++++++++++------------- crates/libm-macros/tests/basic.rs | 2 + libm-test/src/op.rs | 10 +- 4 files changed, 226 insertions(+), 175 deletions(-) diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs index 3cdd364e8..144676c12 100644 --- a/crates/libm-macros/src/lib.rs +++ b/crates/libm-macros/src/lib.rs @@ -12,7 +12,7 @@ use syn::visit_mut::VisitMut; use syn::{Ident, ItemEnum}; const KNOWN_TYPES: &[&str] = &[ - "FTy", "CFn", "CArgs", "CRet", "RustFn", "RustArgs", "RustRet", + "FTy", "CFn", "CArgs", "CRet", "RustFn", "RustArgs", "RustRet", "public", ]; /// Populate an enum with a variant representing function. Names are in upper camel case. @@ -80,6 +80,8 @@ pub fn base_name_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> p /// RustArgs: $RustArgs:ty, /// // The Rust version's return type (e.g. `(f32, f32)`) /// RustRet: $RustRet:ty, +/// // True if this is part of `libm`'s public API +/// public: $public:expr, /// // Attributes for the current function, if any /// attrs: [$($attr:meta),*], /// // Extra tokens passed directly (if any) @@ -329,6 +331,7 @@ fn expand(input: StructuredInput, fn_list: &[&MathOpInfo]) -> syn::Result syn::Result quote! { RustFn: fn( #(#rust_args),* ,) -> ( #(#rust_ret),* ), }, "RustArgs" => quote! { RustArgs: ( #(#rust_args),* ,), }, "RustRet" => quote! { RustRet: ( #(#rust_ret),* ), }, + "public" => quote! { public: #public, }, _ => unreachable!("checked in validation"), }; ty_fields.push(field); diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs index 750ed1afb..1cefe4e8c 100644 --- a/crates/libm-macros/src/shared.rs +++ b/crates/libm-macros/src/shared.rs @@ -3,16 +3,26 @@ use std::fmt; use std::sync::LazyLock; -const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] = &[ - ( +struct NestedOp { + float_ty: FloatTy, + rust_sig: Signature, + c_sig: Option, + fn_list: &'static [&'static str], + public: bool, +} + +/// We need a flat list to work with most of the time, but define things as a more convenient +/// nested list. +const ALL_OPERATIONS_NESTED: &[NestedOp] = &[ + NestedOp { // `fn(f16) -> f16` - FloatTy::F16, - Signature { + float_ty: FloatTy::F16, + rust_sig: Signature { args: &[Ty::F16], returns: &[Ty::F16], }, - None, - &[ + c_sig: None, + fn_list: &[ "ceilf16", "fabsf16", "floorf16", @@ -22,16 +32,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] "sqrtf16", "truncf16", ], - ), - ( + public: true, + }, + NestedOp { // `fn(f32) -> f32` - FloatTy::F32, - Signature { + float_ty: FloatTy::F32, + rust_sig: Signature { args: &[Ty::F32], returns: &[Ty::F32], }, - None, - &[ + c_sig: None, + fn_list: &[ "acosf", "acoshf", "asinf", @@ -70,16 +81,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] "y0f", "y1f", ], - ), - ( + public: true, + }, + NestedOp { // `(f64) -> f64` - FloatTy::F64, - Signature { + float_ty: FloatTy::F64, + rust_sig: Signature { args: &[Ty::F64], returns: &[Ty::F64], }, - None, - &[ + c_sig: None, + fn_list: &[ "acos", "acosh", "asin", @@ -118,16 +130,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] "y0", "y1", ], - ), - ( + public: true, + }, + NestedOp { // `fn(f128) -> f128` - FloatTy::F128, - Signature { + float_ty: FloatTy::F128, + rust_sig: Signature { args: &[Ty::F128], returns: &[Ty::F128], }, - None, - &[ + c_sig: None, + fn_list: &[ "ceilf128", "fabsf128", "floorf128", @@ -137,16 +150,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] "sqrtf128", "truncf128", ], - ), - ( + public: true, + }, + NestedOp { // `(f16, f16) -> f16` - FloatTy::F16, - Signature { + float_ty: FloatTy::F16, + rust_sig: Signature { args: &[Ty::F16, Ty::F16], returns: &[Ty::F16], }, - None, - &[ + c_sig: None, + fn_list: &[ "copysignf16", "fdimf16", "fmaxf16", @@ -157,16 +171,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] "fminimumf16", "fmodf16", ], - ), - ( + public: true, + }, + NestedOp { // `(f32, f32) -> f32` - FloatTy::F32, - Signature { + float_ty: FloatTy::F32, + rust_sig: Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32], }, - None, - &[ + c_sig: None, + fn_list: &[ "atan2f", "copysignf", "fdimf", @@ -182,16 +197,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] "powf", "remainderf", ], - ), - ( + public: true, + }, + NestedOp { // `(f64, f64) -> f64` - FloatTy::F64, - Signature { + float_ty: FloatTy::F64, + rust_sig: Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64], }, - None, - &[ + c_sig: None, + fn_list: &[ "atan2", "copysign", "fdim", @@ -207,16 +223,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] "pow", "remainder", ], - ), - ( + public: true, + }, + NestedOp { // `(f128, f128) -> f128` - FloatTy::F128, - Signature { + float_ty: FloatTy::F128, + rust_sig: Signature { args: &[Ty::F128, Ty::F128], returns: &[Ty::F128], }, - None, - &[ + c_sig: None, + fn_list: &[ "copysignf128", "fdimf128", "fmaxf128", @@ -227,221 +244,241 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] "fminimumf128", "fmodf128", ], - ), - ( + public: true, + }, + NestedOp { // `(f32, f32, f32) -> f32` - FloatTy::F32, - Signature { + float_ty: FloatTy::F32, + rust_sig: Signature { args: &[Ty::F32, Ty::F32, Ty::F32], returns: &[Ty::F32], }, - None, - &["fmaf"], - ), - ( + c_sig: None, + fn_list: &["fmaf"], + public: true, + }, + NestedOp { // `(f64, f64, f64) -> f64` - FloatTy::F64, - Signature { + float_ty: FloatTy::F64, + rust_sig: Signature { args: &[Ty::F64, Ty::F64, Ty::F64], returns: &[Ty::F64], }, - None, - &["fma"], - ), - ( + c_sig: None, + fn_list: &["fma"], + public: true, + }, + NestedOp { // `(f128, f128, f128) -> f128` - FloatTy::F128, - Signature { + float_ty: FloatTy::F128, + rust_sig: Signature { args: &[Ty::F128, Ty::F128, Ty::F128], returns: &[Ty::F128], }, - None, - &["fmaf128"], - ), - ( + c_sig: None, + fn_list: &["fmaf128"], + public: true, + }, + NestedOp { // `(f32) -> i32` - FloatTy::F32, - Signature { + float_ty: FloatTy::F32, + rust_sig: Signature { args: &[Ty::F32], returns: &[Ty::I32], }, - None, - &["ilogbf"], - ), - ( + c_sig: None, + fn_list: &["ilogbf"], + public: true, + }, + NestedOp { // `(f64) -> i32` - FloatTy::F64, - Signature { + float_ty: FloatTy::F64, + rust_sig: Signature { args: &[Ty::F64], returns: &[Ty::I32], }, - None, - &["ilogb"], - ), - ( + c_sig: None, + fn_list: &["ilogb"], + public: true, + }, + NestedOp { // `(i32, f32) -> f32` - FloatTy::F32, - Signature { + float_ty: FloatTy::F32, + rust_sig: Signature { args: &[Ty::I32, Ty::F32], returns: &[Ty::F32], }, - None, - &["jnf", "ynf"], - ), - ( + c_sig: None, + fn_list: &["jnf", "ynf"], + public: true, + }, + NestedOp { // `(i32, f64) -> f64` - FloatTy::F64, - Signature { + float_ty: FloatTy::F64, + rust_sig: Signature { args: &[Ty::I32, Ty::F64], returns: &[Ty::F64], }, - None, - &["jn", "yn"], - ), - ( + c_sig: None, + fn_list: &["jn", "yn"], + public: true, + }, + NestedOp { // `(f16, i32) -> f16` - FloatTy::F16, - Signature { + float_ty: FloatTy::F16, + rust_sig: Signature { args: &[Ty::F16, Ty::I32], returns: &[Ty::F16], }, - None, - &["ldexpf16", "scalbnf16"], - ), - ( + c_sig: None, + fn_list: &["ldexpf16", "scalbnf16"], + public: true, + }, + NestedOp { // `(f32, i32) -> f32` - FloatTy::F32, - Signature { + float_ty: FloatTy::F32, + rust_sig: Signature { args: &[Ty::F32, Ty::I32], returns: &[Ty::F32], }, - None, - &["ldexpf", "scalbnf"], - ), - ( + c_sig: None, + fn_list: &["ldexpf", "scalbnf"], + public: true, + }, + NestedOp { // `(f64, i64) -> f64` - FloatTy::F64, - Signature { + float_ty: FloatTy::F64, + rust_sig: Signature { args: &[Ty::F64, Ty::I32], returns: &[Ty::F64], }, - None, - &["ldexp", "scalbn"], - ), - ( + c_sig: None, + fn_list: &["ldexp", "scalbn"], + public: true, + }, + NestedOp { // `(f128, i32) -> f128` - FloatTy::F128, - Signature { + float_ty: FloatTy::F128, + rust_sig: Signature { args: &[Ty::F128, Ty::I32], returns: &[Ty::F128], }, - None, - &["ldexpf128", "scalbnf128"], - ), - ( + c_sig: None, + fn_list: &["ldexpf128", "scalbnf128"], + public: true, + }, + NestedOp { // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)` - FloatTy::F32, - Signature { + float_ty: FloatTy::F32, + rust_sig: Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32], }, - Some(Signature { + c_sig: Some(Signature { args: &[Ty::F32, Ty::MutF32], returns: &[Ty::F32], }), - &["modff"], - ), - ( + fn_list: &["modff"], + public: true, + }, + NestedOp { // `(f64, &mut f64) -> f64` as `(f64) -> (f64, f64)` - FloatTy::F64, - Signature { + float_ty: FloatTy::F64, + rust_sig: Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64], }, - Some(Signature { + c_sig: Some(Signature { args: &[Ty::F64, Ty::MutF64], returns: &[Ty::F64], }), - &["modf"], - ), - ( + fn_list: &["modf"], + public: true, + }, + NestedOp { // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)` - FloatTy::F32, - Signature { + float_ty: FloatTy::F32, + rust_sig: Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::I32], }, - Some(Signature { + c_sig: Some(Signature { args: &[Ty::F32, Ty::MutCInt], returns: &[Ty::F32], }), - &["frexpf", "lgammaf_r"], - ), - ( + fn_list: &["frexpf", "lgammaf_r"], + public: true, + }, + NestedOp { // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)` - FloatTy::F64, - Signature { + float_ty: FloatTy::F64, + rust_sig: Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::I32], }, - Some(Signature { + c_sig: Some(Signature { args: &[Ty::F64, Ty::MutCInt], returns: &[Ty::F64], }), - &["frexp", "lgamma_r"], - ), - ( + fn_list: &["frexp", "lgamma_r"], + public: true, + }, + NestedOp { // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)` - FloatTy::F32, - Signature { + float_ty: FloatTy::F32, + rust_sig: Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32, Ty::I32], }, - Some(Signature { + c_sig: Some(Signature { args: &[Ty::F32, Ty::F32, Ty::MutCInt], returns: &[Ty::F32], }), - &["remquof"], - ), - ( + fn_list: &["remquof"], + public: true, + }, + NestedOp { // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)` - FloatTy::F64, - Signature { + float_ty: FloatTy::F64, + rust_sig: Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64, Ty::I32], }, - Some(Signature { + c_sig: Some(Signature { args: &[Ty::F64, Ty::F64, Ty::MutCInt], returns: &[Ty::F64], }), - &["remquo"], - ), - ( + fn_list: &["remquo"], + public: true, + }, + NestedOp { // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)` - FloatTy::F32, - Signature { + float_ty: FloatTy::F32, + rust_sig: Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32], }, - Some(Signature { + c_sig: Some(Signature { args: &[Ty::F32, Ty::MutF32, Ty::MutF32], returns: &[], }), - &["sincosf"], - ), - ( + fn_list: &["sincosf"], + public: true, + }, + NestedOp { // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)` - FloatTy::F64, - Signature { + float_ty: FloatTy::F64, + rust_sig: Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64], }, - Some(Signature { + c_sig: Some(Signature { args: &[Ty::F64, Ty::MutF64, Ty::MutF64], returns: &[], }), - &["sincos"], - ), + fn_list: &["sincos"], + public: true, + }, ]; /// A type used in a function signature. @@ -520,27 +557,31 @@ pub struct MathOpInfo { pub c_sig: Signature, /// Function signature for Rust implementations pub rust_sig: Signature, + /// True if part of libm's public API + pub public: bool, } /// A flat representation of `ALL_FUNCTIONS`. pub static ALL_OPERATIONS: LazyLock> = LazyLock::new(|| { let mut ret = Vec::new(); - for (base_fty, rust_sig, c_sig, names) in ALL_OPERATIONS_NESTED { - for name in *names { + for op in ALL_OPERATIONS_NESTED { + let fn_names = op.fn_list; + for name in fn_names { let api = MathOpInfo { name, - float_ty: *base_fty, - rust_sig: rust_sig.clone(), - c_sig: c_sig.clone().unwrap_or_else(|| rust_sig.clone()), + float_ty: op.float_ty, + rust_sig: op.rust_sig.clone(), + c_sig: op.c_sig.clone().unwrap_or_else(|| op.rust_sig.clone()), + public: op.public, }; ret.push(api); } - if !names.is_sorted() { - let mut sorted = (*names).to_owned(); + if !fn_names.is_sorted() { + let mut sorted = (*fn_names).to_owned(); sorted.sort_unstable(); - panic!("names list is not sorted: {names:?}\nExpected: {sorted:?}"); + panic!("names list is not sorted: {fn_names:?}\nExpected: {sorted:?}"); } } diff --git a/crates/libm-macros/tests/basic.rs b/crates/libm-macros/tests/basic.rs index 5314e84bb..260350ef2 100644 --- a/crates/libm-macros/tests/basic.rs +++ b/crates/libm-macros/tests/basic.rs @@ -13,6 +13,7 @@ macro_rules! basic { RustFn: $RustFn:ty, RustArgs: $RustArgs:ty, RustRet: $RustRet:ty, + public: $public:expr, attrs: [$($attr:meta),*], extra: [$($extra_tt:tt)*], fn_extra: $fn_extra:expr, @@ -25,6 +26,7 @@ macro_rules! basic { type RustFnTy = $RustFn; type RustArgsTy = $RustArgs; type RustRetTy = $RustRet; + const PUBLIC: bool = $public; const A: &[&str] = &[$($extra_tt)*]; fn foo(a: f32) -> f32 { $fn_extra(a) diff --git a/libm-test/src/op.rs b/libm-test/src/op.rs index bd17aad7d..afd445ff9 100644 --- a/libm-test/src/op.rs +++ b/libm-test/src/op.rs @@ -90,6 +90,9 @@ pub trait MathOp { /// The function in `libm` which can be called. const ROUTINE: Self::RustFn; + + /// Whether or not the function is part of libm public API. + const PUBLIC: bool; } /// Access the associated `FTy` type from an op (helper to avoid ambiguous associated types). @@ -107,7 +110,7 @@ pub type OpRustArgs = ::RustArgs; /// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types). pub type OpRustRet = ::RustRet; -macro_rules! do_thing { +macro_rules! create_op_modules { // Matcher for unary functions ( fn_name: $fn_name:ident, @@ -118,8 +121,8 @@ macro_rules! do_thing { RustFn: $RustFn:ty, RustArgs: $RustArgs:ty, RustRet: $RustRet:ty, + public: $public:expr, attrs: [$($attr:meta),*], - ) => { paste::paste! { $(#[$attr])* @@ -138,6 +141,7 @@ macro_rules! do_thing { const IDENTIFIER: Identifier = Identifier::[< $fn_name:camel >]; const ROUTINE: Self::RustFn = libm::$fn_name; + const PUBLIC: bool = $public; } } @@ -146,6 +150,6 @@ macro_rules! do_thing { } libm_macros::for_each_function! { - callback: do_thing, + callback: create_op_modules, emit_types: all, } From bf792806487d314c52660edeb2a0557eeadc728a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 23 Apr 2025 07:47:48 +0000 Subject: [PATCH 1331/1459] libm-macros: Allow a way to bulk match f16 and f128 functions These are never available in musl, so introduce easier ways to skip them rather than needing to exclude f16/f128 functions in three different places. --- crates/libm-macros/src/lib.rs | 30 +++++++++++ crates/libm-macros/src/parse.rs | 16 +++++- crates/libm-macros/tests/basic.rs | 72 +++++++++++++++++++++++++++ crates/util/src/main.rs | 44 ++-------------- libm-test/benches/random.rs | 45 ++--------------- libm-test/tests/compare_built_musl.rs | 43 ++-------------- 6 files changed, 128 insertions(+), 122 deletions(-) diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs index 144676c12..e8afe3aad 100644 --- a/crates/libm-macros/src/lib.rs +++ b/crates/libm-macros/src/lib.rs @@ -116,6 +116,9 @@ pub fn base_name_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> p /// // a simplified match-like syntax. /// fn_extra: match MACRO_FN_NAME { /// hypot | hypotf => |x| x.hypot(), +/// // `ALL_*` magic matchers also work to extract specific types +/// ALL_F64 => |x| x, +/// // The default pattern gets applied to everything that did not match /// _ => |x| x, /// }, /// } @@ -138,6 +141,27 @@ pub fn for_each_function(tokens: pm::TokenStream) -> pm::TokenStream { /// /// Returns the list of function names that we should expand for. fn validate(input: &mut StructuredInput) -> syn::Result> { + // Replace magic mappers with a list of relevant functions. + if let Some(map) = &mut input.fn_extra { + for (name, ty) in [ + ("ALL_F16", FloatTy::F16), + ("ALL_F32", FloatTy::F32), + ("ALL_F64", FloatTy::F64), + ("ALL_F128", FloatTy::F128), + ] { + let Some(k) = map.keys().find(|key| *key == name) else { + continue; + }; + + let key = k.clone(); + let val = map.remove(&key).unwrap(); + + for op in ALL_OPERATIONS.iter().filter(|op| op.float_ty == ty) { + map.insert(Ident::new(op.name, key.span()), val.clone()); + } + } + } + // Collect lists of all functions that are provied as macro inputs in various fields (only, // skip, attributes). let attr_mentions = input @@ -195,6 +219,12 @@ fn validate(input: &mut StructuredInput) -> syn::Result continue; } + // Omit f16 and f128 functions if requested + if input.skip_f16_f128 && (func.float_ty == FloatTy::F16 || func.float_ty == FloatTy::F128) + { + continue; + } + // Run everything else fn_list.push(func); } diff --git a/crates/libm-macros/src/parse.rs b/crates/libm-macros/src/parse.rs index d60d1247a..4876f3ef7 100644 --- a/crates/libm-macros/src/parse.rs +++ b/crates/libm-macros/src/parse.rs @@ -6,7 +6,7 @@ use syn::parse::{Parse, ParseStream, Parser}; use syn::punctuated::Punctuated; use syn::spanned::Spanned; use syn::token::{self, Comma}; -use syn::{Arm, Attribute, Expr, ExprMatch, Ident, Meta, Token, bracketed}; +use syn::{Arm, Attribute, Expr, ExprMatch, Ident, LitBool, Meta, Token, bracketed}; /// The input to our macro; just a list of `field: value` items. #[derive(Debug)] @@ -50,6 +50,8 @@ pub struct StructuredInput { pub emit_types: Vec, /// Skip these functions pub skip: Vec, + /// If true, omit f16 and f128 functions that aren't present in other libraries. + pub skip_f16_f128: bool, /// Invoke only for these functions pub only: Option>, /// Attributes that get applied to specific functions @@ -70,6 +72,7 @@ impl StructuredInput { let cb_expr = expect_field(&mut map, "callback")?; let emit_types_expr = expect_field(&mut map, "emit_types").ok(); let skip_expr = expect_field(&mut map, "skip").ok(); + let skip_f16_f128 = expect_field(&mut map, "skip_f16_f128").ok(); let only_expr = expect_field(&mut map, "only").ok(); let attr_expr = expect_field(&mut map, "attributes").ok(); let extra = expect_field(&mut map, "extra").ok(); @@ -93,6 +96,11 @@ impl StructuredInput { None => Vec::new(), }; + let skip_f16_f128 = match skip_f16_f128 { + Some(expr) => expect_litbool(expr)?.value, + None => false, + }; + let only_span = only_expr.as_ref().map(|expr| expr.span()); let only = match only_expr { Some(expr) => Some(Parser::parse2(parse_ident_array, expr.into_token_stream())?), @@ -122,6 +130,7 @@ impl StructuredInput { callback: expect_ident(cb_expr)?, emit_types, skip, + skip_f16_f128, only, only_span, attributes, @@ -220,6 +229,11 @@ fn expect_ident(expr: Expr) -> syn::Result { syn::parse2(expr.into_token_stream()) } +/// Coerce an expression into a simple keyword. +fn expect_litbool(expr: Expr) -> syn::Result { + syn::parse2(expr.into_token_stream()) +} + /// Parse either a single identifier (`foo`) or an array of identifiers (`[foo, bar, baz]`). fn parse_ident_or_array(input: ParseStream) -> syn::Result> { if !input.peek(token::Bracket) { diff --git a/crates/libm-macros/tests/basic.rs b/crates/libm-macros/tests/basic.rs index 260350ef2..b42762622 100644 --- a/crates/libm-macros/tests/basic.rs +++ b/crates/libm-macros/tests/basic.rs @@ -103,3 +103,75 @@ mod test_emit_types { emit_types: [RustFn, RustArgs], } } + +#[test] +fn test_skip_f16_f128() { + macro_rules! skip_f16_f128 { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + extra: $vec:ident, + ) => { + $vec.push(stringify!($fn_name)); + }; + } + + let mut v = Vec::new(); + // Test with no extra, no skip, and no attributes + libm_macros::for_each_function! { + callback: skip_f16_f128, + skip_f16_f128: true, + extra: v, + } + + for name in v { + assert!(!name.contains("f16"), "{name}"); + assert!(!name.contains("f128"), "{name}"); + } +} + +#[test] +fn test_fn_extra_expansion() { + macro_rules! fn_extra_expansion { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + fn_extra: $vec:expr, + ) => { + $vec.push(stringify!($fn_name)); + }; + } + + let mut vf16 = Vec::new(); + let mut vf32 = Vec::new(); + let mut vf64 = Vec::new(); + let mut vf128 = Vec::new(); + + // Test with no extra, no skip, and no attributes + libm_macros::for_each_function! { + callback: fn_extra_expansion, + fn_extra: match MACRO_FN_NAME { + ALL_F16 => vf16, + ALL_F32 => vf32, + ALL_F64 => vf64, + ALL_F128 => vf128, + } + } + + // Skip functions with a suffix after the type spec + vf16.retain(|name| !name.ends_with("_r")); + vf32.retain(|name| !name.ends_with("_r")); + vf64.retain(|name| !name.ends_with("_r")); + vf128.retain(|name| !name.ends_with("_r")); + + for name in vf16 { + assert!(name.ends_with("f16"), "{name}"); + } + for name in vf32 { + assert!(name.ends_with("f"), "{name}"); + } + let _ = vf64; + for name in vf128 { + assert!(name.ends_with("f128"), "{name}"); + } +} diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs index e70578699..597218153 100644 --- a/crates/util/src/main.rs +++ b/crates/util/src/main.rs @@ -86,55 +86,19 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { emit_types: [CFn, RustFn, RustArgs], extra: (basis, op, inputs), fn_extra: match MACRO_FN_NAME { - ceilf128 - | ceilf16 - | copysignf128 - | copysignf16 - | fabsf128 - | fabsf16 - | fdimf128 - | fdimf16 - | floorf128 - | floorf16 - | fmaf128 - | fmaxf128 - | fmaxf16 - | fmaximum + // Not provided by musl + fmaximum | fmaximum_num | fmaximum_numf - | fmaximum_numf128 - | fmaximum_numf16 | fmaximumf - | fmaximumf128 - | fmaximumf16 - | fminf128 - | fminf16 | fminimum | fminimum_num | fminimum_numf - | fminimum_numf128 - | fminimum_numf16 | fminimumf - | fminimumf128 - | fminimumf16 - | fmodf128 - | fmodf16 - | ldexpf128 - | ldexpf16 - | rintf128 - | rintf16 | roundeven | roundevenf - | roundevenf128 - | roundevenf16 - | roundf128 - | roundf16 - | scalbnf128 - | scalbnf16 - | sqrtf128 - | sqrtf16 - | truncf128 - | truncf16 => None, + | ALL_F16 + | ALL_F128 => None, _ => Some(musl_math_sys::MACRO_FN_NAME) } } diff --git a/libm-test/benches/random.rs b/libm-test/benches/random.rs index 81f58e3a6..1b17f049e 100644 --- a/libm-test/benches/random.rs +++ b/libm-test/benches/random.rs @@ -125,56 +125,19 @@ libm_macros::for_each_function! { // FIXME(correctness): exp functions have the wrong result on i586 exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)), - // Musl does not provide `f16` and `f128` functions - ceilf128 - | ceilf16 - | copysignf128 - | copysignf16 - | fabsf128 - | fabsf16 - | fdimf128 - | fdimf16 - | floorf128 - | floorf16 - | fmaf128 - | fmaxf128 - | fmaxf16 - | fmaximum + // Musl does not provide `f16` and `f128` functions, as well as a handful of others + fmaximum | fmaximum_num | fmaximum_numf - | fmaximum_numf128 - | fmaximum_numf16 | fmaximumf - | fmaximumf128 - | fmaximumf16 - | fminf128 - | fminf16 | fminimum | fminimum_num | fminimum_numf - | fminimum_numf128 - | fminimum_numf16 | fminimumf - | fminimumf128 - | fminimumf16 - | fmodf128 - | fmodf16 - | ldexpf128 - | ldexpf16 - | rintf128 - | rintf16 | roundeven | roundevenf - | roundevenf128 - | roundevenf16 - | roundf128 - | roundf16 - | scalbnf128 - | scalbnf16 - | sqrtf128 - | sqrtf16 - | truncf128 - | truncf16 => (false, None), + | ALL_F16 + | ALL_F128 => (false, None), // By default we never skip (false) and always have a musl function available _ => (false, Some(musl_math_sys::MACRO_FN_NAME)) diff --git a/libm-test/tests/compare_built_musl.rs b/libm-test/tests/compare_built_musl.rs index cbb4bd49b..6ccbb6f4c 100644 --- a/libm-test/tests/compare_built_musl.rs +++ b/libm-test/tests/compare_built_musl.rs @@ -76,6 +76,8 @@ macro_rules! musl_tests { libm_macros::for_each_function! { callback: musl_tests, attributes: [], + // Not provided by musl + skip_f16_f128: true, skip: [ // TODO integer inputs jn, @@ -89,55 +91,16 @@ libm_macros::for_each_function! { // Not provided by musl // verify-sorted-start - ceilf128, - ceilf16, - copysignf128, - copysignf16, - fabsf128, - fabsf16, - fdimf128, - fdimf16, - floorf128, - floorf16, - fmaf128, - fmaxf128, - fmaxf16, fmaximum, fmaximum_num, fmaximum_numf, - fmaximum_numf128, - fmaximum_numf16, fmaximumf, - fmaximumf128, - fmaximumf16, - fminf128, - fminf16, fminimum, fminimum_num, fminimum_numf, - fminimum_numf128, - fminimum_numf16, fminimumf, - fminimumf128, - fminimumf16, - fmodf128, - fmodf16, - ldexpf128, - ldexpf16, - rintf128, - rintf16, roundeven, roundevenf, - roundevenf128, - roundevenf16, - roundf128, - roundf16, - scalbnf128, - scalbnf16, - sqrtf128, - sqrtf16, - truncf128, - truncf16, - // verify-sorted-end + // // verify-sorted-end ], } From fdbefb39d5bb0b95b29b821247044c8aaf436160 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 19 Mar 2025 00:19:04 +0000 Subject: [PATCH 1332/1459] Warn on `unsafe_op_in_unsafe_fn` by default Edition 2024 requires that we avoid this. There is a lot of code that will need to be adjusted, so start the process here with a warning that will show up in CI. --- builtins-test-intrinsics/src/main.rs | 20 +-- compiler-builtins/src/arm.rs | 181 ++++++++++++++++++++++----- compiler-builtins/src/lib.rs | 3 + compiler-builtins/src/macros.rs | 15 +-- compiler-builtins/src/mem/mod.rs | 2 + compiler-builtins/src/x86_64.rs | 2 +- 6 files changed, 172 insertions(+), 51 deletions(-) diff --git a/builtins-test-intrinsics/src/main.rs b/builtins-test-intrinsics/src/main.rs index c4c026368..18f943eff 100644 --- a/builtins-test-intrinsics/src/main.rs +++ b/builtins-test-intrinsics/src/main.rs @@ -649,14 +649,14 @@ fn something_with_a_dtor(f: &dyn Fn()) { f(); } -#[no_mangle] +#[unsafe(no_mangle)] #[cfg(not(thumb))] fn main(_argc: core::ffi::c_int, _argv: *const *const u8) -> core::ffi::c_int { run(); 0 } -#[no_mangle] +#[unsafe(no_mangle)] #[cfg(thumb)] pub fn _start() -> ! { run(); @@ -669,30 +669,30 @@ pub fn _start() -> ! { extern "C" {} // ARM targets need these symbols -#[no_mangle] +#[unsafe(no_mangle)] pub fn __aeabi_unwind_cpp_pr0() {} -#[no_mangle] +#[unsafe(no_mangle)] pub fn __aeabi_unwind_cpp_pr1() {} #[cfg(not(any(windows, target_os = "cygwin")))] #[allow(non_snake_case)] -#[no_mangle] +#[unsafe(no_mangle)] pub fn _Unwind_Resume() {} #[cfg(not(any(windows, target_os = "cygwin")))] #[lang = "eh_personality"] -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn eh_personality() {} #[cfg(any(all(windows, target_env = "gnu"), target_os = "cygwin"))] mod mingw_unwinding { - #[no_mangle] + #[unsafe(no_mangle)] pub fn rust_eh_personality() {} - #[no_mangle] + #[unsafe(no_mangle)] pub fn rust_eh_unwind_resume() {} - #[no_mangle] + #[unsafe(no_mangle)] pub fn rust_eh_register_frames() {} - #[no_mangle] + #[unsafe(no_mangle)] pub fn rust_eh_unregister_frames() {} } diff --git a/compiler-builtins/src/arm.rs b/compiler-builtins/src/arm.rs index 7859b5120..a9107e3cd 100644 --- a/compiler-builtins/src/arm.rs +++ b/compiler-builtins/src/arm.rs @@ -76,90 +76,205 @@ intrinsics! { ); } - // FIXME: The `*4` and `*8` variants should be defined as aliases. + // FIXME(arm): The `*4` and `*8` variants should be defined as aliases. + /// `memcpy` provided with the `aapcs` ABI. + /// + /// # Safety + /// + /// Usual `memcpy` requirements apply. #[cfg(not(target_vendor = "apple"))] - pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) { - crate::mem::memcpy(dest, src, n); + pub unsafe extern "aapcs" fn __aeabi_memcpy(dst: *mut u8, src: *const u8, n: usize) { + // SAFETY: memcpy preconditions apply. + unsafe { crate::mem::memcpy(dst, src, n) }; } + /// `memcpy` for 4-byte alignment. + /// + /// # Safety + /// + /// Usual `memcpy` requirements apply. Additionally, `dest` and `src` must be aligned to + /// four bytes. #[cfg(not(target_vendor = "apple"))] - pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) { + pub unsafe extern "aapcs" fn __aeabi_memcpy4(dst: *mut u8, src: *const u8, n: usize) { // We are guaranteed 4-alignment, so accessing at u32 is okay. - let mut dest = dest as *mut u32; - let mut src = src as *mut u32; + let mut dst = dst.cast::(); + let mut src = src.cast::(); + debug_assert!(dst.is_aligned()); + debug_assert!(src.is_aligned()); let mut n = n; while n >= 4 { - *dest = *src; - dest = dest.offset(1); - src = src.offset(1); + // SAFETY: `dst` and `src` are both valid for at least 4 bytes, from + // `memcpy` preconditions and the loop guard. + unsafe { *dst = *src }; + + // FIXME(addr): if we can make this end-of-address-space safe without losing + // performance, we may want to consider that. + // SAFETY: memcpy is not expected to work at the end of the address space + unsafe { + dst = dst.offset(1); + src = src.offset(1); + } + n -= 4; } - __aeabi_memcpy(dest as *mut u8, src as *const u8, n); + // SAFETY: `dst` and `src` will still be valid for `n` bytes + unsafe { __aeabi_memcpy(dst.cast::(), src.cast::(), n) }; } + /// `memcpy` for 8-byte alignment. + /// + /// # Safety + /// + /// Usual `memcpy` requirements apply. Additionally, `dest` and `src` must be aligned to + /// eight bytes. #[cfg(not(target_vendor = "apple"))] - pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) { - __aeabi_memcpy4(dest, src, n); + pub unsafe extern "aapcs" fn __aeabi_memcpy8(dst: *mut u8, src: *const u8, n: usize) { + debug_assert!(dst.addr() & 7 == 0); + debug_assert!(src.addr() & 7 == 0); + + // SAFETY: memcpy preconditions apply, less strict alignment. + unsafe { __aeabi_memcpy4(dst, src, n) }; } + /// `memmove` provided with the `aapcs` ABI. + /// + /// # Safety + /// + /// Usual `memmove` requirements apply. #[cfg(not(target_vendor = "apple"))] - pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) { - crate::mem::memmove(dest, src, n); + pub unsafe extern "aapcs" fn __aeabi_memmove(dst: *mut u8, src: *const u8, n: usize) { + // SAFETY: memmove preconditions apply. + unsafe { crate::mem::memmove(dst, src, n) }; } + /// `memmove` for 4-byte alignment. + /// + /// # Safety + /// + /// Usual `memmove` requirements apply. Additionally, `dest` and `src` must be aligned to + /// four bytes. #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] - pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) { - __aeabi_memmove(dest, src, n); + pub unsafe extern "aapcs" fn __aeabi_memmove4(dst: *mut u8, src: *const u8, n: usize) { + debug_assert!(dst.addr() & 3 == 0); + debug_assert!(src.addr() & 3 == 0); + + // SAFETY: same preconditions, less strict aligment. + unsafe { __aeabi_memmove(dst, src, n) }; } + /// `memmove` for 8-byte alignment. + /// + /// # Safety + /// + /// Usual `memmove` requirements apply. Additionally, `dst` and `src` must be aligned to + /// eight bytes. #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] - pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) { - __aeabi_memmove(dest, src, n); + pub unsafe extern "aapcs" fn __aeabi_memmove8(dst: *mut u8, src: *const u8, n: usize) { + debug_assert!(dst.addr() & 7 == 0); + debug_assert!(src.addr() & 7 == 0); + + // SAFETY: memmove preconditions apply, less strict alignment. + unsafe { __aeabi_memmove(dst, src, n) }; } + /// `memset` provided with the `aapcs` ABI. + /// + /// # Safety + /// + /// Usual `memset` requirements apply. #[cfg(not(target_vendor = "apple"))] - pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { + pub unsafe extern "aapcs" fn __aeabi_memset(dst: *mut u8, n: usize, c: i32) { // Note the different argument order - crate::mem::memset(dest, c, n); + // SAFETY: memset preconditions apply. + unsafe { crate::mem::memset(dst, c, n) }; } + /// `memset` for 4-byte alignment. + /// + /// # Safety + /// + /// Usual `memset` requirements apply. Additionally, `dest` and `src` must be aligned to + /// four bytes. #[cfg(not(target_vendor = "apple"))] - pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) { - let mut dest = dest as *mut u32; + pub unsafe extern "aapcs" fn __aeabi_memset4(dst: *mut u8, n: usize, c: i32) { + let mut dst = dst.cast::(); + debug_assert!(dst.is_aligned()); let mut n = n; let byte = (c as u32) & 0xff; let c = (byte << 24) | (byte << 16) | (byte << 8) | byte; while n >= 4 { - *dest = c; - dest = dest.offset(1); + // SAFETY: `dst` is valid for at least 4 bytes, from `memset` preconditions and + // the loop guard. + unsafe { *dst = c }; + + // FIXME(addr): if we can make this end-of-address-space safe without losing + // performance, we may want to consider that. + // SAFETY: memcpy is not expected to work at the end of the address space + unsafe { + dst = dst.offset(1); + } n -= 4; } - __aeabi_memset(dest as *mut u8, n, byte as i32); + // SAFETY: `dst` will still be valid for `n` bytes + unsafe { __aeabi_memset(dst.cast::(), n, byte as i32) }; } + /// `memset` for 8-byte alignment. + /// + /// # Safety + /// + /// Usual `memset` requirements apply. Additionally, `dst` and `src` must be aligned to + /// eight bytes. #[cfg(not(target_vendor = "apple"))] - pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) { - __aeabi_memset4(dest, n, c); + pub unsafe extern "aapcs" fn __aeabi_memset8(dst: *mut u8, n: usize, c: i32) { + debug_assert!(dst.addr() & 7 == 0); + + // SAFETY: memset preconditions apply, less strict alignment. + unsafe { __aeabi_memset4(dst, n, c) }; } + /// `memclr` provided with the `aapcs` ABI. + /// + /// # Safety + /// + /// Usual `memclr` requirements apply. #[cfg(not(target_vendor = "apple"))] - pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) { - __aeabi_memset(dest, n, 0); + pub unsafe extern "aapcs" fn __aeabi_memclr(dst: *mut u8, n: usize) { + // SAFETY: memclr preconditions apply, less strict alignment. + unsafe { __aeabi_memset(dst, n, 0) }; } + /// `memclr` for 4-byte alignment. + /// + /// # Safety + /// + /// Usual `memclr` requirements apply. Additionally, `dest` and `src` must be aligned to + /// four bytes. #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] - pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) { - __aeabi_memset4(dest, n, 0); + pub unsafe extern "aapcs" fn __aeabi_memclr4(dst: *mut u8, n: usize) { + debug_assert!(dst.addr() & 3 == 0); + + // SAFETY: memclr preconditions apply, less strict alignment. + unsafe { __aeabi_memset4(dst, n, 0) }; } + /// `memclr` for 8-byte alignment. + /// + /// # Safety + /// + /// Usual `memclr` requirements apply. Additionally, `dst` and `src` must be aligned to + /// eight bytes. #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] - pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { - __aeabi_memset4(dest, n, 0); + pub unsafe extern "aapcs" fn __aeabi_memclr8(dst: *mut u8, n: usize) { + debug_assert!(dst.addr() & 7 == 0); + + // SAFETY: memclr preconditions apply, less strict alignment. + unsafe { __aeabi_memset4(dst, n, 0) }; } } diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs index 7523a00cf..6a6b28067 100644 --- a/compiler-builtins/src/lib.rs +++ b/compiler-builtins/src/lib.rs @@ -22,6 +22,9 @@ #![allow(clippy::manual_swap)] // Support compiling on both stage0 and stage1 which may differ in supported stable features. #![allow(stable_features)] +// By default, disallow this as it is forbidden in edition 2024. There is a lot of unsafe code to +// be migrated, however, so exceptions exist. +#![warn(unsafe_op_in_unsafe_fn)] // We disable #[no_mangle] for tests so that we can verify the test results // against the native compiler-rt implementations of the builtins. diff --git a/compiler-builtins/src/macros.rs b/compiler-builtins/src/macros.rs index 4fa53656e..dbf715534 100644 --- a/compiler-builtins/src/macros.rs +++ b/compiler-builtins/src/macros.rs @@ -256,7 +256,7 @@ macro_rules! intrinsics { #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"), not(feature = "mangled-names")))] mod $name { - #[no_mangle] + #[unsafe(no_mangle)] #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] $(#[$($attr)*])* extern $abi fn $name( $($argname: u16),* ) $(-> $ret)? { @@ -292,7 +292,7 @@ macro_rules! intrinsics { #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"), not(feature = "mangled-names")))] mod $name { - #[no_mangle] + #[unsafe(no_mangle)] #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] $(#[$($attr)*])* extern $abi fn $name( $($argname: $ty),* ) -> u16 { @@ -333,7 +333,7 @@ macro_rules! intrinsics { #[cfg(all(target_arch = "arm", not(feature = "mangled-names")))] mod $name { - #[no_mangle] + #[unsafe(no_mangle)] #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] $(#[$($attr)*])* extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { @@ -343,7 +343,7 @@ macro_rules! intrinsics { #[cfg(all(target_arch = "arm", not(feature = "mangled-names")))] mod $alias { - #[no_mangle] + #[unsafe(no_mangle)] #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] $(#[$($attr)*])* extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? { @@ -410,7 +410,7 @@ macro_rules! intrinsics { #[cfg(all(feature = "mem", not(feature = "mangled-names")))] mod $name { $(#[$($attr)*])* - #[no_mangle] + #[unsafe(no_mangle)] #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) @@ -485,10 +485,11 @@ macro_rules! intrinsics { #[cfg(not(feature = "mangled-names"))] mod $name { $(#[$($attr)*])* - #[no_mangle] + #[unsafe(no_mangle)] #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] $(unsafe $($empty)?)? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { - super::$name($($argname),*) + // SAFETY: same preconditions. + $(unsafe $($empty)?)? { super::$name($($argname),*) } } } diff --git a/compiler-builtins/src/mem/mod.rs b/compiler-builtins/src/mem/mod.rs index ec160039d..6828f3804 100644 --- a/compiler-builtins/src/mem/mod.rs +++ b/compiler-builtins/src/mem/mod.rs @@ -1,5 +1,7 @@ // Trying to satisfy clippy here is hopeless #![allow(clippy::style)] +// FIXME(e2024): this eventually needs to be removed. +#![allow(unsafe_op_in_unsafe_fn)] #[allow(warnings)] #[cfg(target_pointer_width = "16")] diff --git a/compiler-builtins/src/x86_64.rs b/compiler-builtins/src/x86_64.rs index af67e66e2..fc1190f79 100644 --- a/compiler-builtins/src/x86_64.rs +++ b/compiler-builtins/src/x86_64.rs @@ -44,7 +44,7 @@ intrinsics! { // HACK(https://github.com/rust-lang/rust/issues/62785): x86_64-unknown-uefi needs special LLVM // support unless we emit the _fltused mod _fltused { - #[no_mangle] + #[unsafe(no_mangle)] #[used] #[cfg(target_os = "uefi")] static _fltused: i32 = 0; From 99b4c195918149b632a061cf1ef56c313b13b396 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 29 Apr 2025 21:04:30 +0000 Subject: [PATCH 1333/1459] Resolve `unnecessary_transmutes` lints These appeared in a later nightly. In compiler-builtins we can apply the suggestion, but in `libm` we need to ignore them since `fx::from_bits` is not `const` at the MSRV. `clippy::uninlined_format_args` also seems to have gotten stricter, so fix those here. --- builtins-test/tests/misc.rs | 30 +++++++++++---------------- builtins-test/tests/mul.rs | 22 +++++++------------- compiler-builtins/build.rs | 9 +++----- compiler-builtins/src/mem/impls.rs | 2 +- libm/src/math/pow.rs | 11 ++-------- libm/src/math/support/float_traits.rs | 6 ++++++ 6 files changed, 32 insertions(+), 48 deletions(-) diff --git a/builtins-test/tests/misc.rs b/builtins-test/tests/misc.rs index b8c75c026..64a9d56f3 100644 --- a/builtins-test/tests/misc.rs +++ b/builtins-test/tests/misc.rs @@ -77,16 +77,13 @@ fn leading_zeros() { let lz1 = leading_zeros_default(x); let lz2 = leading_zeros_riscv(x); if lz0 != lz { - panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0); + panic!("__clzsi2({x}): std: {lz}, builtins: {lz0}"); } if lz1 != lz { - panic!( - "leading_zeros_default({}): std: {}, builtins: {}", - x, lz, lz1 - ); + panic!("leading_zeros_default({x}): std: {lz}, builtins: {lz1}"); } if lz2 != lz { - panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2); + panic!("leading_zeros_riscv({x}): std: {lz}, builtins: {lz2}"); } }); } @@ -102,16 +99,13 @@ fn leading_zeros() { let lz1 = leading_zeros_default(x); let lz2 = leading_zeros_riscv(x); if lz0 != lz { - panic!("__clzdi2({}): std: {}, builtins: {}", x, lz, lz0); + panic!("__clzdi2({x}): std: {lz}, builtins: {lz0}"); } if lz1 != lz { - panic!( - "leading_zeros_default({}): std: {}, builtins: {}", - x, lz, lz1 - ); + panic!("leading_zeros_default({x}): std: {lz}, builtins: {lz1}"); } if lz2 != lz { - panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2); + panic!("leading_zeros_riscv({x}): std: {lz}, builtins: {lz2}"); } }); } @@ -125,7 +119,7 @@ fn leading_zeros() { let lz = x.leading_zeros() as usize; let lz0 = __clzti2(x); if lz0 != lz { - panic!("__clzti2({}): std: {}, builtins: {}", x, lz, lz0); + panic!("__clzti2({x}): std: {lz}, builtins: {lz0}"); } }); } @@ -142,10 +136,10 @@ fn trailing_zeros() { let tz0 = __ctzsi2(x); let tz1 = trailing_zeros(x); if tz0 != tz { - panic!("__ctzsi2({}): std: {}, builtins: {}", x, tz, tz0); + panic!("__ctzsi2({x}): std: {tz}, builtins: {tz0}"); } if tz1 != tz { - panic!("trailing_zeros({}): std: {}, builtins: {}", x, tz, tz1); + panic!("trailing_zeros({x}): std: {tz}, builtins: {tz1}"); } }); fuzz(N, |x: u64| { @@ -156,10 +150,10 @@ fn trailing_zeros() { let tz0 = __ctzdi2(x); let tz1 = trailing_zeros(x); if tz0 != tz { - panic!("__ctzdi2({}): std: {}, builtins: {}", x, tz, tz0); + panic!("__ctzdi2({x}): std: {tz}, builtins: {tz0}"); } if tz1 != tz { - panic!("trailing_zeros({}): std: {}, builtins: {}", x, tz, tz1); + panic!("trailing_zeros({x}): std: {tz}, builtins: {tz1}"); } }); fuzz(N, |x: u128| { @@ -169,7 +163,7 @@ fn trailing_zeros() { let tz = x.trailing_zeros() as usize; let tz0 = __ctzti2(x); if tz0 != tz { - panic!("__ctzti2({}): std: {}, builtins: {}", x, tz, tz0); + panic!("__ctzti2({x}): std: {tz}, builtins: {tz0}"); } }); } diff --git a/builtins-test/tests/mul.rs b/builtins-test/tests/mul.rs index 198cacb34..58bc9ab4a 100644 --- a/builtins-test/tests/mul.rs +++ b/builtins-test/tests/mul.rs @@ -18,8 +18,8 @@ mod int_mul { let mul1: $i = $fn(x, y); if mul0 != mul1 { panic!( - "{}({}, {}): std: {}, builtins: {}", - stringify!($fn), x, y, mul0, mul1 + "{func}({x}, {y}): std: {mul0}, builtins: {mul1}", + func = stringify!($fn), ); } }); @@ -52,8 +52,8 @@ mod int_overflowing_mul { let o1 = o1 != 0; if mul0 != mul1 || o0 != o1 { panic!( - "{}({}, {}): std: ({}, {}), builtins: ({}, {})", - stringify!($fn), x, y, mul0, o0, mul1, o1 + "{func}({x}, {y}): std: ({mul0}, {o0}), builtins: ({mul1}, {o1})", + func = stringify!($fn), ); } }); @@ -77,20 +77,14 @@ mod int_overflowing_mul { let (mul0, o0) = x.overflowing_mul(y); let mul1 = __rust_u128_mulo(x, y, &mut o1); if mul0 != mul1 || i32::from(o0) != o1 { - panic!( - "__rust_u128_mulo({}, {}): std: ({}, {}), builtins: ({}, {})", - x, y, mul0, o0, mul1, o1 - ); + panic!("__rust_u128_mulo({x}, {y}): std: ({mul0}, {o0}), builtins: ({mul1}, {o1})",); } let x = x as i128; let y = y as i128; let (mul0, o0) = x.overflowing_mul(y); let mul1 = __rust_i128_mulo(x, y, &mut o1); if mul0 != mul1 || i32::from(o0) != o1 { - panic!( - "__rust_i128_mulo({}, {}): std: ({}, {}), builtins: ({}, {})", - x, y, mul0, o0, mul1, o1 - ); + panic!("__rust_i128_mulo({x}, {y}): std: ({mul0}, {o0}), builtins: ({mul1}, {o1})",); } }); } @@ -109,8 +103,8 @@ macro_rules! float_mul { let mul1: $f = $fn(x, y); if !Float::eq_repr(mul0, mul1) { panic!( - "{}({:?}, {:?}): std: {:?}, builtins: {:?}", - stringify!($fn), x, y, mul0, mul1 + "{func}({x:?}, {y:?}): std: {mul0:?}, builtins: {mul1:?}", + func = stringify!($fn), ); } }); diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs index 04369a4aa..90d98ec7c 100644 --- a/compiler-builtins/build.rs +++ b/compiler-builtins/build.rs @@ -137,7 +137,7 @@ fn aarch64_symbol(ordering: Ordering) -> &'static str { Ordering::Acquire => "acq", Ordering::Release => "rel", Ordering::AcqRel => "acq_rel", - _ => panic!("unknown symbol for {:?}", ordering), + _ => panic!("unknown symbol for {ordering:?}"), } } @@ -229,7 +229,7 @@ fn configure_check_cfg() { for op_size in op_sizes { for ordering in ["relax", "acq", "rel", "acq_rel"] { - aarch_atomic.push(format!("__aarch64_{}{}_{}", aarch_op, op_size, ordering)); + aarch_atomic.push(format!("__aarch64_{aarch_op}{op_size}_{ordering}")); } } } @@ -239,10 +239,7 @@ fn configure_check_cfg() { .copied() .chain(aarch_atomic.iter().map(|s| s.as_str())) { - println!( - "cargo::rustc-check-cfg=cfg({}, values(\"optimized-c\"))", - fn_name - ); + println!("cargo::rustc-check-cfg=cfg({fn_name}, values(\"optimized-c\"))",); } // Rustc is unaware of sparc target features, but this does show up from diff --git a/compiler-builtins/src/mem/impls.rs b/compiler-builtins/src/mem/impls.rs index dc12d6996..14a478748 100644 --- a/compiler-builtins/src/mem/impls.rs +++ b/compiler-builtins/src/mem/impls.rs @@ -38,7 +38,7 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize { // Do not use `core::ptr::read_unaligned` here, since it calls `copy_nonoverlapping` which // is translated to memcpy in LLVM. let x_read = (x as *const [u8; core::mem::size_of::()]).read(); - core::mem::transmute(x_read) + usize::from_ne_bytes(x_read) } /// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 7e7d049b9..94ae31cf0 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -452,11 +452,7 @@ mod tests { } else { pow(base, exponent) == expected }, - "{} ** {} was {} instead of {}", - base, - exponent, - res, - expected + "{base} ** {exponent} was {res} instead of {expected}", ); } @@ -486,10 +482,7 @@ mod tests { } else { exp == res }, - "test for {} was {} instead of {}", - val, - res, - exp + "test for {val} was {res} instead of {exp}", ); }) }); diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 8094a7b84..4c866ef10 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -1,3 +1,5 @@ +#![allow(unknown_lints)] // FIXME(msrv) we shouldn't need this + use core::{fmt, mem, ops}; use super::int_traits::{CastFrom, Int, MinInt}; @@ -344,24 +346,28 @@ float_impl!( /* FIXME(msrv): vendor some things that are not const stable at our MSRV */ /// `f32::from_bits` +#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust pub const fn f32_from_bits(bits: u32) -> f32 { // SAFETY: POD cast with no preconditions unsafe { mem::transmute::(bits) } } /// `f32::to_bits` +#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust pub const fn f32_to_bits(x: f32) -> u32 { // SAFETY: POD cast with no preconditions unsafe { mem::transmute::(x) } } /// `f64::from_bits` +#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust pub const fn f64_from_bits(bits: u64) -> f64 { // SAFETY: POD cast with no preconditions unsafe { mem::transmute::(bits) } } /// `f64::to_bits` +#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust pub const fn f64_to_bits(x: f64) -> u64 { // SAFETY: POD cast with no preconditions unsafe { mem::transmute::(x) } From 91963f59b3d2b03e947707da15ee4aa19e3d214d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 29 Apr 2025 20:50:58 +0000 Subject: [PATCH 1334/1459] Move `fma` implementations to `mod generic` This will not build correctly, the move is done as a separate step from the rest of refactoring so git's history is cleaner. --- libm/src/math/{ => generic}/fma.rs | 0 libm/src/math/{ => generic}/fma_wide.rs | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename libm/src/math/{ => generic}/fma.rs (100%) rename libm/src/math/{ => generic}/fma_wide.rs (100%) diff --git a/libm/src/math/fma.rs b/libm/src/math/generic/fma.rs similarity index 100% rename from libm/src/math/fma.rs rename to libm/src/math/generic/fma.rs diff --git a/libm/src/math/fma_wide.rs b/libm/src/math/generic/fma_wide.rs similarity index 100% rename from libm/src/math/fma_wide.rs rename to libm/src/math/generic/fma_wide.rs From f456aa8baf0b108208332dc4bed63b6e70639b67 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 29 Apr 2025 20:52:54 +0000 Subject: [PATCH 1335/1459] Refactor the fma modules Move implementations to `generic/` like the other functions. This also allows us to combine the `fma` and `fma_wide` modules. --- etc/function-definitions.json | 2 +- libm/src/math/fma.rs | 165 ++++++++++++++++++++++++++++++ libm/src/math/generic/fma.rs | 133 +----------------------- libm/src/math/generic/fma_wide.rs | 44 +------- libm/src/math/generic/mod.rs | 4 + libm/src/math/mod.rs | 6 +- 6 files changed, 179 insertions(+), 175 deletions(-) create mode 100644 libm/src/math/fma.rs diff --git a/etc/function-definitions.json b/etc/function-definitions.json index 3e33343c4..9e5774eaf 100644 --- a/etc/function-definitions.json +++ b/etc/function-definitions.json @@ -350,7 +350,7 @@ "fmaf": { "sources": [ "libm/src/math/arch/aarch64.rs", - "libm/src/math/fma_wide.rs" + "libm/src/math/fma.rs" ], "type": "f32" }, diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs new file mode 100644 index 000000000..78f0f8992 --- /dev/null +++ b/libm/src/math/fma.rs @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/fma.c, fmaf.c Ported to generic Rust algorithm in 2025, TG. */ + +use super::generic; +use crate::support::Round; + +// Placeholder so we can have `fmaf16` in the `Float` trait. +#[allow(unused)] +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { + unimplemented!() +} + +/// Floating multiply add (f32) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { + select_implementation! { + name: fmaf, + use_arch: all(target_arch = "aarch64", target_feature = "neon"), + args: x, y, z, + } + + generic::fma_wide_round(x, y, z, Round::Nearest).val +} + +/// Fused multiply add (f64) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fma(x: f64, y: f64, z: f64) -> f64 { + select_implementation! { + name: fma, + use_arch: all(target_arch = "aarch64", target_feature = "neon"), + args: x, y, z, + } + + generic::fma_round(x, y, z, Round::Nearest).val +} + +/// Fused multiply add (f128) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { + generic::fma_round(x, y, z, Round::Nearest).val +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{CastFrom, CastInto, Float, FpResult, HInt, MinInt, Round, Status}; + + /// Test the generic `fma_round` algorithm for a given float. + fn spec_test(f: impl Fn(F, F, F) -> F) + where + F: Float, + F: CastFrom, + F: CastFrom, + F::Int: HInt, + u32: CastInto, + { + let x = F::from_bits(F::Int::ONE); + let y = F::from_bits(F::Int::ONE); + let z = F::ZERO; + + // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of + // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the + // exact result" + assert_biteq!(f(x, y, z), F::ZERO); + assert_biteq!(f(x, -y, z), F::NEG_ZERO); + assert_biteq!(f(-x, y, z), F::NEG_ZERO); + assert_biteq!(f(-x, -y, z), F::ZERO); + } + + #[test] + fn spec_test_f32() { + spec_test::(fmaf); + + // Also do a small check that the non-widening version works for f32 (this should ideally + // get tested some more). + spec_test::(|x, y, z| generic::fma_round(x, y, z, Round::Nearest).val); + } + + #[test] + fn spec_test_f64() { + spec_test::(fma); + + let expect_underflow = [ + ( + hf64!("0x1.0p-1070"), + hf64!("0x1.0p-1070"), + hf64!("0x1.ffffffffffffp-1023"), + hf64!("0x0.ffffffffffff8p-1022"), + ), + ( + // FIXME: we raise underflow but this should only be inexact (based on C and + // `rustc_apfloat`). + hf64!("0x1.0p-1070"), + hf64!("0x1.0p-1070"), + hf64!("-0x1.0p-1022"), + hf64!("-0x1.0p-1022"), + ), + ]; + + for (x, y, z, res) in expect_underflow { + let FpResult { val, status } = generic::fma_round(x, y, z, Round::Nearest); + assert_biteq!(val, res); + assert_eq!(status, Status::UNDERFLOW); + } + } + + #[test] + #[cfg(f128_enabled)] + fn spec_test_f128() { + spec_test::(fmaf128); + } + + #[test] + fn issue_263() { + let a = f32::from_bits(1266679807); + let b = f32::from_bits(1300234242); + let c = f32::from_bits(1115553792); + let expected = f32::from_bits(1501560833); + assert_eq!(fmaf(a, b, c), expected); + } + + #[test] + fn fma_segfault() { + // These two inputs cause fma to segfault on release due to overflow: + assert_eq!( + fma( + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313 + ), + -0.00000000000000022204460492503126, + ); + + let result = fma(-0.992, -0.992, -0.992); + //force rounding to storage format on x87 to prevent superious errors. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let result = force_eval!(result); + assert_eq!(result, -0.007936000000000007,); + } + + #[test] + fn fma_sbb() { + assert_eq!( + fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), + -3991680619069439e277 + ); + } + + #[test] + fn fma_underflow() { + assert_eq!( + fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), + 0.0, + ); + } +} diff --git a/libm/src/math/generic/fma.rs b/libm/src/math/generic/fma.rs index 8856e63f5..aaf459d1b 100644 --- a/libm/src/math/generic/fma.rs +++ b/libm/src/math/generic/fma.rs @@ -1,31 +1,9 @@ /* SPDX-License-Identifier: MIT */ /* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */ -use super::support::{DInt, FpResult, HInt, IntTy, Round, Status}; -use super::{CastFrom, CastInto, Float, Int, MinInt}; - -/// Fused multiply add (f64) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fma(x: f64, y: f64, z: f64) -> f64 { - select_implementation! { - name: fma, - use_arch: all(target_arch = "aarch64", target_feature = "neon"), - args: x, y, z, - } - - fma_round(x, y, z, Round::Nearest).val -} - -/// Fused multiply add (f128) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). -#[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { - fma_round(x, y, z, Round::Nearest).val -} +use crate::support::{ + CastFrom, CastInto, DInt, Float, FpResult, HInt, Int, IntTy, MinInt, Round, Status, +}; /// Fused multiply-add that works when there is not a larger float size available. Computes /// `(x * y) + z`. @@ -234,7 +212,7 @@ where } // Use our exponent to scale the final value. - FpResult::new(super::generic::scalbn(r, e), status) + FpResult::new(super::scalbn(r, e), status) } /// Representation of `F` that has handled subnormals. @@ -298,106 +276,3 @@ impl Norm { self.e > Self::ZERO_INF_NAN as i32 } } - -#[cfg(test)] -mod tests { - use super::*; - - /// Test the generic `fma_round` algorithm for a given float. - fn spec_test() - where - F: Float, - F: CastFrom, - F: CastFrom, - F::Int: HInt, - u32: CastInto, - { - let x = F::from_bits(F::Int::ONE); - let y = F::from_bits(F::Int::ONE); - let z = F::ZERO; - - let fma = |x, y, z| fma_round(x, y, z, Round::Nearest).val; - - // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of - // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the - // exact result" - assert_biteq!(fma(x, y, z), F::ZERO); - assert_biteq!(fma(x, -y, z), F::NEG_ZERO); - assert_biteq!(fma(-x, y, z), F::NEG_ZERO); - assert_biteq!(fma(-x, -y, z), F::ZERO); - } - - #[test] - fn spec_test_f32() { - spec_test::(); - } - - #[test] - fn spec_test_f64() { - spec_test::(); - - let expect_underflow = [ - ( - hf64!("0x1.0p-1070"), - hf64!("0x1.0p-1070"), - hf64!("0x1.ffffffffffffp-1023"), - hf64!("0x0.ffffffffffff8p-1022"), - ), - ( - // FIXME: we raise underflow but this should only be inexact (based on C and - // `rustc_apfloat`). - hf64!("0x1.0p-1070"), - hf64!("0x1.0p-1070"), - hf64!("-0x1.0p-1022"), - hf64!("-0x1.0p-1022"), - ), - ]; - - for (x, y, z, res) in expect_underflow { - let FpResult { val, status } = fma_round(x, y, z, Round::Nearest); - assert_biteq!(val, res); - assert_eq!(status, Status::UNDERFLOW); - } - } - - #[test] - #[cfg(f128_enabled)] - fn spec_test_f128() { - spec_test::(); - } - - #[test] - fn fma_segfault() { - // These two inputs cause fma to segfault on release due to overflow: - assert_eq!( - fma( - -0.0000000000000002220446049250313, - -0.0000000000000002220446049250313, - -0.0000000000000002220446049250313 - ), - -0.00000000000000022204460492503126, - ); - - let result = fma(-0.992, -0.992, -0.992); - //force rounding to storage format on x87 to prevent superious errors. - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let result = force_eval!(result); - assert_eq!(result, -0.007936000000000007,); - } - - #[test] - fn fma_sbb() { - assert_eq!( - fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), - -3991680619069439e277 - ); - } - - #[test] - fn fma_underflow() { - assert_eq!( - fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), - 0.0, - ); - } -} diff --git a/libm/src/math/generic/fma_wide.rs b/libm/src/math/generic/fma_wide.rs index f268c2f14..a2ef59d3e 100644 --- a/libm/src/math/generic/fma_wide.rs +++ b/libm/src/math/generic/fma_wide.rs @@ -1,30 +1,6 @@ -/* SPDX-License-Identifier: MIT */ -/* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */ - -use super::support::{FpResult, IntTy, Round, Status}; -use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt}; - -// Placeholder so we can have `fmaf16` in the `Float` trait. -#[allow(unused)] -#[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { - unimplemented!() -} - -/// Floating multiply add (f32) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { - select_implementation! { - name: fmaf, - use_arch: all(target_arch = "aarch64", target_feature = "neon"), - args: x, y, z, - } - - fma_wide_round(x, y, z, Round::Nearest).val -} +use crate::support::{ + CastFrom, CastInto, DFloat, Float, FpResult, HFloat, IntTy, MinInt, Round, Status, +}; /// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`, /// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding. @@ -95,17 +71,3 @@ where FpResult::ok(B::from_bits(ui).narrow()) } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn issue_263() { - let a = f32::from_bits(1266679807); - let b = f32::from_bits(1300234242); - let c = f32::from_bits(1115553792); - let expected = f32::from_bits(1501560833); - assert_eq!(fmaf(a, b, c), expected); - } -} diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index 35846351a..9d497a03f 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -6,6 +6,8 @@ mod copysign; mod fabs; mod fdim; mod floor; +mod fma; +mod fma_wide; mod fmax; mod fmaximum; mod fmaximum_num; @@ -24,6 +26,8 @@ pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; pub use floor::floor; +pub use fma::fma_round; +pub use fma_wide::fma_wide_round; pub use fmax::fmax; pub use fmaximum::fmaximum; pub use fmaximum_num::fmaximum_num; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 949c18b40..ce9b8fc58 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -159,7 +159,6 @@ mod fabs; mod fdim; mod floor; mod fma; -mod fma_wide; mod fmin_fmax; mod fminimum_fmaximum; mod fminimum_fmaximum_num; @@ -254,8 +253,7 @@ pub use self::expm1f::expm1f; pub use self::fabs::{fabs, fabsf}; pub use self::fdim::{fdim, fdimf}; pub use self::floor::{floor, floorf}; -pub use self::fma::fma; -pub use self::fma_wide::fmaf; +pub use self::fma::{fma, fmaf}; pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf}; pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf}; pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf}; @@ -336,7 +334,7 @@ cfg_if! { // verify-sorted-end #[allow(unused_imports)] - pub(crate) use self::fma_wide::fmaf16; + pub(crate) use self::fma::fmaf16; } } From 7ccb126f88de3330eb6472036e269c8b73c0b94e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 1 May 2025 18:52:12 +0000 Subject: [PATCH 1336/1459] builtins-test: Remove `no_mangle` from `eh_personality` Rustc now mangles these symbols on its own, so `no_mangle` is rejected as an error. --- builtins-test-intrinsics/src/main.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/builtins-test-intrinsics/src/main.rs b/builtins-test-intrinsics/src/main.rs index 18f943eff..1fa7b0091 100644 --- a/builtins-test-intrinsics/src/main.rs +++ b/builtins-test-intrinsics/src/main.rs @@ -682,7 +682,6 @@ pub fn _Unwind_Resume() {} #[cfg(not(any(windows, target_os = "cygwin")))] #[lang = "eh_personality"] -#[unsafe(no_mangle)] pub extern "C" fn eh_personality() {} #[cfg(any(all(windows, target_env = "gnu"), target_os = "cygwin"))] From 725484ee6758e7a647df30c4661e356504e222e6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 2 May 2025 00:46:21 +0000 Subject: [PATCH 1337/1459] fmaf: Add a test case from a MinGW failure This is a known problem in the MinGW fmaf implementation, identified at [1]. Make sure our implementation passes this edge case. [1]: https://github.com/rust-lang/rust/issues/140515 --- libm-test/src/generate/case_list.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/libm-test/src/generate/case_list.rs b/libm-test/src/generate/case_list.rs index f1e6fcec3..43b28722f 100644 --- a/libm-test/src/generate/case_list.rs +++ b/libm-test/src/generate/case_list.rs @@ -269,7 +269,18 @@ fn fma_cases() -> Vec> { } fn fmaf_cases() -> Vec> { - vec![] + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Known rounding error for some implementations (notably MinGW) + ( + (-1.9369631e13f32, 2.1513551e-7, -1.7354427e-24), + Some(-4167095.8), + ), + ], + ); + v } #[cfg(f128_enabled)] From f83962ed9a1f850876860e04d552c5e43ed888f8 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 1 May 2025 18:41:05 +0000 Subject: [PATCH 1338/1459] update-api-list: Match subdirectories within arch --- etc/update-api-list.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/etc/update-api-list.py b/etc/update-api-list.py index 0770a8b20..28ff22f4c 100755 --- a/etc/update-api-list.py +++ b/etc/update-api-list.py @@ -123,7 +123,9 @@ def _init_defs(self, index: IndexTy) -> None: # A lot of the `arch` module is often configured out so doesn't show up in docs. Use # string matching as a fallback. - for fname in glob("libm/src/math/arch/**.rs", root_dir=ROOT_DIR): + for fname in glob( + "libm/src/math/arch/**/*.rs", root_dir=ROOT_DIR, recursive=True + ): contents = (ROOT_DIR.joinpath(fname)).read_text() for name in self.public_functions: From 6e4255a71258534d70167b8942c7bd8a5c0005dc Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 1 May 2025 17:49:56 +0000 Subject: [PATCH 1339/1459] Rename the i686 module to x86 This module is used for both i686 and x86-64. --- etc/function-definitions.json | 4 ++-- libm/src/math/arch/mod.rs | 4 ++-- libm/src/math/arch/{i686.rs => x86.rs} | 0 3 files changed, 4 insertions(+), 4 deletions(-) rename libm/src/math/arch/{i686.rs => x86.rs} (100%) diff --git a/etc/function-definitions.json b/etc/function-definitions.json index 9e5774eaf..691205ddf 100644 --- a/etc/function-definitions.json +++ b/etc/function-definitions.json @@ -932,8 +932,8 @@ "sqrt": { "sources": [ "libm/src/math/arch/aarch64.rs", - "libm/src/math/arch/i686.rs", "libm/src/math/arch/wasm32.rs", + "libm/src/math/arch/x86.rs", "libm/src/math/generic/sqrt.rs", "libm/src/math/sqrt.rs" ], @@ -942,8 +942,8 @@ "sqrtf": { "sources": [ "libm/src/math/arch/aarch64.rs", - "libm/src/math/arch/i686.rs", "libm/src/math/arch/wasm32.rs", + "libm/src/math/arch/x86.rs", "libm/src/math/generic/sqrt.rs", "libm/src/math/sqrt.rs" ], diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs index d9f2aad66..67352f90c 100644 --- a/libm/src/math/arch/mod.rs +++ b/libm/src/math/arch/mod.rs @@ -15,8 +15,8 @@ cfg_if! { ceil, ceilf, fabs, fabsf, floor, floorf, rint, rintf, sqrt, sqrtf, trunc, truncf, }; } else if #[cfg(target_feature = "sse2")] { - mod i686; - pub use i686::{sqrt, sqrtf}; + mod x86; + pub use x86::{sqrt, sqrtf}; } else if #[cfg(all( any(target_arch = "aarch64", target_arch = "arm64ec"), target_feature = "neon" diff --git a/libm/src/math/arch/i686.rs b/libm/src/math/arch/x86.rs similarity index 100% rename from libm/src/math/arch/i686.rs rename to libm/src/math/arch/x86.rs From a2f64407618e95bc8767d6dbabfa7ac0348f5bc8 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 29 Apr 2025 22:16:41 +0000 Subject: [PATCH 1340/1459] Use runtime feature detection for fma routines on x86 Get performance closer to the glibc implementations by adding assembly fma routines, with runtime feature detection so they are used even if not compiled with `+fma` (as the distributed standard library is often not). Glibc uses ifuncs, this implementation stores a function pointer in an atomic. Results of CPU flags are also cached in order to avoid repeating the startup time in calls to different functions. The feature detection code is a slightly simplified version of `std-detect`. Musl sources were used as a reference [1]. Fixes: https://github.com/rust-lang/rust/issues/140452 once synced [1]: https://github.com/bminor/musl/blob/c47ad25ea3b484e10326f933e927c0bc8cded3da/src/math/x32/fma.c --- etc/function-definitions.json | 2 + libm/src/math/arch/mod.rs | 2 +- libm/src/math/arch/x86.rs | 5 + libm/src/math/arch/x86/detect.rs | 229 ++++++++++++++++++++++++ libm/src/math/arch/x86/fma.rs | 134 ++++++++++++++ libm/src/math/fma.rs | 10 +- libm/src/math/support/feature_detect.rs | 206 +++++++++++++++++++++ libm/src/math/support/mod.rs | 3 + 8 files changed, 588 insertions(+), 3 deletions(-) create mode 100644 libm/src/math/arch/x86/detect.rs create mode 100644 libm/src/math/arch/x86/fma.rs create mode 100644 libm/src/math/support/feature_detect.rs diff --git a/etc/function-definitions.json b/etc/function-definitions.json index 691205ddf..4f796905b 100644 --- a/etc/function-definitions.json +++ b/etc/function-definitions.json @@ -343,6 +343,7 @@ "fma": { "sources": [ "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/x86/fma.rs", "libm/src/math/fma.rs" ], "type": "f64" @@ -350,6 +351,7 @@ "fmaf": { "sources": [ "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/x86/fma.rs", "libm/src/math/fma.rs" ], "type": "f32" diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs index 67352f90c..984ae7f31 100644 --- a/libm/src/math/arch/mod.rs +++ b/libm/src/math/arch/mod.rs @@ -16,7 +16,7 @@ cfg_if! { }; } else if #[cfg(target_feature = "sse2")] { mod x86; - pub use x86::{sqrt, sqrtf}; + pub use x86::{sqrt, sqrtf, fma, fmaf}; } else if #[cfg(all( any(target_arch = "aarch64", target_arch = "arm64ec"), target_feature = "neon" diff --git a/libm/src/math/arch/x86.rs b/libm/src/math/arch/x86.rs index 3e1d19bfa..454aa2850 100644 --- a/libm/src/math/arch/x86.rs +++ b/libm/src/math/arch/x86.rs @@ -1,5 +1,10 @@ //! Architecture-specific support for x86-32 and x86-64 with SSE2 +mod detect; +mod fma; + +pub use fma::{fma, fmaf}; + pub fn sqrtf(mut x: f32) -> f32 { // SAFETY: `sqrtss` is part of `sse2`, which this module is gated behind. It has no memory // access or side effects. diff --git a/libm/src/math/arch/x86/detect.rs b/libm/src/math/arch/x86/detect.rs new file mode 100644 index 000000000..71c3281dc --- /dev/null +++ b/libm/src/math/arch/x86/detect.rs @@ -0,0 +1,229 @@ +#[cfg(target_arch = "x86")] +use core::arch::x86::{__cpuid, __cpuid_count, _xgetbv, CpuidResult}; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::{__cpuid, __cpuid_count, _xgetbv, CpuidResult}; + +use crate::support::{Flags, get_or_init_flags_cache}; + +/// CPU features that get cached (doesn't correlate to anything on the CPU). +pub mod cpu_flags { + use crate::support::unique_masks; + + unique_masks! { + u32, + SSE3, + F16C, + SSE, + SSE2, + ERMSB, + MOVRS, + FMA, + FMA4, + AVX512FP16, + AVX512BF16, + } +} + +/// Get CPU features, loading from a cache if available. +pub fn get_cpu_features() -> Flags { + use core::sync::atomic::AtomicU32; + static CACHE: AtomicU32 = AtomicU32::new(0); + get_or_init_flags_cache(&CACHE, load_x86_features) +} + +/// Read from cpuid and translate to a `Flags` instance, using `cpu_flags`. +/// +/// Implementation is taken from [std-detect][std-detect]. +/// +/// [std-detect]: https://github.com/rust-lang/stdarch/blob/690b3a6334d482874163bd6fcef408e0518febe9/crates/std_detect/src/detect/os/x86.rs#L142 +fn load_x86_features() -> Flags { + let mut value = Flags::empty(); + + if cfg!(target_env = "sgx") { + // doesn't support this because it is untrusted data + return Flags::empty(); + } + + // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU + // has `cpuid` support. + + // 0. EAX = 0: Basic Information: + // - EAX returns the "Highest Function Parameter", that is, the maximum leaf + // value for subsequent calls of `cpuinfo` in range [0, 0x8000_0000]. + // - The vendor ID is stored in 12 u8 ascii chars, returned in EBX, EDX, and ECX + // (in that order) + let mut vendor_id = [0u8; 12]; + let max_basic_leaf; + unsafe { + let CpuidResult { eax, ebx, ecx, edx } = __cpuid(0); + max_basic_leaf = eax; + vendor_id[0..4].copy_from_slice(&ebx.to_ne_bytes()); + vendor_id[4..8].copy_from_slice(&edx.to_ne_bytes()); + vendor_id[8..12].copy_from_slice(&ecx.to_ne_bytes()); + } + + if max_basic_leaf < 1 { + // Earlier Intel 486, CPUID not implemented + return value; + } + + // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits"; + // Contains information about most x86 features. + let CpuidResult { ecx, edx, .. } = unsafe { __cpuid(0x0000_0001_u32) }; + let proc_info_ecx = Flags::from_bits(ecx); + let proc_info_edx = Flags::from_bits(edx); + + // EAX = 7: Queries "Extended Features"; + // Contains information about bmi,bmi2, and avx2 support. + let mut extended_features_ebx = Flags::empty(); + let mut extended_features_edx = Flags::empty(); + let mut extended_features_eax_leaf_1 = Flags::empty(); + if max_basic_leaf >= 7 { + let CpuidResult { ebx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) }; + extended_features_ebx = Flags::from_bits(ebx); + extended_features_edx = Flags::from_bits(edx); + + let CpuidResult { eax, .. } = unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) }; + extended_features_eax_leaf_1 = Flags::from_bits(eax) + } + + // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported + // - EAX returns the max leaf value for extended information, that is, + // `cpuid` calls in range [0x8000_0000; u32::MAX]: + let extended_max_basic_leaf = unsafe { __cpuid(0x8000_0000_u32) }.eax; + + // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature Bits" + let mut extended_proc_info_ecx = Flags::empty(); + if extended_max_basic_leaf >= 1 { + let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) }; + extended_proc_info_ecx = Flags::from_bits(ecx); + } + + let mut enable = |regflags: Flags, regbit, flag| { + if regflags.test_nth(regbit) { + value.insert(flag); + } + }; + + enable(proc_info_ecx, 0, cpu_flags::SSE3); + enable(proc_info_ecx, 29, cpu_flags::F16C); + enable(proc_info_edx, 25, cpu_flags::SSE); + enable(proc_info_edx, 26, cpu_flags::SSE2); + enable(extended_features_ebx, 9, cpu_flags::ERMSB); + enable(extended_features_eax_leaf_1, 31, cpu_flags::MOVRS); + + // `XSAVE` and `AVX` support: + let cpu_xsave = proc_info_ecx.test_nth(26); + if cpu_xsave { + // 0. Here the CPU supports `XSAVE`. + + // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and + // supports saving the state of the AVX/AVX2 vector registers on + // context-switches, see: + // + // - [intel: is avx enabled?][is_avx_enabled], + // - [mozilla: sse.cpp][mozilla_sse_cpp]. + // + // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled + // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190 + let cpu_osxsave = proc_info_ecx.test_nth(27); + + if cpu_osxsave { + // 2. The OS must have signaled the CPU that it supports saving and + // restoring the: + // + // * SSE -> `XCR0.SSE[1]` + // * AVX -> `XCR0.AVX[2]` + // * AVX-512 -> `XCR0.AVX-512[7:5]`. + // * AMX -> `XCR0.AMX[18:17]` + // + // by setting the corresponding bits of `XCR0` to `1`. + // + // This is safe because the CPU supports `xsave` and the OS has set `osxsave`. + let xcr0 = unsafe { _xgetbv(0) }; + // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`: + let os_avx_support = xcr0 & 6 == 6; + // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 0xe0`: + let os_avx512_support = xcr0 & 0xe0 == 0xe0; + + // Only if the OS and the CPU support saving/restoring the AVX + // registers we enable `xsave` support: + if os_avx_support { + // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED + // FEATURES" in the "Intel® 64 and IA-32 Architectures Software + // Developer’s Manual, Volume 1: Basic Architecture": + // + // "Software enables the XSAVE feature set by setting + // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4 + // instruction). If this bit is 0, execution of any of XGETBV, + // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV + // causes an invalid-opcode exception (#UD)" + + // FMA (uses 256-bit wide registers): + enable(proc_info_ecx, 12, cpu_flags::FMA); + + // For AVX-512 the OS also needs to support saving/restoring + // the extended state, only then we enable AVX-512 support: + if os_avx512_support { + enable(extended_features_edx, 23, cpu_flags::AVX512FP16); + enable(extended_features_eax_leaf_1, 5, cpu_flags::AVX512BF16); + } + } + } + } + + // As Hygon Dhyana originates from AMD technology and shares most of the architecture with + // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series number + // (Family 18h). + // + // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD + // family 17h. + // + // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf + // (AMD64 Architecture Programmer's Manual, Appendix E). + // Related Hygon kernel patch can be found on + // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn + if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" { + // These features are available on AMD arch CPUs: + enable(extended_proc_info_ecx, 16, cpu_flags::FMA4); + } + + value +} + +#[cfg(test)] +mod tests { + extern crate std; + use std::is_x86_feature_detected; + + use super::*; + + #[test] + fn check_matches_std() { + let features = get_cpu_features(); + for i in 0..cpu_flags::ALL.len() { + let flag = cpu_flags::ALL[i]; + let name = cpu_flags::NAMES[i]; + + let std_detected = match flag { + cpu_flags::SSE3 => is_x86_feature_detected!("sse3"), + cpu_flags::F16C => is_x86_feature_detected!("f16c"), + cpu_flags::SSE => is_x86_feature_detected!("sse"), + cpu_flags::SSE2 => is_x86_feature_detected!("sse2"), + cpu_flags::ERMSB => is_x86_feature_detected!("ermsb"), + cpu_flags::MOVRS => continue, // only very recent support in std + cpu_flags::FMA => is_x86_feature_detected!("fma"), + cpu_flags::FMA4 => continue, // not yet supported in std + cpu_flags::AVX512FP16 => is_x86_feature_detected!("avx512fp16"), + cpu_flags::AVX512BF16 => is_x86_feature_detected!("avx512bf16"), + _ => panic!("untested CPU flag {name}"), + }; + + assert_eq!( + std_detected, + features.contains(flag), + "different flag {name}. flags: {features:?}" + ); + } + } +} diff --git a/libm/src/math/arch/x86/fma.rs b/libm/src/math/arch/x86/fma.rs new file mode 100644 index 000000000..eb43f4696 --- /dev/null +++ b/libm/src/math/arch/x86/fma.rs @@ -0,0 +1,134 @@ +//! Use assembly fma if the `fma` or `fma4` feature is detected at runtime. + +use core::arch::asm; + +use super::super::super::generic; +use super::detect::{cpu_flags, get_cpu_features}; +use crate::support::{Round, select_once}; + +pub fn fma(x: f64, y: f64, z: f64) -> f64 { + select_once! { + sig: fn(x: f64, y: f64, z: f64) -> f64, + init: || { + let features = get_cpu_features(); + if features.contains(cpu_flags::FMA) { + fma_with_fma + } else if features.contains(cpu_flags::FMA4) { + fma_with_fma4 + } else { + fma_fallback as Func + } + }, + // SAFETY: `fn_ptr` is the result of `init`, preconditions have been checked. + call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) }, + } +} + +pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { + select_once! { + sig: fn(x: f32, y: f32, z: f32) -> f32, + init: || { + let features = get_cpu_features(); + if features.contains(cpu_flags::FMA) { + fmaf_with_fma + } else if features.contains(cpu_flags::FMA4) { + fmaf_with_fma4 + } else { + fmaf_fallback as Func + } + }, + // SAFETY: `fn_ptr` is the result of `init`, preconditions have been checked. + call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) }, + } +} + +/// # Safety +/// +/// Must have +fma available. +unsafe fn fma_with_fma(mut x: f64, y: f64, z: f64) -> f64 { + debug_assert!(get_cpu_features().contains(cpu_flags::FMA)); + + // SAFETY: fma is asserted available by precondition, which provides the instruction. No + // memory access or side effects. + unsafe { + asm!( + "vfmadd213sd {x}, {y}, {z}", + x = inout(xmm_reg) x, + y = in(xmm_reg) y, + z = in(xmm_reg) z, + options(nostack, nomem, pure), + ); + } + x +} + +/// # Safety +/// +/// Must have +fma available. +unsafe fn fmaf_with_fma(mut x: f32, y: f32, z: f32) -> f32 { + debug_assert!(get_cpu_features().contains(cpu_flags::FMA)); + + // SAFETY: fma is asserted available by precondition, which provides the instruction. No + // memory access or side effects. + unsafe { + asm!( + "vfmadd213ss {x}, {y}, {z}", + x = inout(xmm_reg) x, + y = in(xmm_reg) y, + z = in(xmm_reg) z, + options(nostack, nomem, pure), + ); + } + x +} + +/// # Safety +/// +/// Must have +fma4 available. +unsafe fn fma_with_fma4(mut x: f64, y: f64, z: f64) -> f64 { + debug_assert!(get_cpu_features().contains(cpu_flags::FMA4)); + + // SAFETY: fma4 is asserted available by precondition, which provides the instruction. No + // memory access or side effects. + unsafe { + asm!( + "vfmaddsd {x}, {x}, {y}, {z}", + x = inout(xmm_reg) x, + y = in(xmm_reg) y, + z = in(xmm_reg) z, + options(nostack, nomem, pure), + ); + } + x +} + +/// # Safety +/// +/// Must have +fma4 available. +unsafe fn fmaf_with_fma4(mut x: f32, y: f32, z: f32) -> f32 { + debug_assert!(get_cpu_features().contains(cpu_flags::FMA4)); + + // SAFETY: fma4 is asserted available by precondition, which provides the instruction. No + // memory access or side effects. + unsafe { + asm!( + "vfmaddss {x}, {x}, {y}, {z}", + x = inout(xmm_reg) x, + y = in(xmm_reg) y, + z = in(xmm_reg) z, + options(nostack, nomem, pure), + ); + } + x +} + +// FIXME: the `select_implementation` macro should handle arch implementations that want +// to use the fallback, so we don't need to recreate the body. + +fn fma_fallback(x: f64, y: f64, z: f64) -> f64 { + generic::fma_round(x, y, z, Round::Nearest).val +} + +fn fmaf_fallback(x: f32, y: f32, z: f32) -> f32 { + generic::fma_wide_round(x, y, z, Round::Nearest).val +} diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 78f0f8992..5bf473cfe 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -19,7 +19,10 @@ pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { select_implementation! { name: fmaf, - use_arch: all(target_arch = "aarch64", target_feature = "neon"), + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + target_feature = "sse2", + ), args: x, y, z, } @@ -33,7 +36,10 @@ pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { pub fn fma(x: f64, y: f64, z: f64) -> f64 { select_implementation! { name: fma, - use_arch: all(target_arch = "aarch64", target_feature = "neon"), + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + target_feature = "sse2", + ), args: x, y, z, } diff --git a/libm/src/math/support/feature_detect.rs b/libm/src/math/support/feature_detect.rs new file mode 100644 index 000000000..cb669b073 --- /dev/null +++ b/libm/src/math/support/feature_detect.rs @@ -0,0 +1,206 @@ +//! Helpers for runtime target feature detection that are shared across architectures. + +use core::sync::atomic::{AtomicU32, Ordering}; + +/// Given a list of identifiers, assign each one a unique sequential single-bit mask. +#[allow(unused_macros)] +macro_rules! unique_masks { + ($ty:ty, $($name:ident,)+) => { + #[cfg(test)] + pub const ALL: &[$ty] = &[$($name),+]; + #[cfg(test)] + pub const NAMES: &[&str] = &[$(stringify!($name)),+]; + + unique_masks!(@one; $ty; 0; $($name,)+); + }; + // Matcher for a single value + (@one; $_ty:ty; $_idx:expr;) => {}; + (@one; $ty:ty; $shift:expr; $name:ident, $($tail:tt)*) => { + pub const $name: $ty = 1 << $shift; + // Ensure the top bit is not used since it stores initialized state. + const _: () = assert!($name != (1 << (<$ty>::BITS - 1))); + // Increment the shift and invoke the next + unique_masks!(@one; $ty; $shift + 1; $($tail)*); + }; +} + +/// Call `init` once to choose an implementation, then use it for the rest of the program. +/// +/// - `sig` is the function type. +/// - `init` is an expression called at startup that chooses an implementation and returns a +/// function pointer. +/// - `call` is an expression to call a function returned by `init`, encapsulating any safety +/// preconditions. +/// +/// The type `Func` is available in `init` and `call`. +/// +/// This is effectively our version of an ifunc without linker support. Note that `init` may be +/// called more than once until one completes. +#[allow(unused_macros)] // only used on some architectures +macro_rules! select_once { + ( + sig: fn($($arg:ident: $ArgTy:ty),*) -> $RetTy:ty, + init: $init:expr, + call: $call:expr, + ) => {{ + use core::mem; + use core::sync::atomic::{AtomicPtr, Ordering}; + + type Func = unsafe fn($($arg: $ArgTy),*) -> $RetTy; + + /// Stores a pointer that is immediately jumped to. By default it is an init function + /// that sets FUNC to something else. + static FUNC: AtomicPtr<()> = AtomicPtr::new((initializer as Func) as *mut ()); + + /// Run once to set the function that will be used for all subsequent calls. + fn initializer($($arg: $ArgTy),*) -> $RetTy { + // Select an implementation, ensuring a 'static lifetime. + let fn_ptr: Func = $init(); + FUNC.store(fn_ptr as *mut (), Ordering::Relaxed); + + // Forward the call to the selected function. + $call(fn_ptr) + } + + let raw: *mut () = FUNC.load(Ordering::Relaxed); + + // SAFETY: will only ever be `initializer` or another function pointer that has the + // 'static lifetime. + let fn_ptr: Func = unsafe { mem::transmute::<*mut (), Func>(raw) }; + + $call(fn_ptr) + }} +} + +pub(crate) use {select_once, unique_masks}; + +use crate::support::cold_path; + +/// Helper for working with bit flags, based on `bitflags`. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Flags(u32); + +#[allow(dead_code)] // only used on some architectures +impl Flags { + /// No bits set. + pub const fn empty() -> Self { + Self(0) + } + + /// Create with bits already set. + pub const fn from_bits(val: u32) -> Self { + Self(val) + } + + /// Get the integer representation. + pub fn bits(&self) -> u32 { + self.0 + } + + /// Set any bits in `mask`. + pub fn insert(&mut self, mask: u32) { + self.0 |= mask; + } + + /// Check whether the mask is set. + pub fn contains(&self, mask: u32) -> bool { + self.0 & mask == mask + } + + /// Check whether the nth bit is set. + pub fn test_nth(&self, bit: u32) -> bool { + debug_assert!(bit < u32::BITS, "bit index out-of-bounds"); + self.0 & (1 << bit) != 0 + } +} + +/// Load flags from an atomic value. If the flags have not yet been initialized, call `init` +/// to do so. +/// +/// Note that `init` may run more than once. +#[allow(dead_code)] // only used on some architectures +pub fn get_or_init_flags_cache(cache: &AtomicU32, init: impl FnOnce() -> Flags) -> Flags { + // The top bit is used to indicate that the values have already been set once. + const INITIALIZED: u32 = 1 << 31; + + // Relaxed ops are sufficient since the result should always be the same. + let mut flags = Flags::from_bits(cache.load(Ordering::Relaxed)); + + if !flags.contains(INITIALIZED) { + // Without this, `init` is inlined and the bit check gets wrapped in `init`'s lengthy + // prologue/epilogue. Cold pathing gives a preferable load->test->?jmp->ret. + cold_path(); + + flags = init(); + debug_assert!( + !flags.contains(INITIALIZED), + "initialized bit shouldn't be set" + ); + flags.insert(INITIALIZED); + cache.store(flags.bits(), Ordering::Relaxed); + } + + flags +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn unique_masks() { + unique_masks! { + u32, + V0, + V1, + V2, + } + assert_eq!(V0, 1u32 << 0); + assert_eq!(V1, 1u32 << 1); + assert_eq!(V2, 1u32 << 2); + assert_eq!(ALL, [V0, V1, V2]); + assert_eq!(NAMES, ["V0", "V1", "V2"]); + } + + #[test] + fn flag_cache_is_used() { + // Sanity check that flags are only ever set once + static CACHE: AtomicU32 = AtomicU32::new(0); + + let mut f1 = Flags::from_bits(0x1); + let f2 = Flags::from_bits(0x2); + + let r1 = get_or_init_flags_cache(&CACHE, || f1); + let r2 = get_or_init_flags_cache(&CACHE, || f2); + + f1.insert(1 << 31); // init bit + + assert_eq!(r1, f1); + assert_eq!(r2, f1); + } + + #[test] + fn select_cache_is_used() { + // Sanity check that cache is used + static CALLED: AtomicU32 = AtomicU32::new(0); + + fn inner() { + fn nop() {} + + select_once! { + sig: fn() -> (), + init: || { + CALLED.fetch_add(1, Ordering::Relaxed); + nop + }, + call: |fn_ptr: Func| unsafe { fn_ptr() }, + } + } + + // `init` should only have been called once. + inner(); + assert_eq!(CALLED.load(Ordering::Relaxed), 1); + inner(); + assert_eq!(CALLED.load(Ordering::Relaxed), 1); + } +} diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index ee3f2bbdf..727b9a360 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -2,6 +2,7 @@ pub mod macros; mod big; mod env; +mod feature_detect; mod float_traits; pub mod hex_float; mod int_traits; @@ -10,6 +11,8 @@ mod int_traits; pub use big::{i256, u256}; pub use env::{FpResult, Round, Status}; #[allow(unused_imports)] +pub(crate) use feature_detect::{Flags, get_or_init_flags_cache, select_once, unique_masks}; +#[allow(unused_imports)] pub use float_traits::{DFloat, Float, HFloat, IntTy}; pub(crate) use float_traits::{f32_from_bits, f64_from_bits}; #[cfg(f16_enabled)] From 257dd4808950ec85ed9ecffb8116c550079684f7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 3 May 2025 18:18:20 +0000 Subject: [PATCH 1341/1459] chore: release --- compiler-builtins/CHANGELOG.md | 6 ++++++ compiler-builtins/Cargo.toml | 2 +- libm/CHANGELOG.md | 6 ++++++ libm/Cargo.toml | 2 +- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/compiler-builtins/CHANGELOG.md b/compiler-builtins/CHANGELOG.md index 34b413a86..f152c2c2c 100644 --- a/compiler-builtins/CHANGELOG.md +++ b/compiler-builtins/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.157](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.156...compiler_builtins-v0.1.157) - 2025-05-03 + +### Other + +- Use runtime feature detection for fma routines on x86 + ## [0.1.156](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.155...compiler_builtins-v0.1.156) - 2025-04-21 ### Other diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index acbace687..784563777 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.156" +version = "0.1.157" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index 292561f86..a0217af09 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -8,6 +8,12 @@ and this project adheres to ## [Unreleased] +## [0.2.14](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.13...libm-v0.2.14) - 2025-05-03 + +### Other + +- Use runtime feature detection for fma routines on x86 + ## [0.2.13](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.12...libm-v0.2.13) - 2025-04-21 ### Fixed diff --git a/libm/Cargo.toml b/libm/Cargo.toml index f80715ff6..76c9a73bc 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT" name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" -version = "0.2.13" +version = "0.2.14" edition = "2021" rust-version = "1.63" From 8e78411a4fdb83640ea6091aefb3e8e99c9320cd Mon Sep 17 00:00:00 2001 From: ELginas Date: Mon, 28 Apr 2025 17:17:11 +0300 Subject: [PATCH 1342/1459] docs: fix typo in Cargo.toml Initially introduced in 63ccaf11f08fb5d0b39cc33884c5a1a63f547ace Signed-off-by: ELginas --- compiler-builtins/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 784563777..d9eebcfc8 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -35,7 +35,7 @@ default = ["compiler-builtins"] c = ["dep:cc"] # Workaround for the Cranelift codegen backend. Disables any implementations -# which use inline assembly and fall back to pure Rust versions (if avalible). +# which use inline assembly and fall back to pure Rust versions (if available). no-asm = [] # Workaround for codegen backends which haven't yet implemented `f16` and From 6d78c1acc995b9093365588e094a5defacd611e4 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 5 May 2025 05:40:54 +0000 Subject: [PATCH 1343/1459] Replace `super::super` with `crate::support` where possible Since `crate::support` now works in both `compiler-builtins` and `libm`, we can get rid of some of these unusual paths. --- libm/src/math/generic/ceil.rs | 3 +-- libm/src/math/generic/copysign.rs | 2 +- libm/src/math/generic/fabs.rs | 2 +- libm/src/math/generic/fdim.rs | 2 +- libm/src/math/generic/floor.rs | 3 +-- libm/src/math/generic/fmax.rs | 2 +- libm/src/math/generic/fmaximum.rs | 2 +- libm/src/math/generic/fmaximum_num.rs | 2 +- libm/src/math/generic/fmin.rs | 2 +- libm/src/math/generic/fminimum.rs | 2 +- libm/src/math/generic/fminimum_num.rs | 2 +- libm/src/math/generic/fmod.rs | 2 +- libm/src/math/generic/rint.rs | 3 +-- libm/src/math/generic/round.rs | 2 +- libm/src/math/generic/scalbn.rs | 2 +- libm/src/math/generic/sqrt.rs | 5 +++-- libm/src/math/generic/trunc.rs | 3 +-- 17 files changed, 19 insertions(+), 22 deletions(-) diff --git a/libm/src/math/generic/ceil.rs b/libm/src/math/generic/ceil.rs index 499770c0d..1072ba7c2 100644 --- a/libm/src/math/generic/ceil.rs +++ b/libm/src/math/generic/ceil.rs @@ -7,8 +7,7 @@ //! performance seems to be better (based on icount) and it does not seem to experience rounding //! errors on i386. -use super::super::support::{FpResult, Status}; -use super::super::{Float, Int, IntTy, MinInt}; +use crate::support::{Float, FpResult, Int, IntTy, MinInt, Status}; #[inline] pub fn ceil(x: F) -> F { diff --git a/libm/src/math/generic/copysign.rs b/libm/src/math/generic/copysign.rs index a61af22f0..da9ce3878 100644 --- a/libm/src/math/generic/copysign.rs +++ b/libm/src/math/generic/copysign.rs @@ -1,4 +1,4 @@ -use super::super::Float; +use crate::support::Float; /// Copy the sign of `y` to `x`. #[inline] diff --git a/libm/src/math/generic/fabs.rs b/libm/src/math/generic/fabs.rs index 0fa0edf9b..0adfa57d9 100644 --- a/libm/src/math/generic/fabs.rs +++ b/libm/src/math/generic/fabs.rs @@ -1,4 +1,4 @@ -use super::super::Float; +use crate::support::Float; /// Absolute value. #[inline] diff --git a/libm/src/math/generic/fdim.rs b/libm/src/math/generic/fdim.rs index a63007b19..289e5fd96 100644 --- a/libm/src/math/generic/fdim.rs +++ b/libm/src/math/generic/fdim.rs @@ -1,4 +1,4 @@ -use super::super::Float; +use crate::support::Float; #[inline] pub fn fdim(x: F, y: F) -> F { diff --git a/libm/src/math/generic/floor.rs b/libm/src/math/generic/floor.rs index 58d1ee4c2..e6dfd8866 100644 --- a/libm/src/math/generic/floor.rs +++ b/libm/src/math/generic/floor.rs @@ -7,8 +7,7 @@ //! performance seems to be better (based on icount) and it does not seem to experience rounding //! errors on i386. -use super::super::support::{FpResult, Status}; -use super::super::{Float, Int, IntTy, MinInt}; +use crate::support::{Float, FpResult, Int, IntTy, MinInt, Status}; #[inline] pub fn floor(x: F) -> F { diff --git a/libm/src/math/generic/fmax.rs b/libm/src/math/generic/fmax.rs index bf3f847e8..54207e4b3 100644 --- a/libm/src/math/generic/fmax.rs +++ b/libm/src/math/generic/fmax.rs @@ -14,7 +14,7 @@ //! //! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf -use super::super::Float; +use crate::support::Float; #[inline] pub fn fmax(x: F, y: F) -> F { diff --git a/libm/src/math/generic/fmaximum.rs b/libm/src/math/generic/fmaximum.rs index 387055af2..4b6295bc0 100644 --- a/libm/src/math/generic/fmaximum.rs +++ b/libm/src/math/generic/fmaximum.rs @@ -9,7 +9,7 @@ //! //! Excluded from our implementation is sNaN handling. -use super::super::Float; +use crate::support::Float; #[inline] pub fn fmaximum(x: F, y: F) -> F { diff --git a/libm/src/math/generic/fmaximum_num.rs b/libm/src/math/generic/fmaximum_num.rs index f7efdde80..2e97ff6d3 100644 --- a/libm/src/math/generic/fmaximum_num.rs +++ b/libm/src/math/generic/fmaximum_num.rs @@ -11,7 +11,7 @@ //! //! Excluded from our implementation is sNaN handling. -use super::super::Float; +use crate::support::Float; #[inline] pub fn fmaximum_num(x: F, y: F) -> F { diff --git a/libm/src/math/generic/fmin.rs b/libm/src/math/generic/fmin.rs index cd3caeee4..0f86364d2 100644 --- a/libm/src/math/generic/fmin.rs +++ b/libm/src/math/generic/fmin.rs @@ -14,7 +14,7 @@ //! //! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf -use super::super::Float; +use crate::support::Float; #[inline] pub fn fmin(x: F, y: F) -> F { diff --git a/libm/src/math/generic/fminimum.rs b/libm/src/math/generic/fminimum.rs index 4ddb36455..9dc0b64be 100644 --- a/libm/src/math/generic/fminimum.rs +++ b/libm/src/math/generic/fminimum.rs @@ -9,7 +9,7 @@ //! //! Excluded from our implementation is sNaN handling. -use super::super::Float; +use crate::support::Float; #[inline] pub fn fminimum(x: F, y: F) -> F { diff --git a/libm/src/math/generic/fminimum_num.rs b/libm/src/math/generic/fminimum_num.rs index 441c204a9..40db8b189 100644 --- a/libm/src/math/generic/fminimum_num.rs +++ b/libm/src/math/generic/fminimum_num.rs @@ -11,7 +11,7 @@ //! //! Excluded from our implementation is sNaN handling. -use super::super::Float; +use crate::support::Float; #[inline] pub fn fminimum_num(x: F, y: F) -> F { diff --git a/libm/src/math/generic/fmod.rs b/libm/src/math/generic/fmod.rs index e9898012f..29acc8a4d 100644 --- a/libm/src/math/generic/fmod.rs +++ b/libm/src/math/generic/fmod.rs @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: MIT OR Apache-2.0 */ -use super::super::{CastFrom, Float, Int, MinInt}; +use crate::support::{CastFrom, Float, Int, MinInt}; #[inline] pub fn fmod(x: F, y: F) -> F { diff --git a/libm/src/math/generic/rint.rs b/libm/src/math/generic/rint.rs index 7bf38e323..c5bc27d3d 100644 --- a/libm/src/math/generic/rint.rs +++ b/libm/src/math/generic/rint.rs @@ -1,8 +1,7 @@ /* SPDX-License-Identifier: MIT */ /* origin: musl src/math/rint.c */ -use super::super::Float; -use super::super::support::{FpResult, Round}; +use crate::support::{Float, FpResult, Round}; /// IEEE 754-2019 `roundToIntegralExact`, which respects rounding mode and raises inexact if /// applicable. diff --git a/libm/src/math/generic/round.rs b/libm/src/math/generic/round.rs index 01314ac70..16739f01d 100644 --- a/libm/src/math/generic/round.rs +++ b/libm/src/math/generic/round.rs @@ -1,5 +1,5 @@ -use super::super::{Float, MinInt}; use super::{copysign, trunc}; +use crate::support::{Float, MinInt}; #[inline] pub fn round(x: F) -> F { diff --git a/libm/src/math/generic/scalbn.rs b/libm/src/math/generic/scalbn.rs index a45db1b4a..6dd9b1a9b 100644 --- a/libm/src/math/generic/scalbn.rs +++ b/libm/src/math/generic/scalbn.rs @@ -1,4 +1,4 @@ -use super::super::{CastFrom, CastInto, Float, IntTy, MinInt}; +use crate::support::{CastFrom, CastInto, Float, IntTy, MinInt}; /// Scale the exponent. /// diff --git a/libm/src/math/generic/sqrt.rs b/libm/src/math/generic/sqrt.rs index c52560bdb..9481c4cdb 100644 --- a/libm/src/math/generic/sqrt.rs +++ b/libm/src/math/generic/sqrt.rs @@ -41,8 +41,9 @@ //! Goldschmidt has the advantage over Newton-Raphson that `sqrt(x)` and `1/sqrt(x)` are //! computed at the same time, i.e. there is no need to calculate `1/sqrt(x)` and invert it. -use super::super::support::{FpResult, IntTy, Round, Status, cold_path}; -use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt}; +use crate::support::{ + CastFrom, CastInto, DInt, Float, FpResult, HInt, Int, IntTy, MinInt, Round, Status, cold_path, +}; #[inline] pub fn sqrt(x: F) -> F diff --git a/libm/src/math/generic/trunc.rs b/libm/src/math/generic/trunc.rs index 29a28f47b..d5b444d15 100644 --- a/libm/src/math/generic/trunc.rs +++ b/libm/src/math/generic/trunc.rs @@ -1,8 +1,7 @@ /* SPDX-License-Identifier: MIT * origin: musl src/math/trunc.c */ -use super::super::support::{FpResult, Status}; -use super::super::{Float, Int, IntTy, MinInt}; +use crate::support::{Float, FpResult, Int, IntTy, MinInt, Status}; #[inline] pub fn trunc(x: F) -> F { From 1b1b2ed16eac475b6e974ce0cb16e76a9d9c10fa Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 5 May 2025 05:43:58 +0000 Subject: [PATCH 1344/1459] ci: Mention `ci: skip-extensive` in the error message --- ci/ci-util.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/ci-util.py b/ci/ci-util.py index 7486d6b41..d785b2e9e 100755 --- a/ci/ci-util.py +++ b/ci/ci-util.py @@ -261,7 +261,9 @@ def emit_workflow_output(self): if error_on_many_tests and total_to_test > MANY_EXTENSIVE_THRESHOLD: eprint( f"More than {MANY_EXTENSIVE_THRESHOLD} tests would be run; add" - f" `{ALLOW_MANY_EXTENSIVE_DIRECTIVE}` to the PR body if this is intentional" + f" `{ALLOW_MANY_EXTENSIVE_DIRECTIVE}` to the PR body if this is" + " intentional. If this is refactoring that happens to touch a lot of" + f" files, `{SKIP_EXTENSIVE_DIRECTIVE}` can be used instead." ) exit(1) From da8b5829f44f9fe04dfac7cafde8310e0ed0a429 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 6 May 2025 20:11:48 +0000 Subject: [PATCH 1345/1459] Require `target_has_atomic = "ptr"` for runtime feature detection The `feature_detect` module is currently being built on all targets, but the use of `AtomicU32` causes a problem if atomics are not available (such as with `bpfel-unknown-none`). Gate this module behind `target_has_atomic = "ptr"`. The below now completes successfully: cargo build -p compiler_builtins --target=bpfel-unknown-none -Z build-std=core Fixes: https://github.com/rust-lang/compiler-builtins/issues/908 --- libm/src/math/arch/x86/detect.rs | 7 +++++-- libm/src/math/arch/x86/fma.rs | 3 ++- libm/src/math/support/feature_detect.rs | 5 +++++ libm/src/math/support/mod.rs | 6 +++--- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/libm/src/math/arch/x86/detect.rs b/libm/src/math/arch/x86/detect.rs index 71c3281dc..e6d9b040b 100644 --- a/libm/src/math/arch/x86/detect.rs +++ b/libm/src/math/arch/x86/detect.rs @@ -1,13 +1,16 @@ +// Using runtime feature detection requires atomics. Currently there are no x86 targets +// that support sse but not `AtomicPtr`. + #[cfg(target_arch = "x86")] use core::arch::x86::{__cpuid, __cpuid_count, _xgetbv, CpuidResult}; #[cfg(target_arch = "x86_64")] use core::arch::x86_64::{__cpuid, __cpuid_count, _xgetbv, CpuidResult}; -use crate::support::{Flags, get_or_init_flags_cache}; +use crate::support::feature_detect::{Flags, get_or_init_flags_cache, unique_masks}; /// CPU features that get cached (doesn't correlate to anything on the CPU). pub mod cpu_flags { - use crate::support::unique_masks; + use super::unique_masks; unique_masks! { u32, diff --git a/libm/src/math/arch/x86/fma.rs b/libm/src/math/arch/x86/fma.rs index eb43f4696..43ac18779 100644 --- a/libm/src/math/arch/x86/fma.rs +++ b/libm/src/math/arch/x86/fma.rs @@ -4,7 +4,8 @@ use core::arch::asm; use super::super::super::generic; use super::detect::{cpu_flags, get_cpu_features}; -use crate::support::{Round, select_once}; +use crate::support::Round; +use crate::support::feature_detect::select_once; pub fn fma(x: f64, y: f64, z: f64) -> f64 { select_once! { diff --git a/libm/src/math/support/feature_detect.rs b/libm/src/math/support/feature_detect.rs index cb669b073..9ebd434a5 100644 --- a/libm/src/math/support/feature_detect.rs +++ b/libm/src/math/support/feature_detect.rs @@ -1,5 +1,9 @@ //! Helpers for runtime target feature detection that are shared across architectures. +// `AtomicU32` is preferred for a consistent size across targets. +#[cfg(all(target_has_atomic = "ptr", not(target_has_atomic = "32")))] +compile_error!("currently all targets that support `AtomicPtr` also support `AtomicU32`"); + use core::sync::atomic::{AtomicU32, Ordering}; /// Given a list of identifiers, assign each one a unique sequential single-bit mask. @@ -72,6 +76,7 @@ macro_rules! select_once { }} } +#[allow(unused_imports)] pub(crate) use {select_once, unique_masks}; use crate::support::cold_path; diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index 727b9a360..a4f596ab8 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -2,7 +2,9 @@ pub mod macros; mod big; mod env; -mod feature_detect; +// Runtime feature detection requires atomics. +#[cfg(target_has_atomic = "ptr")] +pub(crate) mod feature_detect; mod float_traits; pub mod hex_float; mod int_traits; @@ -11,8 +13,6 @@ mod int_traits; pub use big::{i256, u256}; pub use env::{FpResult, Round, Status}; #[allow(unused_imports)] -pub(crate) use feature_detect::{Flags, get_or_init_flags_cache, select_once, unique_masks}; -#[allow(unused_imports)] pub use float_traits::{DFloat, Float, HFloat, IntTy}; pub(crate) use float_traits::{f32_from_bits, f64_from_bits}; #[cfg(f16_enabled)] From cf0094106471e100f79000dba1926705f5f7f392 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 6 May 2025 21:59:33 +0000 Subject: [PATCH 1346/1459] chore: release --- compiler-builtins/CHANGELOG.md | 6 ++++++ compiler-builtins/Cargo.toml | 2 +- libm/CHANGELOG.md | 6 ++++++ libm/Cargo.toml | 2 +- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/compiler-builtins/CHANGELOG.md b/compiler-builtins/CHANGELOG.md index f152c2c2c..f0af37ba0 100644 --- a/compiler-builtins/CHANGELOG.md +++ b/compiler-builtins/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.158](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.157...compiler_builtins-v0.1.158) - 2025-05-06 + +### Other + +- Require `target_has_atomic = "ptr"` for runtime feature detection + ## [0.1.157](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.156...compiler_builtins-v0.1.157) - 2025-05-03 ### Other diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index d9eebcfc8..81f708c48 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.157" +version = "0.1.158" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md index a0217af09..33fec06aa 100644 --- a/libm/CHANGELOG.md +++ b/libm/CHANGELOG.md @@ -8,6 +8,12 @@ and this project adheres to ## [Unreleased] +## [0.2.15](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.14...libm-v0.2.15) - 2025-05-06 + +### Other + +- Require `target_has_atomic = "ptr"` for runtime feature detection + ## [0.2.14](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.13...libm-v0.2.14) - 2025-05-03 ### Other diff --git a/libm/Cargo.toml b/libm/Cargo.toml index 76c9a73bc..b6fb5efcf 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT" name = "libm" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" -version = "0.2.14" +version = "0.2.15" edition = "2021" rust-version = "1.63" From a4c748f72a1dce652cc3e41c3a8425731bd1519a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 6 May 2025 23:00:46 +0000 Subject: [PATCH 1347/1459] release-plz: Include the libm changelog in compiler-builtins --- .release-plz.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/.release-plz.toml b/.release-plz.toml index 95e91a46c..8023ade9b 100644 --- a/.release-plz.toml +++ b/.release-plz.toml @@ -7,6 +7,7 @@ publish_allow_dirty = true [[package]] name = "compiler_builtins" semver_check = false +changelog_include = ["libm"] # libm is included as part of builtins [[package]] name = "libm" From ab01e290b8fbaf334e934d75e27516829e000b3a Mon Sep 17 00:00:00 2001 From: nora <48135649+Noratrieb@users.noreply.github.com> Date: Mon, 12 May 2025 16:15:24 +0200 Subject: [PATCH 1348/1459] Remove cfg(bootstrap) Foe the bootstrap bump --- compiler-builtins/src/macros.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/compiler-builtins/src/macros.rs b/compiler-builtins/src/macros.rs index dbf715534..22e0dd27f 100644 --- a/compiler-builtins/src/macros.rs +++ b/compiler-builtins/src/macros.rs @@ -433,18 +433,6 @@ macro_rules! intrinsics { ) => ( // `#[naked]` definitions are referenced by other places, so we can't use `cfg` like the others pub mod $name { - // FIXME: when bootstrap supports `#[unsafe(naked)]` this duplication can be removed - #[cfg(bootstrap)] - #[naked] - #[allow(unused_unsafe)] - $(#[$($attr)*])* - #[cfg_attr(not(feature = "mangled-names"), no_mangle)] - #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] - pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { - unsafe { $($body)* } - } - - #[cfg(not(bootstrap))] #[unsafe(naked)] $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] From 87a6afb37fad14cee50c498d4dcd6c5a09930750 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 14:26:59 +0000 Subject: [PATCH 1349/1459] chore(compiler_builtins): release v0.1.159 --- compiler-builtins/CHANGELOG.md | 6 ++++++ compiler-builtins/Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/compiler-builtins/CHANGELOG.md b/compiler-builtins/CHANGELOG.md index f0af37ba0..a7c01c463 100644 --- a/compiler-builtins/CHANGELOG.md +++ b/compiler-builtins/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.159](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.158...compiler_builtins-v0.1.159) - 2025-05-12 + +### Other + +- Remove cfg(bootstrap) + ## [0.1.158](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.157...compiler_builtins-v0.1.158) - 2025-05-06 ### Other diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 81f708c48..d65a22152 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.158" +version = "0.1.159" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From f2918cd0f4d23a6ff038a7a9a5ea2695598aeaaa Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Thu, 8 May 2025 15:00:59 +0200 Subject: [PATCH 1350/1459] Fix `i256::MAX` --- compiler-builtins/src/int/big.rs | 2 +- libm/src/math/support/big.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-builtins/src/int/big.rs b/compiler-builtins/src/int/big.rs index 61f1349d9..1402efb8e 100644 --- a/compiler-builtins/src/int/big.rs +++ b/compiler-builtins/src/int/big.rs @@ -65,7 +65,7 @@ impl MinInt for i256 { const ZERO: Self = Self([0u64; 4]); const ONE: Self = Self([1, 0, 0, 0]); const MIN: Self = Self([0, 0, 0, 1 << 63]); - const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX << 1]); + const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1]); } macro_rules! impl_common { diff --git a/libm/src/math/support/big.rs b/libm/src/math/support/big.rs index f24c063cd..8a52d86cc 100644 --- a/libm/src/math/support/big.rs +++ b/libm/src/math/support/big.rs @@ -83,7 +83,7 @@ impl MinInt for i256 { }; const MAX: Self = Self { lo: u128::MAX, - hi: u128::MAX << 1, + hi: u128::MAX >> 1, }; } From 233434412fe7eced8f1ddbfeddabef1d55e493bd Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 18 May 2025 10:14:22 +0200 Subject: [PATCH 1351/1459] fix an if statement that can be collapsed --- crates/libm-macros/src/lib.rs | 40 ++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs index e8afe3aad..482da974c 100644 --- a/crates/libm-macros/src/lib.rs +++ b/crates/libm-macros/src/lib.rs @@ -1,3 +1,5 @@ +#![feature(let_chains)] + mod enums; mod parse; mod shared; @@ -266,27 +268,27 @@ fn validate(input: &mut StructuredInput) -> syn::Result } } - if let Some(map) = &input.fn_extra { - if !map.keys().any(|key| key == "_") { - // No default provided; make sure every expected function is covered - let mut fns_not_covered = Vec::new(); - for func in &fn_list { - if !map.keys().any(|key| key == func.name) { - // `name` was not mentioned in the `match` statement - fns_not_covered.push(func); - } + if let Some(map) = &input.fn_extra + && !map.keys().any(|key| key == "_") + { + // No default provided; make sure every expected function is covered + let mut fns_not_covered = Vec::new(); + for func in &fn_list { + if !map.keys().any(|key| key == func.name) { + // `name` was not mentioned in the `match` statement + fns_not_covered.push(func); } + } - if !fns_not_covered.is_empty() { - let e = syn::Error::new( - input.fn_extra_span.unwrap(), - format!( - "`fn_extra`: no default `_` pattern specified and the following \ - patterns are not covered: {fns_not_covered:#?}" - ), - ); - return Err(e); - } + if !fns_not_covered.is_empty() { + let e = syn::Error::new( + input.fn_extra_span.unwrap(), + format!( + "`fn_extra`: no default `_` pattern specified and the following \ + patterns are not covered: {fns_not_covered:#?}" + ), + ); + return Err(e); } }; From da5f72d8f3d550648f53f7a5d8ec4ac9d886e01c Mon Sep 17 00:00:00 2001 From: beetrees Date: Wed, 21 May 2025 18:11:11 +0100 Subject: [PATCH 1352/1459] Enable `__powitf2` on MSVC --- builtins-test/tests/float_pow.rs | 2 -- compiler-builtins/src/float/pow.rs | 2 -- 2 files changed, 4 deletions(-) diff --git a/builtins-test/tests/float_pow.rs b/builtins-test/tests/float_pow.rs index 8209543e6..0e8ae88e8 100644 --- a/builtins-test/tests/float_pow.rs +++ b/builtins-test/tests/float_pow.rs @@ -58,8 +58,6 @@ pow! { } #[cfg(f128_enabled)] -// FIXME(f16_f128): MSVC cannot build these until `__divtf3` is available in nightly. -#[cfg(not(target_env = "msvc"))] #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] pow! { f128, 1e-36, __powitf2, not(feature = "no-sys-f128"); diff --git a/compiler-builtins/src/float/pow.rs b/compiler-builtins/src/float/pow.rs index 45a4ad904..6997a9c21 100644 --- a/compiler-builtins/src/float/pow.rs +++ b/compiler-builtins/src/float/pow.rs @@ -32,8 +32,6 @@ intrinsics! { #[ppc_alias = __powikf2] #[cfg(f128_enabled)] - // FIXME(f16_f128): MSVC cannot build these until `__divtf3` is available in nightly. - #[cfg(not(target_env = "msvc"))] pub extern "C" fn __powitf2(a: f128, b: i32) -> f128 { pow(a, b) } From 3f0959fa9967030775bc7f47eff63a8174f03acc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1lyi=20L=C5=91rinc?= Date: Sat, 10 May 2025 08:36:28 +0000 Subject: [PATCH 1353/1459] fixed typo in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3130ff7b7..177bce624 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ This repository contains two main crates: * `compiler-builtins`: symbols that the compiler expects to be available at link time * `libm`: a Rust implementation of C math libraries, used to provide - implementations in `ocre`. + implementations in `core`. More details are at [compiler-builtins/README.md](compiler-builtins/README.md) and [libm/README.md](libm/README.md). From 157a0b7df5a612173f9a8139e2066725bf049bc8 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 22 Apr 2025 19:35:21 -0400 Subject: [PATCH 1354/1459] libm: Clean up unused files These were deleted during refactoring in 0a2dc5d9 ("Combine the source files for more generic implementations") but got added back by accident in 54bac411 ("refactor: Move the libm crate to a subdirectory"). Remove them again here. --- libm/src/math/copysignf.rs | 8 ------- libm/src/math/copysignf128.rs | 8 ------- libm/src/math/copysignf16.rs | 8 ------- libm/src/math/fabsf.rs | 39 ----------------------------------- libm/src/math/fabsf128.rs | 31 ---------------------------- libm/src/math/fabsf16.rs | 31 ---------------------------- libm/src/math/fdimf.rs | 12 ----------- libm/src/math/fdimf128.rs | 12 ----------- libm/src/math/fdimf16.rs | 12 ----------- libm/src/math/floorf.rs | 13 ------------ libm/src/math/floorf128.rs | 7 ------- libm/src/math/floorf16.rs | 7 ------- libm/src/math/fmodf.rs | 5 ----- libm/src/math/fmodf128.rs | 5 ----- libm/src/math/fmodf16.rs | 5 ----- libm/src/math/ldexpf.rs | 4 ---- libm/src/math/ldexpf128.rs | 4 ---- libm/src/math/ldexpf16.rs | 4 ---- libm/src/math/roundf.rs | 5 ----- libm/src/math/roundf128.rs | 5 ----- libm/src/math/roundf16.rs | 5 ----- libm/src/math/scalbnf.rs | 4 ---- libm/src/math/scalbnf128.rs | 4 ---- libm/src/math/scalbnf16.rs | 4 ---- libm/src/math/sqrtf.rs | 15 -------------- libm/src/math/sqrtf128.rs | 5 ----- libm/src/math/sqrtf16.rs | 11 ---------- libm/src/math/truncf.rs | 23 --------------------- libm/src/math/truncf128.rs | 7 ------- libm/src/math/truncf16.rs | 7 ------- 30 files changed, 310 deletions(-) delete mode 100644 libm/src/math/copysignf.rs delete mode 100644 libm/src/math/copysignf128.rs delete mode 100644 libm/src/math/copysignf16.rs delete mode 100644 libm/src/math/fabsf.rs delete mode 100644 libm/src/math/fabsf128.rs delete mode 100644 libm/src/math/fabsf16.rs delete mode 100644 libm/src/math/fdimf.rs delete mode 100644 libm/src/math/fdimf128.rs delete mode 100644 libm/src/math/fdimf16.rs delete mode 100644 libm/src/math/floorf.rs delete mode 100644 libm/src/math/floorf128.rs delete mode 100644 libm/src/math/floorf16.rs delete mode 100644 libm/src/math/fmodf.rs delete mode 100644 libm/src/math/fmodf128.rs delete mode 100644 libm/src/math/fmodf16.rs delete mode 100644 libm/src/math/ldexpf.rs delete mode 100644 libm/src/math/ldexpf128.rs delete mode 100644 libm/src/math/ldexpf16.rs delete mode 100644 libm/src/math/roundf.rs delete mode 100644 libm/src/math/roundf128.rs delete mode 100644 libm/src/math/roundf16.rs delete mode 100644 libm/src/math/scalbnf.rs delete mode 100644 libm/src/math/scalbnf128.rs delete mode 100644 libm/src/math/scalbnf16.rs delete mode 100644 libm/src/math/sqrtf.rs delete mode 100644 libm/src/math/sqrtf128.rs delete mode 100644 libm/src/math/sqrtf16.rs delete mode 100644 libm/src/math/truncf.rs delete mode 100644 libm/src/math/truncf128.rs delete mode 100644 libm/src/math/truncf16.rs diff --git a/libm/src/math/copysignf.rs b/libm/src/math/copysignf.rs deleted file mode 100644 index 8b9bed4c0..000000000 --- a/libm/src/math/copysignf.rs +++ /dev/null @@ -1,8 +0,0 @@ -/// Sign of Y, magnitude of X (f32) -/// -/// Constructs a number with the magnitude (absolute value) of its -/// first argument, `x`, and the sign of its second argument, `y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn copysignf(x: f32, y: f32) -> f32 { - super::generic::copysign(x, y) -} diff --git a/libm/src/math/copysignf128.rs b/libm/src/math/copysignf128.rs deleted file mode 100644 index 7bd81d42b..000000000 --- a/libm/src/math/copysignf128.rs +++ /dev/null @@ -1,8 +0,0 @@ -/// Sign of Y, magnitude of X (f128) -/// -/// Constructs a number with the magnitude (absolute value) of its -/// first argument, `x`, and the sign of its second argument, `y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn copysignf128(x: f128, y: f128) -> f128 { - super::generic::copysign(x, y) -} diff --git a/libm/src/math/copysignf16.rs b/libm/src/math/copysignf16.rs deleted file mode 100644 index 820658686..000000000 --- a/libm/src/math/copysignf16.rs +++ /dev/null @@ -1,8 +0,0 @@ -/// Sign of Y, magnitude of X (f16) -/// -/// Constructs a number with the magnitude (absolute value) of its -/// first argument, `x`, and the sign of its second argument, `y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn copysignf16(x: f16, y: f16) -> f16 { - super::generic::copysign(x, y) -} diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs deleted file mode 100644 index e5820a26c..000000000 --- a/libm/src/math/fabsf.rs +++ /dev/null @@ -1,39 +0,0 @@ -/// Absolute value (magnitude) (f32) -/// -/// Calculates the absolute value (magnitude) of the argument `x`, -/// by direct manipulation of the bit representation of `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fabsf(x: f32) -> f32 { - select_implementation! { - name: fabsf, - use_arch: all(target_arch = "wasm32", intrinsics_enabled), - args: x, - } - - super::generic::fabs(x) -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(fabsf(-1.0), 1.0); - assert_eq!(fabsf(2.8), 2.8); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs - #[test] - fn spec_tests() { - assert!(fabsf(f32::NAN).is_nan()); - for f in [0.0, -0.0].iter().copied() { - assert_eq!(fabsf(f), 0.0); - } - for f in [f32::INFINITY, f32::NEG_INFINITY].iter().copied() { - assert_eq!(fabsf(f), f32::INFINITY); - } - } -} diff --git a/libm/src/math/fabsf128.rs b/libm/src/math/fabsf128.rs deleted file mode 100644 index 46429ca49..000000000 --- a/libm/src/math/fabsf128.rs +++ /dev/null @@ -1,31 +0,0 @@ -/// Absolute value (magnitude) (f128) -/// -/// Calculates the absolute value (magnitude) of the argument `x`, -/// by direct manipulation of the bit representation of `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fabsf128(x: f128) -> f128 { - super::generic::fabs(x) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(fabsf128(-1.0), 1.0); - assert_eq!(fabsf128(2.8), 2.8); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs - #[test] - fn spec_tests() { - assert!(fabsf128(f128::NAN).is_nan()); - for f in [0.0, -0.0].iter().copied() { - assert_eq!(fabsf128(f), 0.0); - } - for f in [f128::INFINITY, f128::NEG_INFINITY].iter().copied() { - assert_eq!(fabsf128(f), f128::INFINITY); - } - } -} diff --git a/libm/src/math/fabsf16.rs b/libm/src/math/fabsf16.rs deleted file mode 100644 index eee42ac6a..000000000 --- a/libm/src/math/fabsf16.rs +++ /dev/null @@ -1,31 +0,0 @@ -/// Absolute value (magnitude) (f16) -/// -/// Calculates the absolute value (magnitude) of the argument `x`, -/// by direct manipulation of the bit representation of `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fabsf16(x: f16) -> f16 { - super::generic::fabs(x) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(fabsf16(-1.0), 1.0); - assert_eq!(fabsf16(2.8), 2.8); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs - #[test] - fn spec_tests() { - assert!(fabsf16(f16::NAN).is_nan()); - for f in [0.0, -0.0].iter().copied() { - assert_eq!(fabsf16(f), 0.0); - } - for f in [f16::INFINITY, f16::NEG_INFINITY].iter().copied() { - assert_eq!(fabsf16(f), f16::INFINITY); - } - } -} diff --git a/libm/src/math/fdimf.rs b/libm/src/math/fdimf.rs deleted file mode 100644 index 367ef517c..000000000 --- a/libm/src/math/fdimf.rs +++ /dev/null @@ -1,12 +0,0 @@ -/// Positive difference (f32) -/// -/// Determines the positive difference between arguments, returning: -/// * x - y if x > y, or -/// * +0 if x <= y, or -/// * NAN if either argument is NAN. -/// -/// A range error may occur. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fdimf(x: f32, y: f32) -> f32 { - super::generic::fdim(x, y) -} diff --git a/libm/src/math/fdimf128.rs b/libm/src/math/fdimf128.rs deleted file mode 100644 index 6f3d1d0ff..000000000 --- a/libm/src/math/fdimf128.rs +++ /dev/null @@ -1,12 +0,0 @@ -/// Positive difference (f128) -/// -/// Determines the positive difference between arguments, returning: -/// * x - y if x > y, or -/// * +0 if x <= y, or -/// * NAN if either argument is NAN. -/// -/// A range error may occur. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fdimf128(x: f128, y: f128) -> f128 { - super::generic::fdim(x, y) -} diff --git a/libm/src/math/fdimf16.rs b/libm/src/math/fdimf16.rs deleted file mode 100644 index 37bd68858..000000000 --- a/libm/src/math/fdimf16.rs +++ /dev/null @@ -1,12 +0,0 @@ -/// Positive difference (f16) -/// -/// Determines the positive difference between arguments, returning: -/// * x - y if x > y, or -/// * +0 if x <= y, or -/// * NAN if either argument is NAN. -/// -/// A range error may occur. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fdimf16(x: f16, y: f16) -> f16 { - super::generic::fdim(x, y) -} diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs deleted file mode 100644 index 16957b7f3..000000000 --- a/libm/src/math/floorf.rs +++ /dev/null @@ -1,13 +0,0 @@ -/// Floor (f32) -/// -/// Finds the nearest integer less than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn floorf(x: f32) -> f32 { - select_implementation! { - name: floorf, - use_arch: all(target_arch = "wasm32", intrinsics_enabled), - args: x, - } - - return super::generic::floor(x); -} diff --git a/libm/src/math/floorf128.rs b/libm/src/math/floorf128.rs deleted file mode 100644 index 9a9fe4151..000000000 --- a/libm/src/math/floorf128.rs +++ /dev/null @@ -1,7 +0,0 @@ -/// Floor (f128) -/// -/// Finds the nearest integer less than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn floorf128(x: f128) -> f128 { - return super::generic::floor(x); -} diff --git a/libm/src/math/floorf16.rs b/libm/src/math/floorf16.rs deleted file mode 100644 index f9b868e04..000000000 --- a/libm/src/math/floorf16.rs +++ /dev/null @@ -1,7 +0,0 @@ -/// Floor (f16) -/// -/// Finds the nearest integer less than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn floorf16(x: f16) -> f16 { - return super::generic::floor(x); -} diff --git a/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs deleted file mode 100644 index 4e95696e2..000000000 --- a/libm/src/math/fmodf.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmodf(x: f32, y: f32) -> f32 { - super::generic::fmod(x, y) -} diff --git a/libm/src/math/fmodf128.rs b/libm/src/math/fmodf128.rs deleted file mode 100644 index ff0e0493e..000000000 --- a/libm/src/math/fmodf128.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmodf128(x: f128, y: f128) -> f128 { - super::generic::fmod(x, y) -} diff --git a/libm/src/math/fmodf16.rs b/libm/src/math/fmodf16.rs deleted file mode 100644 index 11972a7de..000000000 --- a/libm/src/math/fmodf16.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmodf16(x: f16, y: f16) -> f16 { - super::generic::fmod(x, y) -} diff --git a/libm/src/math/ldexpf.rs b/libm/src/math/ldexpf.rs deleted file mode 100644 index 95b27fc49..000000000 --- a/libm/src/math/ldexpf.rs +++ /dev/null @@ -1,4 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn ldexpf(x: f32, n: i32) -> f32 { - super::scalbnf(x, n) -} diff --git a/libm/src/math/ldexpf128.rs b/libm/src/math/ldexpf128.rs deleted file mode 100644 index b35277d15..000000000 --- a/libm/src/math/ldexpf128.rs +++ /dev/null @@ -1,4 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn ldexpf128(x: f128, n: i32) -> f128 { - super::scalbnf128(x, n) -} diff --git a/libm/src/math/ldexpf16.rs b/libm/src/math/ldexpf16.rs deleted file mode 100644 index 8de6cffd6..000000000 --- a/libm/src/math/ldexpf16.rs +++ /dev/null @@ -1,4 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn ldexpf16(x: f16, n: i32) -> f16 { - super::scalbnf16(x, n) -} diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs deleted file mode 100644 index b5d7c9d69..000000000 --- a/libm/src/math/roundf.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Round `x` to the nearest integer, breaking ties away from zero. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn roundf(x: f32) -> f32 { - super::generic::round(x) -} diff --git a/libm/src/math/roundf128.rs b/libm/src/math/roundf128.rs deleted file mode 100644 index fc3164929..000000000 --- a/libm/src/math/roundf128.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Round `x` to the nearest integer, breaking ties away from zero. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn roundf128(x: f128) -> f128 { - super::generic::round(x) -} diff --git a/libm/src/math/roundf16.rs b/libm/src/math/roundf16.rs deleted file mode 100644 index 8b356eaab..000000000 --- a/libm/src/math/roundf16.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Round `x` to the nearest integer, breaking ties away from zero. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn roundf16(x: f16) -> f16 { - super::generic::round(x) -} diff --git a/libm/src/math/scalbnf.rs b/libm/src/math/scalbnf.rs deleted file mode 100644 index 57e7ba76f..000000000 --- a/libm/src/math/scalbnf.rs +++ /dev/null @@ -1,4 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn scalbnf(x: f32, n: i32) -> f32 { - super::generic::scalbn(x, n) -} diff --git a/libm/src/math/scalbnf128.rs b/libm/src/math/scalbnf128.rs deleted file mode 100644 index c1d2b4855..000000000 --- a/libm/src/math/scalbnf128.rs +++ /dev/null @@ -1,4 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn scalbnf128(x: f128, n: i32) -> f128 { - super::generic::scalbn(x, n) -} diff --git a/libm/src/math/scalbnf16.rs b/libm/src/math/scalbnf16.rs deleted file mode 100644 index 2209e1a17..000000000 --- a/libm/src/math/scalbnf16.rs +++ /dev/null @@ -1,4 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn scalbnf16(x: f16, n: i32) -> f16 { - super::generic::scalbn(x, n) -} diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs deleted file mode 100644 index c28a705e3..000000000 --- a/libm/src/math/sqrtf.rs +++ /dev/null @@ -1,15 +0,0 @@ -/// The square root of `x` (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn sqrtf(x: f32) -> f32 { - select_implementation! { - name: sqrtf, - use_arch: any( - all(target_arch = "aarch64", target_feature = "neon"), - all(target_arch = "wasm32", intrinsics_enabled), - target_feature = "sse2" - ), - args: x, - } - - super::generic::sqrt(x) -} diff --git a/libm/src/math/sqrtf128.rs b/libm/src/math/sqrtf128.rs deleted file mode 100644 index eaef6ae0c..000000000 --- a/libm/src/math/sqrtf128.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// The square root of `x` (f128). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn sqrtf128(x: f128) -> f128 { - return super::generic::sqrt(x); -} diff --git a/libm/src/math/sqrtf16.rs b/libm/src/math/sqrtf16.rs deleted file mode 100644 index 7bedb7f8b..000000000 --- a/libm/src/math/sqrtf16.rs +++ /dev/null @@ -1,11 +0,0 @@ -/// The square root of `x` (f16). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn sqrtf16(x: f16) -> f16 { - select_implementation! { - name: sqrtf16, - use_arch: all(target_arch = "aarch64", target_feature = "fp16"), - args: x, - } - - return super::generic::sqrt(x); -} diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs deleted file mode 100644 index 14533a267..000000000 --- a/libm/src/math/truncf.rs +++ /dev/null @@ -1,23 +0,0 @@ -/// Rounds the number toward 0 to the closest integral value (f32). -/// -/// This effectively removes the decimal part of the number, leaving the integral part. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn truncf(x: f32) -> f32 { - select_implementation! { - name: truncf, - use_arch: all(target_arch = "wasm32", intrinsics_enabled), - args: x, - } - - super::generic::trunc(x) -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - #[test] - fn sanity_check() { - assert_eq!(super::truncf(1.1), 1.0); - } -} diff --git a/libm/src/math/truncf128.rs b/libm/src/math/truncf128.rs deleted file mode 100644 index 9dccc0d0e..000000000 --- a/libm/src/math/truncf128.rs +++ /dev/null @@ -1,7 +0,0 @@ -/// Rounds the number toward 0 to the closest integral value (f128). -/// -/// This effectively removes the decimal part of the number, leaving the integral part. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn truncf128(x: f128) -> f128 { - super::generic::trunc(x) -} diff --git a/libm/src/math/truncf16.rs b/libm/src/math/truncf16.rs deleted file mode 100644 index d7c3d225c..000000000 --- a/libm/src/math/truncf16.rs +++ /dev/null @@ -1,7 +0,0 @@ -/// Rounds the number toward 0 to the closest integral value (f16). -/// -/// This effectively removes the decimal part of the number, leaving the integral part. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn truncf16(x: f16) -> f16 { - super::generic::trunc(x) -} From 7365ea4b0645879ab6520c77bebf01f1cd6ead35 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 23 May 2025 17:26:39 +0000 Subject: [PATCH 1355/1459] Update `CmpResult` to use a pointer-sized return type As seen at [1], LLVM uses `long long` on LLP64 (to get a 64-bit integer matching pointer size) and `long` on everything else, with exceptions for AArch64 and AVR. Our current logic always uses an `i32`. This happens to work because LLVM uses 32-bit instructions to check the output on x86-64, but the GCC checks the full 64-bit register so garbage in the upper half leads to incorrect results. Update our return type to be `isize`, with exceptions for AArch64 and AVR. Fixes: https://github.com/rust-lang/compiler-builtins/issues/919 [1]: https://github.com/llvm/llvm-project/blob/0cf3c437c18ed27d9663d87804a9a15ff6874af2/compiler-rt/lib/builtins/fp_compare_impl.inc#L11-L27 --- builtins-test/benches/float_cmp.rs | 43 +++++++++++++++++++----------- builtins-test/src/bench.rs | 4 +-- compiler-builtins/src/float/cmp.rs | 25 +++++++++++------ libm/src/math/support/mod.rs | 2 ++ 4 files changed, 48 insertions(+), 26 deletions(-) diff --git a/builtins-test/benches/float_cmp.rs b/builtins-test/benches/float_cmp.rs index 42d665239..87a89efb5 100644 --- a/builtins-test/benches/float_cmp.rs +++ b/builtins-test/benches/float_cmp.rs @@ -1,12 +1,23 @@ #![cfg_attr(f128_enabled, feature(f128))] use builtins_test::float_bench; -use compiler_builtins::float::cmp; +use compiler_builtins::float::cmp::{self, CmpResult}; use criterion::{Criterion, criterion_main}; /// `gt` symbols are allowed to return differing results, they just get compared /// to 0. -fn gt_res_eq(a: i32, b: i32) -> bool { +fn gt_res_eq(mut a: CmpResult, mut b: CmpResult) -> bool { + // FIXME: Our CmpResult used to be `i32`, but GCC/LLVM expect `isize`. on 64-bit platforms, + // this means the top half of the word may be garbage if built with an old version of + // `compiler-builtins`, so add a hack around this. + // + // This can be removed once a version of `compiler-builtins` with the return type fix makes + // it upstream. + if size_of::() == 8 { + a = a as i32 as CmpResult; + b = b as i32 as CmpResult; + } + let a_lt_0 = a <= 0; let b_lt_0 = b <= 0; (a_lt_0 && b_lt_0) || (!a_lt_0 && !b_lt_0) @@ -14,14 +25,14 @@ fn gt_res_eq(a: i32, b: i32) -> bool { float_bench! { name: cmp_f32_gt, - sig: (a: f32, b: f32) -> i32, + sig: (a: f32, b: f32) -> CmpResult, crate_fn: cmp::__gtsf2, sys_fn: __gtsf2, sys_available: all(), output_eq: gt_res_eq, asm: [ #[cfg(target_arch = "x86_64")] { - let ret: i32; + let ret: CmpResult; asm!( "xor {ret:e}, {ret:e}", "ucomiss {a}, {b}", @@ -36,7 +47,7 @@ float_bench! { }; #[cfg(target_arch = "aarch64")] { - let ret: i32; + let ret: CmpResult; asm!( "fcmp {a:s}, {b:s}", "cset {ret:w}, gt", @@ -53,13 +64,13 @@ float_bench! { float_bench! { name: cmp_f32_unord, - sig: (a: f32, b: f32) -> i32, + sig: (a: f32, b: f32) -> CmpResult, crate_fn: cmp::__unordsf2, sys_fn: __unordsf2, sys_available: all(), asm: [ #[cfg(target_arch = "x86_64")] { - let ret: i32; + let ret: CmpResult; asm!( "xor {ret:e}, {ret:e}", "ucomiss {a}, {b}", @@ -74,7 +85,7 @@ float_bench! { }; #[cfg(target_arch = "aarch64")] { - let ret: i32; + let ret: CmpResult; asm!( "fcmp {a:s}, {b:s}", "cset {ret:w}, vs", @@ -91,14 +102,14 @@ float_bench! { float_bench! { name: cmp_f64_gt, - sig: (a: f64, b: f64) -> i32, + sig: (a: f64, b: f64) -> CmpResult, crate_fn: cmp::__gtdf2, sys_fn: __gtdf2, sys_available: all(), output_eq: gt_res_eq, asm: [ #[cfg(target_arch = "x86_64")] { - let ret: i32; + let ret: CmpResult; asm!( "xor {ret:e}, {ret:e}", "ucomisd {a}, {b}", @@ -113,7 +124,7 @@ float_bench! { }; #[cfg(target_arch = "aarch64")] { - let ret: i32; + let ret: CmpResult; asm!( "fcmp {a:d}, {b:d}", "cset {ret:w}, gt", @@ -130,13 +141,13 @@ float_bench! { float_bench! { name: cmp_f64_unord, - sig: (a: f64, b: f64) -> i32, + sig: (a: f64, b: f64) -> CmpResult, crate_fn: cmp::__unorddf2, sys_fn: __unorddf2, sys_available: all(), asm: [ #[cfg(target_arch = "x86_64")] { - let ret: i32; + let ret: CmpResult; asm!( "xor {ret:e}, {ret:e}", "ucomisd {a}, {b}", @@ -151,7 +162,7 @@ float_bench! { }; #[cfg(target_arch = "aarch64")] { - let ret: i32; + let ret: CmpResult; asm!( "fcmp {a:d}, {b:d}", "cset {ret:w}, vs", @@ -168,7 +179,7 @@ float_bench! { float_bench! { name: cmp_f128_gt, - sig: (a: f128, b: f128) -> i32, + sig: (a: f128, b: f128) -> CmpResult, crate_fn: cmp::__gttf2, crate_fn_ppc: cmp::__gtkf2, sys_fn: __gttf2, @@ -180,7 +191,7 @@ float_bench! { float_bench! { name: cmp_f128_unord, - sig: (a: f128, b: f128) -> i32, + sig: (a: f128, b: f128) -> CmpResult, crate_fn: cmp::__unordtf2, crate_fn_ppc: cmp::__unordkf2, sys_fn: __unordtf2, diff --git a/builtins-test/src/bench.rs b/builtins-test/src/bench.rs index 2348f6bc9..098718567 100644 --- a/builtins-test/src/bench.rs +++ b/builtins-test/src/bench.rs @@ -358,8 +358,8 @@ impl_testio!(float f16); impl_testio!(float f32, f64); #[cfg(f128_enabled)] impl_testio!(float f128); -impl_testio!(int i16, i32, i64, i128); -impl_testio!(int u16, u32, u64, u128); +impl_testio!(int i8, i16, i32, i64, i128, isize); +impl_testio!(int u8, u16, u32, u64, u128, usize); impl_testio!((float, int)(f32, i32)); impl_testio!((float, int)(f64, i32)); #[cfg(f128_enabled)] diff --git a/compiler-builtins/src/float/cmp.rs b/compiler-builtins/src/float/cmp.rs index 296952821..f1e54dc1c 100644 --- a/compiler-builtins/src/float/cmp.rs +++ b/compiler-builtins/src/float/cmp.rs @@ -2,14 +2,23 @@ use crate::float::Float; use crate::int::MinInt; - -// https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/compiler-rt/lib/builtins/fp_compare_impl.inc#L22 -#[cfg(target_arch = "avr")] -pub type CmpResult = i8; - -// https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/compiler-rt/lib/builtins/fp_compare_impl.inc#L25 -#[cfg(not(target_arch = "avr"))] -pub type CmpResult = i32; +use crate::support::cfg_if; + +// Taken from LLVM config: +// https://github.com/llvm/llvm-project/blob/0cf3c437c18ed27d9663d87804a9a15ff6874af2/compiler-rt/lib/builtins/fp_compare_impl.inc#L11-L27 +cfg_if! { + if #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] { + // Aarch64 uses `int` rather than a pointer-sized value. + pub type CmpResult = i32; + } else if #[cfg(target_arch = "avr")] { + // AVR uses a single byte. + pub type CmpResult = i8; + } else { + // In compiler-rt, LLP64 ABIs use `long long` and everything else uses `long`. In effect, + // this means the return value is always pointer-sized. + pub type CmpResult = isize; + } +} #[derive(Clone, Copy)] enum Result { diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index a4f596ab8..2771cfd32 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -11,6 +11,8 @@ mod int_traits; #[allow(unused_imports)] pub use big::{i256, u256}; +#[allow(unused_imports)] +pub(crate) use cfg_if; pub use env::{FpResult, Round, Status}; #[allow(unused_imports)] pub use float_traits::{DFloat, Float, HFloat, IntTy}; From 347adad2a3907e7e5bee51b7582fbc5a54a8e51b Mon Sep 17 00:00:00 2001 From: Dario Damiani <154735680+D-Dario0@users.noreply.github.com> Date: Wed, 28 May 2025 20:48:05 +0200 Subject: [PATCH 1356/1459] Typo in README.md Link to Apache License changed from htps:// to https:// --- libm/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libm/README.md b/libm/README.md index 349e892df..77608db3d 100644 --- a/libm/README.md +++ b/libm/README.md @@ -34,7 +34,7 @@ Usage is under the MIT license, available at ### Contribution Contributions are licensed under both the MIT license and the Apache License, -Version 2.0, available at . Unless +Version 2.0, available at . Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as mentioned, without any additional terms or conditions. From fc34c3edad294c2035c24968d43e3d2ce8fbc471 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 28 May 2025 21:08:41 +0000 Subject: [PATCH 1357/1459] aarch64: Add a note saying why we use `frintx` rather than `frintn` --- libm/src/math/arch/aarch64.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libm/src/math/arch/aarch64.rs b/libm/src/math/arch/aarch64.rs index 020bb731c..8896804b5 100644 --- a/libm/src/math/arch/aarch64.rs +++ b/libm/src/math/arch/aarch64.rs @@ -30,6 +30,12 @@ pub fn fmaf(mut x: f32, y: f32, z: f32) -> f32 { x } +// NB: `frintx` is technically the correct instruction for C's `rint`. However, in Rust (and LLVM +// by default), `rint` is identical to `roundeven` (no fpenv interaction) so we use the +// side-effect-free `frintn`. +// +// In general, C code that calls Rust's libm should assume that fpenv is ignored. + pub fn rint(mut x: f64) -> f64 { // SAFETY: `frintn` is available with neon and has no side effects. // From 0608b45a1d68f91481fc943072f01d08ceb3accb Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 21 Apr 2025 09:35:55 +0000 Subject: [PATCH 1358/1459] cleanup: Reuse `MinInt` and `Int` from `libm` in `compiler-builtins` Since the two crates are now in the same repo, it is easier to share code. Begin some deduplication with the integer traits. --- builtins-test/src/lib.rs | 78 +++++- compiler-builtins/src/float/add.rs | 22 +- compiler-builtins/src/float/conv.rs | 24 +- compiler-builtins/src/float/div.rs | 2 +- compiler-builtins/src/float/mul.rs | 2 +- compiler-builtins/src/float/traits.rs | 4 +- compiler-builtins/src/int/addsub.rs | 6 +- compiler-builtins/src/int/big.rs | 4 +- compiler-builtins/src/int/leading_zeros.rs | 64 +++-- compiler-builtins/src/int/trailing_zeros.rs | 25 +- compiler-builtins/src/int/traits.rs | 273 +------------------- libm/src/math/support/int_traits.rs | 9 + 12 files changed, 168 insertions(+), 345 deletions(-) diff --git a/builtins-test/src/lib.rs b/builtins-test/src/lib.rs index c596ac213..f1673133b 100644 --- a/builtins-test/src/lib.rs +++ b/builtins-test/src/lib.rs @@ -40,6 +40,75 @@ pub const N: u32 = if cfg!(target_arch = "x86_64") && !cfg!(debug_assertions) { 10_000 }; +/// Additional constants that determine how the integer gets fuzzed. +trait FuzzInt: MinInt { + /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing + /// in `builtins-test`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96, + /// 111,112,119,120,125,126,127]. + const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(Self::BITS); + + /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. + const FUZZ_NUM: usize = { + let log2 = Self::BITS.ilog2() as usize; + if log2 == 3 { + // case for u8 + 6 + } else { + // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate + // boundaries. + 8 + (4 * (log2 - 4)) + } + }; +} + +impl FuzzInt for I where I: MinInt {} + +const fn make_fuzz_lengths(bits: u32) -> [u8; 20] { + let mut v = [0u8; 20]; + v[0] = 0; + v[1] = 1; + v[2] = 2; // important for parity and the iX::MIN case when reversed + let mut i = 3; + + // No need for any more until the byte boundary, because there should be no algorithms + // that are sensitive to anything not next to byte boundaries after 2. We also scale + // in powers of two, which is important to prevent u128 corner tests from getting too + // big. + let mut l = 8; + loop { + if l >= ((bits / 2) as u8) { + break; + } + // get both sides of the byte boundary + v[i] = l - 1; + i += 1; + v[i] = l; + i += 1; + l *= 2; + } + + if bits != 8 { + // add the lower side of the middle boundary + v[i] = ((bits / 2) - 1) as u8; + i += 1; + } + + // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS + // boundary because of algorithms that split the high part up. We reverse the scaling + // as we go to Self::BITS. + let mid = i; + let mut j = 1; + loop { + v[i] = (bits as u8) - (v[mid - j]) - 1; + if j == mid { + break; + } + i += 1; + j += 1; + } + v +} + /// Random fuzzing step. When run several times, it results in excellent fuzzing entropy such as: /// 11110101010101011110111110011111 /// 10110101010100001011101011001010 @@ -92,10 +161,9 @@ fn fuzz_step(rng: &mut Xoshiro128StarStar, x: &mut I) { macro_rules! edge_cases { ($I:ident, $case:ident, $inner:block) => { for i0 in 0..$I::FUZZ_NUM { - let mask_lo = (!$I::UnsignedInt::ZERO).wrapping_shr($I::FUZZ_LENGTHS[i0] as u32); + let mask_lo = (!$I::Unsigned::ZERO).wrapping_shr($I::FUZZ_LENGTHS[i0] as u32); for i1 in i0..I::FUZZ_NUM { - let mask_hi = - (!$I::UnsignedInt::ZERO).wrapping_shl($I::FUZZ_LENGTHS[i1 - i0] as u32); + let mask_hi = (!$I::Unsigned::ZERO).wrapping_shl($I::FUZZ_LENGTHS[i1 - i0] as u32); let $case = I::from_unsigned(mask_lo & mask_hi); $inner } @@ -107,7 +175,7 @@ macro_rules! edge_cases { /// edge cases, followed by a more random fuzzer that runs `n` times. pub fn fuzz(n: u32, mut f: F) where - ::UnsignedInt: Int, + ::Unsigned: Int, { // edge case tester. Calls `f` 210 times for u128. // zero gets skipped by the loop @@ -128,7 +196,7 @@ where /// The same as `fuzz`, except `f` has two inputs. pub fn fuzz_2(n: u32, f: F) where - ::UnsignedInt: Int, + ::Unsigned: Int, { // Check cases where the first and second inputs are zero. Both call `f` 210 times for `u128`. edge_cases!(I, case, { diff --git a/compiler-builtins/src/float/add.rs b/compiler-builtins/src/float/add.rs index 0426c9cc4..43e3ae931 100644 --- a/compiler-builtins/src/float/add.rs +++ b/compiler-builtins/src/float/add.rs @@ -1,5 +1,5 @@ use crate::float::Float; -use crate::int::{CastInto, Int, MinInt}; +use crate::int::{CastFrom, CastInto, Int, MinInt}; /// Returns `a + b` fn add(a: F, b: F) -> F @@ -12,7 +12,7 @@ where let one = F::Int::ONE; let zero = F::Int::ZERO; - let bits = F::BITS.cast(); + let bits: F::Int = F::BITS.cast(); let significand_bits = F::SIG_BITS; let max_exponent = F::EXP_SAT; @@ -115,9 +115,10 @@ where let align = a_exponent.wrapping_sub(b_exponent).cast(); if align != MinInt::ZERO { if align < bits { - let sticky = - F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != MinInt::ZERO); - b_significand = (b_significand >> align.cast()) | sticky; + let sticky = F::Int::from_bool( + b_significand << u32::cast_from(bits.wrapping_sub(align)) != MinInt::ZERO, + ); + b_significand = (b_significand >> u32::cast_from(align)) | sticky; } else { b_significand = one; // sticky; b is known to be non-zero. } @@ -132,8 +133,8 @@ where // If partial cancellation occured, we need to left-shift the result // and adjust the exponent: if a_significand < implicit_bit << 3 { - let shift = - a_significand.leading_zeros() as i32 - (implicit_bit << 3).leading_zeros() as i32; + let shift = a_significand.leading_zeros() as i32 + - (implicit_bit << 3u32).leading_zeros() as i32; a_significand <<= shift; a_exponent -= shift; } @@ -159,9 +160,10 @@ where // Result is denormal before rounding; the exponent is zero and we // need to shift the significand. let shift = (1 - a_exponent).cast(); - let sticky = - F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != MinInt::ZERO); - a_significand = (a_significand >> shift.cast()) | sticky; + let sticky = F::Int::from_bool( + (a_significand << u32::cast_from(bits.wrapping_sub(shift))) != MinInt::ZERO, + ); + a_significand = (a_significand >> u32::cast_from(shift)) | sticky; a_exponent = 0; } diff --git a/compiler-builtins/src/float/conv.rs b/compiler-builtins/src/float/conv.rs index f5427a113..9d732f2cd 100644 --- a/compiler-builtins/src/float/conv.rs +++ b/compiler-builtins/src/float/conv.rs @@ -72,7 +72,7 @@ mod int_to_float { F: Float, I: Int, F::Int: CastFrom, - Conv: Fn(I::UnsignedInt) -> F::Int, + Conv: Fn(I::Unsigned) -> F::Int, { let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1); F::from_bits(conv(i.unsigned_abs()) | sign_bit) @@ -313,10 +313,10 @@ intrinsics! { fn float_to_unsigned_int(f: F) -> U where F: Float, - U: Int, + U: Int, F::Int: CastInto, F::Int: CastFrom, - F::Int: CastInto, + F::Int: CastInto, u32: CastFrom, { float_to_int_inner::(f.to_bits(), |i: U| i, || U::MAX) @@ -327,8 +327,8 @@ fn float_to_signed_int(f: F) -> I where F: Float, I: Int + Neg, - I::UnsignedInt: Int, - F::Int: CastInto, + I::Unsigned: Int, + F::Int: CastInto, F::Int: CastFrom, u32: CastFrom, { @@ -355,27 +355,27 @@ where I: Int, FnFoo: FnOnce(I) -> I, FnOob: FnOnce() -> I, - I::UnsignedInt: Int, - F::Int: CastInto, + I::Unsigned: Int, + F::Int: CastInto, F::Int: CastFrom, u32: CastFrom, { let int_max_exp = F::EXP_BIAS + I::MAX.ilog2() + 1; - let foobar = F::EXP_BIAS + I::UnsignedInt::BITS - 1; + let foobar = F::EXP_BIAS + I::Unsigned::BITS - 1; if fbits < F::ONE.to_bits() { // < 0 gets rounded to 0 I::ZERO } else if fbits < F::Int::cast_from(int_max_exp) << F::SIG_BITS { // >= 1, < integer max - let m_base = if I::UnsignedInt::BITS >= F::Int::BITS { - I::UnsignedInt::cast_from(fbits) << (I::BITS - F::SIG_BITS - 1) + let m_base = if I::Unsigned::BITS >= F::Int::BITS { + I::Unsigned::cast_from(fbits) << (I::BITS - F::SIG_BITS - 1) } else { - I::UnsignedInt::cast_from(fbits >> (F::SIG_BITS - I::BITS + 1)) + I::Unsigned::cast_from(fbits >> (F::SIG_BITS - I::BITS + 1)) }; // Set the implicit 1-bit. - let m: I::UnsignedInt = (I::UnsignedInt::ONE << (I::BITS - 1)) | m_base; + let m: I::Unsigned = (I::Unsigned::ONE << (I::BITS - 1)) | m_base; // Shift based on the exponent and bias. let s: u32 = (foobar) - u32::cast_from(fbits >> F::SIG_BITS); diff --git a/compiler-builtins/src/float/div.rs b/compiler-builtins/src/float/div.rs index 5df637c7e..3e4f0e20d 100644 --- a/compiler-builtins/src/float/div.rs +++ b/compiler-builtins/src/float/div.rs @@ -370,7 +370,7 @@ where let hi_corr: F::Int = corr_uq1 >> hw; // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1 - let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1) + let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1u32) .wrapping_add((F::Int::from(x_uq0_hw) * lo_corr) >> (hw - 1)) // 1 to account for the highest bit of corr_UQ1 can be 1 // 1 to account for possible carry diff --git a/compiler-builtins/src/float/mul.rs b/compiler-builtins/src/float/mul.rs index 7f1f19d9b..c811f1406 100644 --- a/compiler-builtins/src/float/mul.rs +++ b/compiler-builtins/src/float/mul.rs @@ -143,7 +143,7 @@ where // a zero of the appropriate sign. Mathematically there is no need to // handle this case separately, but we make it a special case to // simplify the shift logic. - let shift = one.wrapping_sub(product_exponent.cast()).cast(); + let shift: u32 = one.wrapping_sub(product_exponent.cast()).cast(); if shift >= bits { return F::from_bits(product_sign); } diff --git a/compiler-builtins/src/float/traits.rs b/compiler-builtins/src/float/traits.rs index 8ccaa7bcb..a30d20900 100644 --- a/compiler-builtins/src/float/traits.rs +++ b/compiler-builtins/src/float/traits.rs @@ -20,10 +20,10 @@ pub trait Float: + ops::Rem { /// A uint of the same width as the float - type Int: Int; + type Int: Int; /// A int of the same width as the float - type SignedInt: Int + MinInt; + type SignedInt: Int + MinInt; /// An int capable of containing the exponent bits plus a sign bit. This is signed. type ExpInt: Int; diff --git a/compiler-builtins/src/int/addsub.rs b/compiler-builtins/src/int/addsub.rs index 1f84e8eb1..b2b21fc2c 100644 --- a/compiler-builtins/src/int/addsub.rs +++ b/compiler-builtins/src/int/addsub.rs @@ -22,7 +22,7 @@ impl UAddSub for u128 {} trait AddSub: Int where - ::UnsignedInt: UAddSub, + ::Unsigned: UAddSub, { fn add(self, other: Self) -> Self { Self::from_unsigned(self.unsigned().uadd(other.unsigned())) @@ -37,7 +37,7 @@ impl AddSub for i128 {} trait Addo: AddSub where - ::UnsignedInt: UAddSub, + ::Unsigned: UAddSub, { fn addo(self, other: Self) -> (Self, bool) { let sum = AddSub::add(self, other); @@ -50,7 +50,7 @@ impl Addo for u128 {} trait Subo: AddSub where - ::UnsignedInt: UAddSub, + ::Unsigned: UAddSub, { fn subo(self, other: Self) -> (Self, bool) { let sum = AddSub::sub(self, other); diff --git a/compiler-builtins/src/int/big.rs b/compiler-builtins/src/int/big.rs index 1402efb8e..8e0600909 100644 --- a/compiler-builtins/src/int/big.rs +++ b/compiler-builtins/src/int/big.rs @@ -45,7 +45,7 @@ impl i256 { impl MinInt for u256 { type OtherSign = i256; - type UnsignedInt = u256; + type Unsigned = u256; const SIGNED: bool = false; const BITS: u32 = 256; @@ -58,7 +58,7 @@ impl MinInt for u256 { impl MinInt for i256 { type OtherSign = u256; - type UnsignedInt = u256; + type Unsigned = u256; const SIGNED: bool = false; const BITS: u32 = 256; diff --git a/compiler-builtins/src/int/leading_zeros.rs b/compiler-builtins/src/int/leading_zeros.rs index 112f4d036..aa5cb3993 100644 --- a/compiler-builtins/src/int/leading_zeros.rs +++ b/compiler-builtins/src/int/leading_zeros.rs @@ -9,11 +9,14 @@ pub use implementation::{leading_zeros_default, leading_zeros_riscv}; pub(crate) use implementation::{leading_zeros_default, leading_zeros_riscv}; mod implementation { - use crate::int::{CastInto, Int}; + use crate::int::{CastFrom, Int}; /// Returns the number of leading binary zeros in `x`. #[allow(dead_code)] - pub fn leading_zeros_default>(x: T) -> usize { + pub fn leading_zeros_default(x: I) -> usize + where + usize: CastFrom, + { // The basic idea is to test if the higher bits of `x` are zero and bisect the number // of leading zeros. It is possible for all branches of the bisection to use the same // code path by conditionally shifting the higher parts down to let the next bisection @@ -23,44 +26,48 @@ mod implementation { // because it simplifies the final bisection step. let mut x = x; // the number of potential leading zeros - let mut z = T::BITS as usize; + let mut z = I::BITS as usize; // a temporary - let mut t: T; + let mut t: I; - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { + const { assert!(I::BITS <= 64) }; + if I::BITS >= 64 { t = x >> 32; - if t != T::ZERO { + if t != I::ZERO { z -= 32; x = t; } } - if T::BITS >= 32 { + if I::BITS >= 32 { t = x >> 16; - if t != T::ZERO { + if t != I::ZERO { z -= 16; x = t; } } - const { assert!(T::BITS >= 16) }; + const { assert!(I::BITS >= 16) }; t = x >> 8; - if t != T::ZERO { + if t != I::ZERO { z -= 8; x = t; } t = x >> 4; - if t != T::ZERO { + if t != I::ZERO { z -= 4; x = t; } t = x >> 2; - if t != T::ZERO { + if t != I::ZERO { z -= 2; x = t; } // the last two bisections are combined into one conditional t = x >> 1; - if t != T::ZERO { z - 2 } else { z - x.cast() } + if t != I::ZERO { + z - 2 + } else { + z - usize::cast_from(x) + } // We could potentially save a few cycles by using the LUT trick from // "https://embeddedgurus.com/state-space/2014/09/ @@ -82,10 +89,13 @@ mod implementation { /// Returns the number of leading binary zeros in `x`. #[allow(dead_code)] - pub fn leading_zeros_riscv>(x: T) -> usize { + pub fn leading_zeros_riscv(x: I) -> usize + where + usize: CastFrom, + { let mut x = x; // the number of potential leading zeros - let mut z = T::BITS; + let mut z = I::BITS; // a temporary let mut t: u32; @@ -97,11 +107,11 @@ mod implementation { // right). If we try to save an instruction by using `x < imm` for each bisection, we // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, // but the immediate will never fit into 12 bits and never save an instruction. - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { + const { assert!(I::BITS <= 64) }; + if I::BITS >= 64 { // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise // `t` is set to 0. - t = ((x >= (T::ONE << 32)) as u32) << 5; + t = ((x >= (I::ONE << 32)) as u32) << 5; // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the // next step to process. x >>= t; @@ -109,27 +119,27 @@ mod implementation { // leading zeros z -= t; } - if T::BITS >= 32 { - t = ((x >= (T::ONE << 16)) as u32) << 4; + if I::BITS >= 32 { + t = ((x >= (I::ONE << 16)) as u32) << 4; x >>= t; z -= t; } - const { assert!(T::BITS >= 16) }; - t = ((x >= (T::ONE << 8)) as u32) << 3; + const { assert!(I::BITS >= 16) }; + t = ((x >= (I::ONE << 8)) as u32) << 3; x >>= t; z -= t; - t = ((x >= (T::ONE << 4)) as u32) << 2; + t = ((x >= (I::ONE << 4)) as u32) << 2; x >>= t; z -= t; - t = ((x >= (T::ONE << 2)) as u32) << 1; + t = ((x >= (I::ONE << 2)) as u32) << 1; x >>= t; z -= t; - t = (x >= (T::ONE << 1)) as u32; + t = (x >= (I::ONE << 1)) as u32; x >>= t; z -= t; // All bits except the LSB are guaranteed to be zero for this final bisection step. // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. - z as usize - x.cast() + z as usize - usize::cast_from(x) } } diff --git a/compiler-builtins/src/int/trailing_zeros.rs b/compiler-builtins/src/int/trailing_zeros.rs index c45d6b1cf..8f63c22c8 100644 --- a/compiler-builtins/src/int/trailing_zeros.rs +++ b/compiler-builtins/src/int/trailing_zeros.rs @@ -4,33 +4,38 @@ pub use implementation::trailing_zeros; pub(crate) use implementation::trailing_zeros; mod implementation { - use crate::int::{CastInto, Int}; + use crate::int::{CastFrom, Int}; /// Returns number of trailing binary zeros in `x`. #[allow(dead_code)] - pub fn trailing_zeros + CastInto + CastInto>(x: T) -> usize { + pub fn trailing_zeros(x: I) -> usize + where + u32: CastFrom, + u16: CastFrom, + u8: CastFrom, + { let mut x = x; let mut r: u32 = 0; let mut t: u32; - const { assert!(T::BITS <= 64) }; - if T::BITS >= 64 { - r += ((CastInto::::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 + const { assert!(I::BITS <= 64) }; + if I::BITS >= 64 { + r += ((u32::cast_from(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 x >>= r; // remove 32 zero bits } - if T::BITS >= 32 { - t = ((CastInto::::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 + if I::BITS >= 32 { + t = ((u16::cast_from(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 r += t; x >>= t; // x = [0 - 0xFFFF] + higher garbage bits } - const { assert!(T::BITS >= 16) }; - t = ((CastInto::::cast(x) == 0) as u32) << 3; + const { assert!(I::BITS >= 16) }; + t = ((u8::cast_from(x) == 0) as u32) << 3; x >>= t; // x = [0 - 0xFF] + higher garbage bits r += t; - let mut x: u8 = x.cast(); + let mut x: u8 = x.cast_lossy(); t = (((x & 0x0F) == 0) as u32) << 2; x >>= t; // x = [0 - 0xF] + higher garbage bits diff --git a/compiler-builtins/src/int/traits.rs b/compiler-builtins/src/int/traits.rs index 152cb2eee..b474df366 100644 --- a/compiler-builtins/src/int/traits.rs +++ b/compiler-builtins/src/int/traits.rs @@ -1,275 +1,4 @@ -use core::ops; - -/// Minimal integer implementations needed on all integer types, including wide integers. -#[allow(dead_code)] -pub trait MinInt: - Copy - + core::fmt::Debug - + ops::BitOr - + ops::Not - + ops::Shl -{ - /// Type with the same width but other signedness - type OtherSign: MinInt; - /// Unsigned version of Self - type UnsignedInt: MinInt; - - /// If `Self` is a signed integer - const SIGNED: bool; - - /// The bitwidth of the int type - const BITS: u32; - - const ZERO: Self; - const ONE: Self; - const MIN: Self; - const MAX: Self; -} - -/// Trait for some basic operations on integers -#[allow(dead_code)] -pub trait Int: - MinInt - + PartialEq - + PartialOrd - + ops::AddAssign - + ops::SubAssign - + ops::BitAndAssign - + ops::BitOrAssign - + ops::BitXorAssign - + ops::ShlAssign - + ops::ShrAssign - + ops::Add - + ops::Sub - + ops::Mul - + ops::Div - + ops::Shr - + ops::BitXor - + ops::BitAnd -{ - /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing - /// in `builtins-test`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96, - /// 111,112,119,120,125,126,127]. - const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(::BITS); - - /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. - const FUZZ_NUM: usize = { - let log2 = (::BITS - 1).count_ones() as usize; - if log2 == 3 { - // case for u8 - 6 - } else { - // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate - // boundaries. - 8 + (4 * (log2 - 4)) - } - }; - - fn unsigned(self) -> Self::UnsignedInt; - fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; - fn unsigned_abs(self) -> Self::UnsignedInt; - - fn from_bool(b: bool) -> Self; - - /// Prevents the need for excessive conversions between signed and unsigned - fn logical_shr(self, other: u32) -> Self; - - /// Absolute difference between two integers. - fn abs_diff(self, other: Self) -> Self::UnsignedInt; - - // copied from primitive integers, but put in a trait - fn is_zero(self) -> bool; - fn wrapping_neg(self) -> Self; - fn wrapping_add(self, other: Self) -> Self; - fn wrapping_mul(self, other: Self) -> Self; - fn wrapping_sub(self, other: Self) -> Self; - fn wrapping_shl(self, other: u32) -> Self; - fn wrapping_shr(self, other: u32) -> Self; - fn rotate_left(self, other: u32) -> Self; - fn overflowing_add(self, other: Self) -> (Self, bool); - fn leading_zeros(self) -> u32; - fn ilog2(self) -> u32; -} - -pub(crate) const fn make_fuzz_lengths(bits: u32) -> [u8; 20] { - let mut v = [0u8; 20]; - v[0] = 0; - v[1] = 1; - v[2] = 2; // important for parity and the iX::MIN case when reversed - let mut i = 3; - - // No need for any more until the byte boundary, because there should be no algorithms - // that are sensitive to anything not next to byte boundaries after 2. We also scale - // in powers of two, which is important to prevent u128 corner tests from getting too - // big. - let mut l = 8; - loop { - if l >= ((bits / 2) as u8) { - break; - } - // get both sides of the byte boundary - v[i] = l - 1; - i += 1; - v[i] = l; - i += 1; - l *= 2; - } - - if bits != 8 { - // add the lower side of the middle boundary - v[i] = ((bits / 2) - 1) as u8; - i += 1; - } - - // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS - // boundary because of algorithms that split the high part up. We reverse the scaling - // as we go to Self::BITS. - let mid = i; - let mut j = 1; - loop { - v[i] = (bits as u8) - (v[mid - j]) - 1; - if j == mid { - break; - } - i += 1; - j += 1; - } - v -} - -macro_rules! int_impl_common { - ($ty:ty) => { - fn from_bool(b: bool) -> Self { - b as $ty - } - - fn logical_shr(self, other: u32) -> Self { - Self::from_unsigned(self.unsigned().wrapping_shr(other)) - } - - fn is_zero(self) -> bool { - self == Self::ZERO - } - - fn wrapping_neg(self) -> Self { - ::wrapping_neg(self) - } - - fn wrapping_add(self, other: Self) -> Self { - ::wrapping_add(self, other) - } - - fn wrapping_mul(self, other: Self) -> Self { - ::wrapping_mul(self, other) - } - fn wrapping_sub(self, other: Self) -> Self { - ::wrapping_sub(self, other) - } - - fn wrapping_shl(self, other: u32) -> Self { - ::wrapping_shl(self, other) - } - - fn wrapping_shr(self, other: u32) -> Self { - ::wrapping_shr(self, other) - } - - fn rotate_left(self, other: u32) -> Self { - ::rotate_left(self, other) - } - - fn overflowing_add(self, other: Self) -> (Self, bool) { - ::overflowing_add(self, other) - } - - fn leading_zeros(self) -> u32 { - ::leading_zeros(self) - } - - fn ilog2(self) -> u32 { - ::ilog2(self) - } - }; -} - -macro_rules! int_impl { - ($ity:ty, $uty:ty) => { - impl MinInt for $uty { - type OtherSign = $ity; - type UnsignedInt = $uty; - - const BITS: u32 = ::ZERO.count_zeros(); - const SIGNED: bool = Self::MIN != Self::ZERO; - - const ZERO: Self = 0; - const ONE: Self = 1; - const MIN: Self = ::MIN; - const MAX: Self = ::MAX; - } - - impl Int for $uty { - fn unsigned(self) -> $uty { - self - } - - // It makes writing macros easier if this is implemented for both signed and unsigned - #[allow(clippy::wrong_self_convention)] - fn from_unsigned(me: $uty) -> Self { - me - } - - fn unsigned_abs(self) -> Self { - self - } - - fn abs_diff(self, other: Self) -> Self { - self.abs_diff(other) - } - - int_impl_common!($uty); - } - - impl MinInt for $ity { - type OtherSign = $uty; - type UnsignedInt = $uty; - - const BITS: u32 = ::ZERO.count_zeros(); - const SIGNED: bool = Self::MIN != Self::ZERO; - - const ZERO: Self = 0; - const ONE: Self = 1; - const MIN: Self = ::MIN; - const MAX: Self = ::MAX; - } - - impl Int for $ity { - fn unsigned(self) -> $uty { - self as $uty - } - - fn from_unsigned(me: $uty) -> Self { - me as $ity - } - - fn unsigned_abs(self) -> Self::UnsignedInt { - self.unsigned_abs() - } - - fn abs_diff(self, other: Self) -> $uty { - self.abs_diff(other) - } - - int_impl_common!($ity); - } - }; -} - -int_impl!(isize, usize); -int_impl!(i8, u8); -int_impl!(i16, u16); -int_impl!(i32, u32); -int_impl!(i64, u64); -int_impl!(i128, u128); +pub use crate::support::{Int, MinInt}; /// Trait for integers twice the bit width of another integer. This is implemented for all /// primitives except for `u8`, because there is not a smaller primitive. diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index 3ec1faba1..fa9e06066 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -78,6 +78,7 @@ pub trait Int: fn unsigned(self) -> Self::Unsigned; fn from_unsigned(unsigned: Self::Unsigned) -> Self; fn abs(self) -> Self; + fn unsigned_abs(self) -> Self::Unsigned; fn from_bool(b: bool) -> Self; @@ -203,6 +204,10 @@ macro_rules! int_impl { unimplemented!() } + fn unsigned_abs(self) -> Self { + unimplemented!() + } + // It makes writing macros easier if this is implemented for both signed and unsigned #[allow(clippy::wrong_self_convention)] fn from_unsigned(me: $uty) -> Self { @@ -242,6 +247,10 @@ macro_rules! int_impl { self.abs() } + fn unsigned_abs(self) -> Self::Unsigned { + self.unsigned_abs() + } + fn from_unsigned(me: $uty) -> Self { me as $ity } From 6c5dd2da0b90289b9f5faf7fc15cf568e2422c9b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 29 May 2025 03:51:43 +0000 Subject: [PATCH 1359/1459] Reuse `libm`'s `Caat` and `CastFrom` in `compiler-builtins` --- compiler-builtins/src/float/add.rs | 2 +- compiler-builtins/src/float/conv.rs | 6 +-- compiler-builtins/src/float/div.rs | 2 +- compiler-builtins/src/float/mul.rs | 2 +- compiler-builtins/src/float/trunc.rs | 2 +- compiler-builtins/src/int/trailing_zeros.rs | 6 +-- compiler-builtins/src/int/traits.rs | 43 +-------------------- libm/src/math/support/int_traits.rs | 5 +++ 8 files changed, 16 insertions(+), 52 deletions(-) diff --git a/compiler-builtins/src/float/add.rs b/compiler-builtins/src/float/add.rs index 43e3ae931..0cc362f70 100644 --- a/compiler-builtins/src/float/add.rs +++ b/compiler-builtins/src/float/add.rs @@ -168,7 +168,7 @@ where } // Low three bits are round, guard, and sticky. - let a_significand_i32: i32 = a_significand.cast(); + let a_significand_i32: i32 = a_significand.cast_lossy(); let round_guard_sticky: i32 = a_significand_i32 & 0x7; // Shift the significand into place, and mask off the implicit bit. diff --git a/compiler-builtins/src/float/conv.rs b/compiler-builtins/src/float/conv.rs index 9d732f2cd..75ea7ce02 100644 --- a/compiler-builtins/src/float/conv.rs +++ b/compiler-builtins/src/float/conv.rs @@ -74,7 +74,7 @@ mod int_to_float { F::Int: CastFrom, Conv: Fn(I::Unsigned) -> F::Int, { - let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1); + let sign_bit = F::Int::cast_from_lossy(i >> (I::BITS - 1)) << (F::BITS - 1); F::from_bits(conv(i.unsigned_abs()) | sign_bit) } @@ -166,7 +166,7 @@ mod int_to_float { // Within the upper `F::BITS`, everything except for the signifcand // gets truncated - let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIG_BITS - 1)).cast(); + let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIG_BITS - 1)).cast_lossy(); // The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just // check if it is nonzero. @@ -371,7 +371,7 @@ where let m_base = if I::Unsigned::BITS >= F::Int::BITS { I::Unsigned::cast_from(fbits) << (I::BITS - F::SIG_BITS - 1) } else { - I::Unsigned::cast_from(fbits >> (F::SIG_BITS - I::BITS + 1)) + I::Unsigned::cast_from_lossy(fbits >> (F::SIG_BITS - I::BITS + 1)) }; // Set the implicit 1-bit. diff --git a/compiler-builtins/src/float/div.rs b/compiler-builtins/src/float/div.rs index 3e4f0e20d..fc1fc0851 100644 --- a/compiler-builtins/src/float/div.rs +++ b/compiler-builtins/src/float/div.rs @@ -482,7 +482,7 @@ where let ret = quotient.wrapping_shr(u32::cast_from(res_exponent.wrapping_neg()) + 1); residual_lo = a_significand - .wrapping_shl(significand_bits.wrapping_add(CastInto::::cast(res_exponent))) + .wrapping_shl(significand_bits.wrapping_add(CastInto::::cast_lossy(res_exponent))) .wrapping_sub(ret.wrapping_mul(b_significand) << 1); ret }; diff --git a/compiler-builtins/src/float/mul.rs b/compiler-builtins/src/float/mul.rs index c811f1406..dbed3095c 100644 --- a/compiler-builtins/src/float/mul.rs +++ b/compiler-builtins/src/float/mul.rs @@ -143,7 +143,7 @@ where // a zero of the appropriate sign. Mathematically there is no need to // handle this case separately, but we make it a special case to // simplify the shift logic. - let shift: u32 = one.wrapping_sub(product_exponent.cast()).cast(); + let shift: u32 = one.wrapping_sub(product_exponent.cast_lossy()).cast(); if shift >= bits { return F::from_bits(product_sign); } diff --git a/compiler-builtins/src/float/trunc.rs b/compiler-builtins/src/float/trunc.rs index ca8a0f368..93db5d8bb 100644 --- a/compiler-builtins/src/float/trunc.rs +++ b/compiler-builtins/src/float/trunc.rs @@ -50,7 +50,7 @@ where // The exponent of a is within the range of normal numbers in the // destination format. We can convert by simply right-shifting with // rounding and adjusting the exponent. - abs_result = (a_abs >> sig_bits_delta).cast(); + abs_result = (a_abs >> sig_bits_delta).cast_lossy(); // Cast before shifting to prevent overflow. let bias_diff: R::Int = src_exp_bias.wrapping_sub(dst_exp_bias).cast(); let tmp = bias_diff << R::SIG_BITS; diff --git a/compiler-builtins/src/int/trailing_zeros.rs b/compiler-builtins/src/int/trailing_zeros.rs index 8f63c22c8..1b0ae5b73 100644 --- a/compiler-builtins/src/int/trailing_zeros.rs +++ b/compiler-builtins/src/int/trailing_zeros.rs @@ -20,18 +20,18 @@ mod implementation { const { assert!(I::BITS <= 64) }; if I::BITS >= 64 { - r += ((u32::cast_from(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 + r += ((u32::cast_from_lossy(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 x >>= r; // remove 32 zero bits } if I::BITS >= 32 { - t = ((u16::cast_from(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 + t = ((u16::cast_from_lossy(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 r += t; x >>= t; // x = [0 - 0xFFFF] + higher garbage bits } const { assert!(I::BITS >= 16) }; - t = ((u8::cast_from(x) == 0) as u32) << 3; + t = ((u8::cast_from_lossy(x) == 0) as u32) << 3; x >>= t; // x = [0 - 0xFF] + higher garbage bits r += t; diff --git a/compiler-builtins/src/int/traits.rs b/compiler-builtins/src/int/traits.rs index b474df366..25b9718ad 100644 --- a/compiler-builtins/src/int/traits.rs +++ b/compiler-builtins/src/int/traits.rs @@ -1,4 +1,4 @@ -pub use crate::support::{Int, MinInt}; +pub use crate::support::{CastFrom, CastInto, Int, MinInt}; /// Trait for integers twice the bit width of another integer. This is implemented for all /// primitives except for `u8`, because there is not a smaller primitive. @@ -97,44 +97,3 @@ impl_h_int!( i32 u32 i64, i64 u64 i128 ); - -/// Trait to express (possibly lossy) casting of integers -pub trait CastInto: Copy { - fn cast(self) -> T; -} - -pub trait CastFrom: Copy { - fn cast_from(value: T) -> Self; -} - -impl + Copy> CastFrom for T { - fn cast_from(value: U) -> Self { - value.cast() - } -} - -macro_rules! cast_into { - ($ty:ty) => { - cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); - }; - ($ty:ty; $($into:ty),*) => {$( - impl CastInto<$into> for $ty { - fn cast(self) -> $into { - self as $into - } - } - )*}; -} - -cast_into!(usize); -cast_into!(isize); -cast_into!(u8); -cast_into!(i8); -cast_into!(u16); -cast_into!(i16); -cast_into!(u32); -cast_into!(i32); -cast_into!(u64); -cast_into!(i64); -cast_into!(u128); -cast_into!(i128); diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index fa9e06066..716af748a 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -374,14 +374,19 @@ impl_h_int!( /// Trait to express (possibly lossy) casting of integers pub trait CastInto: Copy { /// By default, casts should be exact. + #[track_caller] fn cast(self) -> T; /// Call for casts that are expected to truncate. + /// + /// In practice, this is exactly the same as `cast`; the main difference is to document intent + /// in code. `cast` may panic in debug mode. fn cast_lossy(self) -> T; } pub trait CastFrom: Copy { /// By default, casts should be exact. + #[track_caller] fn cast_from(value: T) -> Self; /// Call for casts that are expected to truncate. From b5638a3cac2d177cd6fc65a23559e1a8847e8ae0 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 28 May 2025 18:15:41 +0000 Subject: [PATCH 1360/1459] Remove unneeded C symbols These are now provided by `compiler-builtins`, so there is no need to also build the C versions. This was detected by checking for duplicate symbols and not excluding weak symbols (like CI currently does). --- compiler-builtins/build.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs index 90d98ec7c..d37fdc5df 100644 --- a/compiler-builtins/build.rs +++ b/compiler-builtins/build.rs @@ -555,7 +555,6 @@ mod c { if (target.arch == "aarch64" || target.arch == "arm64ec") && consider_float_intrinsics { sources.extend(&[ - ("__comparetf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c"), ("__fe_raise_inexact", "fp_mode.c"), ]); @@ -570,11 +569,11 @@ mod c { } if target.arch == "mips64" { - sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]); + sources.extend(&[("__fe_getround", "fp_mode.c")]); } if target.arch == "loongarch64" { - sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]); + sources.extend(&[("__fe_getround", "fp_mode.c")]); } // Remove the assembly implementations that won't compile for the target From 9f0cfc24de919d3b1e6b58bb11994f08db3116f5 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 07:38:43 +0000 Subject: [PATCH 1361/1459] Replace the `nm` symbol check with a Rust implementation This should be less error-prone and adaptable than the `nm` version, and have better cross-platform support without needing LLVM `nm` installed. --- Cargo.toml | 1 + ci/run.sh | 125 +++-------------- crates/symbol-check/Cargo.toml | 13 ++ crates/symbol-check/src/main.rs | 231 ++++++++++++++++++++++++++++++++ 4 files changed, 262 insertions(+), 108 deletions(-) create mode 100644 crates/symbol-check/Cargo.toml create mode 100644 crates/symbol-check/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index b39ec8a25..bc6b4bd29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "crates/libm-macros", "crates/musl-math-sys", "crates/panic-handler", + "crates/symbol-check", "crates/util", "libm", "libm-test", diff --git a/ci/run.sh b/ci/run.sh index 68d13c130..cf3f7dfda 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -47,87 +47,25 @@ else fi fi - -declare -a rlib_paths - -# Set the `rlib_paths` global array to a list of all compiler-builtins rlibs -update_rlib_paths() { - if [ -d /builtins-target ]; then - rlib_paths=( /builtins-target/"${target}"/debug/deps/libcompiler_builtins-*.rlib ) - else - rlib_paths=( target/"${target}"/debug/deps/libcompiler_builtins-*.rlib ) - fi -} - -# Remove any existing artifacts from previous tests that don't set #![compiler_builtins] -update_rlib_paths -rm -f "${rlib_paths[@]}" - -cargo build -p compiler_builtins --target "$target" -cargo build -p compiler_builtins --target "$target" --release -cargo build -p compiler_builtins --target "$target" --features c -cargo build -p compiler_builtins --target "$target" --features c --release -cargo build -p compiler_builtins --target "$target" --features no-asm -cargo build -p compiler_builtins --target "$target" --features no-asm --release -cargo build -p compiler_builtins --target "$target" --features no-f16-f128 -cargo build -p compiler_builtins --target "$target" --features no-f16-f128 --release - -PREFIX=${target//unknown-/}- -case "$target" in - armv7-*) - PREFIX=arm-linux-gnueabihf- - ;; - thumb*) - PREFIX=arm-none-eabi- - ;; - *86*-*) - PREFIX= - ;; -esac - -NM=$(find "$(rustc --print sysroot)" \( -name llvm-nm -o -name llvm-nm.exe \) ) -if [ "$NM" = "" ]; then - NM="${PREFIX}nm" -fi - -# i686-pc-windows-gnu tools have a dependency on some DLLs, so run it with -# rustup run to ensure that those are in PATH. -TOOLCHAIN="$(rustup show active-toolchain | sed 's/ (default)//')" -if [[ "$TOOLCHAIN" == *i686-pc-windows-gnu ]]; then - NM="rustup run $TOOLCHAIN $NM" -fi - -# Look out for duplicated symbols when we include the compiler-rt (C) implementation -update_rlib_paths -for rlib in "${rlib_paths[@]}"; do - set +x - echo "================================================================" - echo "checking $rlib for duplicate symbols" - echo "================================================================" - set -x - - duplicates_found=0 - - # NOTE On i586, It's normal that the get_pc_thunk symbol appears several - # times so ignore it - $NM -g --defined-only "$rlib" 2>&1 | - sort | - uniq -d | - grep -v __x86.get_pc_thunk --quiet | - grep 'T __' && duplicates_found=1 - - if [ "$duplicates_found" != 0 ]; then - echo "error: found duplicate symbols" - exit 1 - else - echo "success; no duplicate symbols found" - fi -done - -rm -f "${rlib_paths[@]}" +# Ensure there are no duplicate symbols or references to `core` when +# `compiler-builtins` is built with various features. Symcheck invokes Cargo to +# build with the arguments we provide it, then validates the built artifacts. +symcheck=(cargo run -p symbol-check --release) +[[ "$target" = "wasm"* ]] && symcheck+=(--features wasm) +symcheck+=(-- build-and-check) + +"${symcheck[@]}" -p compiler_builtins --target "$target" +"${symcheck[@]}" -p compiler_builtins --target "$target" --release +"${symcheck[@]}" -p compiler_builtins --target "$target" --features c +"${symcheck[@]}" -p compiler_builtins --target "$target" --features c --release +"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm +"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm --release +"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128 +"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128 --release build_intrinsics_test() { - cargo build \ + # symcheck also checks the results of builtins-test-intrinsics + "${symcheck[@]}" \ --target "$target" --verbose \ --manifest-path builtins-test-intrinsics/Cargo.toml "$@" } @@ -143,35 +81,6 @@ build_intrinsics_test --features c --release CARGO_PROFILE_DEV_LTO=true build_intrinsics_test CARGO_PROFILE_RELEASE_LTO=true build_intrinsics_test --release -# Ensure no references to any symbols from core -update_rlib_paths -for rlib in "${rlib_paths[@]}"; do - set +x - echo "================================================================" - echo "checking $rlib for references to core" - echo "================================================================" - set -x - - tmpdir="${CARGO_TARGET_DIR:-target}/tmp" - test -d "$tmpdir" || mkdir "$tmpdir" - defined="$tmpdir/defined_symbols.txt" - undefined="$tmpdir/defined_symbols.txt" - - $NM --quiet -U "$rlib" | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > "$defined" - $NM --quiet -u "$rlib" | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > "$undefined" - grep_has_results=0 - grep -v -F -x -f "$defined" "$undefined" && grep_has_results=1 - - if [ "$target" = "powerpc64-unknown-linux-gnu" ]; then - echo "FIXME: powerpc64 fails these tests" - elif [ "$grep_has_results" != 0 ]; then - echo "error: found unexpected references to core" - exit 1 - else - echo "success; no references to core found" - fi -done - # Test libm # Make sure a simple build works diff --git a/crates/symbol-check/Cargo.toml b/crates/symbol-check/Cargo.toml new file mode 100644 index 000000000..30969ee40 --- /dev/null +++ b/crates/symbol-check/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "symbol-check" +version = "0.1.0" +edition = "2024" +publish = false + +[dependencies] +# FIXME: used as a git dependency since the latest release does not support wasm +object = { git = "https://github.com/gimli-rs/object.git", rev = "013fac75da56a684377af4151b8164b78c1790e0" } +serde_json = "1.0.140" + +[features] +wasm = ["object/wasm"] diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs new file mode 100644 index 000000000..104505438 --- /dev/null +++ b/crates/symbol-check/src/main.rs @@ -0,0 +1,231 @@ +//! Tool used by CI to inspect compiler-builtins archives and help ensure we won't run into any +//! linking errors. + +use std::collections::{BTreeMap, BTreeSet}; +use std::fs; +use std::io::{BufRead, BufReader}; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; + +use object::read::archive::{ArchiveFile, ArchiveMember}; +use object::{Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection}; +use serde_json::Value; + +const CHECK_LIBRARIES: &[&str] = &["compiler_builtins", "builtins_test_intrinsics"]; +const CHECK_EXTENSIONS: &[Option<&str>] = &[Some("rlib"), Some("a"), Some("exe"), None]; + +const USAGE: &str = "Usage: + + symbol-check build-and-check CARGO_ARGS ... + +Cargo will get invoked with `CARGO_ARGS` and all output +`compiler_builtins*.rlib` files will be checked. +"; + +fn main() { + // Create a `&str` vec so we can match on it. + let args = std::env::args().collect::>(); + let args_ref = args.iter().map(String::as_str).collect::>(); + + match &args_ref[1..] { + ["build-and-check", rest @ ..] if !rest.is_empty() => { + let paths = exec_cargo_with_args(rest); + for path in paths { + println!("Checking {}", path.display()); + verify_no_duplicates(&path); + verify_core_symbols(&path); + } + } + _ => { + println!("{USAGE}"); + std::process::exit(1); + } + } +} + +/// Run `cargo build` with the provided additional arguments, collecting the list of created +/// libraries. +fn exec_cargo_with_args(args: &[&str]) -> Vec { + let mut cmd = Command::new("cargo") + .arg("build") + .arg("--message-format=json") + .args(args) + .stdout(Stdio::piped()) + .spawn() + .expect("failed to launch Cargo"); + + let stdout = cmd.stdout.take().unwrap(); + let reader = BufReader::new(stdout); + let mut check_files = Vec::new(); + + for line in reader.lines() { + let line = line.expect("failed to read line"); + println!("{line}"); // tee to stdout + + // Select only steps that create files + let j: Value = serde_json::from_str(&line).expect("failed to deserialize"); + if j["reason"] != "compiler-artifact" { + continue; + } + + // Find rlibs in the created file list that match our expected library names and + // extensions. + for fpath in j["filenames"].as_array().expect("filenames not an array") { + let path = fpath.as_str().expect("file name not a string"); + let path = PathBuf::from(path); + + if CHECK_EXTENSIONS.contains(&path.extension().map(|ex| ex.to_str().unwrap())) { + let fname = path.file_name().unwrap().to_str().unwrap(); + + if CHECK_LIBRARIES.iter().any(|lib| fname.contains(lib)) { + check_files.push(path); + } + } + } + } + + cmd.wait().expect("failed to wait on Cargo"); + + assert!(!check_files.is_empty(), "no compiler_builtins rlibs found"); + println!("Collected the following rlibs to check: {check_files:#?}"); + + check_files +} + +/// Information collected from `object`, for convenience. +#[expect(unused)] // only for printing +#[derive(Clone, Debug)] +struct SymInfo { + name: String, + kind: SymbolKind, + scope: SymbolScope, + section: SymbolSection, + is_undefined: bool, + is_global: bool, + is_local: bool, + is_weak: bool, + is_common: bool, + address: u64, + object: String, +} + +impl SymInfo { + fn new(sym: &Symbol, member: &ArchiveMember) -> Self { + Self { + name: sym.name().expect("missing name").to_owned(), + kind: sym.kind(), + scope: sym.scope(), + section: sym.section(), + is_undefined: sym.is_undefined(), + is_global: sym.is_global(), + is_local: sym.is_local(), + is_weak: sym.is_weak(), + is_common: sym.is_common(), + address: sym.address(), + object: String::from_utf8_lossy(member.name()).into_owned(), + } + } +} + +/// Ensure that the same global symbol isn't defined in multiple object files within an archive. +/// +/// Note that this will also locate cases where a symbol is weakly defined in more than one place. +/// Technically there are no linker errors that will come from this, but it keeps our binary more +/// straightforward and saves some distribution size. +fn verify_no_duplicates(path: &Path) { + let mut syms = BTreeMap::::new(); + let mut dups = Vec::new(); + let mut found_any = false; + + for_each_symbol(path, |symbol, member| { + // Only check defined globals + if !symbol.is_global() || symbol.is_undefined() { + return; + } + + let sym = SymInfo::new(&symbol, member); + + // x86-32 includes multiple copies of thunk symbols + if sym.name.starts_with("__x86.get_pc_thunk") { + return; + } + + // Windows has symbols for literal numeric constants, string literals, and MinGW pseudo- + // relocations. These are allowed to have repeated definitions. + let win_allowed_dup_pfx = ["__real@", "__xmm@", "??_C@_", ".refptr"]; + if win_allowed_dup_pfx + .iter() + .any(|pfx| sym.name.starts_with(pfx)) + { + return; + } + + match syms.get(&sym.name) { + Some(existing) => { + dups.push(sym); + dups.push(existing.clone()); + } + None => { + syms.insert(sym.name.clone(), sym); + } + } + + found_any = true; + }); + + assert!(found_any, "no symbols found"); + + if !dups.is_empty() { + dups.sort_unstable_by(|a, b| a.name.cmp(&b.name)); + panic!("found duplicate symbols: {dups:#?}"); + } + + println!(" success: no duplicate symbols found"); +} + +/// Ensure that there are no references to symbols from `core` that aren't also (somehow) defined. +fn verify_core_symbols(path: &Path) { + let mut defined = BTreeSet::new(); + let mut undefined = Vec::new(); + let mut has_symbols = false; + + for_each_symbol(path, |symbol, member| { + has_symbols = true; + + // Find only symbols from `core` + if !symbol.name().unwrap().contains("_ZN4core") { + return; + } + + let sym = SymInfo::new(&symbol, member); + if sym.is_undefined { + undefined.push(sym); + } else { + defined.insert(sym.name); + } + }); + + assert!(has_symbols, "no symbols found"); + + // Discard any symbols that are defined somewhere in the archive + undefined.retain(|sym| !defined.contains(&sym.name)); + + if !undefined.is_empty() { + undefined.sort_unstable_by(|a, b| a.name.cmp(&b.name)); + panic!("found undefined symbols from core: {undefined:#?}"); + } + + println!(" success: no undefined references to core found"); +} + +/// For a given archive path, do something with each symbol. +fn for_each_symbol(path: &Path, mut f: impl FnMut(Symbol, &ArchiveMember)) { + let data = fs::read(path).expect("reading file failed"); + let archive = ArchiveFile::parse(data.as_slice()).expect("archive parse failed"); + for member in archive.members() { + let member = member.expect("failed to access member"); + let obj_data = member.data(&*data).expect("failed to access object"); + let obj = object::File::parse(obj_data).expect("failed to parse object"); + obj.symbols().for_each(|sym| f(sym, &member)); + } +} From bfd4058825e0002b0369f105467a93e8290969c5 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 28 May 2025 19:59:16 +0000 Subject: [PATCH 1362/1459] Remove the now-unneeded llvm-tools-preview Since a working `nm` is no longer needed as part of CI, the rustup component can be removed. --- .github/workflows/main.yaml | 1 - crates/symbol-check/src/main.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index d13dd6b0f..567ad1205 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -119,7 +119,6 @@ jobs: rustup update "$channel" --no-self-update rustup default "$channel" rustup target add "${{ matrix.target }}" - rustup component add llvm-tools-preview - uses: taiki-e/install-action@nextest - uses: Swatinem/rust-cache@v2 with: diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs index 104505438..4e6417fdf 100644 --- a/crates/symbol-check/src/main.rs +++ b/crates/symbol-check/src/main.rs @@ -84,7 +84,7 @@ fn exec_cargo_with_args(args: &[&str]) -> Vec { } } - cmd.wait().expect("failed to wait on Cargo"); + assert!(cmd.wait().expect("failed to wait on Cargo").success()); assert!(!check_files.is_empty(), "no compiler_builtins rlibs found"); println!("Collected the following rlibs to check: {check_files:#?}"); From 9bd702d704c81758b991a21636b857acd58a6a04 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 29 May 2025 15:40:05 +0000 Subject: [PATCH 1363/1459] Change `compiler-builtins` to edition 2024 Do the same for `builtins-test-intrinsics`. Mostly this means updating `extern` to `unsafe extern`, and fixing a few new Clippy lints. --- builtins-test-intrinsics/Cargo.toml | 2 +- builtins-test-intrinsics/src/main.rs | 6 ++++-- builtins-test/tests/aeabi_memclr.rs | 3 ++- builtins-test/tests/aeabi_memcpy.rs | 3 ++- builtins-test/tests/aeabi_memset.rs | 3 ++- compiler-builtins/Cargo.toml | 2 +- compiler-builtins/src/arm.rs | 7 +++++-- .../src/int/specialized_div_rem/mod.rs | 16 ++++++++-------- compiler-builtins/src/macros.rs | 4 ++-- compiler-builtins/src/probestack.rs | 4 +++- 10 files changed, 30 insertions(+), 20 deletions(-) diff --git a/builtins-test-intrinsics/Cargo.toml b/builtins-test-intrinsics/Cargo.toml index 6e10628a4..704de20c5 100644 --- a/builtins-test-intrinsics/Cargo.toml +++ b/builtins-test-intrinsics/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "builtins-test-intrinsics" version = "0.1.0" -edition = "2021" +edition = "2024" publish = false license = "MIT OR Apache-2.0" diff --git a/builtins-test-intrinsics/src/main.rs b/builtins-test-intrinsics/src/main.rs index 1fa7b0091..96fe4a738 100644 --- a/builtins-test-intrinsics/src/main.rs +++ b/builtins-test-intrinsics/src/main.rs @@ -15,9 +15,10 @@ extern crate panic_handler; +// SAFETY: no definitions, only used for linking #[cfg(all(not(thumb), not(windows), not(target_arch = "wasm32")))] #[link(name = "c")] -extern "C" {} +unsafe extern "C" {} // Every function in this module maps will be lowered to an intrinsic by LLVM, if the platform // doesn't have native support for the operation used in the function. ARM has a naming convention @@ -663,10 +664,11 @@ pub fn _start() -> ! { loop {} } +// SAFETY: no definitions, only used for linking #[cfg(windows)] #[link(name = "kernel32")] #[link(name = "msvcrt")] -extern "C" {} +unsafe extern "C" {} // ARM targets need these symbols #[unsafe(no_mangle)] diff --git a/builtins-test/tests/aeabi_memclr.rs b/builtins-test/tests/aeabi_memclr.rs index bfd15a391..0761feaff 100644 --- a/builtins-test/tests/aeabi_memclr.rs +++ b/builtins-test/tests/aeabi_memclr.rs @@ -24,7 +24,8 @@ macro_rules! panic { }; } -extern "C" { +// SAFETY: defined in compiler-builtins +unsafe extern "aapcs" { fn __aeabi_memclr4(dest: *mut u8, n: usize); fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32); } diff --git a/builtins-test/tests/aeabi_memcpy.rs b/builtins-test/tests/aeabi_memcpy.rs index c892c5aba..e76e712a2 100644 --- a/builtins-test/tests/aeabi_memcpy.rs +++ b/builtins-test/tests/aeabi_memcpy.rs @@ -22,7 +22,8 @@ macro_rules! panic { }; } -extern "C" { +// SAFETY: defined in compiler-builtins +unsafe extern "aapcs" { fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize); fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize); } diff --git a/builtins-test/tests/aeabi_memset.rs b/builtins-test/tests/aeabi_memset.rs index 34ab3acc7..8f9f80f96 100644 --- a/builtins-test/tests/aeabi_memset.rs +++ b/builtins-test/tests/aeabi_memset.rs @@ -24,7 +24,8 @@ macro_rules! panic { }; } -extern "C" { +// SAFETY: defined in compiler-builtins +unsafe extern "aapcs" { fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32); } diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index d65a22152..93eb3e01b 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -7,7 +7,7 @@ readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" homepage = "https://github.com/rust-lang/compiler-builtins" documentation = "https://docs.rs/compiler_builtins" -edition = "2021" +edition = "2024" description = "Compiler intrinsics used by the Rust compiler." links = "compiler-rt" diff --git a/compiler-builtins/src/arm.rs b/compiler-builtins/src/arm.rs index a9107e3cd..a7d84e49b 100644 --- a/compiler-builtins/src/arm.rs +++ b/compiler-builtins/src/arm.rs @@ -1,13 +1,16 @@ #![cfg(not(feature = "no-asm"))] // Interfaces used by naked trampolines. -extern "C" { +// SAFETY: these are defined in compiler-builtins +unsafe extern "C" { fn __udivmodsi4(a: u32, b: u32, rem: *mut u32) -> u32; fn __udivmoddi4(a: u64, b: u64, rem: *mut u64) -> u64; fn __divmoddi4(a: i64, b: i64, rem: *mut i64) -> i64; } -extern "aapcs" { +// SAFETY: these are defined in compiler-builtins +// FIXME(extern_custom), this isn't always the correct ABI +unsafe extern "aapcs" { // AAPCS is not always the correct ABI for these intrinsics, but we only use this to // forward another `__aeabi_` call so it doesn't matter. fn __aeabi_idiv(a: i32, b: i32) -> i32; diff --git a/compiler-builtins/src/int/specialized_div_rem/mod.rs b/compiler-builtins/src/int/specialized_div_rem/mod.rs index 43f466e75..7841e4f33 100644 --- a/compiler-builtins/src/int/specialized_div_rem/mod.rs +++ b/compiler-builtins/src/int/specialized_div_rem/mod.rs @@ -125,10 +125,10 @@ impl_normalization_shift!( /// dependencies. #[inline] fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) { - if let Some(quo) = duo.checked_div(div) { - if let Some(rem) = duo.checked_rem(div) { - return (quo, rem); - } + if let Some(quo) = duo.checked_div(div) + && let Some(rem) = duo.checked_rem(div) + { + return (quo, rem); } zero_div_fn() } @@ -227,10 +227,10 @@ impl_asymmetric!( #[inline] #[allow(dead_code)] fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) { - if let Some(quo) = duo.checked_div(div) { - if let Some(rem) = duo.checked_rem(div) { - return (quo, rem); - } + if let Some(quo) = duo.checked_div(div) + && let Some(rem) = duo.checked_rem(div) + { + return (quo, rem); } zero_div_fn() } diff --git a/compiler-builtins/src/macros.rs b/compiler-builtins/src/macros.rs index 22e0dd27f..203cd0949 100644 --- a/compiler-builtins/src/macros.rs +++ b/compiler-builtins/src/macros.rs @@ -132,7 +132,7 @@ macro_rules! intrinsics { ) => ( #[cfg($name = "optimized-c")] pub $(unsafe $($empty)? )? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { - extern $abi { + unsafe extern $abi { fn $name($($argname: $ty),*) $(-> $ret)?; } unsafe { @@ -435,7 +435,7 @@ macro_rules! intrinsics { pub mod $name { #[unsafe(naked)] $(#[$($attr)*])* - #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[cfg_attr(not(feature = "mangled-names"), unsafe(no_mangle))] #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs index 5b6abd21a..c9070cf55 100644 --- a/compiler-builtins/src/probestack.rs +++ b/compiler-builtins/src/probestack.rs @@ -49,7 +49,9 @@ // We only define stack probing for these architectures today. #![cfg(any(target_arch = "x86_64", target_arch = "x86"))] -extern "C" { +// SAFETY: defined in this module. +// FIXME(extern_custom): the ABI is not correct. +unsafe extern "C" { pub fn __rust_probestack(); } From af81023a308ab9742bd475c86884efd3fc4bda1a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 29 May 2025 16:07:54 +0000 Subject: [PATCH 1364/1459] symcheck: Print the command to make reproducing errors easier --- crates/symbol-check/src/main.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs index 4e6417fdf..d83cd318d 100644 --- a/crates/symbol-check/src/main.rs +++ b/crates/symbol-check/src/main.rs @@ -46,15 +46,16 @@ fn main() { /// Run `cargo build` with the provided additional arguments, collecting the list of created /// libraries. fn exec_cargo_with_args(args: &[&str]) -> Vec { - let mut cmd = Command::new("cargo") - .arg("build") + let mut cmd = Command::new("cargo"); + cmd.arg("build") .arg("--message-format=json") .args(args) - .stdout(Stdio::piped()) - .spawn() - .expect("failed to launch Cargo"); + .stdout(Stdio::piped()); - let stdout = cmd.stdout.take().unwrap(); + println!("running: {cmd:?}"); + let mut child = cmd.spawn().expect("failed to launch Cargo"); + + let stdout = child.stdout.take().unwrap(); let reader = BufReader::new(stdout); let mut check_files = Vec::new(); @@ -84,7 +85,7 @@ fn exec_cargo_with_args(args: &[&str]) -> Vec { } } - assert!(cmd.wait().expect("failed to wait on Cargo").success()); + assert!(child.wait().expect("failed to wait on Cargo").success()); assert!(!check_files.is_empty(), "no compiler_builtins rlibs found"); println!("Collected the following rlibs to check: {check_files:#?}"); From f5449b0fd4628e4a04b6e37ac2394ceac35dd8e1 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 29 May 2025 17:37:35 +0000 Subject: [PATCH 1365/1459] Add benchmarks for float parsing and printing As part of this, the u256 benchmarks are reorganized to a group. --- libm-test/benches/icount.rs | 100 ++++++++++++++++++++++++++++++------ 1 file changed, 83 insertions(+), 17 deletions(-) diff --git a/libm-test/benches/icount.rs b/libm-test/benches/icount.rs index da8c6bfd1..4bebbc41c 100644 --- a/libm-test/benches/icount.rs +++ b/libm-test/benches/icount.rs @@ -1,9 +1,11 @@ //! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable. +#![feature(f16)] +#![feature(f128)] use std::hint::black_box; use iai_callgrind::{library_benchmark, library_benchmark_group, main}; -use libm::support::{HInt, u256}; +use libm::support::{HInt, Hexf, hf16, hf32, hf64, hf128, u256}; use libm_test::generate::spaced; use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op}; @@ -109,11 +111,6 @@ fn icount_bench_u128_widen_mul(cases: Vec<(u128, u128)>) { } } -library_benchmark_group!( - name = icount_bench_u128_widen_mul_group; - benchmarks = icount_bench_u128_widen_mul -); - #[library_benchmark] #[bench::linspace(setup_u256_add())] fn icount_bench_u256_add(cases: Vec<(u256, u256)>) { @@ -122,11 +119,6 @@ fn icount_bench_u256_add(cases: Vec<(u256, u256)>) { } } -library_benchmark_group!( - name = icount_bench_u256_add_group; - benchmarks = icount_bench_u256_add -); - #[library_benchmark] #[bench::linspace(setup_u256_shift())] fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) { @@ -136,16 +128,90 @@ fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) { } library_benchmark_group!( - name = icount_bench_u256_shr_group; - benchmarks = icount_bench_u256_shr + name = icount_bench_u128_group; + benchmarks = icount_bench_u128_widen_mul, icount_bench_u256_add, icount_bench_u256_shr +); + +#[library_benchmark] +#[bench::short("0x12.34p+8")] +#[bench::max("0x1.ffcp+15")] +fn icount_bench_hf16(s: &str) -> f16 { + black_box(hf16(s)) +} + +#[library_benchmark] +#[bench::short("0x12.34p+8")] +#[bench::max("0x1.fffffep+127")] +fn icount_bench_hf32(s: &str) -> f32 { + black_box(hf32(s)) +} + +#[library_benchmark] +#[bench::short("0x12.34p+8")] +#[bench::max("0x1.fffffffffffffp+1023")] +fn icount_bench_hf64(s: &str) -> f64 { + black_box(hf64(s)) +} + +#[library_benchmark] +#[bench::short("0x12.34p+8")] +#[bench::max("0x1.ffffffffffffffffffffffffffffp+16383")] +fn icount_bench_hf128(s: &str) -> f128 { + black_box(hf128(s)) +} + +library_benchmark_group!( + name = icount_bench_hf_parse_group; + benchmarks = + icount_bench_hf16, + icount_bench_hf32, + icount_bench_hf64, + icount_bench_hf128 +); + +#[library_benchmark] +#[bench::short(1.015625)] +#[bench::max(f16::MAX)] +fn icount_bench_print_hf16(x: f16) -> String { + black_box(Hexf(x).to_string()) +} + +#[library_benchmark] +#[bench::short(1.015625)] +#[bench::max(f32::MAX)] +fn icount_bench_print_hf32(x: f32) -> String { + black_box(Hexf(x).to_string()) +} + +#[library_benchmark] +#[bench::short(1.015625)] +#[bench::max(f64::MAX)] +fn icount_bench_print_hf64(x: f64) -> String { + black_box(Hexf(x).to_string()) +} + +#[library_benchmark] +#[bench::short(1.015625)] +#[bench::max(f128::MAX)] +fn icount_bench_print_hf128(x: f128) -> String { + black_box(Hexf(x).to_string()) +} + +library_benchmark_group!( + name = icount_bench_hf_print_group; + benchmarks = + icount_bench_print_hf16, + icount_bench_print_hf32, + icount_bench_print_hf64, + icount_bench_print_hf128 ); main!( library_benchmark_groups = - // u256-related benchmarks - icount_bench_u128_widen_mul_group, - icount_bench_u256_add_group, - icount_bench_u256_shr_group, + // Benchmarks not related to public libm math + icount_bench_u128_group, + icount_bench_hf_parse_group, + icount_bench_hf_print_group, // verify-apilist-start // verify-sorted-start icount_bench_acos_group, From b76f6cc5e5567d86d23280a06dc27c82403e6388 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 19 Mar 2025 04:10:03 +0000 Subject: [PATCH 1366/1459] Run `builtins-test-intrinsics` when possible Currently we only build this, but it is possible to run the binary. Change the CI script to do so here. --- builtins-test-intrinsics/src/main.rs | 6 ++++-- ci/run.sh | 30 ++++++++++++++++++---------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/builtins-test-intrinsics/src/main.rs b/builtins-test-intrinsics/src/main.rs index 96fe4a738..66744a081 100644 --- a/builtins-test-intrinsics/src/main.rs +++ b/builtins-test-intrinsics/src/main.rs @@ -13,6 +13,8 @@ #![no_std] #![no_main] +// Ensure this `compiler_builtins` gets used, rather than the version injected from the sysroot. +extern crate compiler_builtins; extern crate panic_handler; // SAFETY: no definitions, only used for linking @@ -652,14 +654,14 @@ fn something_with_a_dtor(f: &dyn Fn()) { #[unsafe(no_mangle)] #[cfg(not(thumb))] -fn main(_argc: core::ffi::c_int, _argv: *const *const u8) -> core::ffi::c_int { +extern "C" fn main(_argc: core::ffi::c_int, _argv: *const *const u8) -> core::ffi::c_int { run(); 0 } #[unsafe(no_mangle)] #[cfg(thumb)] -pub fn _start() -> ! { +extern "C" fn _start() -> ! { run(); loop {} } diff --git a/ci/run.sh b/ci/run.sh index cf3f7dfda..27b9686ea 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -63,23 +63,33 @@ symcheck+=(-- build-and-check) "${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128 "${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128 --release -build_intrinsics_test() { - # symcheck also checks the results of builtins-test-intrinsics - "${symcheck[@]}" \ +run_intrinsics_test() { + args=( --target "$target" --verbose \ - --manifest-path builtins-test-intrinsics/Cargo.toml "$@" + --manifest-path builtins-test-intrinsics/Cargo.toml + ) + args+=( "$@" ) + + # symcheck also checks the results of builtins-test-intrinsics + "${symcheck[@]}" "${args[@]}" + + # FIXME: we get access violations on Windows, our entrypoint may need to + # be tweaked. + if [ "${BUILD_ONLY:-}" != "1" ] && ! [[ "$target" = *"windows"* ]]; then + cargo run "${args[@]}" + fi } # Verify that we haven't dropped any intrinsics/symbols -build_intrinsics_test -build_intrinsics_test --release -build_intrinsics_test --features c -build_intrinsics_test --features c --release +run_intrinsics_test +run_intrinsics_test --release +run_intrinsics_test --features c +run_intrinsics_test --features c --release # Verify that there are no undefined symbols to `panic` within our # implementations -CARGO_PROFILE_DEV_LTO=true build_intrinsics_test -CARGO_PROFILE_RELEASE_LTO=true build_intrinsics_test --release +CARGO_PROFILE_DEV_LTO=true run_intrinsics_test +CARGO_PROFILE_RELEASE_LTO=true run_intrinsics_test --release # Test libm From 11cf244b8ef7da89607eba968ddfd5dd70cdfd92 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 29 May 2025 19:01:03 +0000 Subject: [PATCH 1367/1459] ci: Allow concurrency outside of pull requests When multiple merges to `master` happen before a CI run completes, the in-progress job is getting canceled. Fix this by using the commit sha for the group key if a pull request number is not available, rather than `github.ref` (which is always `refs/head/master` after merge). This should prevent jobs running on previous commits from getting cancelled, while still ensuring there is only ever one active run per pull request. --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 567ad1205..de433d8c7 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -5,7 +5,7 @@ on: concurrency: # Make sure that new pushes cancel running jobs - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} cancel-in-progress: true env: From 7f3731187f56d257c8aa4fc945c98221e7f28b23 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 29 May 2025 18:06:43 +0000 Subject: [PATCH 1368/1459] Increase the benchmark rustc version to 2025-05-28 We may soon want to use some new nightly features in `compiler-builtins` and `libm`, specifically `cfg_target_has_reliable_f16_f128` which was added in the past few weeks. This will mean we need a newer toolchain for benchmarks to continue building. Bump to the current latest nightly so we are not blocked on this down the line. --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index de433d8c7..8e89cb472 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -13,7 +13,7 @@ env: RUSTDOCFLAGS: -Dwarnings RUSTFLAGS: -Dwarnings RUST_BACKTRACE: full - BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results + BENCHMARK_RUSTC: nightly-2025-05-28 # Pin the toolchain for reproducable results jobs: # Determine which tests should be run based on changed files. From 502a1149bc555a5ad58fb26dec0cca5404d24354 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 29 May 2025 20:53:48 +0000 Subject: [PATCH 1369/1459] libm-test: Make `extensive` an attribute rather than a test type Currently we run logspace tests for extensive tests, but there isn't any reason we couldn't also run more kinds of tests more extensively (e.g. more edge cases, combine edge cases with logspace for multi-input functions, etc). As a first step toward making this possible, make `extensive` a new field in `CheckCtx`, and rename `QuickSpaced` to `Spaced`. --- libm-test/benches/icount.rs | 2 +- libm-test/examples/plot_domains.rs | 2 +- libm-test/src/run_cfg.rs | 74 ++++++++++++++++++++------- libm-test/tests/compare_built_musl.rs | 2 +- libm-test/tests/multiprecision.rs | 2 +- libm-test/tests/z_extensive/run.rs | 3 +- 6 files changed, 60 insertions(+), 25 deletions(-) diff --git a/libm-test/benches/icount.rs b/libm-test/benches/icount.rs index 4bebbc41c..a0928a29f 100644 --- a/libm-test/benches/icount.rs +++ b/libm-test/benches/icount.rs @@ -23,7 +23,7 @@ macro_rules! icount_benches { let mut ctx = CheckCtx::new( Op::IDENTIFIER, CheckBasis::None, - GeneratorKind::QuickSpaced + GeneratorKind::Spaced ); ctx.override_iterations(BENCH_ITER_ITEMS); let ret = spaced::get_test_cases::(&ctx).0.collect::>(); diff --git a/libm-test/examples/plot_domains.rs b/libm-test/examples/plot_domains.rs index 3563103b8..7331d454f 100644 --- a/libm-test/examples/plot_domains.rs +++ b/libm-test/examples/plot_domains.rs @@ -55,7 +55,7 @@ where Op: MathOp, Op::RustArgs: SpacedInput, { - let mut ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr, GeneratorKind::QuickSpaced); + let mut ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr, GeneratorKind::Spaced); plot_one_generator( out_dir, &ctx, diff --git a/libm-test/src/run_cfg.rs b/libm-test/src/run_cfg.rs index 3345a01d2..90f81195c 100644 --- a/libm-test/src/run_cfg.rs +++ b/libm-test/src/run_cfg.rs @@ -22,13 +22,38 @@ static EXTENSIVE_ITER_OVERRIDE: LazyLock> = LazyLock::new(|| { /// Specific tests that need to have a reduced amount of iterations to complete in a reasonable /// amount of time. -/// -/// Contains the itentifier+generator combo to match on, plus the factor to reduce by. -const EXTEMELY_SLOW_TESTS: &[(Identifier, GeneratorKind, u64)] = &[ - (Identifier::Fmodf128, GeneratorKind::QuickSpaced, 50), - (Identifier::Fmodf128, GeneratorKind::Extensive, 50), +const EXTREMELY_SLOW_TESTS: &[SlowTest] = &[ + SlowTest { + ident: Identifier::Fmodf128, + gen_kind: GeneratorKind::Spaced, + extensive: false, + reduce_factor: 50, + }, + SlowTest { + ident: Identifier::Fmodf128, + gen_kind: GeneratorKind::Spaced, + extensive: true, + reduce_factor: 50, + }, ]; +/// A pattern to match a `CheckCtx`, plus a factor to reduce by. +struct SlowTest { + ident: Identifier, + gen_kind: GeneratorKind, + extensive: bool, + reduce_factor: u64, +} + +impl SlowTest { + /// True if the test in `CheckCtx` should be reduced by `reduce_factor`. + fn matches_ctx(&self, ctx: &CheckCtx) -> bool { + self.ident == ctx.fn_ident + && self.gen_kind == ctx.gen_kind + && self.extensive == ctx.extensive + } +} + /// Maximum number of iterations to run for a single routine. /// /// The default value of one greater than `u32::MAX` allows testing single-argument `f32` routines @@ -54,6 +79,7 @@ pub struct CheckCtx { /// Source of truth for tests. pub basis: CheckBasis, pub gen_kind: GeneratorKind, + pub extensive: bool, /// If specified, this value will override the value returned by [`iteration_count`]. pub override_iterations: Option, } @@ -69,12 +95,19 @@ impl CheckCtx { base_name_str: fn_ident.base_name().as_str(), basis, gen_kind, + extensive: false, override_iterations: None, }; ret.ulp = crate::default_ulp(&ret); ret } + /// Configure that this is an extensive test. + pub fn extensive(mut self, extensive: bool) -> Self { + self.extensive = extensive; + self + } + /// The number of input arguments for this function. pub fn input_count(&self) -> usize { self.fn_ident.math_op().rust_sig.args.len() @@ -100,14 +133,17 @@ pub enum CheckBasis { /// and quantity. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum GeneratorKind { + /// Extremes, zeros, nonstandard numbers, etc. EdgeCases, - Extensive, - QuickSpaced, + /// Spaced by logarithm (floats) or linear (integers). + Spaced, + /// Test inputs from an RNG. Random, + /// A provided test case list. List, } -/// A list of all functions that should get extensive tests. +/// A list of all functions that should get extensive tests, as configured by environment variable. /// /// This also supports the special test name `all` to run all tests, as well as `all_f16`, /// `all_f32`, `all_f64`, and `all_f128` to run all tests for a specific float type. @@ -216,17 +252,17 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { let random_iter_count = domain_iter_count / 100; let mut total_iterations = match ctx.gen_kind { - GeneratorKind::QuickSpaced => domain_iter_count, + GeneratorKind::Spaced if ctx.extensive => extensive_max_iterations(), + GeneratorKind::Spaced => domain_iter_count, GeneratorKind::Random => random_iter_count, - GeneratorKind::Extensive => extensive_max_iterations(), GeneratorKind::EdgeCases | GeneratorKind::List => { unimplemented!("shoudn't need `iteration_count` for {:?}", ctx.gen_kind) } }; // Larger float types get more iterations. - if t_env.large_float_ty && ctx.gen_kind != GeneratorKind::Extensive { - if ctx.gen_kind == GeneratorKind::Extensive { + if t_env.large_float_ty { + if ctx.extensive { // Extensive already has a pretty high test count. total_iterations *= 2; } else { @@ -244,13 +280,13 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { } // Some tests are significantly slower than others and need to be further reduced. - if let Some((_id, _gen, scale)) = EXTEMELY_SLOW_TESTS + if let Some(slow) = EXTREMELY_SLOW_TESTS .iter() - .find(|(id, generator, _scale)| *id == ctx.fn_ident && *generator == ctx.gen_kind) + .find(|slow| slow.matches_ctx(ctx)) { // However, do not override if the extensive iteration count has been manually set. - if !(ctx.gen_kind == GeneratorKind::Extensive && EXTENSIVE_ITER_OVERRIDE.is_some()) { - total_iterations /= scale; + if !(ctx.extensive && EXTENSIVE_ITER_OVERRIDE.is_some()) { + total_iterations /= slow.reduce_factor; } } @@ -279,7 +315,7 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { let total = ntests.pow(t_env.input_count.try_into().unwrap()); let seed_msg = match ctx.gen_kind { - GeneratorKind::QuickSpaced | GeneratorKind::Extensive => String::new(), + GeneratorKind::Spaced => String::new(), GeneratorKind::Random => { format!( " using `{SEED_ENV}={}`", @@ -327,8 +363,8 @@ pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive { let extensive_range = (-0xfff)..=0xfffff; match ctx.gen_kind { - GeneratorKind::Extensive => extensive_range, - GeneratorKind::QuickSpaced | GeneratorKind::Random => non_extensive_range, + _ if ctx.extensive => extensive_range, + GeneratorKind::Spaced | GeneratorKind::Random => non_extensive_range, GeneratorKind::EdgeCases => extensive_range, GeneratorKind::List => unimplemented!("shoudn't need range for {:?}", ctx.gen_kind), } diff --git a/libm-test/tests/compare_built_musl.rs b/libm-test/tests/compare_built_musl.rs index 6ccbb6f4c..86f3b8b71 100644 --- a/libm-test/tests/compare_built_musl.rs +++ b/libm-test/tests/compare_built_musl.rs @@ -65,7 +65,7 @@ macro_rules! musl_tests { $(#[$attr])* fn [< musl_quickspace_ $fn_name >]() { type Op = libm_test::op::$fn_name::Routine; - let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::QuickSpaced); + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Spaced); let cases = spaced::get_test_cases::(&ctx).0; musl_runner::(&ctx, cases, musl_math_sys::$fn_name); } diff --git a/libm-test/tests/multiprecision.rs b/libm-test/tests/multiprecision.rs index 80b2c7868..60175ae61 100644 --- a/libm-test/tests/multiprecision.rs +++ b/libm-test/tests/multiprecision.rs @@ -55,7 +55,7 @@ macro_rules! mp_tests { $(#[$attr])* fn [< mp_quickspace_ $fn_name >]() { type Op = libm_test::op::$fn_name::Routine; - let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::QuickSpaced); + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Spaced); let cases = spaced::get_test_cases::(&ctx).0; mp_runner::(&ctx, cases); } diff --git a/libm-test/tests/z_extensive/run.rs b/libm-test/tests/z_extensive/run.rs index 59c806ce7..f2ba6a4a0 100644 --- a/libm-test/tests/z_extensive/run.rs +++ b/libm-test/tests/z_extensive/run.rs @@ -17,7 +17,6 @@ use rayon::prelude::*; use spaced::SpacedInput; const BASIS: CheckBasis = CheckBasis::Mpfr; -const GEN_KIND: GeneratorKind = GeneratorKind::Extensive; /// Run the extensive test suite. pub fn run() { @@ -77,7 +76,7 @@ where Op::RustArgs: SpacedInput + Send, { let test_name = format!("mp_extensive_{}", Op::NAME); - let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GEN_KIND); + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Spaced).extensive(true); let skip = skip_extensive_test(&ctx); let runner = move || { From a121a80d2a915e24ff2ca68e29eca6675b881fea Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 29 May 2025 21:22:47 +0000 Subject: [PATCH 1370/1459] ci: Allow for multiple icount benchmarks in the same run We don't actually need this for now, but eventually it would be nice to run icount benchmarks on multiple targets. Start tagging artifact names with the architecture, and allow passing `--tag` to `ci-util.py` in order to retrieve the correct one. --- .github/workflows/main.yaml | 12 ++++++++++-- ci/bench-icount.sh | 16 ++++++++++++++-- ci/ci-util.py | 17 +++++++++++++---- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 8e89cb472..9f389d8b4 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -195,8 +195,14 @@ jobs: benchmarks: name: Benchmarks - runs-on: ubuntu-24.04 timeout-minutes: 20 + strategy: + fail-fast: false + matrix: + include: + - target: x86_64-unknown-linux-gnu + os: ubuntu-24.04 + runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@master with: @@ -215,12 +221,14 @@ jobs: cargo binstall -y iai-callgrind-runner --version "$iai_version" sudo apt-get install valgrind - uses: Swatinem/rust-cache@v2 + with: + key: ${{ matrix.target }} - name: Run icount benchmarks env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} PR_NUMBER: ${{ github.event.pull_request.number }} - run: ./ci/bench-icount.sh + run: ./ci/bench-icount.sh ${{ matrix.target }} - name: Upload the benchmark baseline uses: actions/upload-artifact@v4 diff --git a/ci/bench-icount.sh b/ci/bench-icount.sh index 4d93e257a..5b6974fe4 100755 --- a/ci/bench-icount.sh +++ b/ci/bench-icount.sh @@ -2,10 +2,21 @@ set -eux +target="${1:-}" + +if [ -z "$target" ]; then + host_target=$(rustc -vV | awk '/^host/ { print $2 }') + echo "Defaulted to host target $host_target" + target="$host_target" +fi + iai_home="iai-home" +# Use the arch as a tag to disambiguate artifacts +tag="$(echo "$target" | cut -d'-' -f1)" + # Download the baseline from master -./ci/ci-util.py locate-baseline --download --extract +./ci/ci-util.py locate-baseline --download --extract --tag "$tag" # Run benchmarks once function run_icount_benchmarks() { @@ -44,6 +55,7 @@ function run_icount_benchmarks() { # If this is for a pull request, ignore regressions if specified. ./ci/ci-util.py check-regressions --home "$iai_home" --allow-pr-override "$PR_NUMBER" else + # Disregard regressions after merge ./ci/ci-util.py check-regressions --home "$iai_home" || true fi } @@ -53,6 +65,6 @@ run_icount_benchmarks --features force-soft-floats -- --save-baseline=softfloat run_icount_benchmarks -- --save-baseline=hardfloat # Name and tar the new baseline -name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}" +name="baseline-icount-$tag-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}" echo "BASELINE_NAME=$name" >>"$GITHUB_ENV" tar cJf "$name.tar.xz" "$iai_home" diff --git a/ci/ci-util.py b/ci/ci-util.py index d785b2e9e..6c8b43980 100755 --- a/ci/ci-util.py +++ b/ci/ci-util.py @@ -28,11 +28,14 @@ Calculate a matrix of which functions had source change, print that as a JSON object. - locate-baseline [--download] [--extract] + locate-baseline [--download] [--extract] [--tag TAG] Locate the most recent benchmark baseline available in CI and, if flags specify, download and extract it. Never exits with nonzero status if downloading fails. + `--tag` can be specified to look for artifacts with a specific tag, such as + for a specific architecture. + Note that `--extract` will overwrite files in `iai-home`. check-regressions [--home iai-home] [--allow-pr-override pr_number] @@ -50,7 +53,7 @@ GIT = ["git", "-C", REPO_ROOT] DEFAULT_BRANCH = "master" WORKFLOW_NAME = "CI" # Workflow that generates the benchmark artifacts -ARTIFACT_GLOB = "baseline-icount*" +ARTIFACT_PREFIX = "baseline-icount*" # Place this in a PR body to skip regression checks (must be at the start of a line). REGRESSION_DIRECTIVE = "ci: allow-regressions" # Place this in a PR body to skip extensive tests @@ -278,6 +281,7 @@ def locate_baseline(flags: list[str]) -> None: download = False extract = False + tag = "" while len(flags) > 0: match flags[0]: @@ -285,6 +289,9 @@ def locate_baseline(flags: list[str]) -> None: download = True case "--extract": extract = True + case "--tag": + tag = flags[1] + flags = flags[1:] case _: eprint(USAGE) exit(1) @@ -333,8 +340,10 @@ def locate_baseline(flags: list[str]) -> None: eprint("skipping download step") return + artifact_glob = f"{ARTIFACT_PREFIX}{f"-{tag}" if tag else ""}*" + sp.run( - ["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"], + ["gh", "run", "download", str(job_id), f"--pattern={artifact_glob}"], check=False, ) @@ -344,7 +353,7 @@ def locate_baseline(flags: list[str]) -> None: # Find the baseline with the most recent timestamp. GH downloads the files to e.g. # `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together. - candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}") + candidate_baselines = glob(f"{artifact_glob}/{artifact_glob}") if len(candidate_baselines) == 0: eprint("no possible baseline directories found") return From b6e15ef6c9c250f29a87d08ab1a62c1374558fe5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 29 May 2025 22:08:24 +0000 Subject: [PATCH 1371/1459] chore: release --- compiler-builtins/CHANGELOG.md | 15 +++++++++++++++ compiler-builtins/Cargo.toml | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/compiler-builtins/CHANGELOG.md b/compiler-builtins/CHANGELOG.md index a7c01c463..880e56c44 100644 --- a/compiler-builtins/CHANGELOG.md +++ b/compiler-builtins/CHANGELOG.md @@ -7,6 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.160](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.159...compiler_builtins-v0.1.160) - 2025-05-29 + +### Other + +- Change `compiler-builtins` to edition 2024 +- Remove unneeded C symbols +- Reuse `libm`'s `Caat` and `CastFrom` in `compiler-builtins` +- Reuse `MinInt` and `Int` from `libm` in `compiler-builtins` +- Update `CmpResult` to use a pointer-sized return type +- Enable `__powitf2` on MSVC +- Fix `i256::MAX` +- Add a note saying why we use `frintx` rather than `frintn` +- Typo in README.md +- Clean up unused files + ## [0.1.159](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.158...compiler_builtins-v0.1.159) - 2025-05-12 ### Other diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 93eb3e01b..8ceef286f 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jorge Aparicio "] name = "compiler_builtins" -version = "0.1.159" +version = "0.1.160" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" From 81609be3b5543d9271b4ed3b3341921e0004ca1b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 1 Jun 2025 19:41:03 +0000 Subject: [PATCH 1372/1459] Fix new `dead_code` warnings from recent nightlies --- libm/src/math/support/float_traits.rs | 1 + libm/src/math/support/hex_float.rs | 207 ++++++++++++++------------ libm/src/math/support/int_traits.rs | 1 + libm/src/math/support/macros.rs | 6 +- libm/src/math/support/mod.rs | 4 +- 5 files changed, 116 insertions(+), 103 deletions(-) diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index 4c866ef10..dd9f46209 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -6,6 +6,7 @@ use super::int_traits::{CastFrom, Int, MinInt}; /// Trait for some basic operations on floats // #[allow(dead_code)] +#[allow(dead_code)] // Some constants are only used with tests pub trait Float: Copy + fmt::Debug diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs index 85569d98a..c8558b900 100644 --- a/libm/src/math/support/hex_float.rs +++ b/libm/src/math/support/hex_float.rs @@ -1,8 +1,6 @@ //! Utilities for working with hex float formats. -use core::fmt; - -use super::{Float, Round, Status, f32_from_bits, f64_from_bits}; +use super::{Round, Status, f32_from_bits, f64_from_bits}; /// Construct a 16-bit float from hex float representation (C-style) #[cfg(f16_enabled)] @@ -352,133 +350,143 @@ const fn u128_ilog2(v: u128) -> u32 { u128::BITS - 1 - v.leading_zeros() } -/// Format a floating point number as its IEEE hex (`%a`) representation. -pub struct Hexf(pub F); +#[cfg(any(test, feature = "unstable-public-internals"))] +mod hex_fmt { + use core::fmt; -// Adapted from https://github.com/ericseppanen/hexfloat2/blob/a5c27932f0ff/src/format.rs -#[cfg(not(feature = "compiler-builtins"))] -fn fmt_any_hex(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if x.is_sign_negative() { - write!(f, "-")?; - } + use crate::support::Float; - if x.is_nan() { - return write!(f, "NaN"); - } else if x.is_infinite() { - return write!(f, "inf"); - } else if *x == F::ZERO { - return write!(f, "0x0p+0"); - } + /// Format a floating point number as its IEEE hex (`%a`) representation. + pub struct Hexf(pub F); - let mut exponent = x.exp_unbiased(); - let sig = x.to_bits() & F::SIG_MASK; - - let bias = F::EXP_BIAS as i32; - // The mantissa MSB needs to be shifted up to the nearest nibble. - let mshift = (4 - (F::SIG_BITS % 4)) % 4; - let sig = sig << mshift; - // The width is rounded up to the nearest char (4 bits) - let mwidth = (F::SIG_BITS as usize + 3) / 4; - let leading = if exponent == -bias { - // subnormal number means we shift our output by 1 bit. - exponent += 1; - "0." - } else { - "1." - }; + // Adapted from https://github.com/ericseppanen/hexfloat2/blob/a5c27932f0ff/src/format.rs + #[cfg(not(feature = "compiler-builtins"))] + pub(super) fn fmt_any_hex(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if x.is_sign_negative() { + write!(f, "-")?; + } - write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}") -} + if x.is_nan() { + return write!(f, "NaN"); + } else if x.is_infinite() { + return write!(f, "inf"); + } else if *x == F::ZERO { + return write!(f, "0x0p+0"); + } -#[cfg(feature = "compiler-builtins")] -fn fmt_any_hex(_x: &F, _f: &mut fmt::Formatter<'_>) -> fmt::Result { - unimplemented!() -} + let mut exponent = x.exp_unbiased(); + let sig = x.to_bits() & F::SIG_MASK; + + let bias = F::EXP_BIAS as i32; + // The mantissa MSB needs to be shifted up to the nearest nibble. + let mshift = (4 - (F::SIG_BITS % 4)) % 4; + let sig = sig << mshift; + // The width is rounded up to the nearest char (4 bits) + let mwidth = (F::SIG_BITS as usize + 3) / 4; + let leading = if exponent == -bias { + // subnormal number means we shift our output by 1 bit. + exponent += 1; + "0." + } else { + "1." + }; -impl fmt::LowerHex for Hexf { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - cfg_if! { - if #[cfg(feature = "compiler-builtins")] { - let _ = f; - unimplemented!() - } else { - fmt_any_hex(&self.0, f) + write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}") + } + + #[cfg(feature = "compiler-builtins")] + pub(super) fn fmt_any_hex(_x: &F, _f: &mut fmt::Formatter<'_>) -> fmt::Result { + unimplemented!() + } + + impl fmt::LowerHex for Hexf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + fmt_any_hex(&self.0, f) + } } } } -} -impl fmt::LowerHex for Hexf<(F, F)> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - cfg_if! { - if #[cfg(feature = "compiler-builtins")] { - let _ = f; - unimplemented!() - } else { - write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) + impl fmt::LowerHex for Hexf<(F, F)> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) + } } } } -} -impl fmt::LowerHex for Hexf<(F, i32)> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - cfg_if! { - if #[cfg(feature = "compiler-builtins")] { - let _ = f; - unimplemented!() - } else { - write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) + impl fmt::LowerHex for Hexf<(F, i32)> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) + } } } } -} -impl fmt::LowerHex for Hexf { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - cfg_if! { - if #[cfg(feature = "compiler-builtins")] { - let _ = f; - unimplemented!() - } else { - fmt::LowerHex::fmt(&self.0, f) + impl fmt::LowerHex for Hexf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + fmt::LowerHex::fmt(&self.0, f) + } } } } -} -impl fmt::Debug for Hexf -where - Hexf: fmt::LowerHex, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - cfg_if! { - if #[cfg(feature = "compiler-builtins")] { - let _ = f; - unimplemented!() - } else { - fmt::LowerHex::fmt(self, f) + impl fmt::Debug for Hexf + where + Hexf: fmt::LowerHex, + { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + fmt::LowerHex::fmt(self, f) + } } } } -} -impl fmt::Display for Hexf -where - Hexf: fmt::LowerHex, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - cfg_if! { - if #[cfg(feature = "compiler-builtins")] { - let _ = f; - unimplemented!() - } else { - fmt::LowerHex::fmt(self, f) + impl fmt::Display for Hexf + where + Hexf: fmt::LowerHex, + { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + fmt::LowerHex::fmt(self, f) + } } } } } +#[cfg(any(test, feature = "unstable-public-internals"))] +pub use hex_fmt::*; + #[cfg(test)] mod parse_tests { extern crate std; @@ -1064,6 +1072,7 @@ mod print_tests { use std::string::ToString; use super::*; + use crate::support::Float; #[test] #[cfg(f16_enabled)] diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index 716af748a..9b29e2f45 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -1,6 +1,7 @@ use core::{cmp, fmt, ops}; /// Minimal integer implementations needed on all integer types, including wide integers. +#[allow(dead_code)] // Some constants are only used with tests pub trait MinInt: Copy + fmt::Debug diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs index 0b72db0e4..2b8fd580a 100644 --- a/libm/src/math/support/macros.rs +++ b/libm/src/math/support/macros.rs @@ -137,12 +137,12 @@ macro_rules! hf128 { #[cfg(test)] macro_rules! assert_biteq { ($left:expr, $right:expr, $($tt:tt)*) => {{ - use $crate::support::Int; let l = $left; let r = $right; - let bits = Int::leading_zeros(l.to_bits() - l.to_bits()); // hack to get the width from the value + // hack to get width from a value + let bits = $crate::support::Int::leading_zeros(l.to_bits() - l.to_bits()); assert!( - l.biteq(r), + $crate::support::Float::biteq(l, r), "{}\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})", format_args!($($tt)*), lb = l.to_bits(), diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index 2771cfd32..2e7edd03c 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -17,6 +17,8 @@ pub use env::{FpResult, Round, Status}; #[allow(unused_imports)] pub use float_traits::{DFloat, Float, HFloat, IntTy}; pub(crate) use float_traits::{f32_from_bits, f64_from_bits}; +#[cfg(any(test, feature = "unstable-public-internals"))] +pub use hex_float::Hexf; #[cfg(f16_enabled)] #[allow(unused_imports)] pub use hex_float::hf16; @@ -24,7 +26,7 @@ pub use hex_float::hf16; #[allow(unused_imports)] pub use hex_float::hf128; #[allow(unused_imports)] -pub use hex_float::{Hexf, hf32, hf64}; +pub use hex_float::{hf32, hf64}; pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; /// Hint to the compiler that the current path is cold. From 7c12df1bde234d8d7c8245f75bdcb8a18592f3d8 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 1 Jun 2025 19:22:42 +0000 Subject: [PATCH 1373/1459] Upgrade all dependencies to the latest available version In particular, this includes a fix to `iai-callgrind` that will allow us to simplify our benchmark runner. --- builtins-test/Cargo.toml | 8 ++++---- compiler-builtins/Cargo.toml | 4 ++-- crates/libm-macros/Cargo.toml | 4 ++-- crates/musl-math-sys/Cargo.toml | 2 +- libm-test/Cargo.toml | 14 +++++++------- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/builtins-test/Cargo.toml b/builtins-test/Cargo.toml index 10978c0bb..c7742aa24 100644 --- a/builtins-test/Cargo.toml +++ b/builtins-test/Cargo.toml @@ -10,11 +10,11 @@ license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" # For fuzzing tests we want a deterministic seedable RNG. We also eliminate potential # problems with system RNGs on the variety of platforms this crate is tested on. # `xoshiro128**` is used for its quality, size, and speed at generating `u32` shift amounts. -rand_xoshiro = "0.6" +rand_xoshiro = "0.7" # To compare float builtins against -rustc_apfloat = "0.2.1" +rustc_apfloat = "0.2.2" # Really a dev dependency, but dev dependencies can't be optional -iai-callgrind = { version = "0.14.0", optional = true } +iai-callgrind = { version = "0.14.1", optional = true } [dependencies.compiler_builtins] path = "../compiler-builtins" @@ -22,7 +22,7 @@ default-features = false features = ["unstable-public-internals"] [dev-dependencies] -criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } +criterion = { version = "0.6.0", default-features = false, features = ["cargo_bench_support"] } paste = "1.0.15" [target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies] diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 8ceef286f..6bee8da68 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -19,10 +19,10 @@ test = false [dependencies] # For more information on this dependency see # https://github.com/rust-lang/rust/tree/master/library/rustc-std-workspace-core -core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core" } +core = { version = "1.0.1", optional = true, package = "rustc-std-workspace-core" } [build-dependencies] -cc = { optional = true, version = "1.0" } +cc = { optional = true, version = "1.2" } [dev-dependencies] panic-handler = { path = "../crates/panic-handler" } diff --git a/crates/libm-macros/Cargo.toml b/crates/libm-macros/Cargo.toml index 3929854f0..6bbf47784 100644 --- a/crates/libm-macros/Cargo.toml +++ b/crates/libm-macros/Cargo.toml @@ -10,9 +10,9 @@ proc-macro = true [dependencies] heck = "0.5.0" -proc-macro2 = "1.0.94" +proc-macro2 = "1.0.95" quote = "1.0.40" -syn = { version = "2.0.100", features = ["full", "extra-traits", "visit-mut"] } +syn = { version = "2.0.101", features = ["full", "extra-traits", "visit-mut"] } [lints.rust] # Values used during testing diff --git a/crates/musl-math-sys/Cargo.toml b/crates/musl-math-sys/Cargo.toml index d3fb147e5..3b8811734 100644 --- a/crates/musl-math-sys/Cargo.toml +++ b/crates/musl-math-sys/Cargo.toml @@ -11,4 +11,4 @@ license = "MIT OR Apache-2.0" libm = { path = "../../libm" } [build-dependencies] -cc = "1.2.16" +cc = "1.2.25" diff --git a/libm-test/Cargo.toml b/libm-test/Cargo.toml index 7a306e735..01b45716b 100644 --- a/libm-test/Cargo.toml +++ b/libm-test/Cargo.toml @@ -28,28 +28,28 @@ icount = ["dep:iai-callgrind"] short-benchmarks = [] [dependencies] -anyhow = "1.0.97" +anyhow = "1.0.98" # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`. -gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false } -iai-callgrind = { version = "0.14.0", optional = true } +gmp-mpfr-sys = { version = "1.6.5", optional = true, default-features = false } +iai-callgrind = { version = "0.14.1", optional = true } indicatif = { version = "0.17.11", default-features = false } libm = { path = "../libm", features = ["unstable-public-internals"] } libm-macros = { path = "../crates/libm-macros" } musl-math-sys = { path = "../crates/musl-math-sys", optional = true } paste = "1.0.15" -rand = "0.9.0" +rand = "0.9.1" rand_chacha = "0.9.0" rayon = "1.10.0" rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "integer", "std"] } [target.'cfg(target_family = "wasm")'.dependencies] -getrandom = { version = "0.3.2", features = ["wasm_js"] } +getrandom = { version = "0.3.3", features = ["wasm_js"] } [build-dependencies] -rand = { version = "0.9.0", optional = true } +rand = { version = "0.9.1", optional = true } [dev-dependencies] -criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } +criterion = { version = "0.6.0", default-features = false, features = ["cargo_bench_support"] } libtest-mimic = "0.8.1" [[bench]] From 4f943d42831c344bbc91851f646d99e4f73b9b32 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 2 Jun 2025 16:10:49 +0000 Subject: [PATCH 1374/1459] cleanup: Use `x.biteq(y)` rather than `x.to_bits() == y.to_bits()` --- libm-test/src/precision.rs | 2 +- libm-test/src/test_traits.rs | 5 +---- libm/src/math/generic/fmaximum.rs | 2 +- libm/src/math/generic/fmaximum_num.rs | 11 +++++------ libm/src/math/generic/fminimum.rs | 2 +- libm/src/math/generic/fminimum_num.rs | 11 +++++------ 6 files changed, 14 insertions(+), 19 deletions(-) diff --git a/libm-test/src/precision.rs b/libm-test/src/precision.rs index f5fb5f670..f6cdd015a 100644 --- a/libm-test/src/precision.rs +++ b/libm-test/src/precision.rs @@ -381,7 +381,7 @@ fn unop_common( } // abs and copysign require signaling NaNs to be propagated, so verify bit equality. - if actual.to_bits() == expected.to_bits() { + if actual.biteq(expected) { return CheckAction::Custom(Ok(())); } else { return CheckAction::Custom(Err(anyhow::anyhow!("NaNs have different bitpatterns"))); diff --git a/libm-test/src/test_traits.rs b/libm-test/src/test_traits.rs index dbb970161..2af6af60b 100644 --- a/libm-test/src/test_traits.rs +++ b/libm-test/src/test_traits.rs @@ -328,10 +328,7 @@ where // Check when both are NaNs if actual.is_nan() && expected.is_nan() { if require_biteq && ctx.basis == CheckBasis::None { - ensure!( - actual.to_bits() == expected.to_bits(), - "mismatched NaN bitpatterns" - ); + ensure!(actual.biteq(expected), "mismatched NaN bitpatterns"); } // By default, NaNs have nothing special to check. return Ok(()); diff --git a/libm/src/math/generic/fmaximum.rs b/libm/src/math/generic/fmaximum.rs index 4b6295bc0..898828b80 100644 --- a/libm/src/math/generic/fmaximum.rs +++ b/libm/src/math/generic/fmaximum.rs @@ -17,7 +17,7 @@ pub fn fmaximum(x: F, y: F) -> F { x } else if y.is_nan() { y - } else if x > y || (y.to_bits() == F::NEG_ZERO.to_bits() && x.is_sign_positive()) { + } else if x > y || (y.biteq(F::NEG_ZERO) && x.is_sign_positive()) { x } else { y diff --git a/libm/src/math/generic/fmaximum_num.rs b/libm/src/math/generic/fmaximum_num.rs index 2e97ff6d3..05df6cbd4 100644 --- a/libm/src/math/generic/fmaximum_num.rs +++ b/libm/src/math/generic/fmaximum_num.rs @@ -15,12 +15,11 @@ use crate::support::Float; #[inline] pub fn fmaximum_num(x: F, y: F) -> F { - let res = - if x.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) { - y - } else { - x - }; + let res = if x.is_nan() || x < y || (x.biteq(F::NEG_ZERO) && y.is_sign_positive()) { + y + } else { + x + }; // Canonicalize res * F::ONE diff --git a/libm/src/math/generic/fminimum.rs b/libm/src/math/generic/fminimum.rs index 9dc0b64be..8592ac546 100644 --- a/libm/src/math/generic/fminimum.rs +++ b/libm/src/math/generic/fminimum.rs @@ -17,7 +17,7 @@ pub fn fminimum(x: F, y: F) -> F { x } else if y.is_nan() { y - } else if x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) { + } else if x < y || (x.biteq(F::NEG_ZERO) && y.is_sign_positive()) { x } else { y diff --git a/libm/src/math/generic/fminimum_num.rs b/libm/src/math/generic/fminimum_num.rs index 40db8b189..6777bbf87 100644 --- a/libm/src/math/generic/fminimum_num.rs +++ b/libm/src/math/generic/fminimum_num.rs @@ -15,12 +15,11 @@ use crate::support::Float; #[inline] pub fn fminimum_num(x: F, y: F) -> F { - let res = - if y.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) { - x - } else { - y - }; + let res = if y.is_nan() || x < y || (x.biteq(F::NEG_ZERO) && y.is_sign_positive()) { + x + } else { + y + }; // Canonicalize res * F::ONE From e211ac653fda4e36a4c0f3b71b9fd9643311cabb Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 1 Jun 2025 19:52:57 +0000 Subject: [PATCH 1375/1459] ci: Refactor benchmark regression checks iai-callgrind now correctly exits with error if regressions were found [1], so we no longer need to check for regressions manually. Remove this check and instead exit based on the exit status of the benchmark run. [1] https://github.com/iai-callgrind/iai-callgrind/issues/337 --- ci/bench-icount.sh | 19 ++++++----- ci/ci-util.py | 84 +++++++++++----------------------------------- 2 files changed, 29 insertions(+), 74 deletions(-) diff --git a/ci/bench-icount.sh b/ci/bench-icount.sh index 5b6974fe4..5724955fe 100755 --- a/ci/bench-icount.sh +++ b/ci/bench-icount.sh @@ -46,17 +46,18 @@ function run_icount_benchmarks() { shift done - # Run iai-callgrind benchmarks - cargo bench "${cargo_args[@]}" -- "${iai_args[@]}" + # Run iai-callgrind benchmarks. Do this in a subshell with `&& true` to + # capture rather than exit on error. + (cargo bench "${cargo_args[@]}" -- "${iai_args[@]}") && true + exit_code="$?" - # NB: iai-callgrind should exit on error but does not, so we inspect the sumary - # for errors. See https://github.com/iai-callgrind/iai-callgrind/issues/337 - if [ -n "${PR_NUMBER:-}" ]; then - # If this is for a pull request, ignore regressions if specified. - ./ci/ci-util.py check-regressions --home "$iai_home" --allow-pr-override "$PR_NUMBER" - else + if [ "$exit_code" -eq 0 ]; then + echo "Benchmarks completed with no regressions" + elif [ -z "${PR_NUMBER:-}" ]; then # Disregard regressions after merge - ./ci/ci-util.py check-regressions --home "$iai_home" || true + echo "Benchmarks completed with regressions; ignoring (not in a PR)" + else + ./ci/ci-util.py handle-banch-regressions "$PR_NUMBER" fi } diff --git a/ci/ci-util.py b/ci/ci-util.py index 6c8b43980..3437d304f 100755 --- a/ci/ci-util.py +++ b/ci/ci-util.py @@ -11,7 +11,7 @@ import subprocess as sp import sys from dataclasses import dataclass -from glob import glob, iglob +from glob import glob from inspect import cleandoc from os import getenv from pathlib import Path @@ -38,14 +38,10 @@ Note that `--extract` will overwrite files in `iai-home`. - check-regressions [--home iai-home] [--allow-pr-override pr_number] - Check `iai-home` (or `iai-home` if unspecified) for `summary.json` - files and see if there are any regressions. This is used as a workaround - for `iai-callgrind` not exiting with error status; see - . - - If `--allow-pr-override` is specified, the regression check will not exit - with failure if any line in the PR starts with `allow-regressions`. + handle-bench-regressions PR_NUMBER + Exit with success if the pull request contains a line starting with + `ci: allow-regressions`, indicating that regressions in benchmarks should + be accepted. Otherwise, exit 1. """ ) @@ -365,64 +361,22 @@ def locate_baseline(flags: list[str]) -> None: eprint("baseline extracted successfully") -def check_iai_regressions(args: list[str]): - """Find regressions in iai summary.json files, exit with failure if any are - found. - """ - - iai_home_str = "iai-home" - pr_number = None - - while len(args) > 0: - match args: - case ["--home", home, *rest]: - iai_home_str = home - args = rest - case ["--allow-pr-override", pr_num, *rest]: - pr_number = pr_num - args = rest - case _: - eprint(USAGE) - exit(1) - - iai_home = Path(iai_home_str) - - found_summaries = False - regressions: list[dict] = [] - for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True): - found_summaries = True - with open(iai_home / summary_path, "r") as f: - summary = json.load(f) - - summary_regs = [] - run = summary["callgrind_summary"]["callgrind_run"] - fname = summary["function_name"] - id = summary["id"] - name_entry = {"name": f"{fname}.{id}"} - - for segment in run["segments"]: - summary_regs.extend(segment["regressions"]) +def handle_bench_regressions(args: list[str]): + """Exit with error unless the PR message contains an ignore directive.""" - summary_regs.extend(run["total"]["regressions"]) - - regressions.extend(name_entry | reg for reg in summary_regs) - - if not found_summaries: - eprint(f"did not find any summary.json files within {iai_home}") - exit(1) + match args: + case [pr_number]: + pr_number = pr_number + case _: + eprint(USAGE) + exit(1) - if len(regressions) == 0: - eprint("No regressions found") + pr = PrInfo.load(pr_number) + if pr.contains_directive(REGRESSION_DIRECTIVE): + eprint("PR allows regressions") return - eprint("Found regressions:", json.dumps(regressions, indent=4)) - - if pr_number is not None: - pr = PrInfo.load(pr_number) - if pr.contains_directive(REGRESSION_DIRECTIVE): - eprint("PR allows regressions, returning") - return - + eprint("Regressions were found; benchmark failed") exit(1) @@ -433,8 +387,8 @@ def main(): ctx.emit_workflow_output() case ["locate-baseline", *flags]: locate_baseline(flags) - case ["check-regressions", *args]: - check_iai_regressions(args) + case ["handle-bench-regressions", *args]: + handle_bench_regressions(args) case ["--help" | "-h"]: print(USAGE) exit() From da8433db2382d76d646bf86b2719c07d24e487ac Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 2 Jun 2025 20:20:23 +0000 Subject: [PATCH 1376/1459] libm-test: Fix unintentional skips in `binop_common` `binop_common` emits a `SKIP` that is intended to apply only to `copysign`, but is instead applying to all binary operators. Correct the general case but leave the currently-failing `maximum_num` tests as a FIXME, to be resolved separately in [1]. Also simplify skip logic and NaN checking, and add a few more `copysign` checks. [1]: https://github.com/rust-lang/compiler-builtins/pull/939 --- libm-test/src/generate/edge_cases.rs | 1 + libm-test/src/precision.rs | 15 ++++++++++----- libm-test/src/test_traits.rs | 20 ++++++++++++++------ libm/src/math/copysign.rs | 10 +++++++++- 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/libm-test/src/generate/edge_cases.rs b/libm-test/src/generate/edge_cases.rs index 2fb074638..4e4a782a1 100644 --- a/libm-test/src/generate/edge_cases.rs +++ b/libm-test/src/generate/edge_cases.rs @@ -51,6 +51,7 @@ where // Check some special values that aren't included in the above ranges values.push(Op::FTy::NAN); + values.push(Op::FTy::NEG_NAN); values.extend(Op::FTy::consts().iter()); // Check around the maximum subnormal value diff --git a/libm-test/src/precision.rs b/libm-test/src/precision.rs index f6cdd015a..32825b15d 100644 --- a/libm-test/src/precision.rs +++ b/libm-test/src/precision.rs @@ -444,13 +444,18 @@ fn binop_common( expected: F2, ctx: &CheckCtx, ) -> CheckAction { - // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate. Skip if - // the first input (magnitude source) is NaN and the output is also a NaN, or if the second - // input (sign source) is NaN. - if ctx.basis == CheckBasis::Mpfr + // MPFR only has one NaN bitpattern; skip tests in cases where the first argument would take + // the sign of a NaN second argument. The default NaN checks cover other cases. + if ctx.base_name == BaseName::Copysign && ctx.basis == CheckBasis::Mpfr && input.1.is_nan() { + return SKIP; + } + + // FIXME(#939): this should not be skipped, there is a bug in our implementationi. + if ctx.base_name == BaseName::FmaximumNum + && ctx.basis == CheckBasis::Mpfr && ((input.0.is_nan() && actual.is_nan() && expected.is_nan()) || input.1.is_nan()) { - return SKIP; + return XFAIL_NOCHECK; } /* FIXME(#439): our fmin and fmax do not compare signed zeros */ diff --git a/libm-test/src/test_traits.rs b/libm-test/src/test_traits.rs index 2af6af60b..278274d91 100644 --- a/libm-test/src/test_traits.rs +++ b/libm-test/src/test_traits.rs @@ -312,12 +312,9 @@ where let mut inner = || -> TestResult { let mut allowed_ulp = ctx.ulp; - // Forbid overrides if the items came from an explicit list, as long as we are checking - // against either MPFR or the result itself. - let require_biteq = ctx.gen_kind == GeneratorKind::List && ctx.basis != CheckBasis::Musl; - match SpecialCase::check_float(input, actual, expected, ctx) { - _ if require_biteq => (), + // Forbid overrides if the items came from an explicit list + _ if ctx.gen_kind == GeneratorKind::List => (), CheckAction::AssertSuccess => (), CheckAction::AssertFailure(msg) => assert_failure_msg = Some(msg), CheckAction::Custom(res) => return res, @@ -327,9 +324,20 @@ where // Check when both are NaNs if actual.is_nan() && expected.is_nan() { - if require_biteq && ctx.basis == CheckBasis::None { + // Don't assert NaN bitwise equality if: + // + // * Testing against MPFR (there is a single NaN representation) + // * Testing against Musl except for explicit tests (Musl does some NaN quieting) + // + // In these cases, just the check that actual and expected are both NaNs is + // sufficient. + let skip_nan_biteq = ctx.basis == CheckBasis::Mpfr + || (ctx.basis == CheckBasis::Musl && ctx.gen_kind != GeneratorKind::List); + + if !skip_nan_biteq { ensure!(actual.biteq(expected), "mismatched NaN bitpatterns"); } + // By default, NaNs have nothing special to check. return Ok(()); } else if actual.is_nan() || expected.is_nan() { diff --git a/libm/src/math/copysign.rs b/libm/src/math/copysign.rs index d2a86e7fd..d093d6107 100644 --- a/libm/src/math/copysign.rs +++ b/libm/src/math/copysign.rs @@ -59,9 +59,17 @@ mod tests { // Not required but we expect it assert_biteq!(f(F::NAN, F::NAN), F::NAN); - assert_biteq!(f(F::NEG_NAN, F::NAN), F::NAN); + assert_biteq!(f(F::NAN, F::ONE), F::NAN); + assert_biteq!(f(F::NAN, F::NEG_ONE), F::NEG_NAN); assert_biteq!(f(F::NAN, F::NEG_NAN), F::NEG_NAN); + assert_biteq!(f(F::NEG_NAN, F::NAN), F::NAN); + assert_biteq!(f(F::NEG_NAN, F::ONE), F::NAN); + assert_biteq!(f(F::NEG_NAN, F::NEG_ONE), F::NEG_NAN); assert_biteq!(f(F::NEG_NAN, F::NEG_NAN), F::NEG_NAN); + assert_biteq!(f(F::ONE, F::NAN), F::ONE); + assert_biteq!(f(F::ONE, F::NEG_NAN), F::NEG_ONE); + assert_biteq!(f(F::NEG_ONE, F::NAN), F::ONE); + assert_biteq!(f(F::NEG_ONE, F::NEG_NAN), F::NEG_ONE); } #[test] From 3c30d8cb1ec24e0b8a88a5cedcf6b9bece0117d7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 20 May 2025 13:31:31 +0000 Subject: [PATCH 1377/1459] compiler-builtins: Eliminate symlinks compiler-builtins has a symlink to the `libm` source directory so the two crates can share files but still act as two separate crates. This causes problems with some sysroot-related tooling, however, since directory symlinks seem to not be supported. The reason this was a symlink in the first place is that there isn't an easy for Cargo to publish two crates that share source (building works fine but publishing rejects `include`d files from parent directories, as well as nested package roots). However, after the switch to a subtree, we no longer need to publish compiler-builtins; this means that we can eliminate the link and just use `#[path]`. Similarly, the LICENSE file was symlinked so it could live in the repository root but be included in the package. This is also removed as it caused problems with the dist job (error from bootstrap's `tarball.rs`, "generated a symlink in a tarball"). If we need to publish compiler-builtins again for any reason, it would be easy to revert these changes in a preprocess step. --- compiler-builtins/LICENSE.txt | 1 - compiler-builtins/src/math/libm_math | 1 - compiler-builtins/src/math/mod.rs | 1 + 3 files changed, 1 insertion(+), 2 deletions(-) delete mode 120000 compiler-builtins/LICENSE.txt delete mode 120000 compiler-builtins/src/math/libm_math diff --git a/compiler-builtins/LICENSE.txt b/compiler-builtins/LICENSE.txt deleted file mode 120000 index 4ab43736a..000000000 --- a/compiler-builtins/LICENSE.txt +++ /dev/null @@ -1 +0,0 @@ -../LICENSE.txt \ No newline at end of file diff --git a/compiler-builtins/src/math/libm_math b/compiler-builtins/src/math/libm_math deleted file mode 120000 index 4d65313c2..000000000 --- a/compiler-builtins/src/math/libm_math +++ /dev/null @@ -1 +0,0 @@ -../../../libm/src/math \ No newline at end of file diff --git a/compiler-builtins/src/math/mod.rs b/compiler-builtins/src/math/mod.rs index 078feb9ff..62d729674 100644 --- a/compiler-builtins/src/math/mod.rs +++ b/compiler-builtins/src/math/mod.rs @@ -2,6 +2,7 @@ #[allow(dead_code)] #[allow(unused_imports)] #[allow(clippy::all)] +#[path = "../../../libm/src/math/mod.rs"] pub(crate) mod libm_math; macro_rules! libm_intrinsics { From f1c4a11e96921c88bcb051caa6ea95112e60dca7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 4 Jun 2025 08:20:08 +0000 Subject: [PATCH 1378/1459] Replace the musl submodule with a download script The submodule was causing issues in rust-lang/rust, so eliminiate it here. `build-musl` is also removed from `libm-test`'s default features so the crate doesn't need to be built by default. --- .github/workflows/main.yaml | 22 ++++++++++------------ .gitignore | 3 +++ .gitmodules | 4 ---- ci/update-musl.sh | 15 +++++++++++++++ crates/musl-math-sys/build.rs | 2 +- crates/musl-math-sys/musl | 1 - libm-test/Cargo.toml | 2 +- 7 files changed, 30 insertions(+), 19 deletions(-) delete mode 100644 .gitmodules create mode 100755 ci/update-musl.sh delete mode 160000 crates/musl-math-sys/musl diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 9f389d8b4..95b0962b0 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -108,8 +108,6 @@ jobs: - name: Print runner information run: uname -a - uses: actions/checkout@v4 - with: - submodules: true - name: Install Rust (rustup) shell: bash run: | @@ -146,6 +144,10 @@ jobs: shell: bash - run: echo "RUST_COMPILER_RT_ROOT=$(realpath ./compiler-rt)" >> "$GITHUB_ENV" shell: bash + + - name: Download musl source + run: ./ci/update-musl.sh + shell: bash - name: Verify API list if: matrix.os == 'ubuntu-24.04' @@ -182,8 +184,6 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@v4 - with: - submodules: true # Unlike rustfmt, stable clippy does not work on code with nightly features. - name: Install nightly `clippy` run: | @@ -191,6 +191,8 @@ jobs: rustup default nightly rustup component add clippy - uses: Swatinem/rust-cache@v2 + - name: Download musl source + run: ./ci/update-musl.sh - run: cargo clippy --workspace --all-targets benchmarks: @@ -205,8 +207,6 @@ jobs: runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@master - with: - submodules: true - uses: taiki-e/install-action@cargo-binstall - name: Set up dependencies @@ -223,6 +223,8 @@ jobs: - uses: Swatinem/rust-cache@v2 with: key: ${{ matrix.target }} + - name: Download musl source + run: ./ci/update-musl.sh - name: Run icount benchmarks env: @@ -256,8 +258,6 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@v4 - with: - submodules: true - name: Install Rust (rustup) run: rustup update nightly --no-self-update && rustup default nightly shell: bash @@ -292,8 +292,6 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@v4 - with: - submodules: true - name: Install stable `rustfmt` run: rustup set profile minimal && rustup default stable && rustup component add rustfmt - run: cargo fmt -- --check @@ -317,13 +315,13 @@ jobs: TO_TEST: ${{ matrix.to_test }} steps: - uses: actions/checkout@v4 - with: - submodules: true - name: Install Rust run: | rustup update nightly --no-self-update rustup default nightly - uses: Swatinem/rust-cache@v2 + - name: download musl source + run: ./ci/update-musl.sh - name: Run extensive tests run: ./ci/run-extensive.sh - name: Print test logs if available diff --git a/.gitignore b/.gitignore index 5287a6c72..f12b871c2 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,6 @@ iai-home *.bk *.rs.bk .#* + +# Manually managed +crates/musl-math-sys/musl diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 792ed9ab2..000000000 --- a/.gitmodules +++ /dev/null @@ -1,4 +0,0 @@ -[submodule "crates/musl-math-sys/musl"] - path = crates/musl-math-sys/musl - url = https://git.musl-libc.org/git/musl - shallow = true diff --git a/ci/update-musl.sh b/ci/update-musl.sh new file mode 100755 index 000000000..b71cf5778 --- /dev/null +++ b/ci/update-musl.sh @@ -0,0 +1,15 @@ +#!/bin/sh +# Download musl to a repository for `musl-math-sys` + +set -eux + +url=git://git.musl-libc.org/musl +ref=c47ad25ea3b484e10326f933e927c0bc8cded3da +dst=crates/musl-math-sys/musl + +if ! [ -d "$dst" ]; then + git clone "$url" "$dst" --single-branch --depth=1000 +fi + +git -C "$dst" fetch "$url" --depth=1 +git -C "$dst" checkout "$ref" diff --git a/crates/musl-math-sys/build.rs b/crates/musl-math-sys/build.rs index b00dbc73e..59e42f2d2 100644 --- a/crates/musl-math-sys/build.rs +++ b/crates/musl-math-sys/build.rs @@ -120,7 +120,7 @@ fn build_musl_math(cfg: &Config) { let arch_dir = musl_dir.join("arch").join(&cfg.musl_arch); assert!( math.exists(), - "musl source not found. Is the submodule up to date?" + "musl source not found. You may need to run `./ci/update-musl.sh`." ); let source_map = find_math_source(&math, cfg); diff --git a/crates/musl-math-sys/musl b/crates/musl-math-sys/musl deleted file mode 160000 index c47ad25ea..000000000 --- a/crates/musl-math-sys/musl +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c47ad25ea3b484e10326f933e927c0bc8cded3da diff --git a/libm-test/Cargo.toml b/libm-test/Cargo.toml index 01b45716b..05fcc3234 100644 --- a/libm-test/Cargo.toml +++ b/libm-test/Cargo.toml @@ -6,7 +6,7 @@ publish = false license = "MIT OR Apache-2.0" [features] -default = ["build-mpfr", "build-musl", "unstable-float"] +default = ["build-mpfr", "unstable-float"] # Propagated from libm because this affects which functions we test. unstable-float = ["libm/unstable-float", "rug?/nightly-float"] From 9e0cc1dbe45b552322f5512e484e2f0670c901c2 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 28 May 2025 14:45:14 +0000 Subject: [PATCH 1379/1459] Add an empty rust-version file This will be used by `josh` tooling. --- rust-version | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 rust-version diff --git a/rust-version b/rust-version new file mode 100644 index 000000000..e69de29bb From ded114bca9a34d3ad4cd47f6a7287c7937c0ca38 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 18 May 2025 17:30:58 +0000 Subject: [PATCH 1380/1459] Add tooling for `josh` syncs Create a crate that handles pulling from and pushing to rust-lang/rust. This can be invoked with the following: $ cargo run -p josh-sync -- rustc-pull $ RUSTC_GIT=/path/to/rust/checkout cargo run -p josh-sync -- rustc-push --- Cargo.toml | 1 + crates/josh-sync/Cargo.toml | 7 + crates/josh-sync/src/main.rs | 45 +++++ crates/josh-sync/src/sync.rs | 371 +++++++++++++++++++++++++++++++++++ 4 files changed, 424 insertions(+) create mode 100644 crates/josh-sync/Cargo.toml create mode 100644 crates/josh-sync/src/main.rs create mode 100644 crates/josh-sync/src/sync.rs diff --git a/Cargo.toml b/Cargo.toml index bc6b4bd29..fb638f2fb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ resolver = "2" members = [ "builtins-test", "compiler-builtins", + "crates/josh-sync", "crates/libm-macros", "crates/musl-math-sys", "crates/panic-handler", diff --git a/crates/josh-sync/Cargo.toml b/crates/josh-sync/Cargo.toml new file mode 100644 index 000000000..1f3bb376d --- /dev/null +++ b/crates/josh-sync/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "josh-sync" +edition = "2024" +publish = false + +[dependencies] +directories = "6.0.0" diff --git a/crates/josh-sync/src/main.rs b/crates/josh-sync/src/main.rs new file mode 100644 index 000000000..7f0b11900 --- /dev/null +++ b/crates/josh-sync/src/main.rs @@ -0,0 +1,45 @@ +use std::io::{Read, Write}; +use std::process::exit; +use std::{env, io}; + +use crate::sync::{GitSync, Josh}; + +mod sync; + +const USAGE: &str = r#"Utility for synchroniing compiler-builtins with rust-lang/rust + +Usage: + + josh-sync rustc-pull + + Pull from rust-lang/rust to compiler-builtins. Creates a commit + updating the version file, followed by a merge commit. + + josh-sync rustc-push GITHUB_USERNAME [BRANCH] + + Create a branch off of rust-lang/rust updating compiler-builtins. +"#; + +fn main() { + let sync = GitSync::from_current_dir(); + + // Collect args, then recollect as str refs so we can match on them + let args: Vec<_> = env::args().collect(); + let args: Vec<&str> = args.iter().map(String::as_str).collect(); + + match args.as_slice()[1..] { + ["rustc-pull"] => sync.rustc_pull(None), + ["rustc-push", github_user, branch] => sync.rustc_push(github_user, Some(branch)), + ["rustc-push", github_user] => sync.rustc_push(github_user, None), + ["start-josh"] => { + let _josh = Josh::start(); + println!("press enter to stop"); + io::stdout().flush().unwrap(); + let _ = io::stdin().read(&mut [0u8]).unwrap(); + } + _ => { + println!("{USAGE}"); + exit(1); + } + } +} diff --git a/crates/josh-sync/src/sync.rs b/crates/josh-sync/src/sync.rs new file mode 100644 index 000000000..003cf187d --- /dev/null +++ b/crates/josh-sync/src/sync.rs @@ -0,0 +1,371 @@ +use std::net::{SocketAddr, TcpStream}; +use std::process::{Command, Stdio, exit}; +use std::time::Duration; +use std::{env, fs, process, thread}; + +const JOSH_PORT: u16 = 42042; +const DEFAULT_PR_BRANCH: &str = "update-builtins"; + +pub struct GitSync { + upstream_repo: String, + upstream_ref: String, + upstream_url: String, + josh_filter: String, + josh_url_base: String, +} + +/// This code was adapted from the miri repository, via the rustc-dev-guide +/// () +impl GitSync { + pub fn from_current_dir() -> Self { + let upstream_repo = + env::var("UPSTREAM_ORG").unwrap_or_else(|_| "rust-lang".to_owned()) + "/rust"; + + Self { + upstream_url: format!("https://github.com/{upstream_repo}"), + upstream_repo, + upstream_ref: env::var("UPSTREAM_REF").unwrap_or_else(|_| "HEAD".to_owned()), + josh_filter: ":/library/compiler-builtins".to_owned(), + josh_url_base: format!("http://localhost:{JOSH_PORT}"), + } + } + + /// Pull from rust-lang/rust to compiler-builtins. + pub fn rustc_pull(&self, commit: Option) { + let Self { + upstream_ref, + upstream_url, + upstream_repo, + .. + } = self; + + let new_upstream_base = commit.unwrap_or_else(|| { + let out = check_output(["git", "ls-remote", upstream_url, upstream_ref]); + out.split_whitespace() + .next() + .unwrap_or_else(|| panic!("could not split output: '{out}'")) + .to_owned() + }); + + ensure_clean(); + + // Make sure josh is running. + let _josh = Josh::start(); + let josh_url_filtered = self.josh_url( + &self.upstream_repo, + Some(&new_upstream_base), + Some(&self.josh_filter), + ); + + let previous_upstream_base = fs::read_to_string("rust-version") + .expect("failed to read `rust-version`") + .trim() + .to_string(); + assert_ne!(previous_upstream_base, new_upstream_base, "nothing to pull"); + + let orig_head = check_output(["git", "rev-parse", "HEAD"]); + println!("original upstream base: {previous_upstream_base}"); + println!("new upstream base: {new_upstream_base}"); + println!("original HEAD: {orig_head}"); + + // Fetch the latest upstream HEAD so we can get a summary. Use the Josh URL for caching. + run([ + "git", + "fetch", + &self.josh_url(&self.upstream_repo, Some(&new_upstream_base), Some(":/")), + &new_upstream_base, + "--depth=1", + ]); + let new_summary = check_output(["git", "log", "-1", "--format=%h %s", &new_upstream_base]); + + // Update rust-version file. As a separate commit, since making it part of + // the merge has confused the heck out of josh in the past. + // We pass `--no-verify` to avoid running git hooks. + // We do this before the merge so that if there are merge conflicts, we have + // the right rust-version file while resolving them. + fs::write("rust-version", format!("{new_upstream_base}\n")) + .expect("failed to write rust-version"); + + let prep_message = format!( + "Update the upstream Rust version\n\n\ + To prepare for merging from {upstream_repo}, set the version file to:\n\n \ + {new_summary}\n\ + ", + ); + run([ + "git", + "commit", + "rust-version", + "--no-verify", + "-m", + &prep_message, + ]); + + // Fetch given rustc commit. + run(["git", "fetch", &josh_url_filtered]); + let incoming_ref = check_output(["git", "rev-parse", "FETCH_HEAD"]); + println!("incoming ref: {incoming_ref}"); + + let merge_message = format!( + "Merge ref '{upstream_head_short}{filter}' from {upstream_url}\n\n\ + Pull recent changes from {upstream_repo} via Josh.\n\n\ + Upstream ref: {new_upstream_base}\n\ + Filtered ref: {incoming_ref}\n\ + ", + upstream_head_short = &new_upstream_base[..12], + filter = self.josh_filter + ); + + // This should not add any new root commits. So count those before and after merging. + let num_roots = || -> u32 { + let out = check_output(["git", "rev-list", "HEAD", "--max-parents=0", "--count"]); + out.trim() + .parse::() + .unwrap_or_else(|e| panic!("failed to parse `{out}`: {e}")) + }; + let num_roots_before = num_roots(); + + let pre_merge_sha = check_output(["git", "rev-parse", "HEAD"]); + println!("pre-merge HEAD: {pre_merge_sha}"); + + // Merge the fetched commit. + run([ + "git", + "merge", + "FETCH_HEAD", + "--no-verify", + "--no-ff", + "-m", + &merge_message, + ]); + + let current_sha = check_output(["git", "rev-parse", "HEAD"]); + if current_sha == pre_merge_sha { + run(["git", "reset", "--hard", &orig_head]); + eprintln!( + "No merge was performed, no changes to pull were found. \ + Rolled back the preparation commit." + ); + exit(1); + } + + // Check that the number of roots did not increase. + assert_eq!( + num_roots(), + num_roots_before, + "Josh created a new root commit. This is probably not the history you want." + ); + } + + /// Construct an update to rust-lang/rust from compiler-builtins. + pub fn rustc_push(&self, github_user: &str, branch: Option<&str>) { + let Self { + josh_filter, + upstream_url, + .. + } = self; + + let branch = branch.unwrap_or(DEFAULT_PR_BRANCH); + let josh_url = self.josh_url(&format!("{github_user}/rust"), None, Some(josh_filter)); + let user_upstream_url = format!("git@github.com:{github_user}/rust.git"); + + let Ok(rustc_git) = env::var("RUSTC_GIT") else { + panic!("the RUSTC_GIT environment variable must be set to a rust-lang/rust checkout") + }; + + ensure_clean(); + let base = fs::read_to_string("rust-version") + .expect("failed to read `rust-version`") + .trim() + .to_string(); + + // Make sure josh is running. + let _josh = Josh::start(); + + // Prepare the branch. Pushing works much better if we use as base exactly + // the commit that we pulled from last time, so we use the `rust-version` + // file to find out which commit that would be. + println!("Preparing {github_user}/rust (base: {base})..."); + + if Command::new("git") + .args(["-C", &rustc_git, "fetch", &user_upstream_url, branch]) + .output() // capture output + .expect("could not run fetch") + .status + .success() + { + panic!( + "The branch '{branch}' seems to already exist in '{user_upstream_url}'. \ + Please delete it and try again." + ); + } + + run(["git", "-C", &rustc_git, "fetch", upstream_url, &base]); + + run_cfg("git", |c| { + c.args([ + "-C", + &rustc_git, + "push", + &user_upstream_url, + &format!("{base}:refs/heads/{branch}"), + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) // silence the "create GitHub PR" message + }); + println!("pushed PR branch"); + + // Do the actual push. + println!("Pushing changes..."); + run(["git", "push", &josh_url, &format!("HEAD:{branch}")]); + println!(); + + // Do a round-trip check to make sure the push worked as expected. + run(["git", "fetch", &josh_url, branch]); + + let head = check_output(["git", "rev-parse", "HEAD"]); + let fetch_head = check_output(["git", "rev-parse", "FETCH_HEAD"]); + assert_eq!( + head, fetch_head, + "Josh created a non-roundtrip push! Do NOT merge this into rustc!\n\ + Expected {head}, got {fetch_head}." + ); + println!( + "Confirmed that the push round-trips back to compiler-builtins properly. Please \ + create a rustc PR:" + ); + // Open PR with `subtree update` title to silence the `no-merges` triagebot check + println!( + " {upstream_url}/compare/{github_user}:{branch}?quick_pull=1\ + &title=Update%20the%20%60compiler-builtins%60%20subtree\ + &body=Update%20the%20Josh%20subtree%20to%20https%3A%2F%2Fgithub.com%2Frust-lang%2F\ + compiler-builtins%2Fcommit%2F{head_short}.%0A%0Ar%3F%20%40ghost", + head_short = &head[..12], + ); + } + + /// Construct a url to the local Josh server with (optionally) + fn josh_url(&self, repo: &str, rev: Option<&str>, filter: Option<&str>) -> String { + format!( + "{base}/{repo}.git{at}{rev}{filter}{filt_git}", + base = self.josh_url_base, + at = if rev.is_some() { "@" } else { "" }, + rev = rev.unwrap_or_default(), + filter = filter.unwrap_or_default(), + filt_git = if filter.is_some() { ".git" } else { "" } + ) + } +} + +/// Fail if there are files that need to be checked in. +fn ensure_clean() { + let read = check_output(["git", "status", "--untracked-files=no", "--porcelain"]); + assert!( + read.is_empty(), + "working directory must be clean before performing rustc pull" + ); +} + +/* Helpers for running commands with logged invocations */ + +/// Run a command from an array, passing its output through. +fn run<'a, Args: AsRef<[&'a str]>>(l: Args) { + let l = l.as_ref(); + run_cfg(l[0], |c| c.args(&l[1..])); +} + +/// Run a command from an array, collecting its output. +fn check_output<'a, Args: AsRef<[&'a str]>>(l: Args) -> String { + let l = l.as_ref(); + check_output_cfg(l[0], |c| c.args(&l[1..])) +} + +/// [`run`] with configuration. +fn run_cfg(prog: &str, f: impl FnOnce(&mut Command) -> &mut Command) { + // self.read(l.as_ref()); + check_output_cfg(prog, |c| f(c.stdout(Stdio::inherit()))); +} + +/// [`read`] with configuration. All shell helpers print the command and pass stderr. +fn check_output_cfg(prog: &str, f: impl FnOnce(&mut Command) -> &mut Command) -> String { + let mut cmd = Command::new(prog); + cmd.stderr(Stdio::inherit()); + f(&mut cmd); + eprintln!("+ {cmd:?}"); + let out = cmd.output().expect("command failed"); + assert!(out.status.success()); + String::from_utf8(out.stdout.trim_ascii().to_vec()).expect("non-UTF8 output") +} + +/// Create a wrapper that stops Josh on drop. +pub struct Josh(process::Child); + +impl Josh { + pub fn start() -> Self { + // Determine cache directory. + let user_dirs = + directories::ProjectDirs::from("org", "rust-lang", "rustc-compiler-builtins-josh") + .unwrap(); + let local_dir = user_dirs.cache_dir().to_owned(); + + // Start josh, silencing its output. + #[expect(clippy::zombie_processes, reason = "clippy can't handle the loop")] + let josh = process::Command::new("josh-proxy") + .arg("--local") + .arg(local_dir) + .args([ + "--remote=https://github.com", + &format!("--port={JOSH_PORT}"), + "--no-background", + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn() + .expect("failed to start josh-proxy, make sure it is installed"); + + // Wait until the port is open. We try every 10ms until 1s passed. + for _ in 0..100 { + // This will generally fail immediately when the port is still closed. + let addr = SocketAddr::from(([127, 0, 0, 1], JOSH_PORT)); + let josh_ready = TcpStream::connect_timeout(&addr, Duration::from_millis(1)); + + if josh_ready.is_ok() { + println!("josh up and running"); + return Josh(josh); + } + + // Not ready yet. + thread::sleep(Duration::from_millis(10)); + } + panic!("Even after waiting for 1s, josh-proxy is still not available.") + } +} + +impl Drop for Josh { + fn drop(&mut self) { + if cfg!(unix) { + // Try to gracefully shut it down. + Command::new("kill") + .args(["-s", "INT", &self.0.id().to_string()]) + .output() + .expect("failed to SIGINT josh-proxy"); + // Sadly there is no "wait with timeout"... so we just give it some time to finish. + thread::sleep(Duration::from_millis(100)); + // Now hopefully it is gone. + if self + .0 + .try_wait() + .expect("failed to wait for josh-proxy") + .is_some() + { + return; + } + } + // If that didn't work (or we're not on Unix), kill it hard. + eprintln!( + "I have to kill josh-proxy the hard way, let's hope this does not \ + break anything." + ); + self.0.kill().expect("failed to SIGKILL josh-proxy"); + } +} From 162576fa9844ec5111191e32a3384a26f8c825fb Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 4 Jun 2025 18:10:07 +0000 Subject: [PATCH 1381/1459] Update the upstream Rust version To prepare for merging from rust-lang/rust, set the version file to: df8102fe5f Auto merge of #142002 - onur-ozkan:follow-ups2, r=jieyouxu --- rust-version | 1 + 1 file changed, 1 insertion(+) diff --git a/rust-version b/rust-version index e69de29bb..e05aaa057 100644 --- a/rust-version +++ b/rust-version @@ -0,0 +1 @@ +df8102fe5f24f28a918660b0cd918d7331c3896e From cd0f2026a1995c0314aea6b52ed30b9e050931d0 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 4 Jun 2025 21:17:51 +0000 Subject: [PATCH 1382/1459] `panic-handler`: Remove the `no_core` feature This was introduced before `#[panic_handler]` was stable, but should no longer be needed. Additionally, we only need it for `builtins-test-intrinsics`, not as a dependency of `compiler-builtins`. --- builtins-test-intrinsics/Cargo.toml | 2 +- compiler-builtins/Cargo.toml | 3 --- crates/panic-handler/src/lib.rs | 7 ++----- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/builtins-test-intrinsics/Cargo.toml b/builtins-test-intrinsics/Cargo.toml index 704de20c5..064b7cad2 100644 --- a/builtins-test-intrinsics/Cargo.toml +++ b/builtins-test-intrinsics/Cargo.toml @@ -6,7 +6,7 @@ publish = false license = "MIT OR Apache-2.0" [dependencies] -compiler_builtins = { path = "../compiler-builtins", features = ["compiler-builtins"]} +compiler_builtins = { path = "../compiler-builtins", features = ["compiler-builtins"] } panic-handler = { path = "../crates/panic-handler" } [features] diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 6bee8da68..11ee91954 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -24,9 +24,6 @@ core = { version = "1.0.1", optional = true, package = "rustc-std-workspace-core [build-dependencies] cc = { optional = true, version = "1.2" } -[dev-dependencies] -panic-handler = { path = "../crates/panic-handler" } - [features] default = ["compiler-builtins"] diff --git a/crates/panic-handler/src/lib.rs b/crates/panic-handler/src/lib.rs index 673e00522..f4d7c8397 100644 --- a/crates/panic-handler/src/lib.rs +++ b/crates/panic-handler/src/lib.rs @@ -1,11 +1,8 @@ //! This is needed for tests on targets that require a `#[panic_handler]` function -#![feature(no_core)] -#![no_core] - -extern crate core; +#![no_std] #[panic_handler] -fn panic(_: &core::panic::PanicInfo) -> ! { +fn panic(_: &core::panic::PanicInfo<'_>) -> ! { loop {} } From 23567698971accae711fa3514f95c996ecba2abf Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 4 Jun 2025 20:56:35 +0000 Subject: [PATCH 1383/1459] Use the in-tree `compiler-builtins` Many of `std`'s dependency have a dependency on the crates.io `compiler-builtins` when used with the feature `rustc-std-workspace-core`. Use a Cargo patch to select the in-tree version instead. `compiler-builtins` is also added as a dependency of `rustc-std-workspace-core` so these crates can remove their crates.io dependency in the future. --- compiler-builtins/Cargo.toml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 11ee91954..df8e96482 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -17,9 +17,7 @@ doctest = false test = false [dependencies] -# For more information on this dependency see -# https://github.com/rust-lang/rust/tree/master/library/rustc-std-workspace-core -core = { version = "1.0.1", optional = true, package = "rustc-std-workspace-core" } +core = { path = "../../core", optional = true } [build-dependencies] cc = { optional = true, version = "1.2" } From 8f802ae454432a8cd30df5d376dbd29694e6bb71 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 5 Jun 2025 06:00:54 +0000 Subject: [PATCH 1384/1459] compiler-builtins: Fix a `rustdoc::bare-urls` error --- compiler-builtins/src/aarch64_linux.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-builtins/src/aarch64_linux.rs b/compiler-builtins/src/aarch64_linux.rs index e238d0237..226121237 100644 --- a/compiler-builtins/src/aarch64_linux.rs +++ b/compiler-builtins/src/aarch64_linux.rs @@ -4,7 +4,7 @@ //! To avoid breaking backwards compat, C toolchains introduced a concept of "outlined atomics", //! where atomic operations call into the compiler runtime to dispatch between two depending on //! which is supported on the current CPU. -//! See https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics for more discussion. +//! See for more discussion. //! //! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection. //! Use the `compiler-rt` intrinsics if you want LSE support. From 714314f48b9c0b23c083b0840930f6b000b9cc2a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 8 Jun 2025 10:21:57 +0000 Subject: [PATCH 1385/1459] compiler-builtins: Resolve `unsafe_op_in_unsafe_fn` on Arm32 Android There are a few places that violate this lint, which showed up in rust-lang/rust CI (the relevent module is gated behind `kernel_user_helpers` which is only set for `armv4t`, `armv5te`, and `arm-linux-androideabi`; none of these are tested in compiler-builtins CI). Add new `unsafe { /* ... */ }` blocks where needed to address this. Some blocks should get a more thorough review of their preconditions, so their safety comments are left as `FIXME`s. --- compiler-builtins/src/arm_linux.rs | 40 ++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/compiler-builtins/src/arm_linux.rs b/compiler-builtins/src/arm_linux.rs index 6ce67ba71..ab9f86807 100644 --- a/compiler-builtins/src/arm_linux.rs +++ b/compiler-builtins/src/arm_linux.rs @@ -4,12 +4,17 @@ use core::{arch, mem}; // Kernel-provided user-mode helper functions: // https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt unsafe fn __kuser_cmpxchg(oldval: u32, newval: u32, ptr: *mut u32) -> bool { - let f: extern "C" fn(u32, u32, *mut u32) -> u32 = mem::transmute(0xffff0fc0usize as *const ()); + // FIXME(volatile): the third parameter is a volatile pointer + // SAFETY: kernel docs specify a known address with the given signature + let f = unsafe { + mem::transmute::<_, extern "C" fn(u32, u32, *mut u32) -> u32>(0xffff0fc0usize as *const ()) + }; f(oldval, newval, ptr) == 0 } unsafe fn __kuser_memory_barrier() { - let f: extern "C" fn() = mem::transmute(0xffff0fa0usize as *const ()); + // SAFETY: kernel docs specify a known address with the given signature + let f = unsafe { mem::transmute::<_, extern "C" fn()>(0xffff0fa0usize as *const ()) }; f(); } @@ -67,8 +72,10 @@ fn insert_aligned(aligned: u32, val: u32, shift: u32, mask: u32) -> u32 { /// - if `size_of::() == 2`, `ptr` or `ptr` offset by 2 bytes must be valid for a relaxed atomic /// read of 2 bytes. /// - if `size_of::() == 4`, `ptr` must be valid for a relaxed atomic read of 4 bytes. +// FIXME: assert some of the preconditions in debug mode unsafe fn atomic_load_aligned(ptr: *mut u32) -> u32 { - if mem::size_of::() == 4 { + const { assert!(size_of::() <= 4) }; + if size_of::() == 4 { // SAFETY: As `T` has a size of 4, the caller garantees this is sound. unsafe { AtomicU32::from_ptr(ptr).load(Ordering::Relaxed) } } else { @@ -100,11 +107,13 @@ unsafe fn atomic_rmw u32, G: Fn(u32, u32) -> u32>(ptr: *mut T, let (shift, mask) = get_shift_mask(ptr); loop { - let curval_aligned = atomic_load_aligned::(aligned_ptr); + // FIXME(safety): preconditions review needed + let curval_aligned = unsafe { atomic_load_aligned::(aligned_ptr) }; let curval = extract_aligned(curval_aligned, shift, mask); let newval = f(curval); let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask); - if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) { + // FIXME(safety): preconditions review needed + if unsafe { __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) } { return g(curval, newval); } } @@ -116,13 +125,15 @@ unsafe fn atomic_cmpxchg(ptr: *mut T, oldval: u32, newval: u32) -> u32 { let (shift, mask) = get_shift_mask(ptr); loop { - let curval_aligned = atomic_load_aligned::(aligned_ptr); + // FIXME(safety): preconditions review needed + let curval_aligned = unsafe { atomic_load_aligned::(aligned_ptr) }; let curval = extract_aligned(curval_aligned, shift, mask); if curval != oldval { return curval; } let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask); - if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) { + // FIXME(safety): preconditions review needed + if unsafe { __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) } { return oldval; } } @@ -132,7 +143,14 @@ macro_rules! atomic_rmw { ($name:ident, $ty:ty, $op:expr, $fetch:expr) => { intrinsics! { pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty { - atomic_rmw(ptr, |x| $op(x as $ty, val) as u32, |old, new| $fetch(old, new)) as $ty + // FIXME(safety): preconditions review needed + unsafe { + atomic_rmw( + ptr, + |x| $op(x as $ty, val) as u32, + |old, new| $fetch(old, new) + ) as $ty + } } } }; @@ -149,7 +167,8 @@ macro_rules! atomic_cmpxchg { ($name:ident, $ty:ty) => { intrinsics! { pub unsafe extern "C" fn $name(ptr: *mut $ty, oldval: $ty, newval: $ty) -> $ty { - atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty + // FIXME(safety): preconditions review needed + unsafe { atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty } } } }; @@ -285,6 +304,7 @@ atomic_cmpxchg!(__sync_val_compare_and_swap_4, u32); intrinsics! { pub unsafe extern "C" fn __sync_synchronize() { - __kuser_memory_barrier(); + // SAFETY: preconditions are the same as the calling function. + unsafe { __kuser_memory_barrier() }; } } From d17f101f0d5eba57120ad60358c478442c284303 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 8 Jun 2025 17:13:18 +0000 Subject: [PATCH 1386/1459] compiler-builtins: Specify `:r` registers for `usize` On the ILP32 `x86_64-unknown-linux-gnux32` target, `usize` is 32 bits so there is a sub-register alignment warning. Specify the 64-bit `r` registers, which matches the current default as well as the size of the other operands in the routines. --- compiler-builtins/src/mem/x86_64.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler-builtins/src/mem/x86_64.rs b/compiler-builtins/src/mem/x86_64.rs index 5cbe83ab1..fb29eb11b 100644 --- a/compiler-builtins/src/mem/x86_64.rs +++ b/compiler-builtins/src/mem/x86_64.rs @@ -69,7 +69,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { "rep movsb", "sub $7, %rsi", "sub $7, %rdi", - "mov {qword_count}, %rcx", + "mov {qword_count:r}, %rcx", "rep movsq", "test {pre_byte_count:e}, {pre_byte_count:e}", "add $7, %rsi", @@ -212,7 +212,7 @@ pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { let x = { let r; asm!( - "movdqa ({addr}), {dest}", + "movdqa ({addr:r}), {dest}", addr = in(reg) s, dest = out(xmm_reg) r, options(att_syntax, nostack), @@ -232,7 +232,7 @@ pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { let x = { let r; asm!( - "movdqa ({addr}), {dest}", + "movdqa ({addr:r}), {dest}", addr = in(reg) s, dest = out(xmm_reg) r, options(att_syntax, nostack), From d1d8fb29fe06adb56176d45c2dd810153a5ce006 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 9 Jun 2025 04:10:24 +0000 Subject: [PATCH 1387/1459] compiler-builtins: Emit `rustc-check-cfg` earlier The `build.rs` entrypoint returns early for some targets, so emscripten and OpenBSD were not getting check-cfg set. Emit these earlier to avoid the `unexpected_cfgs` lint. --- compiler-builtins/build.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs index d37fdc5df..7c8da02fd 100644 --- a/compiler-builtins/build.rs +++ b/compiler-builtins/build.rs @@ -22,6 +22,9 @@ fn main() { println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display()); + println!("cargo::rustc-check-cfg=cfg(kernel_user_helpers)"); + println!("cargo::rustc-check-cfg=cfg(feature, values(\"mem-unaligned\"))"); + // Emscripten's runtime includes all the builtins if target.os == "emscripten" { return; @@ -47,7 +50,6 @@ fn main() { } // These targets have hardware unaligned access support. - println!("cargo::rustc-check-cfg=cfg(feature, values(\"mem-unaligned\"))"); if target.arch.contains("x86_64") || target.arch.contains("x86") || target.arch.contains("aarch64") @@ -78,7 +80,6 @@ fn main() { // Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. This // includes the old androideabi. It is deprecated but it is available as a // rustc target (arm-linux-androideabi). - println!("cargo::rustc-check-cfg=cfg(kernel_user_helpers)"); if llvm_target[0] == "armv4t" || llvm_target[0] == "armv5te" || target.triple == "arm-linux-androideabi" From 20510166d01c0fb6fbe8927dd53f73fb13e95d46 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 10 Jun 2025 06:49:28 +0000 Subject: [PATCH 1388/1459] compiler-builtins: Remove unused `lints.rust` table The unexpected configs are now unused or known to `rustc` in our CI. --- compiler-builtins/Cargo.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 11ee91954..eabb3d625 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -55,7 +55,3 @@ rustc-dep-of-std = ["compiler-builtins", "dep:core"] # This makes certain traits and function specializations public that # are not normally public but are required by the `builtins-test` unstable-public-internals = [] - -[lints.rust] -# The cygwin config can be dropped after our benchmark toolchain is bumped -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(bootstrap)', 'cfg(target_os, values("cygwin"))'] } From add44a716f996401dac618417519ac31185b80c8 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 10 Jun 2025 07:02:57 +0000 Subject: [PATCH 1389/1459] ci: Fix a typo that was causing a command failure --- ci/bench-icount.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/bench-icount.sh b/ci/bench-icount.sh index 5724955fe..d2baebb52 100755 --- a/ci/bench-icount.sh +++ b/ci/bench-icount.sh @@ -57,7 +57,7 @@ function run_icount_benchmarks() { # Disregard regressions after merge echo "Benchmarks completed with regressions; ignoring (not in a PR)" else - ./ci/ci-util.py handle-banch-regressions "$PR_NUMBER" + ./ci/ci-util.py handle-bench-regressions "$PR_NUMBER" fi } From 319637f544d9dda8fc3dd482d9979e0da135a258 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Tue, 10 Jun 2025 00:59:09 +0200 Subject: [PATCH 1390/1459] add a fixme to use `extern_custom` when available --- compiler-builtins/src/probestack.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs index c9070cf55..16faaa67f 100644 --- a/compiler-builtins/src/probestack.rs +++ b/compiler-builtins/src/probestack.rs @@ -125,6 +125,9 @@ macro_rules! define_rust_probestack { // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, // ensuring that if any pages are unmapped we'll make a page fault. // +// FIXME(abi_custom): This function is unsafe because it uses a custom ABI, +// it does not actually match `extern "C"`. +// // The ABI here is that the stack frame size is located in `%rax`. Upon // return we're not supposed to modify `%rsp` or `%rax`. // @@ -260,6 +263,9 @@ core::arch::global_asm!( // that on Unix we're expected to restore everything as it was, this // function basically can't tamper with anything. // +// FIXME(abi_custom): This function is unsafe because it uses a custom ABI, +// it does not actually match `extern "C"`. +// // The ABI here is the same as x86_64, except everything is 32-bits large. core::arch::global_asm!( define_rust_probestack!( @@ -303,6 +309,9 @@ core::arch::global_asm!( // probestack function will also do things like _chkstk in MSVC. // So we need to sub %ax %sp in probestack when arch is x86. // +// FIXME(abi_custom): This function is unsafe because it uses a custom ABI, +// it does not actually match `extern "C"`. +// // REF: Rust commit(74e80468347) // rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805 // Comments in LLVM: From 0f8e54c0a0431d5925bf47a0469239ae00f1cc4f Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 4 Jun 2025 01:31:34 +0200 Subject: [PATCH 1391/1459] use `#[naked]` for `__rust_probestack` --- compiler-builtins/src/lib.rs | 1 + compiler-builtins/src/probestack.rs | 129 +++++++--------------------- 2 files changed, 33 insertions(+), 97 deletions(-) diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs index 6a6b28067..6549d4cef 100644 --- a/compiler-builtins/src/lib.rs +++ b/compiler-builtins/src/lib.rs @@ -8,6 +8,7 @@ #![feature(linkage)] #![feature(naked_functions)] #![feature(repr_simd)] +#![feature(rustc_attrs)] #![cfg_attr(f16_enabled, feature(f16))] #![cfg_attr(f128_enabled, feature(f128))] #![no_builtins] diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs index 16faaa67f..e9a26dff1 100644 --- a/compiler-builtins/src/probestack.rs +++ b/compiler-builtins/src/probestack.rs @@ -49,79 +49,6 @@ // We only define stack probing for these architectures today. #![cfg(any(target_arch = "x86_64", target_arch = "x86"))] -// SAFETY: defined in this module. -// FIXME(extern_custom): the ABI is not correct. -unsafe extern "C" { - pub fn __rust_probestack(); -} - -// A wrapper for our implementation of __rust_probestack, which allows us to -// keep the assembly inline while controlling all CFI directives in the assembly -// emitted for the function. -// -// This is the ELF version. -#[cfg(not(any(target_vendor = "apple", target_os = "uefi")))] -macro_rules! define_rust_probestack { - ($body: expr) => { - concat!( - " - .pushsection .text.__rust_probestack - .globl __rust_probestack - .type __rust_probestack, @function - .hidden __rust_probestack - __rust_probestack: - ", - $body, - " - .size __rust_probestack, . - __rust_probestack - .popsection - " - ) - }; -} - -#[cfg(all(target_os = "uefi", target_arch = "x86_64"))] -macro_rules! define_rust_probestack { - ($body: expr) => { - concat!( - " - .globl __rust_probestack - __rust_probestack: - ", - $body - ) - }; -} - -// Same as above, but for Mach-O. Note that the triple underscore -// is deliberate -#[cfg(target_vendor = "apple")] -macro_rules! define_rust_probestack { - ($body: expr) => { - concat!( - " - .globl ___rust_probestack - ___rust_probestack: - ", - $body - ) - }; -} - -// In UEFI x86 arch, triple underscore is deliberate. -#[cfg(all(target_os = "uefi", target_arch = "x86"))] -macro_rules! define_rust_probestack { - ($body: expr) => { - concat!( - " - .globl ___rust_probestack - ___rust_probestack: - ", - $body - ) - }; -} - // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, // ensuring that if any pages are unmapped we'll make a page fault. // @@ -136,8 +63,10 @@ macro_rules! define_rust_probestack { target_arch = "x86_64", not(all(target_env = "sgx", target_vendor = "fortanix")) ))] -core::arch::global_asm!( - define_rust_probestack!( +#[unsafe(naked)] +#[rustc_std_internal_symbol] +pub unsafe extern "C" fn __rust_probestack() { + core::arch::naked_asm!( " .cfi_startproc pushq %rbp @@ -187,10 +116,10 @@ core::arch::global_asm!( .cfi_adjust_cfa_offset -8 ret .cfi_endproc - " - ), - options(att_syntax) -); + ", + options(att_syntax) + ) +} // This function is the same as above, except that some instructions are // [manually patched for LVI]. @@ -200,8 +129,10 @@ core::arch::global_asm!( target_arch = "x86_64", all(target_env = "sgx", target_vendor = "fortanix") ))] -core::arch::global_asm!( - define_rust_probestack!( +#[unsafe(naked)] +#[no_mangle] +pub unsafe extern "C" fn __rust_probestack() { + core::arch::naked_asm!( " .cfi_startproc pushq %rbp @@ -253,10 +184,10 @@ core::arch::global_asm!( lfence jmp *%r11 .cfi_endproc - " - ), - options(att_syntax) -); + ", + options(att_syntax) + ) +} #[cfg(all(target_arch = "x86", not(target_os = "uefi")))] // This is the same as x86_64 above, only translated for 32-bit sizes. Note @@ -267,8 +198,10 @@ core::arch::global_asm!( // it does not actually match `extern "C"`. // // The ABI here is the same as x86_64, except everything is 32-bits large. -core::arch::global_asm!( - define_rust_probestack!( +#[unsafe(naked)] +#[rustc_std_internal_symbol] +pub unsafe extern "C" fn __rust_probestack() { + core::arch::naked_asm!( " .cfi_startproc push %ebp @@ -299,10 +232,10 @@ core::arch::global_asm!( .cfi_adjust_cfa_offset -4 ret .cfi_endproc - " - ), - options(att_syntax) -); + ", + options(att_syntax) + ) +} #[cfg(all(target_arch = "x86", target_os = "uefi"))] // UEFI target is windows like target. LLVM will do _chkstk things like windows. @@ -318,8 +251,10 @@ core::arch::global_asm!( // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp // themselves. -core::arch::global_asm!( - define_rust_probestack!( +#[unsafe(naked)] +#[rustc_std_internal_symbol] +pub unsafe extern "C" fn __rust_probestack() { + core::arch::naked_asm!( " .cfi_startproc push %ebp @@ -355,7 +290,7 @@ core::arch::global_asm!( .cfi_adjust_cfa_offset -4 ret .cfi_endproc - " - ), - options(att_syntax) -); + ", + options(att_syntax) + ) +} From f9395626288ff91e8d5499207f14fd57c2a16498 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 4 Jun 2025 01:32:04 +0200 Subject: [PATCH 1392/1459] merge the sgx/fortanix `__rust_probestack` into the general `x86_64` one --- compiler-builtins/src/probestack.rs | 96 +++++++---------------------- 1 file changed, 23 insertions(+), 73 deletions(-) diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs index e9a26dff1..2375107e3 100644 --- a/compiler-builtins/src/probestack.rs +++ b/compiler-builtins/src/probestack.rs @@ -57,15 +57,31 @@ // // The ABI here is that the stack frame size is located in `%rax`. Upon // return we're not supposed to modify `%rsp` or `%rax`. -// -// Any changes to this function should be replicated to the SGX version below. -#[cfg(all( - target_arch = "x86_64", - not(all(target_env = "sgx", target_vendor = "fortanix")) -))] +#[cfg(target_arch = "x86_64")] #[unsafe(naked)] #[rustc_std_internal_symbol] pub unsafe extern "C" fn __rust_probestack() { + #[cfg(not(all(target_env = "sgx", target_vendor = "fortanix")))] + macro_rules! ret { + () => { + "ret" + }; + } + + #[cfg(all(target_env = "sgx", target_vendor = "fortanix"))] + macro_rules! ret { + // for this target, [manually patch for LVI]. + // + // [manually patch for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions + () => { + " + pop %r11 + lfence + jmp *%r11 + " + }; + } + core::arch::naked_asm!( " .cfi_startproc @@ -114,75 +130,9 @@ pub unsafe extern "C" fn __rust_probestack() { leave .cfi_def_cfa_register %rsp .cfi_adjust_cfa_offset -8 - ret - .cfi_endproc ", - options(att_syntax) - ) -} - -// This function is the same as above, except that some instructions are -// [manually patched for LVI]. -// -// [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions -#[cfg(all( - target_arch = "x86_64", - all(target_env = "sgx", target_vendor = "fortanix") -))] -#[unsafe(naked)] -#[no_mangle] -pub unsafe extern "C" fn __rust_probestack() { - core::arch::naked_asm!( + ret!(), " - .cfi_startproc - pushq %rbp - .cfi_adjust_cfa_offset 8 - .cfi_offset %rbp, -16 - movq %rsp, %rbp - .cfi_def_cfa_register %rbp - - mov %rax,%r11 // duplicate %rax as we're clobbering %r11 - - // Main loop, taken in one page increments. We're decrementing rsp by - // a page each time until there's less than a page remaining. We're - // guaranteed that this function isn't called unless there's more than a - // page needed. - // - // Note that we're also testing against `8(%rsp)` to account for the 8 - // bytes pushed on the stack orginally with our return address. Using - // `8(%rsp)` simulates us testing the stack pointer in the caller's - // context. - - // It's usually called when %rax >= 0x1000, but that's not always true. - // Dynamic stack allocation, which is needed to implement unsized - // rvalues, triggers stackprobe even if %rax < 0x1000. - // Thus we have to check %r11 first to avoid segfault. - cmp $0x1000,%r11 - jna 3f -2: - sub $0x1000,%rsp - test %rsp,8(%rsp) - sub $0x1000,%r11 - cmp $0x1000,%r11 - ja 2b - -3: - // Finish up the last remaining stack space requested, getting the last - // bits out of r11 - sub %r11,%rsp - test %rsp,8(%rsp) - - // Restore the stack pointer to what it previously was when entering - // this function. The caller will readjust the stack pointer after we - // return. - add %rax,%rsp - - leave - .cfi_def_cfa_register %rsp - .cfi_adjust_cfa_offset -8 - pop %r11 - lfence - jmp *%r11 .cfi_endproc ", options(att_syntax) From c885ce088cf8e5ed65478226565d58976e8aa129 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 4 Jun 2025 01:33:16 +0200 Subject: [PATCH 1393/1459] indent the probestack inline assembly --- compiler-builtins/src/probestack.rs | 220 ++++++++++++++-------------- 1 file changed, 110 insertions(+), 110 deletions(-) diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs index 2375107e3..1441fd73b 100644 --- a/compiler-builtins/src/probestack.rs +++ b/compiler-builtins/src/probestack.rs @@ -84,56 +84,56 @@ pub unsafe extern "C" fn __rust_probestack() { core::arch::naked_asm!( " - .cfi_startproc - pushq %rbp - .cfi_adjust_cfa_offset 8 - .cfi_offset %rbp, -16 - movq %rsp, %rbp - .cfi_def_cfa_register %rbp - - mov %rax,%r11 // duplicate %rax as we're clobbering %r11 - - // Main loop, taken in one page increments. We're decrementing rsp by - // a page each time until there's less than a page remaining. We're - // guaranteed that this function isn't called unless there's more than a - // page needed. - // - // Note that we're also testing against `8(%rsp)` to account for the 8 - // bytes pushed on the stack orginally with our return address. Using - // `8(%rsp)` simulates us testing the stack pointer in the caller's - // context. - - // It's usually called when %rax >= 0x1000, but that's not always true. - // Dynamic stack allocation, which is needed to implement unsized - // rvalues, triggers stackprobe even if %rax < 0x1000. - // Thus we have to check %r11 first to avoid segfault. - cmp $0x1000,%r11 - jna 3f -2: - sub $0x1000,%rsp - test %rsp,8(%rsp) - sub $0x1000,%r11 - cmp $0x1000,%r11 - ja 2b - -3: - // Finish up the last remaining stack space requested, getting the last - // bits out of r11 - sub %r11,%rsp - test %rsp,8(%rsp) - - // Restore the stack pointer to what it previously was when entering - // this function. The caller will readjust the stack pointer after we - // return. - add %rax,%rsp - - leave - .cfi_def_cfa_register %rsp - .cfi_adjust_cfa_offset -8 + .cfi_startproc + pushq %rbp + .cfi_adjust_cfa_offset 8 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + + mov %rax,%r11 // duplicate %rax as we're clobbering %r11 + + // Main loop, taken in one page increments. We're decrementing rsp by + // a page each time until there's less than a page remaining. We're + // guaranteed that this function isn't called unless there's more than a + // page needed. + // + // Note that we're also testing against `8(%rsp)` to account for the 8 + // bytes pushed on the stack orginally with our return address. Using + // `8(%rsp)` simulates us testing the stack pointer in the caller's + // context. + + // It's usually called when %rax >= 0x1000, but that's not always true. + // Dynamic stack allocation, which is needed to implement unsized + // rvalues, triggers stackprobe even if %rax < 0x1000. + // Thus we have to check %r11 first to avoid segfault. + cmp $0x1000,%r11 + jna 3f + 2: + sub $0x1000,%rsp + test %rsp,8(%rsp) + sub $0x1000,%r11 + cmp $0x1000,%r11 + ja 2b + + 3: + // Finish up the last remaining stack space requested, getting the last + // bits out of r11 + sub %r11,%rsp + test %rsp,8(%rsp) + + // Restore the stack pointer to what it previously was when entering + // this function. The caller will readjust the stack pointer after we + // return. + add %rax,%rsp + + leave + .cfi_def_cfa_register %rsp + .cfi_adjust_cfa_offset -8 ", ret!(), " - .cfi_endproc + .cfi_endproc ", options(att_syntax) ) @@ -153,35 +153,35 @@ pub unsafe extern "C" fn __rust_probestack() { pub unsafe extern "C" fn __rust_probestack() { core::arch::naked_asm!( " - .cfi_startproc - push %ebp - .cfi_adjust_cfa_offset 4 - .cfi_offset %ebp, -8 - mov %esp, %ebp - .cfi_def_cfa_register %ebp - push %ecx - mov %eax,%ecx - - cmp $0x1000,%ecx - jna 3f -2: - sub $0x1000,%esp - test %esp,8(%esp) - sub $0x1000,%ecx - cmp $0x1000,%ecx - ja 2b - -3: - sub %ecx,%esp - test %esp,8(%esp) - - add %eax,%esp - pop %ecx - leave - .cfi_def_cfa_register %esp - .cfi_adjust_cfa_offset -4 - ret - .cfi_endproc + .cfi_startproc + push %ebp + .cfi_adjust_cfa_offset 4 + .cfi_offset %ebp, -8 + mov %esp, %ebp + .cfi_def_cfa_register %ebp + push %ecx + mov %eax,%ecx + + cmp $0x1000,%ecx + jna 3f + 2: + sub $0x1000,%esp + test %esp,8(%esp) + sub $0x1000,%ecx + cmp $0x1000,%ecx + ja 2b + + 3: + sub %ecx,%esp + test %esp,8(%esp) + + add %eax,%esp + pop %ecx + leave + .cfi_def_cfa_register %esp + .cfi_adjust_cfa_offset -4 + ret + .cfi_endproc ", options(att_syntax) ) @@ -206,40 +206,40 @@ pub unsafe extern "C" fn __rust_probestack() { pub unsafe extern "C" fn __rust_probestack() { core::arch::naked_asm!( " - .cfi_startproc - push %ebp - .cfi_adjust_cfa_offset 4 - .cfi_offset %ebp, -8 - mov %esp, %ebp - .cfi_def_cfa_register %ebp - push %ecx - push %edx - mov %eax,%ecx - - cmp $0x1000,%ecx - jna 3f -2: - sub $0x1000,%esp - test %esp,8(%esp) - sub $0x1000,%ecx - cmp $0x1000,%ecx - ja 2b - -3: - sub %ecx,%esp - test %esp,8(%esp) - mov 4(%ebp),%edx - mov %edx, 12(%esp) - add %eax,%esp - pop %edx - pop %ecx - leave - - sub %eax, %esp - .cfi_def_cfa_register %esp - .cfi_adjust_cfa_offset -4 - ret - .cfi_endproc + .cfi_startproc + push %ebp + .cfi_adjust_cfa_offset 4 + .cfi_offset %ebp, -8 + mov %esp, %ebp + .cfi_def_cfa_register %ebp + push %ecx + push %edx + mov %eax,%ecx + + cmp $0x1000,%ecx + jna 3f + 2: + sub $0x1000,%esp + test %esp,8(%esp) + sub $0x1000,%ecx + cmp $0x1000,%ecx + ja 2b + + 3: + sub %ecx,%esp + test %esp,8(%esp) + mov 4(%ebp),%edx + mov %edx, 12(%esp) + add %eax,%esp + pop %edx + pop %ecx + leave + + sub %eax, %esp + .cfi_def_cfa_register %esp + .cfi_adjust_cfa_offset -4 + ret + .cfi_endproc ", options(att_syntax) ) From 2f01db9bf56b5d559b53e601b537e2053905e03e Mon Sep 17 00:00:00 2001 From: qinghon Date: Fri, 13 Jun 2025 13:27:47 +0800 Subject: [PATCH 1394/1459] Eliminate `build.rs`-generated Aarch64 atomic macros (#951) Replace `build.rs` Rust generation with macros, using the unstable `${concat(...)}`. Fixes: https://github.com/rust-lang/compiler-builtins/issues/947 --- builtins-test/tests/lse.rs | 3 +- compiler-builtins/build.rs | 62 --------------------- compiler-builtins/src/aarch64_linux.rs | 74 +++++++++++++++++++++++++- compiler-builtins/src/lib.rs | 1 + 4 files changed, 75 insertions(+), 65 deletions(-) diff --git a/builtins-test/tests/lse.rs b/builtins-test/tests/lse.rs index 53167d98f..0d85228d7 100644 --- a/builtins-test/tests/lse.rs +++ b/builtins-test/tests/lse.rs @@ -1,4 +1,5 @@ #![feature(decl_macro)] // so we can use pub(super) +#![feature(macro_metavar_expr_concat)] #![cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm")))] /// Translate a byte size to a Rust type. @@ -87,7 +88,7 @@ test_op!(add, |left, right| left.wrapping_add(right)); test_op!(clr, |left, right| left & !right); test_op!(xor, std::ops::BitXor::bitxor); test_op!(or, std::ops::BitOr::bitor); - +use compiler_builtins::{foreach_bytes, foreach_ordering}; compiler_builtins::foreach_cas!(cas::test); compiler_builtins::foreach_cas16!(test_cas16); compiler_builtins::foreach_swp!(swap::test); diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs index d37fdc5df..e909a0dcb 100644 --- a/compiler-builtins/build.rs +++ b/compiler-builtins/build.rs @@ -1,9 +1,6 @@ mod configure; -use std::collections::BTreeMap; use std::env; -use std::path::PathBuf; -use std::sync::atomic::Ordering; use configure::{Target, configure_aliases, configure_f16_f128}; @@ -85,10 +82,6 @@ fn main() { { println!("cargo:rustc-cfg=kernel_user_helpers") } - - if llvm_target[0].starts_with("aarch64") { - generate_aarch64_outlined_atomics(); - } } /// Run configuration for `libm` since it is included directly. @@ -131,61 +124,6 @@ fn configure_libm(target: &Target) { println!("cargo:rustc-cfg=feature=\"unstable-intrinsics\""); } -fn aarch64_symbol(ordering: Ordering) -> &'static str { - match ordering { - Ordering::Relaxed => "relax", - Ordering::Acquire => "acq", - Ordering::Release => "rel", - Ordering::AcqRel => "acq_rel", - _ => panic!("unknown symbol for {ordering:?}"), - } -} - -/// The `concat_idents` macro is extremely annoying and doesn't allow us to define new items. -/// Define them from the build script instead. -/// Note that the majority of the code is still defined in `aarch64.rs` through inline macros. -fn generate_aarch64_outlined_atomics() { - use std::fmt::Write; - // #[macro_export] so that we can use this in tests - let gen_macro = - |name| format!("#[macro_export] macro_rules! foreach_{name} {{ ($macro:path) => {{\n"); - - // Generate different macros for add/clr/eor/set so that we can test them separately. - let sym_names = ["cas", "ldadd", "ldclr", "ldeor", "ldset", "swp"]; - let mut macros = BTreeMap::new(); - for sym in sym_names { - macros.insert(sym, gen_macro(sym)); - } - - // Only CAS supports 16 bytes, and it has a different implementation that uses a different macro. - let mut cas16 = gen_macro("cas16"); - - for ordering in [ - Ordering::Relaxed, - Ordering::Acquire, - Ordering::Release, - Ordering::AcqRel, - ] { - let sym_ordering = aarch64_symbol(ordering); - for size in [1, 2, 4, 8] { - for (sym, macro_) in &mut macros { - let name = format!("__aarch64_{sym}{size}_{sym_ordering}"); - writeln!(macro_, "$macro!( {ordering:?}, {size}, {name} );").unwrap(); - } - } - let name = format!("__aarch64_cas16_{sym_ordering}"); - writeln!(cas16, "$macro!( {ordering:?}, {name} );").unwrap(); - } - - let mut buf = String::new(); - for macro_def in macros.values().chain(std::iter::once(&cas16)) { - buf += macro_def; - buf += "}; }\n"; - } - let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap()); - std::fs::write(out_dir.join("outlined_atomics.rs"), buf).unwrap(); -} - /// Emit directives for features we expect to support that aren't in `Cargo.toml`. /// /// These are mostly cfg elements emitted by this `build.rs`. diff --git a/compiler-builtins/src/aarch64_linux.rs b/compiler-builtins/src/aarch64_linux.rs index e238d0237..2402a3fe1 100644 --- a/compiler-builtins/src/aarch64_linux.rs +++ b/compiler-builtins/src/aarch64_linux.rs @@ -262,8 +262,78 @@ macro_rules! or { }; } -// See `generate_aarch64_outlined_atomics` in build.rs. -include!(concat!(env!("OUT_DIR"), "/outlined_atomics.rs")); +#[macro_export] +macro_rules! foreach_ordering { + ($macro:path, $bytes:tt, $name:ident) => { + $macro!( Relaxed, $bytes, ${concat($name, _relax)} ); + $macro!( Acquire, $bytes, ${concat($name, _acq)} ); + $macro!( Release, $bytes, ${concat($name, _rel)} ); + $macro!( AcqRel, $bytes, ${concat($name, _acq_rel)} ); + }; + ($macro:path, $name:ident) => { + $macro!( Relaxed, ${concat($name, _relax)} ); + $macro!( Acquire, ${concat($name, _acq)} ); + $macro!( Release, ${concat($name, _rel)} ); + $macro!( AcqRel, ${concat($name, _acq_rel)} ); + }; +} + +#[macro_export] +macro_rules! foreach_bytes { + ($macro:path, $name:ident) => { + foreach_ordering!( $macro, 1, ${concat(__aarch64_, $name, "1")} ); + foreach_ordering!( $macro, 2, ${concat(__aarch64_, $name, "2")} ); + foreach_ordering!( $macro, 4, ${concat(__aarch64_, $name, "4")} ); + foreach_ordering!( $macro, 8, ${concat(__aarch64_, $name, "8")} ); + }; +} + +/// Generate different macros for cas/swp/add/clr/eor/set so that we can test them separately. +#[macro_export] +macro_rules! foreach_cas { + ($macro:path) => { + foreach_bytes!($macro, cas); + }; +} + +/// Only CAS supports 16 bytes, and it has a different implementation that uses a different macro. +#[macro_export] +macro_rules! foreach_cas16 { + ($macro:path) => { + foreach_ordering!($macro, __aarch64_cas16); + }; +} +#[macro_export] +macro_rules! foreach_swp { + ($macro:path) => { + foreach_bytes!($macro, swp); + }; +} +#[macro_export] +macro_rules! foreach_ldadd { + ($macro:path) => { + foreach_bytes!($macro, ldadd); + }; +} +#[macro_export] +macro_rules! foreach_ldclr { + ($macro:path) => { + foreach_bytes!($macro, ldclr); + }; +} +#[macro_export] +macro_rules! foreach_ldeor { + ($macro:path) => { + foreach_bytes!($macro, ldeor); + }; +} +#[macro_export] +macro_rules! foreach_ldset { + ($macro:path) => { + foreach_bytes!($macro, ldset); + }; +} + foreach_cas!(compare_and_swap); foreach_cas16!(compare_and_swap_i128); foreach_swp!(swap); diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs index 6a6b28067..ef3299d69 100644 --- a/compiler-builtins/src/lib.rs +++ b/compiler-builtins/src/lib.rs @@ -8,6 +8,7 @@ #![feature(linkage)] #![feature(naked_functions)] #![feature(repr_simd)] +#![feature(macro_metavar_expr_concat)] #![cfg_attr(f16_enabled, feature(f16))] #![cfg_attr(f128_enabled, feature(f128))] #![no_builtins] From f6a23a78c44e96780de730d419c7f8b0afebfb34 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 2 Jun 2025 17:20:22 +0000 Subject: [PATCH 1395/1459] fmaximum,fminimum: Fix incorrect result and add tests After adding tests, the current implementation for fminimum fails when provided a negative zero and NaN as inputs: ---- math::fminimum_fmaximum_num::tests::fmaximum_num_spec_tests_f64 stdout ---- thread 'math::fminimum_fmaximum_num::tests::fmaximum_num_spec_tests_f64' panicked at libm/src/math/fminimum_fmaximum_num.rs:240:13: fmaximum_num(-0x0p+0, NaN) l: NaN (0x7ff8000000000000) r: -0.0 (0x8000000000000000) ---- math::fminimum_fmaximum_num::tests::fmaximum_num_spec_tests_f32 stdout ---- thread 'math::fminimum_fmaximum_num::tests::fmaximum_num_spec_tests_f32' panicked at libm/src/math/fminimum_fmaximum_num.rs:240:13: fmaximum_num(-0x0p+0, NaN) l: NaN (0x7fc00000) r: -0.0 (0x80000000) Add more thorough spec tests for these functions and correct the implementations. Canonicalization is also moved to a trait method to centralize documentation about what it does and doesn't do. --- libm/src/math/fmin_fmax.rs | 122 ++++++++++++++++++++-- libm/src/math/fminimum_fmaximum.rs | 126 ++++++++++++++++++++-- libm/src/math/fminimum_fmaximum_num.rs | 138 ++++++++++++++++++++++--- libm/src/math/generic/fmax.rs | 3 +- libm/src/math/generic/fmaximum.rs | 5 +- libm/src/math/generic/fmaximum_num.rs | 17 +-- libm/src/math/generic/fmin.rs | 3 +- libm/src/math/generic/fminimum.rs | 5 +- libm/src/math/generic/fminimum_num.rs | 17 +-- libm/src/math/support/float_traits.rs | 9 ++ libm/src/math/support/macros.rs | 4 +- 11 files changed, 392 insertions(+), 57 deletions(-) diff --git a/libm/src/math/fmin_fmax.rs b/libm/src/math/fmin_fmax.rs index 2947b783e..481301994 100644 --- a/libm/src/math/fmin_fmax.rs +++ b/libm/src/math/fmin_fmax.rs @@ -82,22 +82,77 @@ mod tests { fn fmin_spec_test(f: impl Fn(F, F) -> F) { let cases = [ (F::ZERO, F::ZERO, F::ZERO), - (F::ONE, F::ONE, F::ONE), (F::ZERO, F::ONE, F::ZERO), - (F::ONE, F::ZERO, F::ZERO), (F::ZERO, F::NEG_ONE, F::NEG_ONE), + (F::ZERO, F::INFINITY, F::ZERO), + (F::ZERO, F::NEG_INFINITY, F::NEG_INFINITY), + (F::ZERO, F::NAN, F::ZERO), + (F::ZERO, F::NEG_NAN, F::ZERO), + (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::ONE, F::NEG_ZERO), + (F::NEG_ZERO, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ZERO, F::INFINITY, F::NEG_ZERO), + (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_INFINITY), + (F::NEG_ZERO, F::NAN, F::NEG_ZERO), + (F::NEG_ZERO, F::NEG_NAN, F::NEG_ZERO), + (F::ONE, F::ZERO, F::ZERO), + (F::ONE, F::NEG_ZERO, F::NEG_ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ONE, F::NEG_ONE, F::NEG_ONE), + (F::ONE, F::INFINITY, F::ONE), + (F::ONE, F::NEG_INFINITY, F::NEG_INFINITY), + (F::ONE, F::NAN, F::ONE), + (F::ONE, F::NEG_NAN, F::ONE), (F::NEG_ONE, F::ZERO, F::NEG_ONE), + (F::NEG_ONE, F::NEG_ZERO, F::NEG_ONE), + (F::NEG_ONE, F::ONE, F::NEG_ONE), + (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::INFINITY, F::NEG_ONE), + (F::NEG_ONE, F::NEG_INFINITY, F::NEG_INFINITY), + (F::NEG_ONE, F::NAN, F::NEG_ONE), + (F::NEG_ONE, F::NEG_NAN, F::NEG_ONE), (F::INFINITY, F::ZERO, F::ZERO), + (F::INFINITY, F::NEG_ZERO, F::NEG_ZERO), + (F::INFINITY, F::ONE, F::ONE), + (F::INFINITY, F::NEG_ONE, F::NEG_ONE), + (F::INFINITY, F::INFINITY, F::INFINITY), + (F::INFINITY, F::NEG_INFINITY, F::NEG_INFINITY), + (F::INFINITY, F::NAN, F::INFINITY), + (F::INFINITY, F::NEG_NAN, F::INFINITY), (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_INFINITY), + (F::NEG_INFINITY, F::ONE, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NEG_ONE, F::NEG_INFINITY), + (F::NEG_INFINITY, F::INFINITY, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NAN, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NEG_NAN, F::NEG_INFINITY), (F::NAN, F::ZERO, F::ZERO), - (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NEG_ZERO, F::NEG_ZERO), + (F::NAN, F::ONE, F::ONE), + (F::NAN, F::NEG_ONE, F::NEG_ONE), + (F::NAN, F::INFINITY, F::INFINITY), + (F::NAN, F::NEG_INFINITY, F::NEG_INFINITY), (F::NAN, F::NAN, F::NAN), + (F::NEG_NAN, F::ZERO, F::ZERO), + (F::NEG_NAN, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_NAN, F::ONE, F::ONE), + (F::NEG_NAN, F::NEG_ONE, F::NEG_ONE), + (F::NEG_NAN, F::INFINITY, F::INFINITY), + (F::NEG_NAN, F::NEG_INFINITY, F::NEG_INFINITY), ]; for (x, y, res) in cases { let val = f(x, y); assert_biteq!(val, res, "fmin({}, {})", Hexf(x), Hexf(y)); } + + // Ordering between zeros and NaNs does not matter + assert_eq!(f(F::ZERO, F::NEG_ZERO), F::ZERO); + assert_eq!(f(F::NEG_ZERO, F::ZERO), F::ZERO); + assert!(f(F::NAN, F::NEG_NAN).is_nan()); + assert!(f(F::NEG_NAN, F::NAN).is_nan()); + assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan()); } #[test] @@ -125,22 +180,77 @@ mod tests { fn fmax_spec_test(f: impl Fn(F, F) -> F) { let cases = [ (F::ZERO, F::ZERO, F::ZERO), - (F::ONE, F::ONE, F::ONE), (F::ZERO, F::ONE, F::ONE), - (F::ONE, F::ZERO, F::ONE), (F::ZERO, F::NEG_ONE, F::ZERO), + (F::ZERO, F::INFINITY, F::INFINITY), + (F::ZERO, F::NEG_INFINITY, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::ZERO, F::NEG_NAN, F::ZERO), + (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::ONE, F::ONE), + (F::NEG_ZERO, F::NEG_ONE, F::NEG_ZERO), + (F::NEG_ZERO, F::INFINITY, F::INFINITY), + (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_ZERO), + (F::NEG_ZERO, F::NAN, F::NEG_ZERO), + (F::NEG_ZERO, F::NEG_NAN, F::NEG_ZERO), + (F::ONE, F::ZERO, F::ONE), + (F::ONE, F::NEG_ZERO, F::ONE), + (F::ONE, F::ONE, F::ONE), + (F::ONE, F::NEG_ONE, F::ONE), + (F::ONE, F::INFINITY, F::INFINITY), + (F::ONE, F::NEG_INFINITY, F::ONE), + (F::ONE, F::NAN, F::ONE), + (F::ONE, F::NEG_NAN, F::ONE), (F::NEG_ONE, F::ZERO, F::ZERO), + (F::NEG_ONE, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ONE, F::ONE, F::ONE), + (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::INFINITY, F::INFINITY), + (F::NEG_ONE, F::NEG_INFINITY, F::NEG_ONE), + (F::NEG_ONE, F::NAN, F::NEG_ONE), + (F::NEG_ONE, F::NEG_NAN, F::NEG_ONE), (F::INFINITY, F::ZERO, F::INFINITY), + (F::INFINITY, F::NEG_ZERO, F::INFINITY), + (F::INFINITY, F::ONE, F::INFINITY), + (F::INFINITY, F::NEG_ONE, F::INFINITY), + (F::INFINITY, F::INFINITY, F::INFINITY), + (F::INFINITY, F::NEG_INFINITY, F::INFINITY), + (F::INFINITY, F::NAN, F::INFINITY), + (F::INFINITY, F::NEG_NAN, F::INFINITY), (F::NEG_INFINITY, F::ZERO, F::ZERO), + (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_INFINITY, F::ONE, F::ONE), + (F::NEG_INFINITY, F::NEG_ONE, F::NEG_ONE), + (F::NEG_INFINITY, F::INFINITY, F::INFINITY), + (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NAN, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NEG_NAN, F::NEG_INFINITY), (F::NAN, F::ZERO, F::ZERO), - (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NEG_ZERO, F::NEG_ZERO), + (F::NAN, F::ONE, F::ONE), + (F::NAN, F::NEG_ONE, F::NEG_ONE), + (F::NAN, F::INFINITY, F::INFINITY), + (F::NAN, F::NEG_INFINITY, F::NEG_INFINITY), (F::NAN, F::NAN, F::NAN), + (F::NEG_NAN, F::ZERO, F::ZERO), + (F::NEG_NAN, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_NAN, F::ONE, F::ONE), + (F::NEG_NAN, F::NEG_ONE, F::NEG_ONE), + (F::NEG_NAN, F::INFINITY, F::INFINITY), + (F::NEG_NAN, F::NEG_INFINITY, F::NEG_INFINITY), ]; for (x, y, res) in cases { let val = f(x, y); assert_biteq!(val, res, "fmax({}, {})", Hexf(x), Hexf(y)); } + + // Ordering between zeros and NaNs does not matter + assert_eq!(f(F::ZERO, F::NEG_ZERO), F::ZERO); + assert_eq!(f(F::NEG_ZERO, F::ZERO), F::ZERO); + assert!(f(F::NAN, F::NEG_NAN).is_nan()); + assert!(f(F::NEG_NAN, F::NAN).is_nan()); + assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan()); } #[test] diff --git a/libm/src/math/fminimum_fmaximum.rs b/libm/src/math/fminimum_fmaximum.rs index b7999e273..8f1308670 100644 --- a/libm/src/math/fminimum_fmaximum.rs +++ b/libm/src/math/fminimum_fmaximum.rs @@ -74,24 +74,77 @@ mod tests { fn fminimum_spec_test(f: impl Fn(F, F) -> F) { let cases = [ (F::ZERO, F::ZERO, F::ZERO), - (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::NEG_ZERO, F::NEG_ZERO), (F::ZERO, F::ONE, F::ZERO), - (F::ONE, F::ZERO, F::ZERO), (F::ZERO, F::NEG_ONE, F::NEG_ONE), + (F::ZERO, F::INFINITY, F::ZERO), + (F::ZERO, F::NEG_INFINITY, F::NEG_INFINITY), + (F::ZERO, F::NAN, F::NAN), + (F::NEG_ZERO, F::ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::ONE, F::NEG_ZERO), + (F::NEG_ZERO, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ZERO, F::INFINITY, F::NEG_ZERO), + (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_INFINITY), + (F::NEG_ZERO, F::NAN, F::NAN), + (F::ONE, F::ZERO, F::ZERO), + (F::ONE, F::NEG_ZERO, F::NEG_ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ONE, F::NEG_ONE, F::NEG_ONE), + (F::ONE, F::INFINITY, F::ONE), + (F::ONE, F::NEG_INFINITY, F::NEG_INFINITY), + (F::ONE, F::NAN, F::NAN), (F::NEG_ONE, F::ZERO, F::NEG_ONE), + (F::NEG_ONE, F::NEG_ZERO, F::NEG_ONE), + (F::NEG_ONE, F::ONE, F::NEG_ONE), + (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::INFINITY, F::NEG_ONE), + (F::NEG_ONE, F::NEG_INFINITY, F::NEG_INFINITY), + (F::NEG_ONE, F::NAN, F::NAN), (F::INFINITY, F::ZERO, F::ZERO), + (F::INFINITY, F::NEG_ZERO, F::NEG_ZERO), + (F::INFINITY, F::ONE, F::ONE), + (F::INFINITY, F::NEG_ONE, F::NEG_ONE), + (F::INFINITY, F::INFINITY, F::INFINITY), + (F::INFINITY, F::NEG_INFINITY, F::NEG_INFINITY), + (F::INFINITY, F::NAN, F::NAN), (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_INFINITY), + (F::NEG_INFINITY, F::ONE, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NEG_ONE, F::NEG_INFINITY), + (F::NEG_INFINITY, F::INFINITY, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NAN, F::NAN), (F::NAN, F::ZERO, F::NAN), - (F::ZERO, F::NAN, F::NAN), + (F::NAN, F::NEG_ZERO, F::NAN), + (F::NAN, F::ONE, F::NAN), + (F::NAN, F::NEG_ONE, F::NAN), + (F::NAN, F::INFINITY, F::NAN), + (F::NAN, F::NEG_INFINITY, F::NAN), (F::NAN, F::NAN, F::NAN), - (F::ZERO, F::NEG_ZERO, F::NEG_ZERO), - (F::NEG_ZERO, F::ZERO, F::NEG_ZERO), ]; for (x, y, res) in cases { let val = f(x, y); assert_biteq!(val, res, "fminimum({}, {})", Hexf(x), Hexf(y)); } + + // Ordering between NaNs does not matter + assert!(f(F::NAN, F::NEG_NAN).is_nan()); + assert!(f(F::NEG_NAN, F::NAN).is_nan()); + assert!(f(F::ZERO, F::NEG_NAN).is_nan()); + assert!(f(F::NEG_ZERO, F::NEG_NAN).is_nan()); + assert!(f(F::ONE, F::NEG_NAN).is_nan()); + assert!(f(F::NEG_ONE, F::NEG_NAN).is_nan()); + assert!(f(F::INFINITY, F::NEG_NAN).is_nan()); + assert!(f(F::NEG_INFINITY, F::NEG_NAN).is_nan()); + assert!(f(F::NEG_NAN, F::ZERO).is_nan()); + assert!(f(F::NEG_NAN, F::NEG_ZERO).is_nan()); + assert!(f(F::NEG_NAN, F::ONE).is_nan()); + assert!(f(F::NEG_NAN, F::NEG_ONE).is_nan()); + assert!(f(F::NEG_NAN, F::INFINITY).is_nan()); + assert!(f(F::NEG_NAN, F::NEG_INFINITY).is_nan()); + assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan()); } #[test] @@ -119,24 +172,77 @@ mod tests { fn fmaximum_spec_test(f: impl Fn(F, F) -> F) { let cases = [ (F::ZERO, F::ZERO, F::ZERO), - (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::NEG_ZERO, F::ZERO), (F::ZERO, F::ONE, F::ONE), - (F::ONE, F::ZERO, F::ONE), (F::ZERO, F::NEG_ONE, F::ZERO), + (F::ZERO, F::INFINITY, F::INFINITY), + (F::ZERO, F::NEG_INFINITY, F::ZERO), + (F::ZERO, F::NAN, F::NAN), + (F::NEG_ZERO, F::ZERO, F::ZERO), + (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::ONE, F::ONE), + (F::NEG_ZERO, F::NEG_ONE, F::NEG_ZERO), + (F::NEG_ZERO, F::INFINITY, F::INFINITY), + (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_ZERO), + (F::NEG_ZERO, F::NAN, F::NAN), + (F::ONE, F::ZERO, F::ONE), + (F::ONE, F::NEG_ZERO, F::ONE), + (F::ONE, F::ONE, F::ONE), + (F::ONE, F::NEG_ONE, F::ONE), + (F::ONE, F::INFINITY, F::INFINITY), + (F::ONE, F::NEG_INFINITY, F::ONE), + (F::ONE, F::NAN, F::NAN), (F::NEG_ONE, F::ZERO, F::ZERO), + (F::NEG_ONE, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ONE, F::ONE, F::ONE), + (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::INFINITY, F::INFINITY), + (F::NEG_ONE, F::NEG_INFINITY, F::NEG_ONE), + (F::NEG_ONE, F::NAN, F::NAN), (F::INFINITY, F::ZERO, F::INFINITY), + (F::INFINITY, F::NEG_ZERO, F::INFINITY), + (F::INFINITY, F::ONE, F::INFINITY), + (F::INFINITY, F::NEG_ONE, F::INFINITY), + (F::INFINITY, F::INFINITY, F::INFINITY), + (F::INFINITY, F::NEG_INFINITY, F::INFINITY), + (F::INFINITY, F::NAN, F::NAN), (F::NEG_INFINITY, F::ZERO, F::ZERO), + (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_INFINITY, F::ONE, F::ONE), + (F::NEG_INFINITY, F::NEG_ONE, F::NEG_ONE), + (F::NEG_INFINITY, F::INFINITY, F::INFINITY), + (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NAN, F::NAN), (F::NAN, F::ZERO, F::NAN), - (F::ZERO, F::NAN, F::NAN), + (F::NAN, F::NEG_ZERO, F::NAN), + (F::NAN, F::ONE, F::NAN), + (F::NAN, F::NEG_ONE, F::NAN), + (F::NAN, F::INFINITY, F::NAN), + (F::NAN, F::NEG_INFINITY, F::NAN), (F::NAN, F::NAN, F::NAN), - (F::ZERO, F::NEG_ZERO, F::ZERO), - (F::NEG_ZERO, F::ZERO, F::ZERO), ]; for (x, y, res) in cases { let val = f(x, y); assert_biteq!(val, res, "fmaximum({}, {})", Hexf(x), Hexf(y)); } + + // Ordering between NaNs does not matter + assert!(f(F::NAN, F::NEG_NAN).is_nan()); + assert!(f(F::NEG_NAN, F::NAN).is_nan()); + assert!(f(F::ZERO, F::NEG_NAN).is_nan()); + assert!(f(F::NEG_ZERO, F::NEG_NAN).is_nan()); + assert!(f(F::ONE, F::NEG_NAN).is_nan()); + assert!(f(F::NEG_ONE, F::NEG_NAN).is_nan()); + assert!(f(F::INFINITY, F::NEG_NAN).is_nan()); + assert!(f(F::NEG_INFINITY, F::NEG_NAN).is_nan()); + assert!(f(F::NEG_NAN, F::ZERO).is_nan()); + assert!(f(F::NEG_NAN, F::NEG_ZERO).is_nan()); + assert!(f(F::NEG_NAN, F::ONE).is_nan()); + assert!(f(F::NEG_NAN, F::NEG_ONE).is_nan()); + assert!(f(F::NEG_NAN, F::INFINITY).is_nan()); + assert!(f(F::NEG_NAN, F::NEG_INFINITY).is_nan()); + assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan()); } #[test] diff --git a/libm/src/math/fminimum_fmaximum_num.rs b/libm/src/math/fminimum_fmaximum_num.rs index 180d21f72..fadf93418 100644 --- a/libm/src/math/fminimum_fmaximum_num.rs +++ b/libm/src/math/fminimum_fmaximum_num.rs @@ -74,24 +74,77 @@ mod tests { fn fminimum_num_spec_test(f: impl Fn(F, F) -> F) { let cases = [ (F::ZERO, F::ZERO, F::ZERO), - (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::NEG_ZERO, F::NEG_ZERO), (F::ZERO, F::ONE, F::ZERO), - (F::ONE, F::ZERO, F::ZERO), (F::ZERO, F::NEG_ONE, F::NEG_ONE), + (F::ZERO, F::INFINITY, F::ZERO), + (F::ZERO, F::NEG_INFINITY, F::NEG_INFINITY), + (F::ZERO, F::NAN, F::ZERO), + (F::ZERO, F::NEG_NAN, F::ZERO), + (F::NEG_ZERO, F::ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::ONE, F::NEG_ZERO), + (F::NEG_ZERO, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ZERO, F::INFINITY, F::NEG_ZERO), + (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_INFINITY), + (F::NEG_ZERO, F::NAN, F::NEG_ZERO), + (F::NEG_ZERO, F::NEG_NAN, F::NEG_ZERO), + (F::ONE, F::ZERO, F::ZERO), + (F::ONE, F::NEG_ZERO, F::NEG_ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ONE, F::NEG_ONE, F::NEG_ONE), + (F::ONE, F::INFINITY, F::ONE), + (F::ONE, F::NEG_INFINITY, F::NEG_INFINITY), + (F::ONE, F::NAN, F::ONE), + (F::ONE, F::NEG_NAN, F::ONE), (F::NEG_ONE, F::ZERO, F::NEG_ONE), + (F::NEG_ONE, F::NEG_ZERO, F::NEG_ONE), + (F::NEG_ONE, F::ONE, F::NEG_ONE), + (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::INFINITY, F::NEG_ONE), + (F::NEG_ONE, F::NEG_INFINITY, F::NEG_INFINITY), + (F::NEG_ONE, F::NAN, F::NEG_ONE), + (F::NEG_ONE, F::NEG_NAN, F::NEG_ONE), (F::INFINITY, F::ZERO, F::ZERO), + (F::INFINITY, F::NEG_ZERO, F::NEG_ZERO), + (F::INFINITY, F::ONE, F::ONE), + (F::INFINITY, F::NEG_ONE, F::NEG_ONE), + (F::INFINITY, F::INFINITY, F::INFINITY), + (F::INFINITY, F::NEG_INFINITY, F::NEG_INFINITY), + (F::INFINITY, F::NAN, F::INFINITY), + (F::INFINITY, F::NEG_NAN, F::INFINITY), (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_INFINITY), + (F::NEG_INFINITY, F::ONE, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NEG_ONE, F::NEG_INFINITY), + (F::NEG_INFINITY, F::INFINITY, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NAN, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NEG_NAN, F::NEG_INFINITY), (F::NAN, F::ZERO, F::ZERO), - (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NEG_ZERO, F::NEG_ZERO), + (F::NAN, F::ONE, F::ONE), + (F::NAN, F::NEG_ONE, F::NEG_ONE), + (F::NAN, F::INFINITY, F::INFINITY), + (F::NAN, F::NEG_INFINITY, F::NEG_INFINITY), (F::NAN, F::NAN, F::NAN), - (F::ZERO, F::NEG_ZERO, F::NEG_ZERO), - (F::NEG_ZERO, F::ZERO, F::NEG_ZERO), + (F::NEG_NAN, F::ZERO, F::ZERO), + (F::NEG_NAN, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_NAN, F::ONE, F::ONE), + (F::NEG_NAN, F::NEG_ONE, F::NEG_ONE), + (F::NEG_NAN, F::INFINITY, F::INFINITY), + (F::NEG_NAN, F::NEG_INFINITY, F::NEG_INFINITY), ]; - for (x, y, res) in cases { - let val = f(x, y); - assert_biteq!(val, res, "fminimum_num({}, {})", Hexf(x), Hexf(y)); + for (x, y, expected) in cases { + let actual = f(x, y); + assert_biteq!(actual, expected, "fminimum_num({}, {})", Hexf(x), Hexf(y)); } + + // Ordering between NaNs does not matter + assert!(f(F::NAN, F::NEG_NAN).is_nan()); + assert!(f(F::NEG_NAN, F::NAN).is_nan()); + assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan()); } #[test] @@ -119,24 +172,77 @@ mod tests { fn fmaximum_num_spec_test(f: impl Fn(F, F) -> F) { let cases = [ (F::ZERO, F::ZERO, F::ZERO), - (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::NEG_ZERO, F::ZERO), (F::ZERO, F::ONE, F::ONE), - (F::ONE, F::ZERO, F::ONE), (F::ZERO, F::NEG_ONE, F::ZERO), + (F::ZERO, F::INFINITY, F::INFINITY), + (F::ZERO, F::NEG_INFINITY, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::ZERO, F::NEG_NAN, F::ZERO), + (F::NEG_ZERO, F::ZERO, F::ZERO), + (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::ONE, F::ONE), + (F::NEG_ZERO, F::NEG_ONE, F::NEG_ZERO), + (F::NEG_ZERO, F::INFINITY, F::INFINITY), + (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_ZERO), + (F::NEG_ZERO, F::NAN, F::NEG_ZERO), + (F::NEG_ZERO, F::NEG_NAN, F::NEG_ZERO), + (F::ONE, F::ZERO, F::ONE), + (F::ONE, F::NEG_ZERO, F::ONE), + (F::ONE, F::ONE, F::ONE), + (F::ONE, F::NEG_ONE, F::ONE), + (F::ONE, F::INFINITY, F::INFINITY), + (F::ONE, F::NEG_INFINITY, F::ONE), + (F::ONE, F::NAN, F::ONE), + (F::ONE, F::NEG_NAN, F::ONE), (F::NEG_ONE, F::ZERO, F::ZERO), + (F::NEG_ONE, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ONE, F::ONE, F::ONE), + (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::INFINITY, F::INFINITY), + (F::NEG_ONE, F::NEG_INFINITY, F::NEG_ONE), + (F::NEG_ONE, F::NAN, F::NEG_ONE), + (F::NEG_ONE, F::NEG_NAN, F::NEG_ONE), (F::INFINITY, F::ZERO, F::INFINITY), + (F::INFINITY, F::NEG_ZERO, F::INFINITY), + (F::INFINITY, F::ONE, F::INFINITY), + (F::INFINITY, F::NEG_ONE, F::INFINITY), + (F::INFINITY, F::INFINITY, F::INFINITY), + (F::INFINITY, F::NEG_INFINITY, F::INFINITY), + (F::INFINITY, F::NAN, F::INFINITY), + (F::INFINITY, F::NEG_NAN, F::INFINITY), (F::NEG_INFINITY, F::ZERO, F::ZERO), + (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_INFINITY, F::ONE, F::ONE), + (F::NEG_INFINITY, F::NEG_ONE, F::NEG_ONE), + (F::NEG_INFINITY, F::INFINITY, F::INFINITY), + (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NAN, F::NEG_INFINITY), + (F::NEG_INFINITY, F::NEG_NAN, F::NEG_INFINITY), (F::NAN, F::ZERO, F::ZERO), - (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NEG_ZERO, F::NEG_ZERO), + (F::NAN, F::ONE, F::ONE), + (F::NAN, F::NEG_ONE, F::NEG_ONE), + (F::NAN, F::INFINITY, F::INFINITY), + (F::NAN, F::NEG_INFINITY, F::NEG_INFINITY), (F::NAN, F::NAN, F::NAN), - (F::ZERO, F::NEG_ZERO, F::ZERO), - (F::NEG_ZERO, F::ZERO, F::ZERO), + (F::NEG_NAN, F::ZERO, F::ZERO), + (F::NEG_NAN, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_NAN, F::ONE, F::ONE), + (F::NEG_NAN, F::NEG_ONE, F::NEG_ONE), + (F::NEG_NAN, F::INFINITY, F::INFINITY), + (F::NEG_NAN, F::NEG_INFINITY, F::NEG_INFINITY), ]; - for (x, y, res) in cases { - let val = f(x, y); - assert_biteq!(val, res, "fmaximum_num({}, {})", Hexf(x), Hexf(y)); + for (x, y, expected) in cases { + let actual = f(x, y); + assert_biteq!(actual, expected, "fmaximum_num({}, {})", Hexf(x), Hexf(y)); } + + // Ordering between NaNs does not matter + assert!(f(F::NAN, F::NEG_NAN).is_nan()); + assert!(f(F::NEG_NAN, F::NAN).is_nan()); + assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan()); } #[test] diff --git a/libm/src/math/generic/fmax.rs b/libm/src/math/generic/fmax.rs index 54207e4b3..b05804704 100644 --- a/libm/src/math/generic/fmax.rs +++ b/libm/src/math/generic/fmax.rs @@ -19,6 +19,5 @@ use crate::support::Float; #[inline] pub fn fmax(x: F, y: F) -> F { let res = if x.is_nan() || x < y { y } else { x }; - // Canonicalize - res * F::ONE + res.canonicalize() } diff --git a/libm/src/math/generic/fmaximum.rs b/libm/src/math/generic/fmaximum.rs index 898828b80..55a031e18 100644 --- a/libm/src/math/generic/fmaximum.rs +++ b/libm/src/math/generic/fmaximum.rs @@ -4,8 +4,8 @@ //! Per the spec, returns the canonicalized result of: //! - `x` if `x > y` //! - `y` if `y > x` +//! - +0.0 if x and y are zero with opposite signs //! - qNaN if either operation is NaN -//! - Logic following +0.0 > -0.0 //! //! Excluded from our implementation is sNaN handling. @@ -23,6 +23,5 @@ pub fn fmaximum(x: F, y: F) -> F { y }; - // Canonicalize - res * F::ONE + res.canonicalize() } diff --git a/libm/src/math/generic/fmaximum_num.rs b/libm/src/math/generic/fmaximum_num.rs index 05df6cbd4..2dc60b2d2 100644 --- a/libm/src/math/generic/fmaximum_num.rs +++ b/libm/src/math/generic/fmaximum_num.rs @@ -4,10 +4,10 @@ //! Per the spec, returns: //! - `x` if `x > y` //! - `y` if `y > x` -//! - Non-NaN if one operand is NaN -//! - Logic following +0.0 > -0.0 +//! - +0.0 if x and y are zero with opposite signs //! - Either `x` or `y` if `x == y` and the signs are the same -//! - qNaN if either operand is a NaN +//! - Non-NaN if one operand is NaN +//! - qNaN if both operands are NaNx //! //! Excluded from our implementation is sNaN handling. @@ -15,12 +15,15 @@ use crate::support::Float; #[inline] pub fn fmaximum_num(x: F, y: F) -> F { - let res = if x.is_nan() || x < y || (x.biteq(F::NEG_ZERO) && y.is_sign_positive()) { + let res = if x > y || y.is_nan() { + x + } else if y > x || x.is_nan() { y - } else { + } else if x.is_sign_positive() { x + } else { + y }; - // Canonicalize - res * F::ONE + res.canonicalize() } diff --git a/libm/src/math/generic/fmin.rs b/libm/src/math/generic/fmin.rs index 0f86364d2..e2245bf9e 100644 --- a/libm/src/math/generic/fmin.rs +++ b/libm/src/math/generic/fmin.rs @@ -19,6 +19,5 @@ use crate::support::Float; #[inline] pub fn fmin(x: F, y: F) -> F { let res = if y.is_nan() || x < y { x } else { y }; - // Canonicalize - res * F::ONE + res.canonicalize() } diff --git a/libm/src/math/generic/fminimum.rs b/libm/src/math/generic/fminimum.rs index 8592ac546..aa68b1291 100644 --- a/libm/src/math/generic/fminimum.rs +++ b/libm/src/math/generic/fminimum.rs @@ -4,8 +4,8 @@ //! Per the spec, returns the canonicalized result of: //! - `x` if `x < y` //! - `y` if `y < x` +//! - -0.0 if x and y are zero with opposite signs //! - qNaN if either operation is NaN -//! - Logic following +0.0 > -0.0 //! //! Excluded from our implementation is sNaN handling. @@ -23,6 +23,5 @@ pub fn fminimum(x: F, y: F) -> F { y }; - // Canonicalize - res * F::ONE + res.canonicalize() } diff --git a/libm/src/math/generic/fminimum_num.rs b/libm/src/math/generic/fminimum_num.rs index 6777bbf87..265bd4605 100644 --- a/libm/src/math/generic/fminimum_num.rs +++ b/libm/src/math/generic/fminimum_num.rs @@ -4,10 +4,10 @@ //! Per the spec, returns: //! - `x` if `x < y` //! - `y` if `y < x` -//! - Non-NaN if one operand is NaN -//! - Logic following +0.0 > -0.0 +//! - -0.0 if x and y are zero with opposite signs //! - Either `x` or `y` if `x == y` and the signs are the same -//! - qNaN if either operand is a NaN +//! - Non-NaN if one operand is NaN +//! - qNaN if both operands are NaNx //! //! Excluded from our implementation is sNaN handling. @@ -15,12 +15,15 @@ use crate::support::Float; #[inline] pub fn fminimum_num(x: F, y: F) -> F { - let res = if y.is_nan() || x < y || (x.biteq(F::NEG_ZERO) && y.is_sign_positive()) { + let res = if x > y || x.is_nan() { + y + } else if y > x || y.is_nan() { x - } else { + } else if x.is_sign_positive() { y + } else { + x }; - // Canonicalize - res * F::ONE + res.canonicalize() } diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index dd9f46209..c3e7eeec2 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -190,6 +190,15 @@ pub trait Float: Self::ONE.copysign(self) } } + + /// Make a best-effort attempt to canonicalize the number. Note that this is allowed + /// to be a nop and does not always quiet sNaNs. + fn canonicalize(self) -> Self { + // FIXME: LLVM often removes this. We should determine whether we can remove the operation, + // or switch to something based on `llvm.canonicalize` (which has crashes, + // ). + self * Self::ONE + } } /// Access the associated `Int` type from a float (helper to avoid ambiguous associated types). diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs index 2b8fd580a..550d2e92e 100644 --- a/libm/src/math/support/macros.rs +++ b/libm/src/math/support/macros.rs @@ -143,10 +143,12 @@ macro_rules! assert_biteq { let bits = $crate::support::Int::leading_zeros(l.to_bits() - l.to_bits()); assert!( $crate::support::Float::biteq(l, r), - "{}\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})", + "{}\nl: {l:?} ({lb:#0width$x} {lh})\nr: {r:?} ({rb:#0width$x} {rh})", format_args!($($tt)*), lb = l.to_bits(), + lh = $crate::support::Hexf(l), rb = r.to_bits(), + rh = $crate::support::Hexf(r), width = ((bits / 4) + 2) as usize, ); From 8fe6945fca528aaded3a464058f44294caab7953 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 13 Jun 2025 15:42:06 +0000 Subject: [PATCH 1396/1459] Clean up and sort manifest keys Use a consistent ordering for top-level manifest keys, and remove those that are now redundant (`homapage` isn't supposed to be the same as `repository`, and `documentation` automatically points to docs.rs now). --- compiler-builtins/Cargo.toml | 9 +++------ libm/Cargo.toml | 10 ++++------ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index eabb3d625..22e240099 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -1,14 +1,11 @@ [package] -authors = ["Jorge Aparicio "] name = "compiler_builtins" version = "0.1.160" -license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" -readme = "README.md" +authors = ["Jorge Aparicio "] +description = "Compiler intrinsics used by the Rust compiler." repository = "https://github.com/rust-lang/compiler-builtins" -homepage = "https://github.com/rust-lang/compiler-builtins" -documentation = "https://docs.rs/compiler_builtins" +license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" edition = "2024" -description = "Compiler intrinsics used by the Rust compiler." links = "compiler-rt" [lib] diff --git a/libm/Cargo.toml b/libm/Cargo.toml index b6fb5efcf..63b4d3c27 100644 --- a/libm/Cargo.toml +++ b/libm/Cargo.toml @@ -1,14 +1,12 @@ [package] +name = "libm" +version = "0.2.15" authors = ["Jorge Aparicio "] -categories = ["no-std"] description = "libm in pure Rust" -documentation = "https://docs.rs/libm" +categories = ["no-std"] keywords = ["libm", "math"] -license = "MIT" -name = "libm" -readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" -version = "0.2.15" +license = "MIT" edition = "2021" rust-version = "1.63" From baa4d3f1492e61ee9c08f52b6cf8e8298a6daa33 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 13 Jun 2025 15:44:45 +0000 Subject: [PATCH 1397/1459] Mark compiler-builtins as `publish = false` Now that this repository is a subtree, we have no need to continue publishing `compiler-builtins`. --- compiler-builtins/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 22e240099..dffdcaf94 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -6,6 +6,7 @@ description = "Compiler intrinsics used by the Rust compiler." repository = "https://github.com/rust-lang/compiler-builtins" license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" edition = "2024" +publish = false links = "compiler-rt" [lib] From a18db849f771c94feae009fd6eba39ed571b3756 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 14 Jun 2025 03:38:53 +0000 Subject: [PATCH 1398/1459] Delete `.release-plz.toml` The config file is not needed anymore since compiler-builtins is no longer published. Removing it will resolve a CI failure. --- .release-plz.toml | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 .release-plz.toml diff --git a/.release-plz.toml b/.release-plz.toml deleted file mode 100644 index 8023ade9b..000000000 --- a/.release-plz.toml +++ /dev/null @@ -1,13 +0,0 @@ -[workspace] -# As part of the release process, we delete `libm/Cargo.toml`. Since -# this is only run in CI, we shouldn't need to worry about it. -allow_dirty = true -publish_allow_dirty = true - -[[package]] -name = "compiler_builtins" -semver_check = false -changelog_include = ["libm"] # libm is included as part of builtins - -[[package]] -name = "libm" From fc6b151597c855d3e6f466fdeea945625b29648f Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 14 Jun 2025 04:25:55 +0000 Subject: [PATCH 1399/1459] Update the upstream Rust version To prepare for merging from rust-lang/rust, set the version file to: d087f112b7 Auto merge of #134841 - estebank:serde-attr-4, r=wesleywiser --- rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-version b/rust-version index e05aaa057..731839835 100644 --- a/rust-version +++ b/rust-version @@ -1 +1 @@ -df8102fe5f24f28a918660b0cd918d7331c3896e +d087f112b7d1323446c7b39a8b616aee7fa56b3d From 7c46e921c1174e241ab35ec09c76e10867292633 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 14 Jun 2025 06:23:24 +0000 Subject: [PATCH 1400/1459] Work around out-of-tree testing with a shim crate Out-of-tree testing is broken with the most recent update from rust-lang/rust because it makes `compiler-builtins` depend on `core` by path, which isn't usually available. In order to enable testing outside of rust-lang/rust, add a new crate `builtins-shim` that uses the same source as `compiler-builtins` but drops the `core` dependency. This has replaced `compiler-builtins` as the workspace member and entrypoint for tests. --- Cargo.toml | 8 +++- builtins-shim/Cargo.toml | 63 +++++++++++++++++++++++++++++ builtins-test-intrinsics/Cargo.toml | 2 +- builtins-test/Cargo.toml | 2 +- compiler-builtins/Cargo.toml | 6 +++ 5 files changed, 77 insertions(+), 4 deletions(-) create mode 100644 builtins-shim/Cargo.toml diff --git a/Cargo.toml b/Cargo.toml index fb638f2fb..41350c6cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,8 @@ [workspace] resolver = "2" members = [ + "builtins-shim", "builtins-test", - "compiler-builtins", "crates/josh-sync", "crates/libm-macros", "crates/musl-math-sys", @@ -14,8 +14,8 @@ members = [ ] default-members = [ + "builtins-shim", "builtins-test", - "compiler-builtins", "crates/libm-macros", "libm", "libm-test", @@ -26,6 +26,10 @@ exclude = [ # and `mangled-names` disabled, which is the opposite of what is needed for # other tests, so it makes sense to keep it out of the workspace. "builtins-test-intrinsics", + # We test via the `builtins-shim` crate, so exclude the `compiler-builtins` + # that has a dependency on `core`. See `builtins-shim/Cargo.toml` for more + # details. + "compiler-builtins", ] [profile.release] diff --git a/builtins-shim/Cargo.toml b/builtins-shim/Cargo.toml new file mode 100644 index 000000000..8eb880c6f --- /dev/null +++ b/builtins-shim/Cargo.toml @@ -0,0 +1,63 @@ +# NOTE: Must be kept in sync with `../compiler-builtins/Cargo.toml`. +# +# The manifest at `../compiler-builtins` is what actually gets used in the +# rust-lang/rust tree; however, we can't build it out of tree because it +# depends on `core` by path, and even optional Cargo dependencies need to be +# available at build time. So, we work around this by having this "shim" +# manifest that is identical except for the `core` dependency and forwards +# to the same sources, which acts as the `compiler-builtins` Cargo entrypoint +# for out of tree testing + +[package] +name = "compiler_builtins" +version = "0.1.160" +authors = ["Jorge Aparicio "] +description = "Compiler intrinsics used by the Rust compiler." +repository = "https://github.com/rust-lang/compiler-builtins" +license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" +edition = "2024" +publish = false +links = "compiler-rt" + +build = "../compiler-builtins/build.rs" + +[lib] +path = "../compiler-builtins/src/lib.rs" +bench = false +doctest = false +test = false + +[build-dependencies] +cc = { optional = true, version = "1.2" } + +[features] +default = ["compiler-builtins"] + +# Enable compilation of C code in compiler-rt, filling in some more optimized +# implementations and also filling in unimplemented intrinsics +c = ["dep:cc"] + +# Workaround for the Cranelift codegen backend. Disables any implementations +# which use inline assembly and fall back to pure Rust versions (if available). +no-asm = [] + +# Workaround for codegen backends which haven't yet implemented `f16` and +# `f128` support. Disabled any intrinsics which use those types. +no-f16-f128 = [] + +# Flag this library as the unstable compiler-builtins lib +compiler-builtins = [] + +# Generate memory-related intrinsics like memcpy +mem = [] + +# Mangle all names so this can be linked in with other versions or other +# compiler-rt implementations. Also used for testing +mangled-names = [] + +# Only used in the compiler's build system +rustc-dep-of-std = ["compiler-builtins"] + +# This makes certain traits and function specializations public that +# are not normally public but are required by the `builtins-test` +unstable-public-internals = [] diff --git a/builtins-test-intrinsics/Cargo.toml b/builtins-test-intrinsics/Cargo.toml index 064b7cad2..e73a1f7b1 100644 --- a/builtins-test-intrinsics/Cargo.toml +++ b/builtins-test-intrinsics/Cargo.toml @@ -6,7 +6,7 @@ publish = false license = "MIT OR Apache-2.0" [dependencies] -compiler_builtins = { path = "../compiler-builtins", features = ["compiler-builtins"] } +compiler_builtins = { path = "../builtins-shim", features = ["compiler-builtins"] } panic-handler = { path = "../crates/panic-handler" } [features] diff --git a/builtins-test/Cargo.toml b/builtins-test/Cargo.toml index c7742aa24..093d4633f 100644 --- a/builtins-test/Cargo.toml +++ b/builtins-test/Cargo.toml @@ -17,7 +17,7 @@ rustc_apfloat = "0.2.2" iai-callgrind = { version = "0.14.1", optional = true } [dependencies.compiler_builtins] -path = "../compiler-builtins" +path = "../builtins-shim" default-features = false features = ["unstable-public-internals"] diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 7276a6851..c5446cd76 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -1,3 +1,9 @@ +# NOTE: Must be kept in sync with `../builtins-shim/Cargo.toml`. +# +# This manifest is actually used in-tree by rust-lang/rust, +# `../builtins-shim/Cargo.toml` is used by out-of-tree testing. See the other +# manifest for further details. + [package] name = "compiler_builtins" version = "0.1.160" From 1e2ebebd36d733c0af067187f310d5dc541e782e Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sat, 14 Jun 2025 12:08:25 +0200 Subject: [PATCH 1401/1459] use `is_multiple_of` to check if an addr is aligned --- compiler-builtins/src/arm.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/compiler-builtins/src/arm.rs b/compiler-builtins/src/arm.rs index a7d84e49b..617cc8e50 100644 --- a/compiler-builtins/src/arm.rs +++ b/compiler-builtins/src/arm.rs @@ -135,8 +135,8 @@ intrinsics! { /// eight bytes. #[cfg(not(target_vendor = "apple"))] pub unsafe extern "aapcs" fn __aeabi_memcpy8(dst: *mut u8, src: *const u8, n: usize) { - debug_assert!(dst.addr() & 7 == 0); - debug_assert!(src.addr() & 7 == 0); + debug_assert!(dst.addr().is_multiple_of(8)); + debug_assert!(src.addr().is_multiple_of(8)); // SAFETY: memcpy preconditions apply, less strict alignment. unsafe { __aeabi_memcpy4(dst, src, n) }; @@ -161,8 +161,8 @@ intrinsics! { /// four bytes. #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memmove4(dst: *mut u8, src: *const u8, n: usize) { - debug_assert!(dst.addr() & 3 == 0); - debug_assert!(src.addr() & 3 == 0); + debug_assert!(dst.addr().is_multiple_of(4)); + debug_assert!(src.addr().is_multiple_of(4)); // SAFETY: same preconditions, less strict aligment. unsafe { __aeabi_memmove(dst, src, n) }; @@ -176,8 +176,8 @@ intrinsics! { /// eight bytes. #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memmove8(dst: *mut u8, src: *const u8, n: usize) { - debug_assert!(dst.addr() & 7 == 0); - debug_assert!(src.addr() & 7 == 0); + debug_assert!(dst.addr().is_multiple_of(8)); + debug_assert!(src.addr().is_multiple_of(8)); // SAFETY: memmove preconditions apply, less strict alignment. unsafe { __aeabi_memmove(dst, src, n) }; @@ -236,7 +236,7 @@ intrinsics! { /// eight bytes. #[cfg(not(target_vendor = "apple"))] pub unsafe extern "aapcs" fn __aeabi_memset8(dst: *mut u8, n: usize, c: i32) { - debug_assert!(dst.addr() & 7 == 0); + debug_assert!(dst.addr().is_multiple_of(8)); // SAFETY: memset preconditions apply, less strict alignment. unsafe { __aeabi_memset4(dst, n, c) }; @@ -261,7 +261,7 @@ intrinsics! { /// four bytes. #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memclr4(dst: *mut u8, n: usize) { - debug_assert!(dst.addr() & 3 == 0); + debug_assert!(dst.addr().is_multiple_of(4)); // SAFETY: memclr preconditions apply, less strict alignment. unsafe { __aeabi_memset4(dst, n, 0) }; @@ -275,7 +275,7 @@ intrinsics! { /// eight bytes. #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memclr8(dst: *mut u8, n: usize) { - debug_assert!(dst.addr() & 7 == 0); + debug_assert!(dst.addr().is_multiple_of(8)); // SAFETY: memclr preconditions apply, less strict alignment. unsafe { __aeabi_memset4(dst, n, 0) }; From 64b37a871033a0232cd062ffba3a0a77c3263817 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sat, 14 Jun 2025 20:17:23 +0200 Subject: [PATCH 1402/1459] use `extern "custom"` on naked functions with a custom calling convention --- compiler-builtins/src/aarch64.rs | 2 +- compiler-builtins/src/arm.rs | 13 ++++++------- compiler-builtins/src/int/udiv.rs | 2 +- compiler-builtins/src/lib.rs | 1 + compiler-builtins/src/probestack.rs | 15 +++------------ compiler-builtins/src/x86.rs | 10 +++++----- compiler-builtins/src/x86_64.rs | 4 ++-- 7 files changed, 19 insertions(+), 28 deletions(-) diff --git a/compiler-builtins/src/aarch64.rs b/compiler-builtins/src/aarch64.rs index 80392187c..a72b30d29 100644 --- a/compiler-builtins/src/aarch64.rs +++ b/compiler-builtins/src/aarch64.rs @@ -5,7 +5,7 @@ use core::intrinsics; intrinsics! { #[unsafe(naked)] #[cfg(all(target_os = "uefi", not(feature = "no-asm")))] - pub unsafe extern "C" fn __chkstk() { + pub unsafe extern "custom" fn __chkstk() { core::arch::naked_asm!( ".p2align 2", "lsl x16, x15, #4", diff --git a/compiler-builtins/src/arm.rs b/compiler-builtins/src/arm.rs index 617cc8e50..fbec93ca4 100644 --- a/compiler-builtins/src/arm.rs +++ b/compiler-builtins/src/arm.rs @@ -9,11 +9,10 @@ unsafe extern "C" { } // SAFETY: these are defined in compiler-builtins -// FIXME(extern_custom), this isn't always the correct ABI -unsafe extern "aapcs" { +unsafe extern "custom" { // AAPCS is not always the correct ABI for these intrinsics, but we only use this to // forward another `__aeabi_` call so it doesn't matter. - fn __aeabi_idiv(a: i32, b: i32) -> i32; + fn __aeabi_idiv(); } intrinsics! { @@ -21,7 +20,7 @@ intrinsics! { // custom calling convention which can't be implemented using a normal Rust function. #[unsafe(naked)] #[cfg(not(target_env = "msvc"))] - pub unsafe extern "C" fn __aeabi_uidivmod() { + pub unsafe extern "custom" fn __aeabi_uidivmod() { core::arch::naked_asm!( "push {{lr}}", "sub sp, sp, #4", @@ -35,7 +34,7 @@ intrinsics! { } #[unsafe(naked)] - pub unsafe extern "C" fn __aeabi_uldivmod() { + pub unsafe extern "custom" fn __aeabi_uldivmod() { core::arch::naked_asm!( "push {{r4, lr}}", "sub sp, sp, #16", @@ -51,7 +50,7 @@ intrinsics! { } #[unsafe(naked)] - pub unsafe extern "C" fn __aeabi_idivmod() { + pub unsafe extern "custom" fn __aeabi_idivmod() { core::arch::naked_asm!( "push {{r0, r1, r4, lr}}", "bl {trampoline}", @@ -64,7 +63,7 @@ intrinsics! { } #[unsafe(naked)] - pub unsafe extern "C" fn __aeabi_ldivmod() { + pub unsafe extern "custom" fn __aeabi_ldivmod() { core::arch::naked_asm!( "push {{r4, lr}}", "sub sp, sp, #16", diff --git a/compiler-builtins/src/int/udiv.rs b/compiler-builtins/src/int/udiv.rs index b9dee63c4..017a81ac9 100644 --- a/compiler-builtins/src/int/udiv.rs +++ b/compiler-builtins/src/int/udiv.rs @@ -44,7 +44,7 @@ intrinsics! { } #[unsafe(naked)] - pub unsafe extern "C" fn __udivmodqi4() { + pub unsafe extern "custom" fn __udivmodqi4() { // compute unsigned 8-bit `n / d` and `n % d`. // // Note: GCC implements a [non-standard calling convention](https://gcc.gnu.org/wiki/avr-gcc#Exceptions_to_the_Calling_Convention) for this function. diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs index 1cec39d8b..dd9920cae 100644 --- a/compiler-builtins/src/lib.rs +++ b/compiler-builtins/src/lib.rs @@ -1,5 +1,6 @@ #![cfg_attr(feature = "compiler-builtins", compiler_builtins)] #![cfg_attr(all(target_family = "wasm"), feature(wasm_numeric_instr))] +#![feature(abi_custom)] #![feature(abi_unadjusted)] #![feature(asm_experimental_arch)] #![feature(cfg_target_has_atomic)] diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs index 1441fd73b..1d0010842 100644 --- a/compiler-builtins/src/probestack.rs +++ b/compiler-builtins/src/probestack.rs @@ -52,15 +52,12 @@ // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, // ensuring that if any pages are unmapped we'll make a page fault. // -// FIXME(abi_custom): This function is unsafe because it uses a custom ABI, -// it does not actually match `extern "C"`. -// // The ABI here is that the stack frame size is located in `%rax`. Upon // return we're not supposed to modify `%rsp` or `%rax`. #[cfg(target_arch = "x86_64")] #[unsafe(naked)] #[rustc_std_internal_symbol] -pub unsafe extern "C" fn __rust_probestack() { +pub unsafe extern "custom" fn __rust_probestack() { #[cfg(not(all(target_env = "sgx", target_vendor = "fortanix")))] macro_rules! ret { () => { @@ -144,13 +141,10 @@ pub unsafe extern "C" fn __rust_probestack() { // that on Unix we're expected to restore everything as it was, this // function basically can't tamper with anything. // -// FIXME(abi_custom): This function is unsafe because it uses a custom ABI, -// it does not actually match `extern "C"`. -// // The ABI here is the same as x86_64, except everything is 32-bits large. #[unsafe(naked)] #[rustc_std_internal_symbol] -pub unsafe extern "C" fn __rust_probestack() { +pub unsafe extern "custom" fn __rust_probestack() { core::arch::naked_asm!( " .cfi_startproc @@ -192,9 +186,6 @@ pub unsafe extern "C" fn __rust_probestack() { // probestack function will also do things like _chkstk in MSVC. // So we need to sub %ax %sp in probestack when arch is x86. // -// FIXME(abi_custom): This function is unsafe because it uses a custom ABI, -// it does not actually match `extern "C"`. -// // REF: Rust commit(74e80468347) // rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805 // Comments in LLVM: @@ -203,7 +194,7 @@ pub unsafe extern "C" fn __rust_probestack() { // themselves. #[unsafe(naked)] #[rustc_std_internal_symbol] -pub unsafe extern "C" fn __rust_probestack() { +pub unsafe extern "custom" fn __rust_probestack() { core::arch::naked_asm!( " .cfi_startproc diff --git a/compiler-builtins/src/x86.rs b/compiler-builtins/src/x86.rs index 01152d9c7..16e50922a 100644 --- a/compiler-builtins/src/x86.rs +++ b/compiler-builtins/src/x86.rs @@ -2,7 +2,7 @@ use core::intrinsics; -// NOTE These functions are implemented using assembly because they using a custom +// NOTE These functions are implemented using assembly because they use a custom // calling convention which can't be implemented using a normal Rust function // NOTE These functions are never mangled as they are not tested against compiler-rt @@ -13,10 +13,10 @@ intrinsics! { any(all(windows, target_env = "gnu"), target_os = "uefi"), not(feature = "no-asm") ))] - pub unsafe extern "C" fn __chkstk() { + pub unsafe extern "custom" fn __chkstk() { core::arch::naked_asm!( - "jmp __alloca", // Jump to __alloca since fallthrough may be unreliable" - options(att_syntax) + "jmp {}", // Jump to __alloca since fallthrough may be unreliable" + sym crate::x86::_alloca::_alloca, ); } @@ -25,7 +25,7 @@ intrinsics! { any(all(windows, target_env = "gnu"), target_os = "uefi"), not(feature = "no-asm") ))] - pub unsafe extern "C" fn _alloca() { + pub unsafe extern "custom" fn _alloca() { // __chkstk and _alloca are the same function core::arch::naked_asm!( "push %ecx", diff --git a/compiler-builtins/src/x86_64.rs b/compiler-builtins/src/x86_64.rs index fc1190f79..9b7133b48 100644 --- a/compiler-builtins/src/x86_64.rs +++ b/compiler-builtins/src/x86_64.rs @@ -2,7 +2,7 @@ use core::intrinsics; -// NOTE These functions are implemented using assembly because they using a custom +// NOTE These functions are implemented using assembly because they use a custom // calling convention which can't be implemented using a normal Rust function // NOTE These functions are never mangled as they are not tested against compiler-rt @@ -17,7 +17,7 @@ intrinsics! { ), not(feature = "no-asm") ))] - pub unsafe extern "C" fn ___chkstk_ms() { + pub unsafe extern "custom" fn ___chkstk_ms() { core::arch::naked_asm!( "push %rcx", "push %rax", From 41b5e34cd9b265ea0ee627599c0b3544cb4ece26 Mon Sep 17 00:00:00 2001 From: Urgau <3616612+Urgau@users.noreply.github.com> Date: Sun, 15 Jun 2025 00:43:17 +0200 Subject: [PATCH 1403/1459] Add minimal triagebot config This PR adds a minimal `triagebot.toml` config to make contributions to this repository respect upstream rust-lang/rust conventions and avoid issues when syncing this subtree. --- triagebot.toml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 triagebot.toml diff --git a/triagebot.toml b/triagebot.toml new file mode 100644 index 000000000..ecc05da01 --- /dev/null +++ b/triagebot.toml @@ -0,0 +1,21 @@ +## See for documentation +## of these features. + +# Warns when a PR contains merge commits +# Documentation at: https://forge.rust-lang.org/triagebot/no-merge.html +[no-merges] +exclude_titles = ["Update from"] + +# Canonicalize issue numbers to avoid closing the wrong issue +# when commits are included in subtrees, as well as warning links in commits. +# Documentation at: https://forge.rust-lang.org/triagebot/issue-links.html +[issue-links] +check-commits = false + +# Prevents mentions in commits to avoid users being spammed +# Documentation at: https://forge.rust-lang.org/triagebot/no-mentions.html +[no-mentions] + +# Enable issue transfers within the org +# Documentation at: https://forge.rust-lang.org/triagebot/transfer.html +[transfer] From 267ae1fa43785448bfb0aebafc4e352c936dd4cf Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 14 Jun 2025 23:39:24 +0000 Subject: [PATCH 1404/1459] symcheck: Add a wrapper around an archive Rather than re-opening the archive file for each check, add a wrapper that keeps the data in memory. Additionally, collect the `--target` argument so it can be used within this crate. --- crates/symbol-check/src/main.rs | 104 ++++++++++++++++++++++++-------- 1 file changed, 80 insertions(+), 24 deletions(-) diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs index d83cd318d..843a943fb 100644 --- a/crates/symbol-check/src/main.rs +++ b/crates/symbol-check/src/main.rs @@ -8,7 +8,9 @@ use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; use object::read::archive::{ArchiveFile, ArchiveMember}; -use object::{Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection}; +use object::{ + File as ObjFile, Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection, +}; use serde_json::Value; const CHECK_LIBRARIES: &[&str] = &["compiler_builtins", "builtins_test_intrinsics"]; @@ -28,13 +30,11 @@ fn main() { let args_ref = args.iter().map(String::as_str).collect::>(); match &args_ref[1..] { - ["build-and-check", rest @ ..] if !rest.is_empty() => { - let paths = exec_cargo_with_args(rest); - for path in paths { - println!("Checking {}", path.display()); - verify_no_duplicates(&path); - verify_core_symbols(&path); - } + ["build-and-check", "--target", target, args @ ..] if !args.is_empty() => { + run_build_and_check(Some(target), args); + } + ["build-and-check", args @ ..] if !args.is_empty() => { + run_build_and_check(None, args); } _ => { println!("{USAGE}"); @@ -43,12 +43,42 @@ fn main() { } } +fn run_build_and_check(target: Option<&str>, args: &[&str]) { + let paths = exec_cargo_with_args(target, args); + for path in paths { + println!("Checking {}", path.display()); + let archive = Archive::from_path(&path); + + verify_no_duplicates(&archive); + verify_core_symbols(&archive); + } +} + +fn host_target() -> String { + let out = Command::new("rustc") + .arg("--version") + .arg("--verbose") + .output() + .unwrap(); + assert!(out.status.success()); + let out = String::from_utf8(out.stdout).unwrap(); + out.lines() + .find_map(|s| s.strip_prefix("host: ")) + .unwrap() + .to_owned() +} + /// Run `cargo build` with the provided additional arguments, collecting the list of created /// libraries. -fn exec_cargo_with_args(args: &[&str]) -> Vec { +fn exec_cargo_with_args(target: Option<&str>, args: &[&str]) -> Vec { + let mut host = String::new(); + let target = target.unwrap_or_else(|| { + host = host_target(); + host.as_str() + }); + let mut cmd = Command::new("cargo"); - cmd.arg("build") - .arg("--message-format=json") + cmd.args(["build", "--target", target, "--message-format=json"]) .args(args) .stdout(Stdio::piped()); @@ -133,12 +163,12 @@ impl SymInfo { /// Note that this will also locate cases where a symbol is weakly defined in more than one place. /// Technically there are no linker errors that will come from this, but it keeps our binary more /// straightforward and saves some distribution size. -fn verify_no_duplicates(path: &Path) { +fn verify_no_duplicates(archive: &Archive) { let mut syms = BTreeMap::::new(); let mut dups = Vec::new(); let mut found_any = false; - for_each_symbol(path, |symbol, member| { + archive.for_each_symbol(|symbol, member| { // Only check defined globals if !symbol.is_global() || symbol.is_undefined() { return; @@ -185,12 +215,12 @@ fn verify_no_duplicates(path: &Path) { } /// Ensure that there are no references to symbols from `core` that aren't also (somehow) defined. -fn verify_core_symbols(path: &Path) { +fn verify_core_symbols(archive: &Archive) { let mut defined = BTreeSet::new(); let mut undefined = Vec::new(); let mut has_symbols = false; - for_each_symbol(path, |symbol, member| { + archive.for_each_symbol(|symbol, member| { has_symbols = true; // Find only symbols from `core` @@ -219,14 +249,40 @@ fn verify_core_symbols(path: &Path) { println!(" success: no undefined references to core found"); } -/// For a given archive path, do something with each symbol. -fn for_each_symbol(path: &Path, mut f: impl FnMut(Symbol, &ArchiveMember)) { - let data = fs::read(path).expect("reading file failed"); - let archive = ArchiveFile::parse(data.as_slice()).expect("archive parse failed"); - for member in archive.members() { - let member = member.expect("failed to access member"); - let obj_data = member.data(&*data).expect("failed to access object"); - let obj = object::File::parse(obj_data).expect("failed to parse object"); - obj.symbols().for_each(|sym| f(sym, &member)); +/// Thin wrapper for owning data used by `object`. +struct Archive { + data: Vec, +} + +impl Archive { + fn from_path(path: &Path) -> Self { + Self { + data: fs::read(path).expect("reading file failed"), + } + } + + fn file(&self) -> ArchiveFile<'_> { + ArchiveFile::parse(self.data.as_slice()).expect("archive parse failed") + } + + /// For a given archive, do something with each object file. + fn for_each_object(&self, mut f: impl FnMut(ObjFile, &ArchiveMember)) { + let archive = self.file(); + + for member in archive.members() { + let member = member.expect("failed to access member"); + let obj_data = member + .data(self.data.as_slice()) + .expect("failed to access object"); + let obj = ObjFile::parse(obj_data).expect("failed to parse object"); + f(obj, &member); + } + } + + /// For a given archive, do something with each symbol. + fn for_each_symbol(&self, mut f: impl FnMut(Symbol, &ArchiveMember)) { + self.for_each_object(|obj, member| { + obj.symbols().for_each(|sym| f(sym, member)); + }); } } From 674910e0fa6f0fb2cc055f4f7051ff0eb53c7735 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Tue, 24 Jun 2025 20:40:08 +0200 Subject: [PATCH 1405/1459] Use `asm_cfg` in `probestack` cc https://www.github.com/rust-lang/rust/issues/140364 --- compiler-builtins/src/lib.rs | 1 + compiler-builtins/src/probestack.rs | 35 ++++++++++------------------- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs index dd9920cae..fe0ad81dd 100644 --- a/compiler-builtins/src/lib.rs +++ b/compiler-builtins/src/lib.rs @@ -7,6 +7,7 @@ #![feature(compiler_builtins)] #![feature(core_intrinsics)] #![feature(linkage)] +#![feature(asm_cfg)] #![feature(naked_functions)] #![feature(repr_simd)] #![feature(macro_metavar_expr_concat)] diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs index 1d0010842..f4105dde5 100644 --- a/compiler-builtins/src/probestack.rs +++ b/compiler-builtins/src/probestack.rs @@ -58,27 +58,6 @@ #[unsafe(naked)] #[rustc_std_internal_symbol] pub unsafe extern "custom" fn __rust_probestack() { - #[cfg(not(all(target_env = "sgx", target_vendor = "fortanix")))] - macro_rules! ret { - () => { - "ret" - }; - } - - #[cfg(all(target_env = "sgx", target_vendor = "fortanix"))] - macro_rules! ret { - // for this target, [manually patch for LVI]. - // - // [manually patch for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions - () => { - " - pop %r11 - lfence - jmp *%r11 - " - }; - } - core::arch::naked_asm!( " .cfi_startproc @@ -128,8 +107,18 @@ pub unsafe extern "custom" fn __rust_probestack() { .cfi_def_cfa_register %rsp .cfi_adjust_cfa_offset -8 ", - ret!(), - " + #[cfg(not(all(target_env = "sgx", target_vendor = "fortanix")))] + " ret", + #[cfg(all(target_env = "sgx", target_vendor = "fortanix"))] + " + // for this target, [manually patch for LVI]. + // + // [manually patch for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions + pop %r11 + lfence + jmp *%r11 + ", + " .cfi_endproc ", options(att_syntax) From 0bbec7238890242e7754d6ff604c989666328d83 Mon Sep 17 00:00:00 2001 From: quaternic <57393910+quaternic@users.noreply.github.com> Date: Sun, 29 Jun 2025 06:53:07 +0300 Subject: [PATCH 1406/1459] apply suggestions for clippy::manual_is_multiple_of in libm-test --- libm-test/tests/z_extensive/run.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libm-test/tests/z_extensive/run.rs b/libm-test/tests/z_extensive/run.rs index f2ba6a4a0..e04e00c6d 100644 --- a/libm-test/tests/z_extensive/run.rs +++ b/libm-test/tests/z_extensive/run.rs @@ -197,15 +197,15 @@ impl Progress { fn update(&self, completed: u64, input: impl fmt::Debug) { // Infrequently update the progress bar. - if completed % 20_000 == 0 { + if completed.is_multiple_of(20_000) { self.pb.set_position(completed); } - if completed % 500_000 == 0 { + if completed.is_multiple_of(500_000) { self.pb.set_message(format!("input: {input:<24?}")); } - if !self.is_tty && completed % 5_000_000 == 0 { + if !self.is_tty && completed.is_multiple_of(5_000_000) { let len = self.pb.length().unwrap_or_default(); eprintln!( "[{elapsed:3?}s {percent:3.0}%] {name} \ From cc53499ebbe3d65ba247bc8a2da7e5984c039906 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 14 Jun 2025 08:44:37 +0000 Subject: [PATCH 1407/1459] josh-sync: Replace `#xxxx`-style links in messages Often our short summaries will pick up a Bors "Auto merge of #xxxx ...` commit message. Replace these with something like `rust-lang/rust#1234` to avoid broken links when going between repositories. --- crates/josh-sync/Cargo.toml | 1 + crates/josh-sync/src/sync.rs | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/crates/josh-sync/Cargo.toml b/crates/josh-sync/Cargo.toml index 1f3bb376d..8e2e891db 100644 --- a/crates/josh-sync/Cargo.toml +++ b/crates/josh-sync/Cargo.toml @@ -5,3 +5,4 @@ publish = false [dependencies] directories = "6.0.0" +regex-lite = "0.1.6" diff --git a/crates/josh-sync/src/sync.rs b/crates/josh-sync/src/sync.rs index 003cf187d..2d89d2d1c 100644 --- a/crates/josh-sync/src/sync.rs +++ b/crates/josh-sync/src/sync.rs @@ -1,8 +1,11 @@ +use std::borrow::Cow; use std::net::{SocketAddr, TcpStream}; use std::process::{Command, Stdio, exit}; use std::time::Duration; use std::{env, fs, process, thread}; +use regex_lite::Regex; + const JOSH_PORT: u16 = 42042; const DEFAULT_PR_BRANCH: &str = "update-builtins"; @@ -77,6 +80,7 @@ impl GitSync { "--depth=1", ]); let new_summary = check_output(["git", "log", "-1", "--format=%h %s", &new_upstream_base]); + let new_summary = replace_references(&new_summary, &self.upstream_repo); // Update rust-version file. As a separate commit, since making it part of // the merge has confused the heck out of josh in the past. @@ -297,6 +301,13 @@ fn check_output_cfg(prog: &str, f: impl FnOnce(&mut Command) -> &mut Command) -> String::from_utf8(out.stdout.trim_ascii().to_vec()).expect("non-UTF8 output") } +/// Replace `#1234`-style issue/PR references with `repo#1234` to ensure links work across +/// repositories. +fn replace_references<'a>(s: &'a str, repo: &str) -> Cow<'a, str> { + let re = Regex::new(r"\B(?P#\d+)\b").unwrap(); + re.replace(s, &format!("{repo}$id")) +} + /// Create a wrapper that stops Josh on drop. pub struct Josh(process::Child); @@ -369,3 +380,22 @@ impl Drop for Josh { self.0.kill().expect("failed to SIGKILL josh-proxy"); } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_replace() { + assert_eq!(replace_references("#1234", "r-l/rust"), "r-l/rust#1234"); + assert_eq!(replace_references("#1234x", "r-l/rust"), "#1234x"); + assert_eq!( + replace_references("merge #1234", "r-l/rust"), + "merge r-l/rust#1234" + ); + assert_eq!( + replace_references("foo/bar#1234", "r-l/rust"), + "foo/bar#1234" + ); + } +} From 95abb0e02db3128256297203507c8e1da9c96696 Mon Sep 17 00:00:00 2001 From: quaternic <57393910+quaternic@users.noreply.github.com> Date: Tue, 1 Jul 2025 11:07:48 +0300 Subject: [PATCH 1408/1459] libm: Improved integer utilities, implement shifts and bug fixes for i256 and u256 `i256` and `u256` - operators now use the same overflow convention as primitives - implement `<<` and `-` (previously just `>>` and `+`) - implement `Ord` correctly (the previous `PartialOrd` was broken) - correct `i256::SIGNED` to `true` The `Int`-trait is extended with `trailing_zeros`, `carrying_add`, and `borrowing_sub`. --- libm-test/benches/icount.rs | 18 +++- libm-test/tests/u256.rs | 46 +++++++++- libm/src/math/support/big.rs | 133 +++++++++++++++++----------- libm/src/math/support/big/tests.rs | 63 ++++++++++++- libm/src/math/support/int_traits.rs | 23 ++++- 5 files changed, 223 insertions(+), 60 deletions(-) diff --git a/libm-test/benches/icount.rs b/libm-test/benches/icount.rs index a0928a29f..02ee13f80 100644 --- a/libm-test/benches/icount.rs +++ b/libm-test/benches/icount.rs @@ -119,6 +119,22 @@ fn icount_bench_u256_add(cases: Vec<(u256, u256)>) { } } +#[library_benchmark] +#[bench::linspace(setup_u256_add())] +fn icount_bench_u256_sub(cases: Vec<(u256, u256)>) { + for (x, y) in cases.iter().copied() { + black_box(black_box(x) - black_box(y)); + } +} + +#[library_benchmark] +#[bench::linspace(setup_u256_shift())] +fn icount_bench_u256_shl(cases: Vec<(u256, u32)>) { + for (x, y) in cases.iter().copied() { + black_box(black_box(x) << black_box(y)); + } +} + #[library_benchmark] #[bench::linspace(setup_u256_shift())] fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) { @@ -129,7 +145,7 @@ fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) { library_benchmark_group!( name = icount_bench_u128_group; - benchmarks = icount_bench_u128_widen_mul, icount_bench_u256_add, icount_bench_u256_shr + benchmarks = icount_bench_u128_widen_mul, icount_bench_u256_add, icount_bench_u256_sub, icount_bench_u256_shl, icount_bench_u256_shr ); #[library_benchmark] diff --git a/libm-test/tests/u256.rs b/libm-test/tests/u256.rs index 8cbb3ad22..d1c5cfbcc 100644 --- a/libm-test/tests/u256.rs +++ b/libm-test/tests/u256.rs @@ -111,12 +111,54 @@ fn mp_u256_add() { let y = random_u256(&mut rng); assign_bigint(&mut bx, x); assign_bigint(&mut by, y); - let actual = x + y; + let actual = if u256::MAX - x >= y { + x + y + } else { + // otherwise (u256::MAX - x) < y, so the wrapped result is + // (x + y) - (u256::MAX + 1) == y - (u256::MAX - x) - 1 + y - (u256::MAX - x) - 1_u128.widen() + }; bx += &by; check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx); } } +#[test] +fn mp_u256_sub() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + let mut by = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x = random_u256(&mut rng); + let y = random_u256(&mut rng); + assign_bigint(&mut bx, x); + assign_bigint(&mut by, y); + + // since the operators (may) panic on overflow, + // we should test something that doesn't + let actual = if x >= y { x - y } else { y - x }; + bx -= &by; + bx.abs_mut(); + check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx); + } +} + +#[test] +fn mp_u256_shl() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x = random_u256(&mut rng); + let shift: u32 = rng.random_range(0..256); + assign_bigint(&mut bx, x); + let actual = x << shift; + bx <<= shift; + check_one(|| hexu(x), || Some(shift.to_string()), actual, &mut bx); + } +} + #[test] fn mp_u256_shr() { let mut rng = ChaCha8Rng::from_seed(*SEED); @@ -124,7 +166,7 @@ fn mp_u256_shr() { for _ in 0..bigint_fuzz_iteration_count() { let x = random_u256(&mut rng); - let shift: u32 = rng.random_range(0..255); + let shift: u32 = rng.random_range(0..256); assign_bigint(&mut bx, x); let actual = x >> shift; bx >>= shift; diff --git a/libm/src/math/support/big.rs b/libm/src/math/support/big.rs index 8a52d86cc..b7f128542 100644 --- a/libm/src/math/support/big.rs +++ b/libm/src/math/support/big.rs @@ -11,10 +11,10 @@ const U128_LO_MASK: u128 = u64::MAX as u128; /// A 256-bit unsigned integer represented as two 128-bit native-endian limbs. #[allow(non_camel_case_types)] -#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)] pub struct u256 { - pub lo: u128, pub hi: u128, + pub lo: u128, } impl u256 { @@ -28,17 +28,17 @@ impl u256 { pub fn signed(self) -> i256 { i256 { lo: self.lo, - hi: self.hi, + hi: self.hi as i128, } } } /// A 256-bit signed integer represented as two 128-bit native-endian limbs. #[allow(non_camel_case_types)] -#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)] pub struct i256 { + pub hi: i128, pub lo: u128, - pub hi: u128, } impl i256 { @@ -47,7 +47,7 @@ impl i256 { pub fn unsigned(self) -> u256 { u256 { lo: self.lo, - hi: self.hi, + hi: self.hi as u128, } } } @@ -73,17 +73,17 @@ impl MinInt for i256 { type Unsigned = u256; - const SIGNED: bool = false; + const SIGNED: bool = true; const BITS: u32 = 256; const ZERO: Self = Self { lo: 0, hi: 0 }; const ONE: Self = Self { lo: 1, hi: 0 }; const MIN: Self = Self { - lo: 0, - hi: 1 << 127, + lo: u128::MIN, + hi: i128::MIN, }; const MAX: Self = Self { lo: u128::MAX, - hi: u128::MAX >> 1, + hi: i128::MAX, }; } @@ -109,60 +109,86 @@ macro_rules! impl_common { } } - impl ops::Shl for $ty { + impl ops::Add for $ty { type Output = Self; - fn shl(self, _rhs: u32) -> Self::Output { - unimplemented!("only used to meet trait bounds") + fn add(self, rhs: Self) -> Self::Output { + let (lo, carry) = self.lo.overflowing_add(rhs.lo); + let (hi, of) = Int::carrying_add(self.hi, rhs.hi, carry); + debug_assert!(!of, "attempt to add with overflow"); + Self { lo, hi } } } - }; -} -impl_common!(i256); -impl_common!(u256); + impl ops::Sub for $ty { + type Output = Self; -impl ops::Add for u256 { - type Output = Self; + fn sub(self, rhs: Self) -> Self::Output { + let (lo, borrow) = self.lo.overflowing_sub(rhs.lo); + let (hi, of) = Int::borrowing_sub(self.hi, rhs.hi, borrow); + debug_assert!(!of, "attempt to subtract with overflow"); + Self { lo, hi } + } + } - fn add(self, rhs: Self) -> Self::Output { - let (lo, carry) = self.lo.overflowing_add(rhs.lo); - let hi = self.hi.wrapping_add(carry as u128).wrapping_add(rhs.hi); + impl ops::Shl for $ty { + type Output = Self; - Self { lo, hi } - } -} + fn shl(mut self, rhs: u32) -> Self::Output { + debug_assert!(rhs < Self::BITS, "attempt to shift left with overflow"); -impl ops::Shr for u256 { - type Output = Self; + let half_bits = Self::BITS / 2; + let low_mask = half_bits - 1; + let s = rhs & low_mask; - fn shr(mut self, rhs: u32) -> Self::Output { - debug_assert!(rhs < Self::BITS, "attempted to shift right with overflow"); - if rhs >= Self::BITS { - return Self::ZERO; - } + let lo = self.lo; + let hi = self.hi; - if rhs == 0 { - return self; - } + self.lo = lo << s; - if rhs < 128 { - self.lo >>= rhs; - self.lo |= self.hi << (128 - rhs); - } else { - self.lo = self.hi >> (rhs - 128); + if rhs & half_bits == 0 { + self.hi = (lo >> (low_mask ^ s) >> 1) as _; + self.hi |= hi << s; + } else { + self.hi = self.lo as _; + self.lo = 0; + } + self + } } - if rhs < 128 { - self.hi >>= rhs; - } else { - self.hi = 0; - } + impl ops::Shr for $ty { + type Output = Self; - self - } + fn shr(mut self, rhs: u32) -> Self::Output { + debug_assert!(rhs < Self::BITS, "attempt to shift right with overflow"); + + let half_bits = Self::BITS / 2; + let low_mask = half_bits - 1; + let s = rhs & low_mask; + + let lo = self.lo; + let hi = self.hi; + + self.hi = hi >> s; + + #[allow(unused_comparisons)] + if rhs & half_bits == 0 { + self.lo = (hi << (low_mask ^ s) << 1) as _; + self.lo |= lo >> s; + } else { + self.lo = self.hi as _; + self.hi = if hi < 0 { !0 } else { 0 }; + } + self + } + } + }; } +impl_common!(i256); +impl_common!(u256); + impl HInt for u128 { type D = u256; @@ -200,7 +226,7 @@ impl HInt for u128 { } fn widen_hi(self) -> Self::D { - self.widen() << ::BITS + u256 { lo: 0, hi: self } } } @@ -208,11 +234,10 @@ impl HInt for i128 { type D = i256; fn widen(self) -> Self::D { - let mut ret = self.unsigned().zero_widen().signed(); - if self.is_negative() { - ret.hi = u128::MAX; + i256 { + lo: self as u128, + hi: if self < 0 { -1 } else { 0 }, } - ret } fn zero_widen(self) -> Self::D { @@ -228,7 +253,7 @@ impl HInt for i128 { } fn widen_hi(self) -> Self::D { - self.widen() << ::BITS + i256 { lo: 0, hi: self } } } @@ -252,6 +277,6 @@ impl DInt for i256 { } fn hi(self) -> Self::H { - self.hi as i128 + self.hi } } diff --git a/libm/src/math/support/big/tests.rs b/libm/src/math/support/big/tests.rs index d2010f021..d54706c72 100644 --- a/libm/src/math/support/big/tests.rs +++ b/libm/src/math/support/big/tests.rs @@ -36,7 +36,7 @@ fn widen_i128() { (LOHI_SPLIT as i128).widen(), i256 { lo: LOHI_SPLIT, - hi: u128::MAX + hi: -1, } ); assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen()); @@ -275,3 +275,64 @@ fn shr_u256_overflow() { assert_eq!(u256::MAX >> 257, u256::ZERO); assert_eq!(u256::MAX >> u32::MAX, u256::ZERO); } + +#[test] +fn u256_ord() { + let _1 = u256::ONE; + let _2 = _1 + _1; + for x in u8::MIN..u8::MAX { + let y = x + 1; + let wx = (x as u128).widen_hi(); + let wy = (y as u128).widen_hi(); + assert!([wx, wx + _1, wx + _2, wy, wy + _1, wy + _2].is_sorted()); + } +} +#[test] +fn i256_ord() { + let _1 = i256::ONE; + let _2 = _1 + _1; + for x in i8::MIN..i8::MAX { + let y = x + 1; + let wx = (x as i128).widen_hi(); + let wy = (y as i128).widen_hi(); + assert!([wx, wx + _1, wx + _2, wy - _2, wy - _1, wy].is_sorted()); + } +} + +#[test] +fn u256_shifts() { + let _1 = u256::ONE; + for k in 0..255 { + let x = _1 << k; + let x2 = _1 << (k + 1); + assert!(x < x2); + assert_eq!(x << 1, x2); + assert_eq!(x + x, x2); + assert_eq!(x >> k, _1); + assert_eq!(x2 >> (k + 1), _1); + } +} +#[test] +fn i256_shifts() { + let _1 = i256::ONE; + for k in 0..254 { + let x = _1 << k; + let x2 = _1 << (k + 1); + assert!(x < x2); + assert_eq!(x << 1, x2); + assert_eq!(x + x, x2); + assert_eq!(x >> k, _1); + assert_eq!(x2 >> (k + 1), _1); + } + + let min = _1 << 255; + assert_eq!(min, i256::MIN); + let mut x = min; + for k in 0..255 { + assert_eq!(x, min >> k); + let y = x >> 1; + assert_eq!(y + y, x); + assert!(x < y); + x = y; + } +} diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index 9b29e2f45..9d8826dfe 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -37,8 +37,6 @@ pub trait Int: + fmt::Display + fmt::Binary + fmt::LowerHex - + PartialEq - + PartialOrd + ops::AddAssign + ops::SubAssign + ops::MulAssign @@ -102,7 +100,10 @@ pub trait Int: fn rotate_left(self, other: u32) -> Self; fn overflowing_add(self, other: Self) -> (Self, bool); fn overflowing_sub(self, other: Self) -> (Self, bool); + fn carrying_add(self, other: Self, carry: bool) -> (Self, bool); + fn borrowing_sub(self, other: Self, borrow: bool) -> (Self, bool); fn leading_zeros(self) -> u32; + fn trailing_zeros(self) -> u32; fn ilog2(self) -> u32; } @@ -168,12 +169,30 @@ macro_rules! int_impl_common { ::leading_zeros(self) } + fn trailing_zeros(self) -> u32 { + ::trailing_zeros(self) + } + fn ilog2(self) -> u32 { // On our older MSRV, this resolves to the trait method. Which won't actually work, // but this is only called behind other gates. #[allow(clippy::incompatible_msrv)] ::ilog2(self) } + + fn carrying_add(self, other: Self, carry: bool) -> (Self, bool) { + let (ab, of1) = self.overflowing_add(other); + let (abc, of2) = ab.overflowing_add(Self::from_bool(carry)); + // `of1 && of2` is possible with signed integers if a negative sum + // overflows to `MAX` and adding the carry overflows again back to `MIN` + (abc, of1 ^ of2) + } + + fn borrowing_sub(self, other: Self, borrow: bool) -> (Self, bool) { + let (ab, of1) = self.overflowing_sub(other); + let (abc, of2) = ab.overflowing_sub(Self::from_bool(borrow)); + (abc, of1 ^ of2) + } }; } From ed17b95715ddce362fdea2c787e6efb28824f29c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 30 Jun 2025 16:53:21 -0500 Subject: [PATCH 1409/1459] Use the compiler to determine whether or not to enable `f16` and `f128` Currently we whether or not to build and test `f16` and `f128` support mostly based on the target triple. This isn't always accurate, however, since support also varies by backend and the backend version. Since recently, `rustc` is aware of this with the unstable config option `target_has_reliable_{f16,f128}`, which better represents when the types are actually expected to be available and usable. Switch our compiler-builtins and libm configuration to use this by probing `rustc` for the target's settings. A few small `cfg` fixes are needed with this. --- builtins-test-intrinsics/build.rs | 1 - builtins-test/benches/float_cmp.rs | 2 + builtins-test/build.rs | 1 - builtins-test/tests/conv.rs | 4 +- builtins-test/tests/div_rem.rs | 4 +- compiler-builtins/build.rs | 3 +- compiler-builtins/configure.rs | 81 +++++++++++------------------- libm/configure.rs | 81 +++++++++++------------------- 8 files changed, 66 insertions(+), 111 deletions(-) diff --git a/builtins-test-intrinsics/build.rs b/builtins-test-intrinsics/build.rs index 89b126ff2..b82581262 100644 --- a/builtins-test-intrinsics/build.rs +++ b/builtins-test-intrinsics/build.rs @@ -6,6 +6,5 @@ fn main() { println!("cargo::rerun-if-changed=../configure.rs"); let target = builtins_configure::Target::from_env(); - builtins_configure::configure_f16_f128(&target); builtins_configure::configure_aliases(&target); } diff --git a/builtins-test/benches/float_cmp.rs b/builtins-test/benches/float_cmp.rs index 87a89efb5..da29b5d31 100644 --- a/builtins-test/benches/float_cmp.rs +++ b/builtins-test/benches/float_cmp.rs @@ -177,6 +177,7 @@ float_bench! { ], } +#[cfg(f128_enabled)] float_bench! { name: cmp_f128_gt, sig: (a: f128, b: f128) -> CmpResult, @@ -189,6 +190,7 @@ float_bench! { asm: [] } +#[cfg(f128_enabled)] float_bench! { name: cmp_f128_unord, sig: (a: f128, b: f128) -> CmpResult, diff --git a/builtins-test/build.rs b/builtins-test/build.rs index e8f4eb4dd..5b2dcd12e 100644 --- a/builtins-test/build.rs +++ b/builtins-test/build.rs @@ -116,5 +116,4 @@ fn main() { } builtins_configure::configure_aliases(&target); - builtins_configure::configure_f16_f128(&target); } diff --git a/builtins-test/tests/conv.rs b/builtins-test/tests/conv.rs index 491915d9b..7d729364f 100644 --- a/builtins-test/tests/conv.rs +++ b/builtins-test/tests/conv.rs @@ -118,7 +118,7 @@ mod i_to_f { i128, __floattidf; } - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"), u32, __floatunsitf; @@ -129,7 +129,7 @@ mod i_to_f { i128, __floattitf; } - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"), u32, __floatunsikf; diff --git a/builtins-test/tests/div_rem.rs b/builtins-test/tests/div_rem.rs index 5ae653cc9..e8327f9b4 100644 --- a/builtins-test/tests/div_rem.rs +++ b/builtins-test/tests/div_rem.rs @@ -147,7 +147,7 @@ mod float_div { f64, __divdf3, Double, all(); } - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] float! { f128, __divtf3, Quad, @@ -156,7 +156,7 @@ mod float_div { not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux"))); } - #[cfg(not(feature = "no-f16-f128"))] + #[cfg(f128_enabled)] #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] float! { f128, __divkf3, Quad, not(feature = "no-sys-f128"); diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs index 018899faf..8f51c12b5 100644 --- a/compiler-builtins/build.rs +++ b/compiler-builtins/build.rs @@ -2,7 +2,7 @@ mod configure; use std::env; -use configure::{Target, configure_aliases, configure_f16_f128}; +use configure::{Target, configure_aliases}; fn main() { println!("cargo::rerun-if-changed=build.rs"); @@ -12,7 +12,6 @@ fn main() { let cwd = env::current_dir().unwrap(); configure_check_cfg(); - configure_f16_f128(&target); configure_aliases(&target); configure_libm(&target); diff --git a/compiler-builtins/configure.rs b/compiler-builtins/configure.rs index d825f35a9..a1e45080e 100644 --- a/compiler-builtins/configure.rs +++ b/compiler-builtins/configure.rs @@ -1,6 +1,7 @@ // Configuration that is shared between `compiler_builtins` and `builtins_test`. -use std::env; +use std::process::{Command, Stdio}; +use std::{env, str}; #[derive(Debug)] #[allow(dead_code)] @@ -16,6 +17,8 @@ pub struct Target { pub pointer_width: u8, pub little_endian: bool, pub features: Vec, + pub reliable_f128: bool, + pub reliable_f16: bool, } impl Target { @@ -32,6 +35,19 @@ impl Target { .map(|s| s.to_lowercase().replace("_", "-")) .collect(); + // Query rustc for options that Cargo does not provide env for. The bootstrap hack is used + // to get consistent output regardless of channel (`f16`/`f128` config options are hidden + // on stable otherwise). + let mut cmd = Command::new(env::var("RUSTC").unwrap()); + cmd.args(["--print=cfg", "--target", &triple]) + .env("RUSTC_BOOTSTRAP", "1") + .stderr(Stdio::inherit()); + let out = cmd + .output() + .unwrap_or_else(|e| panic!("failed to run `{cmd:?}`: {e}")); + assert!(out.status.success(), "failed to run `{cmd:?}`"); + let rustc_cfg = str::from_utf8(&out.stdout).unwrap(); + Self { triple, triple_split, @@ -51,6 +67,8 @@ impl Target { .split(",") .map(ToOwned::to_owned) .collect(), + reliable_f128: rustc_cfg.lines().any(|l| l == "target_has_reliable_f128"), + reliable_f16: rustc_cfg.lines().any(|l| l == "target_has_reliable_f16"), } } @@ -74,63 +92,24 @@ pub fn configure_aliases(target: &Target) { if target.triple_split[0] == "thumbv6m" || target.triple_split[0] == "thumbv8m.base" { println!("cargo:rustc-cfg=thumb_1") } -} - -/// Configure whether or not `f16` and `f128` support should be enabled. -pub fn configure_f16_f128(target: &Target) { - // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means - // that the backend will not crash when using these types and generates code that can be called - // without crashing (no infinite recursion). This does not mean that the platform doesn't have - // ABI or other bugs. - // - // We do this here rather than in `rust-lang/rust` because configuring via cargo features is - // not straightforward. - // - // Original source of this list: - // - let f16_enabled = match target.arch.as_str() { - // Unsupported - "arm64ec" => false, - // Selection failure - "s390x" => false, - // Infinite recursion - "csky" => false, - "hexagon" => false, - "powerpc" | "powerpc64" => false, - "sparc" | "sparc64" => false, - "wasm32" | "wasm64" => false, - // Most everything else works as of LLVM 19 - _ => true, - }; - let f128_enabled = match target.arch.as_str() { - // Unsupported (libcall is not supported) - "amdgpu" => false, - // Unsupported - "arm64ec" => false, - // FIXME(llvm20): fixed by - "mips64" | "mips64r6" => false, - // Selection failure - "nvptx64" => false, - // Selection failure - "powerpc64" if &target.os == "aix" => false, - // Selection failure - "sparc" => false, - // Most everything else works as of LLVM 19 - _ => true, - }; + /* Not all backends support `f16` and `f128` to the same level on all architectures, so we + * need to disable things if the compiler may crash. See configuration at: + * * https://github.com/rust-lang/rust/blob/c65dccabacdfd6c8a7f7439eba13422fdd89b91e/compiler/rustc_codegen_llvm/src/llvm_util.rs#L367-L432 + * * https://github.com/rust-lang/rustc_codegen_gcc/blob/4b5c44b14166083eef8d71f15f5ea1f53fc976a0/src/lib.rs#L496-L507 + * * https://github.com/rust-lang/rustc_codegen_cranelift/blob/c713ffab3c6e28ab4b4dd4e392330f786ea657ad/src/lib.rs#L196-L226 + */ - // If the feature is set, disable these types. - let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some(); + // If the feature is set, disable both of these types. + let no_f16_f128 = target.cargo_features.iter().any(|s| s == "no-f16-f128"); println!("cargo::rustc-check-cfg=cfg(f16_enabled)"); - println!("cargo::rustc-check-cfg=cfg(f128_enabled)"); - - if f16_enabled && !disable_both { + if target.reliable_f16 && !no_f16_f128 { println!("cargo::rustc-cfg=f16_enabled"); } - if f128_enabled && !disable_both { + println!("cargo::rustc-check-cfg=cfg(f128_enabled)"); + if target.reliable_f128 && !no_f16_f128 { println!("cargo::rustc-cfg=f128_enabled"); } } diff --git a/libm/configure.rs b/libm/configure.rs index 2a497c7b1..6562ecbe5 100644 --- a/libm/configure.rs +++ b/libm/configure.rs @@ -1,7 +1,8 @@ // Configuration shared with both libm and libm-test -use std::env; use std::path::PathBuf; +use std::process::{Command, Stdio}; +use std::{env, str}; #[allow(dead_code)] pub struct Config { @@ -9,6 +10,7 @@ pub struct Config { pub out_dir: PathBuf, pub opt_level: String, pub cargo_features: Vec, + pub target_triple: String, pub target_arch: String, pub target_env: String, pub target_family: Option, @@ -16,10 +18,13 @@ pub struct Config { pub target_string: String, pub target_vendor: String, pub target_features: Vec, + pub reliable_f128: bool, + pub reliable_f16: bool, } impl Config { pub fn from_env() -> Self { + let target_triple = env::var("TARGET").unwrap(); let target_features = env::var("CARGO_CFG_TARGET_FEATURE") .map(|feats| feats.split(',').map(ToOwned::to_owned).collect()) .unwrap_or_default(); @@ -28,7 +33,21 @@ impl Config { .map(|s| s.to_lowercase().replace("_", "-")) .collect(); + // Query rustc for options that Cargo does not provide env for. The bootstrap hack is used + // to get consistent output regardless of channel (`f16`/`f128` config options are hidden + // on stable otherwise). + let mut cmd = Command::new(env::var("RUSTC").unwrap()); + cmd.args(["--print=cfg", "--target", &target_triple]) + .env("RUSTC_BOOTSTRAP", "1") + .stderr(Stdio::inherit()); + let out = cmd + .output() + .unwrap_or_else(|e| panic!("failed to run `{cmd:?}`: {e}")); + assert!(out.status.success(), "failed to run `{cmd:?}`"); + let rustc_cfg = str::from_utf8(&out.stdout).unwrap(); + Self { + target_triple, manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()), out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()), opt_level: env::var("OPT_LEVEL").unwrap(), @@ -40,6 +59,8 @@ impl Config { target_string: env::var("TARGET").unwrap(), target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), target_features, + reliable_f128: rustc_cfg.lines().any(|l| l == "target_has_reliable_f128"), + reliable_f16: rustc_cfg.lines().any(|l| l == "target_has_reliable_f16"), } } } @@ -128,62 +149,18 @@ fn emit_f16_f128_cfg(cfg: &Config) { return; } - // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means - // that the backend will not crash when using these types and generates code that can be called - // without crashing (no infinite recursion). This does not mean that the platform doesn't have - // ABI or other bugs. - // - // We do this here rather than in `rust-lang/rust` because configuring via cargo features is - // not straightforward. - // - // Original source of this list: - // - let f16_enabled = match cfg.target_arch.as_str() { - // Unsupported - "arm64ec" => false, - // Selection failure - "s390x" => false, - // Infinite recursion - // FIXME(llvm): loongarch fixed by - "csky" => false, - "hexagon" => false, - "loongarch64" => false, - "mips" | "mips64" | "mips32r6" | "mips64r6" => false, - "powerpc" | "powerpc64" => false, - "sparc" | "sparc64" => false, - "wasm32" | "wasm64" => false, - // Most everything else works as of LLVM 19 - _ => true, - }; - - let f128_enabled = match cfg.target_arch.as_str() { - // Unsupported (libcall is not supported) - "amdgpu" => false, - // Unsupported - "arm64ec" => false, - // Selection failure - "mips64" | "mips64r6" => false, - // Selection failure - "nvptx64" => false, - // Selection failure - "powerpc64" if &cfg.target_os == "aix" => false, - // Selection failure - "sparc" => false, - // Most everything else works as of LLVM 19 - _ => true, - }; - - // If the feature is set, disable these types. - let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some(); + /* See the compiler-builtins configure file for info about the meaning of these options */ - println!("cargo:rustc-check-cfg=cfg(f16_enabled)"); - println!("cargo:rustc-check-cfg=cfg(f128_enabled)"); + // If the feature is set, disable both of these types. + let no_f16_f128 = cfg.cargo_features.iter().any(|s| s == "no-f16-f128"); - if f16_enabled && !disable_both { + println!("cargo:rustc-check-cfg=cfg(f16_enabled)"); + if cfg.reliable_f16 && !no_f16_f128 { println!("cargo:rustc-cfg=f16_enabled"); } - if f128_enabled && !disable_both { + println!("cargo:rustc-check-cfg=cfg(f128_enabled)"); + if cfg.reliable_f128 && !no_f16_f128 { println!("cargo:rustc-cfg=f128_enabled"); } } From 245c676b8e87b50651ebd79847c6e42d3c091824 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 4 Jul 2025 16:53:19 -0500 Subject: [PATCH 1410/1459] Remove the `let_chains` feature now that it is stable --- crates/libm-macros/src/lib.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs index 482da974c..7efa1488f 100644 --- a/crates/libm-macros/src/lib.rs +++ b/crates/libm-macros/src/lib.rs @@ -1,5 +1,3 @@ -#![feature(let_chains)] - mod enums; mod parse; mod shared; From 56aed1d51810830908c635cea2377fb5159e1ab5 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 4 Jul 2025 17:03:45 -0500 Subject: [PATCH 1411/1459] symcheck: Make `target` a positional argument This makes it more obvious what we intend to check rather than looking for `--target`. --- ci/run.sh | 27 +++++++++++------------- crates/symbol-check/src/main.rs | 37 ++++++++++++++++++++------------- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/ci/run.sh b/ci/run.sh index 27b9686ea..8b7965bb2 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -54,29 +54,26 @@ symcheck=(cargo run -p symbol-check --release) [[ "$target" = "wasm"* ]] && symcheck+=(--features wasm) symcheck+=(-- build-and-check) -"${symcheck[@]}" -p compiler_builtins --target "$target" -"${symcheck[@]}" -p compiler_builtins --target "$target" --release -"${symcheck[@]}" -p compiler_builtins --target "$target" --features c -"${symcheck[@]}" -p compiler_builtins --target "$target" --features c --release -"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm -"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm --release -"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128 -"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128 --release +"${symcheck[@]}" "$target" -- -p compiler_builtins +"${symcheck[@]}" "$target" -- -p compiler_builtins --release +"${symcheck[@]}" "$target" -- -p compiler_builtins --features c +"${symcheck[@]}" "$target" -- -p compiler_builtins --features c --release +"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-asm +"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-asm --release +"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-f16-f128 +"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-f16-f128 --release run_intrinsics_test() { - args=( - --target "$target" --verbose \ - --manifest-path builtins-test-intrinsics/Cargo.toml - ) - args+=( "$@" ) + build_args=(--verbose --manifest-path builtins-test-intrinsics/Cargo.toml) + build_args+=("$@") # symcheck also checks the results of builtins-test-intrinsics - "${symcheck[@]}" "${args[@]}" + "${symcheck[@]}" "$target" -- "${build_args[@]}" # FIXME: we get access violations on Windows, our entrypoint may need to # be tweaked. if [ "${BUILD_ONLY:-}" != "1" ] && ! [[ "$target" = *"windows"* ]]; then - cargo run "${args[@]}" + cargo run --target "$target" "${build_args[@]}" fi } diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs index 843a943fb..f60d4f0d3 100644 --- a/crates/symbol-check/src/main.rs +++ b/crates/symbol-check/src/main.rs @@ -18,10 +18,12 @@ const CHECK_EXTENSIONS: &[Option<&str>] = &[Some("rlib"), Some("a"), Some("exe") const USAGE: &str = "Usage: - symbol-check build-and-check CARGO_ARGS ... + symbol-check build-and-check [TARGET] -- CARGO_BUILD_ARGS ... -Cargo will get invoked with `CARGO_ARGS` and all output +Cargo will get invoked with `CARGO_ARGS` and the specified target. All output `compiler_builtins*.rlib` files will be checked. + +If TARGET is not specified, the host target is used. "; fn main() { @@ -30,11 +32,13 @@ fn main() { let args_ref = args.iter().map(String::as_str).collect::>(); match &args_ref[1..] { - ["build-and-check", "--target", target, args @ ..] if !args.is_empty() => { - run_build_and_check(Some(target), args); + ["build-and-check", target, "--", args @ ..] if !args.is_empty() => { + check_cargo_args(args); + run_build_and_check(target, args); } - ["build-and-check", args @ ..] if !args.is_empty() => { - run_build_and_check(None, args); + ["build-and-check", "--", args @ ..] if !args.is_empty() => { + check_cargo_args(args); + run_build_and_check(&host_target(), args); } _ => { println!("{USAGE}"); @@ -43,7 +47,18 @@ fn main() { } } -fn run_build_and_check(target: Option<&str>, args: &[&str]) { +/// Make sure `--target` isn't passed to avoid confusion (since it should be proivded only once, +/// positionally). +fn check_cargo_args(args: &[&str]) { + for arg in args { + assert!( + !arg.contains("--target"), + "target must be passed positionally. {USAGE}" + ); + } +} + +fn run_build_and_check(target: &str, args: &[&str]) { let paths = exec_cargo_with_args(target, args); for path in paths { println!("Checking {}", path.display()); @@ -70,13 +85,7 @@ fn host_target() -> String { /// Run `cargo build` with the provided additional arguments, collecting the list of created /// libraries. -fn exec_cargo_with_args(target: Option<&str>, args: &[&str]) -> Vec { - let mut host = String::new(); - let target = target.unwrap_or_else(|| { - host = host_target(); - host.as_str() - }); - +fn exec_cargo_with_args(target: &str, args: &[&str]) -> Vec { let mut cmd = Command::new("cargo"); cmd.args(["build", "--target", target, "--message-format=json"]) .args(args) From 470e968464065adc63a26e1bf64132fb03375925 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 4 Jul 2025 18:10:27 -0500 Subject: [PATCH 1412/1459] symcheck: Improve diagnostics from spawned Cargo Rather than printing the entire JSON dump, use the rendered version. --- crates/symbol-check/src/main.rs | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs index f60d4f0d3..1312a7179 100644 --- a/crates/symbol-check/src/main.rs +++ b/crates/symbol-check/src/main.rs @@ -87,9 +87,14 @@ fn host_target() -> String { /// libraries. fn exec_cargo_with_args(target: &str, args: &[&str]) -> Vec { let mut cmd = Command::new("cargo"); - cmd.args(["build", "--target", target, "--message-format=json"]) - .args(args) - .stdout(Stdio::piped()); + cmd.args([ + "build", + "--target", + target, + "--message-format=json-diagnostic-rendered-ansi", + ]) + .args(args) + .stdout(Stdio::piped()); println!("running: {cmd:?}"); let mut child = cmd.spawn().expect("failed to launch Cargo"); @@ -100,11 +105,21 @@ fn exec_cargo_with_args(target: &str, args: &[&str]) -> Vec { for line in reader.lines() { let line = line.expect("failed to read line"); - println!("{line}"); // tee to stdout - - // Select only steps that create files let j: Value = serde_json::from_str(&line).expect("failed to deserialize"); - if j["reason"] != "compiler-artifact" { + let reason = &j["reason"]; + + // Forward output that is meant to be user-facing + if reason == "compiler-message" { + println!("{}", j["message"]["rendered"].as_str().unwrap()); + } else if reason == "build-finished" { + println!("build finshed. success: {}", j["success"]); + } else if reason == "build-script-executed" { + let pretty = serde_json::to_string_pretty(&j).unwrap(); + println!("build script output: {pretty}",); + } + + // Only interested in the artifact list now + if reason != "compiler-artifact" { continue; } From df2e48eec60eb80012d8c62e2255d85c910be766 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 4 Jul 2025 19:30:31 -0500 Subject: [PATCH 1413/1459] Remove unused custom target JSON files 8521530f4938 ("Fix __divsi3 and __udivsi3 on thumbv6m targets") removed tests that use these `thumb*-linux` target files in favor of tests that use the `thumb*-none` targets, which are available via Rustup. The JSON files haven't been used since then and are outdated, so remove them. --- thumbv6m-linux-eabi.json | 28 ---------------------------- thumbv7em-linux-eabi.json | 27 --------------------------- thumbv7em-linux-eabihf.json | 28 ---------------------------- thumbv7m-linux-eabi.json | 27 --------------------------- 4 files changed, 110 deletions(-) delete mode 100644 thumbv6m-linux-eabi.json delete mode 100644 thumbv7em-linux-eabi.json delete mode 100644 thumbv7em-linux-eabihf.json delete mode 100644 thumbv7m-linux-eabi.json diff --git a/thumbv6m-linux-eabi.json b/thumbv6m-linux-eabi.json deleted file mode 100644 index ac736eae6..000000000 --- a/thumbv6m-linux-eabi.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "abi-blacklist": [ - "stdcall", - "fastcall", - "vectorcall", - "win64", - "sysv64" - ], - "arch": "arm", - "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64", - "env": "", - "executables": true, - "features": "+strict-align", - "linker": "arm-none-eabi-gcc", - "linker-flavor": "gcc", - "llvm-target": "thumbv6m-none-eabi", - "max-atomic-width": 0, - "os": "linux", - "panic-strategy": "abort", - "pre-link-args": { - "gcc": ["-nostartfiles"] - }, - "relocation-model": "static", - "target-endian": "little", - "target-pointer-width": "32", - "target-c-int-width": "32", - "vendor": "" -} diff --git a/thumbv7em-linux-eabi.json b/thumbv7em-linux-eabi.json deleted file mode 100644 index b6d4a6bda..000000000 --- a/thumbv7em-linux-eabi.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "abi-blacklist": [ - "stdcall", - "fastcall", - "vectorcall", - "win64", - "sysv64" - ], - "arch": "arm", - "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64", - "env": "", - "executables": true, - "linker": "arm-none-eabi-gcc", - "linker-flavor": "gcc", - "llvm-target": "thumbv7em-none-eabi", - "max-atomic-width": 32, - "os": "linux", - "panic-strategy": "abort", - "pre-link-args": { - "gcc": ["-nostartfiles"] - }, - "relocation-model": "static", - "target-endian": "little", - "target-pointer-width": "32", - "target-c-int-width": "32", - "vendor": "" -} diff --git a/thumbv7em-linux-eabihf.json b/thumbv7em-linux-eabihf.json deleted file mode 100644 index 81cfcd48d..000000000 --- a/thumbv7em-linux-eabihf.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "abi-blacklist": [ - "stdcall", - "fastcall", - "vectorcall", - "win64", - "sysv64" - ], - "arch": "arm", - "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64", - "env": "", - "executables": true, - "features": "+vfp4,+d16,+fp-only-sp", - "linker": "arm-none-eabi-gcc", - "linker-flavor": "gcc", - "llvm-target": "thumbv7em-none-eabihf", - "max-atomic-width": 32, - "os": "linux", - "panic-strategy": "abort", - "pre-link-args": { - "gcc": ["-nostartfiles"] - }, - "relocation-model": "static", - "target-endian": "little", - "target-pointer-width": "32", - "target-c-int-width": "32", - "vendor": "" -} diff --git a/thumbv7m-linux-eabi.json b/thumbv7m-linux-eabi.json deleted file mode 100644 index abe037c5b..000000000 --- a/thumbv7m-linux-eabi.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "abi-blacklist": [ - "stdcall", - "fastcall", - "vectorcall", - "win64", - "sysv64" - ], - "arch": "arm", - "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64", - "env": "", - "executables": true, - "linker": "arm-none-eabi-gcc", - "linker-flavor": "gcc", - "llvm-target": "thumbv7m-none-eabi", - "max-atomic-width": 32, - "os": "linux", - "panic-strategy": "abort", - "pre-link-args": { - "gcc": ["-nostartfiles"] - }, - "relocation-model": "static", - "target-endian": "little", - "target-pointer-width": "32", - "target-c-int-width": "32", - "vendor": "" -} From 8aba4c899ee89eef7fe688cdfa6629ddd56908f9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 4 Jul 2025 19:42:18 -0500 Subject: [PATCH 1414/1459] Test building custom targets and resolve an issue probing `rustc` The `rustc` probe done in our build scripts needs to pass `--target` to get the correct configuration, which usually comes from the `TARGET` environment variable. However, for targets specified via a `target.json` file, `TARGET` gets set to the file name without an extension or path. `rustc` will check a search path to attempt to locate the file, but this is likely to fail since the directory where Cargo invokes build scripts (and hence where those scripts invoke `rustc`) might not have any relation to the JSON spec file. Resolve this for now by leaving `f16` and `f128` disabled if the `rustc` command fails. Result of the discussion at CARGO-14208 may eventually provide a better solution. A CI test is also added since custom JSON files are an edge case that could fail in other ways. I verified this fails without the fix here. The JSON file is the output for `thumbv7em-none-eabi`, just renamed so `rustc` doesn't identify it. --- .github/workflows/main.yaml | 20 ++++++++++++++++++++ compiler-builtins/configure.rs | 13 ++++++++++--- etc/thumbv7em-none-eabi-renamed.json | 23 +++++++++++++++++++++++ libm/configure.rs | 13 ++++++++++--- 4 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 etc/thumbv7em-none-eabi-renamed.json diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 95b0962b0..541c99c82 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -195,6 +195,25 @@ jobs: run: ./ci/update-musl.sh - run: cargo clippy --workspace --all-targets + build-custom: + name: Build custom target + runs-on: ubuntu-24.04 + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + - name: Install Rust + run: | + rustup update nightly --no-self-update + rustup default nightly + rustup component add rust-src + - uses: Swatinem/rust-cache@v2 + - run: | + # Ensure we can build with custom target.json files (these can interact + # poorly with build scripts) + cargo build -p compiler_builtins -p libm \ + --target etc/thumbv7em-none-eabi-renamed.json \ + -Zbuild-std=core + benchmarks: name: Benchmarks timeout-minutes: 20 @@ -331,6 +350,7 @@ jobs: success: needs: - benchmarks + - build-custom - clippy - extensive - miri diff --git a/compiler-builtins/configure.rs b/compiler-builtins/configure.rs index a1e45080e..9721ddf09 100644 --- a/compiler-builtins/configure.rs +++ b/compiler-builtins/configure.rs @@ -45,9 +45,16 @@ impl Target { let out = cmd .output() .unwrap_or_else(|e| panic!("failed to run `{cmd:?}`: {e}")); - assert!(out.status.success(), "failed to run `{cmd:?}`"); let rustc_cfg = str::from_utf8(&out.stdout).unwrap(); + // If we couldn't query `rustc` (e.g. a custom JSON target was used), make the safe + // choice and leave `f16` and `f128` disabled. + let rustc_output_ok = out.status.success(); + let reliable_f128 = + rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f128"); + let reliable_f16 = + rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f16"); + Self { triple, triple_split, @@ -67,8 +74,8 @@ impl Target { .split(",") .map(ToOwned::to_owned) .collect(), - reliable_f128: rustc_cfg.lines().any(|l| l == "target_has_reliable_f128"), - reliable_f16: rustc_cfg.lines().any(|l| l == "target_has_reliable_f16"), + reliable_f128, + reliable_f16, } } diff --git a/etc/thumbv7em-none-eabi-renamed.json b/etc/thumbv7em-none-eabi-renamed.json new file mode 100644 index 000000000..81273d44e --- /dev/null +++ b/etc/thumbv7em-none-eabi-renamed.json @@ -0,0 +1,23 @@ +{ + "abi": "eabi", + "arch": "arm", + "c-enum-min-bits": 8, + "crt-objects-fallback": "false", + "data-layout": "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64", + "emit-debug-gdb-scripts": false, + "frame-pointer": "always", + "linker": "rust-lld", + "linker-flavor": "gnu-lld", + "llvm-floatabi": "soft", + "llvm-target": "thumbv7em-none-eabi", + "max-atomic-width": 32, + "metadata": { + "description": "Bare ARMv7E-M", + "host_tools": false, + "std": false, + "tier": 2 + }, + "panic-strategy": "abort", + "relocation-model": "static", + "target-pointer-width": "32" +} diff --git a/libm/configure.rs b/libm/configure.rs index 6562ecbe5..f9100d2d5 100644 --- a/libm/configure.rs +++ b/libm/configure.rs @@ -43,9 +43,16 @@ impl Config { let out = cmd .output() .unwrap_or_else(|e| panic!("failed to run `{cmd:?}`: {e}")); - assert!(out.status.success(), "failed to run `{cmd:?}`"); let rustc_cfg = str::from_utf8(&out.stdout).unwrap(); + // If we couldn't query `rustc` (e.g. a custom JSON target was used), make the safe + // choice and leave `f16` and `f128` disabled. + let rustc_output_ok = out.status.success(); + let reliable_f128 = + rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f128"); + let reliable_f16 = + rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f16"); + Self { target_triple, manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()), @@ -59,8 +66,8 @@ impl Config { target_string: env::var("TARGET").unwrap(), target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), target_features, - reliable_f128: rustc_cfg.lines().any(|l| l == "target_has_reliable_f128"), - reliable_f16: rustc_cfg.lines().any(|l| l == "target_has_reliable_f16"), + reliable_f128, + reliable_f16, } } } From 735e44f95fc1dbf3c2302c391fa6aed54ce58e7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Fri, 4 Jul 2025 22:15:27 +0200 Subject: [PATCH 1415/1459] Remove josh-sync crate --- Cargo.toml | 1 - crates/josh-sync/Cargo.toml | 8 - crates/josh-sync/src/main.rs | 45 ---- crates/josh-sync/src/sync.rs | 401 ----------------------------------- 4 files changed, 455 deletions(-) delete mode 100644 crates/josh-sync/Cargo.toml delete mode 100644 crates/josh-sync/src/main.rs delete mode 100644 crates/josh-sync/src/sync.rs diff --git a/Cargo.toml b/Cargo.toml index 41350c6cb..956d738f3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,6 @@ resolver = "2" members = [ "builtins-shim", "builtins-test", - "crates/josh-sync", "crates/libm-macros", "crates/musl-math-sys", "crates/panic-handler", diff --git a/crates/josh-sync/Cargo.toml b/crates/josh-sync/Cargo.toml deleted file mode 100644 index 8e2e891db..000000000 --- a/crates/josh-sync/Cargo.toml +++ /dev/null @@ -1,8 +0,0 @@ -[package] -name = "josh-sync" -edition = "2024" -publish = false - -[dependencies] -directories = "6.0.0" -regex-lite = "0.1.6" diff --git a/crates/josh-sync/src/main.rs b/crates/josh-sync/src/main.rs deleted file mode 100644 index 7f0b11900..000000000 --- a/crates/josh-sync/src/main.rs +++ /dev/null @@ -1,45 +0,0 @@ -use std::io::{Read, Write}; -use std::process::exit; -use std::{env, io}; - -use crate::sync::{GitSync, Josh}; - -mod sync; - -const USAGE: &str = r#"Utility for synchroniing compiler-builtins with rust-lang/rust - -Usage: - - josh-sync rustc-pull - - Pull from rust-lang/rust to compiler-builtins. Creates a commit - updating the version file, followed by a merge commit. - - josh-sync rustc-push GITHUB_USERNAME [BRANCH] - - Create a branch off of rust-lang/rust updating compiler-builtins. -"#; - -fn main() { - let sync = GitSync::from_current_dir(); - - // Collect args, then recollect as str refs so we can match on them - let args: Vec<_> = env::args().collect(); - let args: Vec<&str> = args.iter().map(String::as_str).collect(); - - match args.as_slice()[1..] { - ["rustc-pull"] => sync.rustc_pull(None), - ["rustc-push", github_user, branch] => sync.rustc_push(github_user, Some(branch)), - ["rustc-push", github_user] => sync.rustc_push(github_user, None), - ["start-josh"] => { - let _josh = Josh::start(); - println!("press enter to stop"); - io::stdout().flush().unwrap(); - let _ = io::stdin().read(&mut [0u8]).unwrap(); - } - _ => { - println!("{USAGE}"); - exit(1); - } - } -} diff --git a/crates/josh-sync/src/sync.rs b/crates/josh-sync/src/sync.rs deleted file mode 100644 index 2d89d2d1c..000000000 --- a/crates/josh-sync/src/sync.rs +++ /dev/null @@ -1,401 +0,0 @@ -use std::borrow::Cow; -use std::net::{SocketAddr, TcpStream}; -use std::process::{Command, Stdio, exit}; -use std::time::Duration; -use std::{env, fs, process, thread}; - -use regex_lite::Regex; - -const JOSH_PORT: u16 = 42042; -const DEFAULT_PR_BRANCH: &str = "update-builtins"; - -pub struct GitSync { - upstream_repo: String, - upstream_ref: String, - upstream_url: String, - josh_filter: String, - josh_url_base: String, -} - -/// This code was adapted from the miri repository, via the rustc-dev-guide -/// () -impl GitSync { - pub fn from_current_dir() -> Self { - let upstream_repo = - env::var("UPSTREAM_ORG").unwrap_or_else(|_| "rust-lang".to_owned()) + "/rust"; - - Self { - upstream_url: format!("https://github.com/{upstream_repo}"), - upstream_repo, - upstream_ref: env::var("UPSTREAM_REF").unwrap_or_else(|_| "HEAD".to_owned()), - josh_filter: ":/library/compiler-builtins".to_owned(), - josh_url_base: format!("http://localhost:{JOSH_PORT}"), - } - } - - /// Pull from rust-lang/rust to compiler-builtins. - pub fn rustc_pull(&self, commit: Option) { - let Self { - upstream_ref, - upstream_url, - upstream_repo, - .. - } = self; - - let new_upstream_base = commit.unwrap_or_else(|| { - let out = check_output(["git", "ls-remote", upstream_url, upstream_ref]); - out.split_whitespace() - .next() - .unwrap_or_else(|| panic!("could not split output: '{out}'")) - .to_owned() - }); - - ensure_clean(); - - // Make sure josh is running. - let _josh = Josh::start(); - let josh_url_filtered = self.josh_url( - &self.upstream_repo, - Some(&new_upstream_base), - Some(&self.josh_filter), - ); - - let previous_upstream_base = fs::read_to_string("rust-version") - .expect("failed to read `rust-version`") - .trim() - .to_string(); - assert_ne!(previous_upstream_base, new_upstream_base, "nothing to pull"); - - let orig_head = check_output(["git", "rev-parse", "HEAD"]); - println!("original upstream base: {previous_upstream_base}"); - println!("new upstream base: {new_upstream_base}"); - println!("original HEAD: {orig_head}"); - - // Fetch the latest upstream HEAD so we can get a summary. Use the Josh URL for caching. - run([ - "git", - "fetch", - &self.josh_url(&self.upstream_repo, Some(&new_upstream_base), Some(":/")), - &new_upstream_base, - "--depth=1", - ]); - let new_summary = check_output(["git", "log", "-1", "--format=%h %s", &new_upstream_base]); - let new_summary = replace_references(&new_summary, &self.upstream_repo); - - // Update rust-version file. As a separate commit, since making it part of - // the merge has confused the heck out of josh in the past. - // We pass `--no-verify` to avoid running git hooks. - // We do this before the merge so that if there are merge conflicts, we have - // the right rust-version file while resolving them. - fs::write("rust-version", format!("{new_upstream_base}\n")) - .expect("failed to write rust-version"); - - let prep_message = format!( - "Update the upstream Rust version\n\n\ - To prepare for merging from {upstream_repo}, set the version file to:\n\n \ - {new_summary}\n\ - ", - ); - run([ - "git", - "commit", - "rust-version", - "--no-verify", - "-m", - &prep_message, - ]); - - // Fetch given rustc commit. - run(["git", "fetch", &josh_url_filtered]); - let incoming_ref = check_output(["git", "rev-parse", "FETCH_HEAD"]); - println!("incoming ref: {incoming_ref}"); - - let merge_message = format!( - "Merge ref '{upstream_head_short}{filter}' from {upstream_url}\n\n\ - Pull recent changes from {upstream_repo} via Josh.\n\n\ - Upstream ref: {new_upstream_base}\n\ - Filtered ref: {incoming_ref}\n\ - ", - upstream_head_short = &new_upstream_base[..12], - filter = self.josh_filter - ); - - // This should not add any new root commits. So count those before and after merging. - let num_roots = || -> u32 { - let out = check_output(["git", "rev-list", "HEAD", "--max-parents=0", "--count"]); - out.trim() - .parse::() - .unwrap_or_else(|e| panic!("failed to parse `{out}`: {e}")) - }; - let num_roots_before = num_roots(); - - let pre_merge_sha = check_output(["git", "rev-parse", "HEAD"]); - println!("pre-merge HEAD: {pre_merge_sha}"); - - // Merge the fetched commit. - run([ - "git", - "merge", - "FETCH_HEAD", - "--no-verify", - "--no-ff", - "-m", - &merge_message, - ]); - - let current_sha = check_output(["git", "rev-parse", "HEAD"]); - if current_sha == pre_merge_sha { - run(["git", "reset", "--hard", &orig_head]); - eprintln!( - "No merge was performed, no changes to pull were found. \ - Rolled back the preparation commit." - ); - exit(1); - } - - // Check that the number of roots did not increase. - assert_eq!( - num_roots(), - num_roots_before, - "Josh created a new root commit. This is probably not the history you want." - ); - } - - /// Construct an update to rust-lang/rust from compiler-builtins. - pub fn rustc_push(&self, github_user: &str, branch: Option<&str>) { - let Self { - josh_filter, - upstream_url, - .. - } = self; - - let branch = branch.unwrap_or(DEFAULT_PR_BRANCH); - let josh_url = self.josh_url(&format!("{github_user}/rust"), None, Some(josh_filter)); - let user_upstream_url = format!("git@github.com:{github_user}/rust.git"); - - let Ok(rustc_git) = env::var("RUSTC_GIT") else { - panic!("the RUSTC_GIT environment variable must be set to a rust-lang/rust checkout") - }; - - ensure_clean(); - let base = fs::read_to_string("rust-version") - .expect("failed to read `rust-version`") - .trim() - .to_string(); - - // Make sure josh is running. - let _josh = Josh::start(); - - // Prepare the branch. Pushing works much better if we use as base exactly - // the commit that we pulled from last time, so we use the `rust-version` - // file to find out which commit that would be. - println!("Preparing {github_user}/rust (base: {base})..."); - - if Command::new("git") - .args(["-C", &rustc_git, "fetch", &user_upstream_url, branch]) - .output() // capture output - .expect("could not run fetch") - .status - .success() - { - panic!( - "The branch '{branch}' seems to already exist in '{user_upstream_url}'. \ - Please delete it and try again." - ); - } - - run(["git", "-C", &rustc_git, "fetch", upstream_url, &base]); - - run_cfg("git", |c| { - c.args([ - "-C", - &rustc_git, - "push", - &user_upstream_url, - &format!("{base}:refs/heads/{branch}"), - ]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) // silence the "create GitHub PR" message - }); - println!("pushed PR branch"); - - // Do the actual push. - println!("Pushing changes..."); - run(["git", "push", &josh_url, &format!("HEAD:{branch}")]); - println!(); - - // Do a round-trip check to make sure the push worked as expected. - run(["git", "fetch", &josh_url, branch]); - - let head = check_output(["git", "rev-parse", "HEAD"]); - let fetch_head = check_output(["git", "rev-parse", "FETCH_HEAD"]); - assert_eq!( - head, fetch_head, - "Josh created a non-roundtrip push! Do NOT merge this into rustc!\n\ - Expected {head}, got {fetch_head}." - ); - println!( - "Confirmed that the push round-trips back to compiler-builtins properly. Please \ - create a rustc PR:" - ); - // Open PR with `subtree update` title to silence the `no-merges` triagebot check - println!( - " {upstream_url}/compare/{github_user}:{branch}?quick_pull=1\ - &title=Update%20the%20%60compiler-builtins%60%20subtree\ - &body=Update%20the%20Josh%20subtree%20to%20https%3A%2F%2Fgithub.com%2Frust-lang%2F\ - compiler-builtins%2Fcommit%2F{head_short}.%0A%0Ar%3F%20%40ghost", - head_short = &head[..12], - ); - } - - /// Construct a url to the local Josh server with (optionally) - fn josh_url(&self, repo: &str, rev: Option<&str>, filter: Option<&str>) -> String { - format!( - "{base}/{repo}.git{at}{rev}{filter}{filt_git}", - base = self.josh_url_base, - at = if rev.is_some() { "@" } else { "" }, - rev = rev.unwrap_or_default(), - filter = filter.unwrap_or_default(), - filt_git = if filter.is_some() { ".git" } else { "" } - ) - } -} - -/// Fail if there are files that need to be checked in. -fn ensure_clean() { - let read = check_output(["git", "status", "--untracked-files=no", "--porcelain"]); - assert!( - read.is_empty(), - "working directory must be clean before performing rustc pull" - ); -} - -/* Helpers for running commands with logged invocations */ - -/// Run a command from an array, passing its output through. -fn run<'a, Args: AsRef<[&'a str]>>(l: Args) { - let l = l.as_ref(); - run_cfg(l[0], |c| c.args(&l[1..])); -} - -/// Run a command from an array, collecting its output. -fn check_output<'a, Args: AsRef<[&'a str]>>(l: Args) -> String { - let l = l.as_ref(); - check_output_cfg(l[0], |c| c.args(&l[1..])) -} - -/// [`run`] with configuration. -fn run_cfg(prog: &str, f: impl FnOnce(&mut Command) -> &mut Command) { - // self.read(l.as_ref()); - check_output_cfg(prog, |c| f(c.stdout(Stdio::inherit()))); -} - -/// [`read`] with configuration. All shell helpers print the command and pass stderr. -fn check_output_cfg(prog: &str, f: impl FnOnce(&mut Command) -> &mut Command) -> String { - let mut cmd = Command::new(prog); - cmd.stderr(Stdio::inherit()); - f(&mut cmd); - eprintln!("+ {cmd:?}"); - let out = cmd.output().expect("command failed"); - assert!(out.status.success()); - String::from_utf8(out.stdout.trim_ascii().to_vec()).expect("non-UTF8 output") -} - -/// Replace `#1234`-style issue/PR references with `repo#1234` to ensure links work across -/// repositories. -fn replace_references<'a>(s: &'a str, repo: &str) -> Cow<'a, str> { - let re = Regex::new(r"\B(?P#\d+)\b").unwrap(); - re.replace(s, &format!("{repo}$id")) -} - -/// Create a wrapper that stops Josh on drop. -pub struct Josh(process::Child); - -impl Josh { - pub fn start() -> Self { - // Determine cache directory. - let user_dirs = - directories::ProjectDirs::from("org", "rust-lang", "rustc-compiler-builtins-josh") - .unwrap(); - let local_dir = user_dirs.cache_dir().to_owned(); - - // Start josh, silencing its output. - #[expect(clippy::zombie_processes, reason = "clippy can't handle the loop")] - let josh = process::Command::new("josh-proxy") - .arg("--local") - .arg(local_dir) - .args([ - "--remote=https://github.com", - &format!("--port={JOSH_PORT}"), - "--no-background", - ]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .spawn() - .expect("failed to start josh-proxy, make sure it is installed"); - - // Wait until the port is open. We try every 10ms until 1s passed. - for _ in 0..100 { - // This will generally fail immediately when the port is still closed. - let addr = SocketAddr::from(([127, 0, 0, 1], JOSH_PORT)); - let josh_ready = TcpStream::connect_timeout(&addr, Duration::from_millis(1)); - - if josh_ready.is_ok() { - println!("josh up and running"); - return Josh(josh); - } - - // Not ready yet. - thread::sleep(Duration::from_millis(10)); - } - panic!("Even after waiting for 1s, josh-proxy is still not available.") - } -} - -impl Drop for Josh { - fn drop(&mut self) { - if cfg!(unix) { - // Try to gracefully shut it down. - Command::new("kill") - .args(["-s", "INT", &self.0.id().to_string()]) - .output() - .expect("failed to SIGINT josh-proxy"); - // Sadly there is no "wait with timeout"... so we just give it some time to finish. - thread::sleep(Duration::from_millis(100)); - // Now hopefully it is gone. - if self - .0 - .try_wait() - .expect("failed to wait for josh-proxy") - .is_some() - { - return; - } - } - // If that didn't work (or we're not on Unix), kill it hard. - eprintln!( - "I have to kill josh-proxy the hard way, let's hope this does not \ - break anything." - ); - self.0.kill().expect("failed to SIGKILL josh-proxy"); - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_replace() { - assert_eq!(replace_references("#1234", "r-l/rust"), "r-l/rust#1234"); - assert_eq!(replace_references("#1234x", "r-l/rust"), "#1234x"); - assert_eq!( - replace_references("merge #1234", "r-l/rust"), - "merge r-l/rust#1234" - ); - assert_eq!( - replace_references("foo/bar#1234", "r-l/rust"), - "foo/bar#1234" - ); - } -} From 6e9d1cfbe9db5a1ffe1eb7102ec6dc6219de6105 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Fri, 4 Jul 2025 22:15:56 +0200 Subject: [PATCH 1416/1459] Add josh-sync config file --- josh-sync.toml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 josh-sync.toml diff --git a/josh-sync.toml b/josh-sync.toml new file mode 100644 index 000000000..599a12af8 --- /dev/null +++ b/josh-sync.toml @@ -0,0 +1,3 @@ +org = "rust-lang" +repo = "compiler-builtins" +path = "library/compiler-builtins" From e350b9a406797b36a04099bc7b1cf7a2b7a5729c Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Tue, 8 Jul 2025 16:38:35 -0700 Subject: [PATCH 1417/1459] Disable docs for `compiler-builtins` and `sysroot` Bootstrap already had a manual doc filter for the `sysroot` crate, but other library crates keep themselves out of the public docs by setting `[lib] doc = false` in their manifest. This seems like a better solution to hide `compiler-builtins` docs, and removes the `sysroot` hack too. --- compiler-builtins/Cargo.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index c5446cd76..3ccb05f73 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -19,6 +19,8 @@ links = "compiler-rt" bench = false doctest = false test = false +# make sure this crate isn't included in public standard library docs +doc = false [dependencies] core = { path = "../../core", optional = true } From 0f2c11540ac3bf1debb3afc68ea56b81f7c5f45d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Fri, 4 Jul 2025 22:17:22 +0200 Subject: [PATCH 1418/1459] Add documentation about subtree sync --- CONTRIBUTING.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9f67cfc31..9ae4f893c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -165,3 +165,12 @@ cargo bench --no-default-features \ [`iai-callgrind-runner`]: https://crates.io/crates/iai-callgrind-runner [Valgrind]: https://valgrind.org/ + +## Subtree synchronization + +`compiler-builtins` is included as a [Josh subtree] in the main compiler +repository (`rust-lang/rust`). You can find a guide on how to create synchronization +(pull and push) PRs at the [`rustc-dev-guide` page]. + +[Josh subtree]: https://rustc-dev-guide.rust-lang.org/external-repos.html#josh-subtrees +[`rustc-dev-guide` page]: https://rustc-dev-guide.rust-lang.org/external-repos.html#synchronizing-a-josh-subtree From 90bd9f53aefd4fc40130993c395983bc1d9fb44e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 10 Jul 2025 17:50:49 -0400 Subject: [PATCH 1419/1459] Upgrade dependencies to the latest version This picks up a fix in `rustc_apfloat` [1] that resolves a problem with `fma`. [1]: https://github.com/rust-lang/rustc_apfloat/releases/tag/rustc_apfloat-v0.2.3%2Bllvm-462a31f5a5ab --- builtins-test/Cargo.toml | 2 +- crates/libm-macros/Cargo.toml | 2 +- crates/musl-math-sys/Cargo.toml | 2 +- libm-test/Cargo.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/builtins-test/Cargo.toml b/builtins-test/Cargo.toml index 093d4633f..4607342cd 100644 --- a/builtins-test/Cargo.toml +++ b/builtins-test/Cargo.toml @@ -12,7 +12,7 @@ license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" # `xoshiro128**` is used for its quality, size, and speed at generating `u32` shift amounts. rand_xoshiro = "0.7" # To compare float builtins against -rustc_apfloat = "0.2.2" +rustc_apfloat = "0.2.3" # Really a dev dependency, but dev dependencies can't be optional iai-callgrind = { version = "0.14.1", optional = true } diff --git a/crates/libm-macros/Cargo.toml b/crates/libm-macros/Cargo.toml index 6bbf47784..100a8d0ec 100644 --- a/crates/libm-macros/Cargo.toml +++ b/crates/libm-macros/Cargo.toml @@ -12,7 +12,7 @@ proc-macro = true heck = "0.5.0" proc-macro2 = "1.0.95" quote = "1.0.40" -syn = { version = "2.0.101", features = ["full", "extra-traits", "visit-mut"] } +syn = { version = "2.0.104", features = ["full", "extra-traits", "visit-mut"] } [lints.rust] # Values used during testing diff --git a/crates/musl-math-sys/Cargo.toml b/crates/musl-math-sys/Cargo.toml index 3b8811734..39f6fa906 100644 --- a/crates/musl-math-sys/Cargo.toml +++ b/crates/musl-math-sys/Cargo.toml @@ -11,4 +11,4 @@ license = "MIT OR Apache-2.0" libm = { path = "../../libm" } [build-dependencies] -cc = "1.2.25" +cc = "1.2.29" diff --git a/libm-test/Cargo.toml b/libm-test/Cargo.toml index 05fcc3234..e577288c9 100644 --- a/libm-test/Cargo.toml +++ b/libm-test/Cargo.toml @@ -32,7 +32,7 @@ anyhow = "1.0.98" # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`. gmp-mpfr-sys = { version = "1.6.5", optional = true, default-features = false } iai-callgrind = { version = "0.14.1", optional = true } -indicatif = { version = "0.17.11", default-features = false } +indicatif = { version = "0.18.0", default-features = false } libm = { path = "../libm", features = ["unstable-public-internals"] } libm-macros = { path = "../crates/libm-macros" } musl-math-sys = { path = "../crates/musl-math-sys", optional = true } From 7bba268efa238a171dcc4ee5c1dc6f28310670f2 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 10 Jul 2025 17:55:08 -0400 Subject: [PATCH 1420/1459] Upgrade `iai-callgrind` to 0.15 Pick up the latest version of iai-callgrind, which includes some output improvements. Changelog: https://github.com/iai-callgrind/iai-callgrind/releases --- builtins-test/Cargo.toml | 2 +- ci/bench-icount.sh | 2 +- libm-test/Cargo.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/builtins-test/Cargo.toml b/builtins-test/Cargo.toml index 4607342cd..00a9d8579 100644 --- a/builtins-test/Cargo.toml +++ b/builtins-test/Cargo.toml @@ -14,7 +14,7 @@ rand_xoshiro = "0.7" # To compare float builtins against rustc_apfloat = "0.2.3" # Really a dev dependency, but dev dependencies can't be optional -iai-callgrind = { version = "0.14.1", optional = true } +iai-callgrind = { version = "0.15.2", optional = true } [dependencies.compiler_builtins] path = "../builtins-shim" diff --git a/ci/bench-icount.sh b/ci/bench-icount.sh index d2baebb52..12228b9da 100755 --- a/ci/bench-icount.sh +++ b/ci/bench-icount.sh @@ -28,7 +28,7 @@ function run_icount_benchmarks() { iai_args=( "--home" "$(pwd)/$iai_home" - "--regression=ir=5.0" + "--callgrind-limits=ir=5.0" "--save-summary" ) diff --git a/libm-test/Cargo.toml b/libm-test/Cargo.toml index e577288c9..0af6b0c1d 100644 --- a/libm-test/Cargo.toml +++ b/libm-test/Cargo.toml @@ -31,7 +31,7 @@ short-benchmarks = [] anyhow = "1.0.98" # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`. gmp-mpfr-sys = { version = "1.6.5", optional = true, default-features = false } -iai-callgrind = { version = "0.14.1", optional = true } +iai-callgrind = { version = "0.15.2", optional = true } indicatif = { version = "0.18.0", default-features = false } libm = { path = "../libm", features = ["unstable-public-internals"] } libm-macros = { path = "../crates/libm-macros" } From 6af9880f89ba621659842ea5cc6722eb53837af3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Sat, 12 Jul 2025 22:30:19 +0200 Subject: [PATCH 1421/1459] Add CI workflow for automatically performing subtree sync pulls This CI workflow will run the https://github.com/rust-lang/josh-sync tool on Mondays and Thursdays. It will try to do a pull (sync stdarch changes from rust-lang/rust into this repository). When it runs, three things can happen: - There are no rustc changes to be pulled, the bot does nothing. - There are some new changes to be pulled. In that case, the bot will either open or update an existing PR titled "Rustc pull update" on this repository with the changes. After the PR is merged, we should ideally do the opposite sync (push) manually. - The pull fails (usually because of a merge conflict), or the bot determines that a pull PR has been opened for more than a week without being merged. In that case, it will post a ping to https://rust-lang.zulipchat.com/#narrow/channel/219381-t-libs/topic/compiler-builtins.20subtree.20sync.20automation/with/528482375. --- .github/workflows/rustc-pull.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/rustc-pull.yml diff --git a/.github/workflows/rustc-pull.yml b/.github/workflows/rustc-pull.yml new file mode 100644 index 000000000..ba698492e --- /dev/null +++ b/.github/workflows/rustc-pull.yml @@ -0,0 +1,23 @@ +# Perform a subtree sync (pull) using the josh-sync tool once every few days (or on demand). +name: rustc-pull + +on: + workflow_dispatch: + schedule: + # Run at 04:00 UTC every Monday and Thursday + - cron: '0 4 * * 1,4' + +jobs: + pull: + if: github.repository == 'rust-lang/compiler-builtins' + uses: rust-lang/josh-sync/.github/workflows/rustc-pull.yml@main + with: + # https://rust-lang.zulipchat.com/#narrow/channel/219381-t-libs/topic/compiler-builtins.20subtree.20sync.20automation/with/528482375 + zulip-stream-id: 219381 + zulip-topic: 'compiler-builtins subtree sync automation' + zulip-bot-email: "compiler-builtins-ci-bot@rust-lang.zulipchat.com" + pr-base-branch: master + branch-name: rustc-pull + secrets: + zulip-api-token: ${{ secrets.ZULIP_API_TOKEN }} + token: ${{ secrets.GITHUB_TOKEN }} From 599f0e6408bef29a69f986afc71fa76e732ddd60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Sat, 12 Jul 2025 22:41:45 +0200 Subject: [PATCH 1422/1459] Tell triagebot to reopen bot PRs to run CI on them --- triagebot.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/triagebot.toml b/triagebot.toml index ecc05da01..715be27fc 100644 --- a/triagebot.toml +++ b/triagebot.toml @@ -19,3 +19,6 @@ check-commits = false # Enable issue transfers within the org # Documentation at: https://forge.rust-lang.org/triagebot/transfer.html [transfer] + +# Automatically close and reopen PRs made by bots to run CI on them +[bot-pull-requests] From e8cfc9493c167390a267aea27aadc134692879a9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 17 Jul 2025 03:58:43 -0500 Subject: [PATCH 1423/1459] Allow a new lint failure in nightly ```text warning: function `f32_to_bits` is never used --> libm/src/math/support/float_traits.rs:367:14 | 367 | pub const fn f32_to_bits(x: f32) -> u32 { | ^^^^^^^^^^^ | = note: `#[warn(dead_code)]` on by default warning: function `f64_to_bits` is never used --> libm/src/math/support/float_traits.rs:381:14 | 381 | pub const fn f64_to_bits(x: f64) -> u64 { | ^^^^^^^^^^^ warning: `libm` (lib) generated 2 warnings ``` This is a false positive, see RUST-144060. --- libm/src/math/support/float_traits.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs index c3e7eeec2..fb790e696 100644 --- a/libm/src/math/support/float_traits.rs +++ b/libm/src/math/support/float_traits.rs @@ -363,6 +363,7 @@ pub const fn f32_from_bits(bits: u32) -> f32 { } /// `f32::to_bits` +#[allow(dead_code)] // workaround for false positive RUST-144060 #[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust pub const fn f32_to_bits(x: f32) -> u32 { // SAFETY: POD cast with no preconditions @@ -377,6 +378,7 @@ pub const fn f64_from_bits(bits: u64) -> f64 { } /// `f64::to_bits` +#[allow(dead_code)] // workaround for false positive RUST-144060 #[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust pub const fn f64_to_bits(x: f64) -> u64 { // SAFETY: POD cast with no preconditions From 59b329a79d8a1e667597c59535a27a5836ba86d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Thu, 17 Jul 2025 12:30:52 +0200 Subject: [PATCH 1424/1459] Update the `no-merges` PR title Match the new CI-created PRs: https://github.com/rust-lang/compiler-builtins/pull/974. --- triagebot.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/triagebot.toml b/triagebot.toml index 715be27fc..8a2356c2b 100644 --- a/triagebot.toml +++ b/triagebot.toml @@ -4,7 +4,7 @@ # Warns when a PR contains merge commits # Documentation at: https://forge.rust-lang.org/triagebot/no-merge.html [no-merges] -exclude_titles = ["Update from"] +exclude_titles = ["Rustc pull update"] # Canonicalize issue numbers to avoid closing the wrong issue # when commits are included in subtrees, as well as warning links in commits. From 5d33f9d9f3e985fd1748386845a30ef4a7d4fc55 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 18 Jul 2025 17:35:57 +0000 Subject: [PATCH 1425/1459] mem: Use `core::ffi::c_int` This alias was added in 9897bfb8a ("Fix memset arguments for MSP430 target"), which predates `core::ffi`. Now that it exists we can just use `core::ffi::c_int`. --- compiler-builtins/src/mem/mod.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/compiler-builtins/src/mem/mod.rs b/compiler-builtins/src/mem/mod.rs index 6828f3804..a6f533cb7 100644 --- a/compiler-builtins/src/mem/mod.rs +++ b/compiler-builtins/src/mem/mod.rs @@ -3,13 +3,6 @@ // FIXME(e2024): this eventually needs to be removed. #![allow(unsafe_op_in_unsafe_fn)] -#[allow(warnings)] -#[cfg(target_pointer_width = "16")] -type c_int = i16; -#[allow(warnings)] -#[cfg(not(target_pointer_width = "16"))] -type c_int = i32; - // memcpy/memmove/memset have optimized implementations on some architectures #[cfg_attr( all(not(feature = "no-asm"), target_arch = "x86_64"), @@ -38,7 +31,7 @@ intrinsics! { } #[mem_builtin] - pub unsafe extern "C" fn memset(s: *mut u8, c: crate::mem::c_int, n: usize) -> *mut u8 { + pub unsafe extern "C" fn memset(s: *mut u8, c: core::ffi::c_int, n: usize) -> *mut u8 { impls::set_bytes(s, c as u8, n); s } From 556be9bfc9f2bea7d462388143039737184f00d0 Mon Sep 17 00:00:00 2001 From: Julien THILLARD <54775010+supersurviveur@users.noreply.github.com> Date: Fri, 18 Jul 2025 20:19:13 +0200 Subject: [PATCH 1426/1459] Change the `memcmp` and `bcmp` return type to `c_int` Fix the return type of `memcmp` and `bcmp` builtin functions on targets with a `c_int` other than `i32`. Linked issue: https://github.com/rust-lang/rust/issues/144076 --- compiler-builtins/src/mem/impls.rs | 5 +++-- compiler-builtins/src/mem/mod.rs | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/compiler-builtins/src/mem/impls.rs b/compiler-builtins/src/mem/impls.rs index 14a478748..da16dee25 100644 --- a/compiler-builtins/src/mem/impls.rs +++ b/compiler-builtins/src/mem/impls.rs @@ -15,6 +15,7 @@ // this use. Of course this is not a guarantee that such use will work, it just means that this // crate doing wrapping pointer arithmetic with a method that must not wrap won't be the problem if // something does go wrong at runtime. +use core::ffi::c_int; use core::intrinsics::likely; const WORD_SIZE: usize = core::mem::size_of::(); @@ -384,13 +385,13 @@ pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) { } #[inline(always)] -pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 { +pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> c_int { let mut i = 0; while i < n { let a = *s1.wrapping_add(i); let b = *s2.wrapping_add(i); if a != b { - return a as i32 - b as i32; + return c_int::from(a) - c_int::from(b); } i += 1; } diff --git a/compiler-builtins/src/mem/mod.rs b/compiler-builtins/src/mem/mod.rs index a6f533cb7..a227f60a2 100644 --- a/compiler-builtins/src/mem/mod.rs +++ b/compiler-builtins/src/mem/mod.rs @@ -37,12 +37,12 @@ intrinsics! { } #[mem_builtin] - pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { + pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> core::ffi::c_int { impls::compare_bytes(s1, s2, n) } #[mem_builtin] - pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { + pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> core::ffi::c_int { memcmp(s1, s2, n) } From 794c772e31c6b4b3fcd79a7d5e103ad24787c062 Mon Sep 17 00:00:00 2001 From: The rustc-josh-sync Cronjob Bot Date: Fri, 18 Jul 2025 19:04:50 +0000 Subject: [PATCH 1427/1459] Prepare for merging from rust-lang/rust This updates the rust-version file to 82310651b93a594a3fd69015e1562186a080d94c. --- rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-version b/rust-version index 731839835..a4db05a87 100644 --- a/rust-version +++ b/rust-version @@ -1 +1 @@ -d087f112b7d1323446c7b39a8b616aee7fa56b3d +82310651b93a594a3fd69015e1562186a080d94c From 6aed0ee92adbdea0d99b7539b31efd3f4d6bc4e9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 21 Jul 2025 12:18:07 -0500 Subject: [PATCH 1428/1459] ci: Switch to nightly rustfmt We are getting warnings in CI about unsupported features. There isn't any reason to use stable rustfmt so switch the channel here. --- .github/workflows/main.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 541c99c82..972f1b898 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -311,8 +311,8 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@v4 - - name: Install stable `rustfmt` - run: rustup set profile minimal && rustup default stable && rustup component add rustfmt + - name: Install nightly `rustfmt` + run: rustup set profile minimal && rustup default nightly && rustup component add rustfmt - run: cargo fmt -- --check extensive: From 0822c2615343f4b5fb7b46fc7231bf2d3aa6c37b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 23 Jul 2025 04:50:41 -0500 Subject: [PATCH 1429/1459] ci: Add native PowerPC64LE and s390x jobs We now have access to native runners, so make use of them for these architectures. The existing ppc64le Docker job is kept for now. --- .github/workflows/main.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 972f1b898..6c98a60d2 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -70,8 +70,12 @@ jobs: os: ubuntu-24.04 - target: powerpc64le-unknown-linux-gnu os: ubuntu-24.04 + - target: powerpc64le-unknown-linux-gnu + os: ubuntu-24.04-ppc64le - target: riscv64gc-unknown-linux-gnu os: ubuntu-24.04 + - target: s390x-unknown-linux-gnu + os: ubuntu-24.04-s390x - target: thumbv6m-none-eabi os: ubuntu-24.04 - target: thumbv7em-none-eabi @@ -105,8 +109,21 @@ jobs: TEST_VERBATIM: ${{ matrix.test_verbatim }} MAY_SKIP_LIBM_CI: ${{ needs.calculate_vars.outputs.may_skip_libm_ci }} steps: + - name: Print $HOME + shell: bash + run: | + set -x + echo "${HOME:-not found}" + pwd + printenv - name: Print runner information run: uname -a + + # Native ppc and s390x runners don't have rustup by default + - name: Install rustup + if: matrix.os == 'ubuntu-24.04-ppc64le' || matrix.os == 'ubuntu-24.04-s390x' + run: sudo apt-get update && sudo apt-get install -y rustup + - uses: actions/checkout@v4 - name: Install Rust (rustup) shell: bash @@ -117,7 +134,12 @@ jobs: rustup update "$channel" --no-self-update rustup default "$channel" rustup target add "${{ matrix.target }}" + + # Our scripts use nextest if possible. This is skipped on the native ppc + # and s390x runners since install-action doesn't support them. - uses: taiki-e/install-action@nextest + if: "!(matrix.os == 'ubuntu-24.04-ppc64le' || matrix.os == 'ubuntu-24.04-s390x')" + - uses: Swatinem/rust-cache@v2 with: key: ${{ matrix.target }} From 61f16d0da29e362f9679bca31b1ad06781b3442c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 24 Jul 2025 01:20:55 -0500 Subject: [PATCH 1430/1459] ci: Update to the latest ubuntu:25.04 Docker images This includes a qemu update from 8.2.2 to 9.2.1 which should hopefully fix some bugs we have encountered. PowerPC64LE is skipped for now because the new version seems to cause a number of new SIGILLs. --- ci/docker/aarch64-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/arm-unknown-linux-gnueabi/Dockerfile | 2 +- ci/docker/arm-unknown-linux-gnueabihf/Dockerfile | 2 +- ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile | 2 +- ci/docker/i586-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/i686-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/loongarch64-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/mips-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile | 2 +- ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile | 2 +- ci/docker/mipsel-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/powerpc-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/powerpc64-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile | 1 + ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/thumbv6m-none-eabi/Dockerfile | 2 +- ci/docker/thumbv7em-none-eabi/Dockerfile | 2 +- ci/docker/thumbv7em-none-eabihf/Dockerfile | 2 +- ci/docker/thumbv7m-none-eabi/Dockerfile | 2 +- ci/docker/x86_64-unknown-linux-gnu/Dockerfile | 2 +- ci/run-docker.sh | 2 +- 21 files changed, 21 insertions(+), 20 deletions(-) diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index df71804ba..69b99f5b6 100644 --- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/ci/docker/arm-unknown-linux-gnueabi/Dockerfile index 38ad1a136..2fa6f8520 100644 --- a/ci/docker/arm-unknown-linux-gnueabi/Dockerfile +++ b/ci/docker/arm-unknown-linux-gnueabi/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile index ffead05d5..85f7335f5 100644 --- a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile +++ b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile index 9ab49e46e..42511479f 100644 --- a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +++ b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/i586-unknown-linux-gnu/Dockerfile b/ci/docker/i586-unknown-linux-gnu/Dockerfile index d12ced325..35488c477 100644 --- a/ci/docker/i586-unknown-linux-gnu/Dockerfile +++ b/ci/docker/i586-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/i686-unknown-linux-gnu/Dockerfile b/ci/docker/i686-unknown-linux-gnu/Dockerfile index d12ced325..35488c477 100644 --- a/ci/docker/i686-unknown-linux-gnu/Dockerfile +++ b/ci/docker/i686-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile index 62b43da9e..e95a1b916 100644 --- a/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/mips-unknown-linux-gnu/Dockerfile b/ci/docker/mips-unknown-linux-gnu/Dockerfile index c02a94672..fd1877603 100644 --- a/ci/docker/mips-unknown-linux-gnu/Dockerfile +++ b/ci/docker/mips-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile index 6d8b96069..4e542ce68 100644 --- a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile +++ b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile index 7e6ac7c3b..528dfd894 100644 --- a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile +++ b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/mipsel-unknown-linux-gnu/Dockerfile b/ci/docker/mipsel-unknown-linux-gnu/Dockerfile index 9feadc7b5..257218023 100644 --- a/ci/docker/mipsel-unknown-linux-gnu/Dockerfile +++ b/ci/docker/mipsel-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc-unknown-linux-gnu/Dockerfile index 84dcaf47e..cac1f2361 100644 --- a/ci/docker/powerpc-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile index b90fd5ec5..76127b7db 100644 --- a/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile index e6d1d1cd0..c95adecf0 100644 --- a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile @@ -1,3 +1,4 @@ +# FIXME(ppc): We want 25.04 but get SIGILLs ARG IMAGE=ubuntu:24.04 FROM $IMAGE diff --git a/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile index eeb4ed019..513efacd6 100644 --- a/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile +++ b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/thumbv6m-none-eabi/Dockerfile b/ci/docker/thumbv6m-none-eabi/Dockerfile index ad0d4351e..a9a172a21 100644 --- a/ci/docker/thumbv6m-none-eabi/Dockerfile +++ b/ci/docker/thumbv6m-none-eabi/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/thumbv7em-none-eabi/Dockerfile b/ci/docker/thumbv7em-none-eabi/Dockerfile index ad0d4351e..a9a172a21 100644 --- a/ci/docker/thumbv7em-none-eabi/Dockerfile +++ b/ci/docker/thumbv7em-none-eabi/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/thumbv7em-none-eabihf/Dockerfile b/ci/docker/thumbv7em-none-eabihf/Dockerfile index ad0d4351e..a9a172a21 100644 --- a/ci/docker/thumbv7em-none-eabihf/Dockerfile +++ b/ci/docker/thumbv7em-none-eabihf/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/thumbv7m-none-eabi/Dockerfile b/ci/docker/thumbv7m-none-eabi/Dockerfile index ad0d4351e..a9a172a21 100644 --- a/ci/docker/thumbv7m-none-eabi/Dockerfile +++ b/ci/docker/thumbv7m-none-eabi/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index c590adcdd..2ef800129 100644 --- a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ diff --git a/ci/run-docker.sh b/ci/run-docker.sh index d0122dee5..4c1fe0fe2 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -97,7 +97,7 @@ if [ "${1:-}" = "--help" ] || [ "$#" -gt 1 ]; then usage: ./ci/run-docker.sh [target] you can also set DOCKER_BASE_IMAGE to use something other than the default - ubuntu:24.04 (or rustlang/rust:nightly). + ubuntu:25.04 (or rustlang/rust:nightly). " exit fi From b185e89bbd2252df37b9abca1df50ba8b0b26a2e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 24 Jul 2025 03:26:32 -0500 Subject: [PATCH 1431/1459] symcheck: Switch the `object` dependency from git to crates.io Wasm support has since been released, so we no longer need to depend on a git version of `object`. --- crates/symbol-check/Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/symbol-check/Cargo.toml b/crates/symbol-check/Cargo.toml index 30969ee40..e2218b491 100644 --- a/crates/symbol-check/Cargo.toml +++ b/crates/symbol-check/Cargo.toml @@ -5,8 +5,7 @@ edition = "2024" publish = false [dependencies] -# FIXME: used as a git dependency since the latest release does not support wasm -object = { git = "https://github.com/gimli-rs/object.git", rev = "013fac75da56a684377af4151b8164b78c1790e0" } +object = "0.37.1" serde_json = "1.0.140" [features] From 0c7a82c63410008b97450fa0acd2bffb25664894 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 24 Jul 2025 04:05:41 -0500 Subject: [PATCH 1432/1459] ci: Use a mirror for musl We pretty often get at least one job failed because of failure to pull the musl git repo. Switch this to the unofficial mirror [1] which should be more reliable. Link: https://github.com/kraj/musl [1] --- ci/update-musl.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/update-musl.sh b/ci/update-musl.sh index b71cf5778..637ab1394 100755 --- a/ci/update-musl.sh +++ b/ci/update-musl.sh @@ -3,7 +3,7 @@ set -eux -url=git://git.musl-libc.org/musl +url=https://github.com/kraj/musl.git ref=c47ad25ea3b484e10326f933e927c0bc8cded3da dst=crates/musl-math-sys/musl From 3fa5a8cba55d4e8a0fce06897fd477d66b41da48 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 24 Jul 2025 04:49:46 -0500 Subject: [PATCH 1433/1459] ci: Upgrade ubuntu:25.04 for the PowerPC64LE test Update the last remaining image. For this to work, the `QEMU_CPU=POWER8` configuration needed to be dropped to avoid a new SIGILL. Doing some debugging locally, the crash comes from an `extswsli` (per `powerpc:common64` in gdb-multiarch) in the `ld64.so` available with PowerPC, which qemu rejects when set to power8. Testing a build with `+crt-static` hits the same issue at a `maddld` in `__libc_start_main_impl`. Rust isn't needed to reproduce this: $ cat a.c #include int main() { printf("Hello, world!\n"); } $ powerpc64le-linux-gnu-gcc a.c $ QEMU_CPU=power8 QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu/ ./a.out qemu: uncaught target signal 4 (Illegal instruction) - core dumped Illegal instruction So the cross toolchain provided by Debian must have a power9 baseline rather than rustc's power8. Alternatively, qemu may be incorrectly rejecting these instructions (I can't find a source on whether or not they should be available for power8). Testing instead with the `-musl` toolchain and ppc linker from musl.cc works correctly. In any case, things work with the default qemu config so it seems fine to drop. The env was originally added in 5d164a4edafb ("fix the powerpc64le target") but whatever the problem was there appears to no longer be relevant. --- ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile index c95adecf0..da1d56ca6 100644 --- a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile +++ b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile @@ -1,5 +1,4 @@ -# FIXME(ppc): We want 25.04 but get SIGILLs -ARG IMAGE=ubuntu:24.04 +ARG IMAGE=ubuntu:25.04 FROM $IMAGE RUN apt-get update && \ @@ -13,6 +12,5 @@ ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64le-static \ AR_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ CC_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ - QEMU_CPU=POWER8 \ QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu \ RUST_TEST_THREADS=1 From 9c4ec8b508d9fbc6e263ace2dee36af630cc5f6b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 24 Jul 2025 01:18:46 -0500 Subject: [PATCH 1434/1459] Enable tests that were skipped on PowerPC Most of these were skipped because of a bug with the platform implementation, or some kind of crash unwinding. Since the upgrade to Ubuntu 25.04, these all seem to be resolved with the exception of a bug in the host `__floatundisf` [1]. [1] https://github.com/rust-lang/compiler-builtins/pull/384#issuecomment-740413334 --- builtins-test-intrinsics/src/main.rs | 84 +++++---------------------- builtins-test/benches/float_conv.rs | 9 --- builtins-test/benches/float_extend.rs | 2 - builtins-test/benches/float_trunc.rs | 5 -- builtins-test/src/bench.rs | 11 ---- builtins-test/tests/conv.rs | 38 ++++++------ crates/musl-math-sys/src/lib.rs | 2 - libm/src/math/j1f.rs | 3 +- 8 files changed, 34 insertions(+), 120 deletions(-) diff --git a/builtins-test-intrinsics/src/main.rs b/builtins-test-intrinsics/src/main.rs index 66744a081..b9d19ea77 100644 --- a/builtins-test-intrinsics/src/main.rs +++ b/builtins-test-intrinsics/src/main.rs @@ -40,11 +40,7 @@ mod intrinsics { x as f64 } - #[cfg(all( - f16_enabled, - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(all(f16_enabled, f128_enabled))] pub fn extendhftf(x: f16) -> f128 { x as f128 } @@ -201,11 +197,7 @@ mod intrinsics { /* f128 operations */ - #[cfg(all( - f16_enabled, - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(all(f16_enabled, f128_enabled))] pub fn trunctfhf(x: f128) -> f16 { x as f16 } @@ -220,50 +212,32 @@ mod intrinsics { x as f64 } - #[cfg(all( - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(f128_enabled)] pub fn fixtfsi(x: f128) -> i32 { x as i32 } - #[cfg(all( - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(f128_enabled)] pub fn fixtfdi(x: f128) -> i64 { x as i64 } - #[cfg(all( - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(f128_enabled)] pub fn fixtfti(x: f128) -> i128 { x as i128 } - #[cfg(all( - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(f128_enabled)] pub fn fixunstfsi(x: f128) -> u32 { x as u32 } - #[cfg(all( - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(f128_enabled)] pub fn fixunstfdi(x: f128) -> u64 { x as u64 } - #[cfg(all( - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(f128_enabled)] pub fn fixunstfti(x: f128) -> u128 { x as u128 } @@ -540,47 +514,25 @@ fn run() { bb(extendhfdf(bb(2.))); #[cfg(f16_enabled)] bb(extendhfsf(bb(2.))); - #[cfg(all( - f16_enabled, - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(all(f16_enabled, f128_enabled))] bb(extendhftf(bb(2.))); #[cfg(f128_enabled)] bb(extendsftf(bb(2.))); bb(fixdfti(bb(2.))); bb(fixsfti(bb(2.))); - #[cfg(all( - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(f128_enabled)] bb(fixtfdi(bb(2.))); - #[cfg(all( - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(f128_enabled)] bb(fixtfsi(bb(2.))); - #[cfg(all( - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(f128_enabled)] bb(fixtfti(bb(2.))); bb(fixunsdfti(bb(2.))); bb(fixunssfti(bb(2.))); - #[cfg(all( - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(f128_enabled)] bb(fixunstfdi(bb(2.))); - #[cfg(all( - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(f128_enabled)] bb(fixunstfsi(bb(2.))); - #[cfg(all( - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(f128_enabled)] bb(fixunstfti(bb(2.))); #[cfg(f128_enabled)] bb(floatditf(bb(2))); @@ -616,11 +568,7 @@ fn run() { bb(truncsfhf(bb(2.))); #[cfg(f128_enabled)] bb(trunctfdf(bb(2.))); - #[cfg(all( - f16_enabled, - f128_enabled, - not(any(target_arch = "powerpc", target_arch = "powerpc64")) - ))] + #[cfg(all(f16_enabled, f128_enabled))] bb(trunctfhf(bb(2.))); #[cfg(f128_enabled)] bb(trunctfsf(bb(2.))); diff --git a/builtins-test/benches/float_conv.rs b/builtins-test/benches/float_conv.rs index d4a7346d1..e0f488eb6 100644 --- a/builtins-test/benches/float_conv.rs +++ b/builtins-test/benches/float_conv.rs @@ -365,7 +365,6 @@ float_bench! { /* float -> unsigned int */ -#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] float_bench! { name: conv_f32_u32, sig: (a: f32) -> u32, @@ -387,7 +386,6 @@ float_bench! { ], } -#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] float_bench! { name: conv_f32_u64, sig: (a: f32) -> u64, @@ -409,7 +407,6 @@ float_bench! { ], } -#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] float_bench! { name: conv_f32_u128, sig: (a: f32) -> u128, @@ -505,7 +502,6 @@ float_bench! { /* float -> signed int */ -#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] float_bench! { name: conv_f32_i32, sig: (a: f32) -> i32, @@ -527,7 +523,6 @@ float_bench! { ], } -#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] float_bench! { name: conv_f32_i64, sig: (a: f32) -> i64, @@ -549,7 +544,6 @@ float_bench! { ], } -#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] float_bench! { name: conv_f32_i128, sig: (a: f32) -> i128, @@ -666,9 +660,6 @@ pub fn float_conv() { conv_f64_i128(&mut criterion); #[cfg(f128_enabled)] - // FIXME: ppc64le has a sporadic overflow panic in the crate functions - // - #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] { conv_u32_f128(&mut criterion); conv_u64_f128(&mut criterion); diff --git a/builtins-test/benches/float_extend.rs b/builtins-test/benches/float_extend.rs index fc44e80c9..939dc60f9 100644 --- a/builtins-test/benches/float_extend.rs +++ b/builtins-test/benches/float_extend.rs @@ -110,9 +110,7 @@ float_bench! { pub fn float_extend() { let mut criterion = Criterion::default().configure_from_args(); - // FIXME(#655): `f16` tests disabled until we can bootstrap symbols #[cfg(f16_enabled)] - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] { extend_f16_f32(&mut criterion); extend_f16_f64(&mut criterion); diff --git a/builtins-test/benches/float_trunc.rs b/builtins-test/benches/float_trunc.rs index 43310c7cf..9373f945b 100644 --- a/builtins-test/benches/float_trunc.rs +++ b/builtins-test/benches/float_trunc.rs @@ -121,9 +121,7 @@ float_bench! { pub fn float_trunc() { let mut criterion = Criterion::default().configure_from_args(); - // FIXME(#655): `f16` tests disabled until we can bootstrap symbols #[cfg(f16_enabled)] - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] { trunc_f32_f16(&mut criterion); trunc_f64_f16(&mut criterion); @@ -133,11 +131,8 @@ pub fn float_trunc() { #[cfg(f128_enabled)] { - // FIXME(#655): `f16` tests disabled until we can bootstrap symbols #[cfg(f16_enabled)] - #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] trunc_f128_f16(&mut criterion); - trunc_f128_f32(&mut criterion); trunc_f128_f64(&mut criterion); } diff --git a/builtins-test/src/bench.rs b/builtins-test/src/bench.rs index 098718567..8a513ad67 100644 --- a/builtins-test/src/bench.rs +++ b/builtins-test/src/bench.rs @@ -23,11 +23,6 @@ pub fn skip_sys_checks(test_name: &str) -> bool { "mul_f64", ]; - // FIXME(f16_f128): error on LE ppc64. There are more tests that are cfg-ed out completely - // in their benchmark modules due to runtime panics. - // - const PPC64LE_SKIPPED: &[&str] = &["extend_f32_f128"]; - // FIXME(f16_f128): system symbols have incorrect results // const X86_NO_SSE_SKIPPED: &[&str] = &[ @@ -57,12 +52,6 @@ pub fn skip_sys_checks(test_name: &str) -> bool { return true; } - if cfg!(all(target_arch = "powerpc64", target_endian = "little")) - && PPC64LE_SKIPPED.contains(&test_name) - { - return true; - } - if cfg!(all(target_arch = "x86", not(target_feature = "sse"))) && X86_NO_SSE_SKIPPED.contains(&test_name) { diff --git a/builtins-test/tests/conv.rs b/builtins-test/tests/conv.rs index 7d729364f..9b04295d2 100644 --- a/builtins-test/tests/conv.rs +++ b/builtins-test/tests/conv.rs @@ -59,32 +59,28 @@ mod i_to_f { || ((error_minus == error || error_plus == error) && ((f0.to_bits() & 1) != 0)) { - if !cfg!(any( - target_arch = "powerpc", - target_arch = "powerpc64" - )) { - panic!( - "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})", - stringify!($fn), - x, - f1.to_bits(), - y_minus_ulp, - y, - y_plus_ulp, - error_minus, - error, - error_plus, - ); - } + panic!( + "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})", + stringify!($fn), + x, + f1.to_bits(), + y_minus_ulp, + y, + y_plus_ulp, + error_minus, + error, + error_plus, + ); } } - // Test against native conversion. We disable testing on all `x86` because of - // rounding bugs with `i686`. `powerpc` also has the same rounding bug. + // Test against native conversion. + // FIXME(x86,ppc): the platform version has rounding bugs on i686 and + // PowerPC64le (for PPC this only shows up in Docker, not the native runner). + // https://github.com/rust-lang/compiler-builtins/pull/384#issuecomment-740413334 if !Float::eq_repr(f0, f1) && !cfg!(any( target_arch = "x86", - target_arch = "powerpc", - target_arch = "powerpc64" + all(target_arch = "powerpc64", target_endian = "little") )) { panic!( "{}({}): std: {:?}, builtins: {:?}", diff --git a/crates/musl-math-sys/src/lib.rs b/crates/musl-math-sys/src/lib.rs index 6a4bf4859..9cab8deef 100644 --- a/crates/musl-math-sys/src/lib.rs +++ b/crates/musl-math-sys/src/lib.rs @@ -40,8 +40,6 @@ macro_rules! functions { ) => { // Run a simple check to ensure we can link and call the function without crashing. #[test] - // FIXME(#309): LE PPC crashes calling some musl functions - #[cfg_attr(all(target_arch = "powerpc64", target_endian = "little"), ignore)] fn $name() { $rty>::check(super::$name); } diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs index a47472401..da5413ac2 100644 --- a/libm/src/math/j1f.rs +++ b/libm/src/math/j1f.rs @@ -361,8 +361,6 @@ fn qonef(x: f32) -> f32 { return (0.375 + r / s) / x; } -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::{j1f, y1f}; @@ -371,6 +369,7 @@ mod tests { // 0x401F3E49 assert_eq!(j1f(2.4881766_f32), 0.49999475_f32); } + #[test] fn test_y1f_2002() { //allow slightly different result on x87 From 82f9186e6baa0e3b4c0e51b35e0a79099ab456d2 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 24 Jul 2025 07:28:38 -0500 Subject: [PATCH 1435/1459] Enable tests that were skipped on aarch64 The LLVM issue was resolved a while ago, these should no longer be a problem. --- builtins-test/src/bench.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/builtins-test/src/bench.rs b/builtins-test/src/bench.rs index 8a513ad67..9ba674294 100644 --- a/builtins-test/src/bench.rs +++ b/builtins-test/src/bench.rs @@ -29,11 +29,6 @@ pub fn skip_sys_checks(test_name: &str) -> bool { "add_f128", "sub_f128", "mul_f128", "div_f128", "powi_f32", "powi_f64", ]; - // FIXME(f16_f128): Wide multiply carry bug in `compiler-rt`, re-enable when nightly no longer - // uses `compiler-rt` version. - // - const AARCH64_SKIPPED: &[&str] = &["mul_f128", "div_f128"]; - // FIXME(llvm): system symbols have incorrect results on Windows // const WINDOWS_SKIPPED: &[&str] = &[ @@ -58,10 +53,6 @@ pub fn skip_sys_checks(test_name: &str) -> bool { return true; } - if cfg!(target_arch = "aarch64") && AARCH64_SKIPPED.contains(&test_name) { - return true; - } - if cfg!(target_family = "windows") && WINDOWS_SKIPPED.contains(&test_name) { return true; } From 84507ccd6fc0f95082b8aaa1e199131bba1b7af8 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 24 Jul 2025 07:31:49 -0500 Subject: [PATCH 1436/1459] Enable skipped `f32` and `f64` multiplication tests The fix has since made it to nightly, so the skips here can be removed. --- builtins-test/src/bench.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/builtins-test/src/bench.rs b/builtins-test/src/bench.rs index 9ba674294..bca9f8418 100644 --- a/builtins-test/src/bench.rs +++ b/builtins-test/src/bench.rs @@ -17,10 +17,6 @@ pub fn skip_sys_checks(test_name: &str) -> bool { "extend_f16_f32", "trunc_f32_f16", "trunc_f64_f16", - // FIXME(#616): re-enable once fix is in nightly - // - "mul_f32", - "mul_f64", ]; // FIXME(f16_f128): system symbols have incorrect results From 71f73a58b95784852370a95c4d5744e8d98f6522 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 24 Jul 2025 18:55:27 +0000 Subject: [PATCH 1437/1459] Use `x86_no_sse` configuration in more places Emit `x86_no_sse` in the compiler-builtins (and builtins-test) build script, and use it to simplify `all(target_arch = "x86", not(target_fefature = "sse))` configuration. --- builtins-test/src/bench.rs | 4 +--- builtins-test/tests/addsub.rs | 4 ++-- builtins-test/tests/div_rem.rs | 2 +- builtins-test/tests/float_pow.rs | 3 ++- builtins-test/tests/mul.rs | 4 ++-- compiler-builtins/build.rs | 7 ------- compiler-builtins/configure.rs | 7 +++++++ libm/src/math/rem_pio2.rs | 2 +- 8 files changed, 16 insertions(+), 17 deletions(-) diff --git a/builtins-test/src/bench.rs b/builtins-test/src/bench.rs index bca9f8418..4bdcf482c 100644 --- a/builtins-test/src/bench.rs +++ b/builtins-test/src/bench.rs @@ -43,9 +43,7 @@ pub fn skip_sys_checks(test_name: &str) -> bool { return true; } - if cfg!(all(target_arch = "x86", not(target_feature = "sse"))) - && X86_NO_SSE_SKIPPED.contains(&test_name) - { + if cfg!(x86_no_sse) && X86_NO_SSE_SKIPPED.contains(&test_name) { return true; } diff --git a/builtins-test/tests/addsub.rs b/builtins-test/tests/addsub.rs index 865b9e472..abe7dde64 100644 --- a/builtins-test/tests/addsub.rs +++ b/builtins-test/tests/addsub.rs @@ -111,7 +111,7 @@ macro_rules! float_sum { } } -#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +#[cfg(not(x86_no_sse))] mod float_addsub { use super::*; @@ -122,7 +122,7 @@ mod float_addsub { } #[cfg(f128_enabled)] -#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +#[cfg(not(x86_no_sse))] #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] mod float_addsub_f128 { use super::*; diff --git a/builtins-test/tests/div_rem.rs b/builtins-test/tests/div_rem.rs index e8327f9b4..caee4166c 100644 --- a/builtins-test/tests/div_rem.rs +++ b/builtins-test/tests/div_rem.rs @@ -138,7 +138,7 @@ macro_rules! float { }; } -#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +#[cfg(not(x86_no_sse))] mod float_div { use super::*; diff --git a/builtins-test/tests/float_pow.rs b/builtins-test/tests/float_pow.rs index 0e8ae88e8..a17dff27c 100644 --- a/builtins-test/tests/float_pow.rs +++ b/builtins-test/tests/float_pow.rs @@ -1,7 +1,7 @@ #![allow(unused_macros)] #![cfg_attr(f128_enabled, feature(f128))] -#![cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +#[cfg_attr(x86_no_sse, allow(unused))] use builtins_test::*; // This is approximate because of issues related to @@ -52,6 +52,7 @@ macro_rules! pow { }; } +#[cfg(not(x86_no_sse))] // FIXME(i586): failure for powidf2 pow! { f32, 1e-4, __powisf2, all(); f64, 1e-12, __powidf2, all(); diff --git a/builtins-test/tests/mul.rs b/builtins-test/tests/mul.rs index 58bc9ab4a..3072b45dc 100644 --- a/builtins-test/tests/mul.rs +++ b/builtins-test/tests/mul.rs @@ -113,7 +113,7 @@ macro_rules! float_mul { }; } -#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +#[cfg(not(x86_no_sse))] mod float_mul { use super::*; @@ -126,7 +126,7 @@ mod float_mul { } #[cfg(f128_enabled)] -#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +#[cfg(not(x86_no_sse))] #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] mod float_mul_f128 { use super::*; diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs index 8f51c12b5..43b978606 100644 --- a/compiler-builtins/build.rs +++ b/compiler-builtins/build.rs @@ -106,13 +106,6 @@ fn configure_libm(target: &Target) { println!("cargo:rustc-cfg=optimizations_enabled"); } - // Config shorthands - println!("cargo:rustc-check-cfg=cfg(x86_no_sse)"); - if target.arch == "x86" && !target.features.iter().any(|f| f == "sse") { - // Shorthand to detect i586 targets - println!("cargo:rustc-cfg=x86_no_sse"); - } - println!( "cargo:rustc-env=CFG_CARGO_FEATURES={:?}", target.cargo_features diff --git a/compiler-builtins/configure.rs b/compiler-builtins/configure.rs index 9721ddf09..caedc034d 100644 --- a/compiler-builtins/configure.rs +++ b/compiler-builtins/configure.rs @@ -100,6 +100,13 @@ pub fn configure_aliases(target: &Target) { println!("cargo:rustc-cfg=thumb_1") } + // Config shorthands + println!("cargo:rustc-check-cfg=cfg(x86_no_sse)"); + if target.arch == "x86" && !target.features.iter().any(|f| f == "sse") { + // Shorthand to detect i586 targets + println!("cargo:rustc-cfg=x86_no_sse"); + } + /* Not all backends support `f16` and `f128` to the same level on all architectures, so we * need to disable things if the compiler may crash. See configuration at: * * https://github.com/rust-lang/rust/blob/c65dccabacdfd6c8a7f7439eba13422fdd89b91e/compiler/rustc_codegen_llvm/src/llvm_util.rs#L367-L432 diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index d677fd9dc..648dca170 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -195,7 +195,7 @@ mod tests { #[test] // FIXME(correctness): inaccurate results on i586 - #[cfg_attr(all(target_arch = "x86", not(target_feature = "sse")), ignore)] + #[cfg_attr(x86_no_sse, ignore)] fn test_near_pi() { let arg = 3.141592025756836; let arg = force_eval!(arg); From c22b8485289d1a01329df520ee4d34b0cd187f95 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Jul 2025 16:51:58 -0500 Subject: [PATCH 1438/1459] libm: Update for new warn-by-default clippy lints Silence the approximate constant lint because it is noisy and not always correct. `single_component_path_imports` is also not accurate when built as part of `compiler-builtins`, so that needs to be `allow`ed as well. --- libm/src/math/mod.rs | 2 ++ libm/src/math/support/mod.rs | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index ce9b8fc58..8eecfe566 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -1,3 +1,5 @@ +#![allow(clippy::approx_constant)] // many false positives + macro_rules! force_eval { ($e:expr) => { unsafe { ::core::ptr::read_volatile(&$e) } diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index 2e7edd03c..b2d7bd8d5 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -11,7 +11,8 @@ mod int_traits; #[allow(unused_imports)] pub use big::{i256, u256}; -#[allow(unused_imports)] +// Clippy seems to have a false positive +#[allow(unused_imports, clippy::single_component_path_imports)] pub(crate) use cfg_if; pub use env::{FpResult, Round, Status}; #[allow(unused_imports)] From c4966f9b0d4323183933e98dea4d0a77f52c0e0a Mon Sep 17 00:00:00 2001 From: quaternic <57393910+quaternic@users.noreply.github.com> Date: Sun, 27 Jul 2025 08:26:58 +0300 Subject: [PATCH 1439/1459] Avoid inlining `floor` into `rem_pio2` Possible workaround for https://github.com/rust-lang/compiler-builtins/pull/976#issuecomment-3085530354 Inline assembly in the body of a function currently causes the compiler to consider that function possibly unwinding, even if said asm originated from inlining an `extern "C"` function. This patch wraps the problematic callsite with `#[inline(never)]`. --- libm/src/math/rem_pio2_large.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 6d679bbe9..792c09fb1 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -11,7 +11,7 @@ * ==================================================== */ -use super::{floor, scalbn}; +use super::scalbn; // initial value for jk const INIT_JK: [usize; 4] = [3, 4, 4, 6]; @@ -223,6 +223,14 @@ const PIO2: [f64; 8] = [ /// independent of the exponent of the input. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { + // FIXME(rust-lang/rust#144518): Inline assembly would cause `no_panic` to fail + // on the callers of this function. As a workaround, avoid inlining `floor` here + // when implemented with assembly. + #[cfg_attr(x86_no_sse, inline(never))] + extern "C" fn floor(x: f64) -> f64 { + super::floor(x) + } + let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24) From a4f24dc2537e75661ca693acd9331c8d7f5a7750 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 27 Jul 2025 23:27:40 +0200 Subject: [PATCH 1440/1459] Implement `floor` and `ceil` in assembly on `i586` Fixes: https://github.com/rust-lang/compiler-builtins/issues/837 The assembly is based on - https://github.com/NetBSD/src/blob/20433927938987dd64c8f6aa46904b7aca3fa39e/lib/libm/arch/i387/s_floor.S - https://github.com/NetBSD/src/blob/20433927938987dd64c8f6aa46904b7aca3fa39e/lib/libm/arch/i387/s_ceil.S Which both state /* * Written by J.T. Conklin . * Public domain. */ Which I believe means we're good in terms of licensing. --- libm-test/src/precision.rs | 22 ---------- libm/src/math/arch/i586.rs | 85 ++++++++++++++++++++++++-------------- 2 files changed, 55 insertions(+), 52 deletions(-) diff --git a/libm-test/src/precision.rs b/libm-test/src/precision.rs index 32825b15d..3fb8c1b37 100644 --- a/libm-test/src/precision.rs +++ b/libm-test/src/precision.rs @@ -271,18 +271,6 @@ impl MaybeOverride<(f32,)> for SpecialCase { impl MaybeOverride<(f64,)> for SpecialCase { fn check_float(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction { - if cfg!(x86_no_sse) - && ctx.base_name == BaseName::Ceil - && ctx.basis == CheckBasis::Musl - && input.0 < 0.0 - && input.0 > -1.0 - && expected == F::ZERO - && actual == F::ZERO - { - // musl returns -0.0, we return +0.0 - return XFAIL("i586 ceil signed zero"); - } - if cfg!(x86_no_sse) && (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven) && (expected - actual).abs() <= F::ONE @@ -292,16 +280,6 @@ impl MaybeOverride<(f64,)> for SpecialCase { return XFAIL("i586 rint rounding mode"); } - if cfg!(x86_no_sse) - && (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor) - && expected.eq_repr(F::NEG_ZERO) - && actual.eq_repr(F::ZERO) - { - // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0. - // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955 - return XFAIL("i586 ceil/floor signed zero"); - } - if cfg!(x86_no_sse) && (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2) { diff --git a/libm/src/math/arch/i586.rs b/libm/src/math/arch/i586.rs index f92b9a2af..b9a667620 100644 --- a/libm/src/math/arch/i586.rs +++ b/libm/src/math/arch/i586.rs @@ -1,37 +1,62 @@ //! Architecture-specific support for x86-32 without SSE2 +//! +//! We use an alternative implementation on x86, because the +//! main implementation fails with the x87 FPU used by +//! debian i386, probably due to excess precision issues. +//! +//! See https://github.com/rust-lang/compiler-builtins/pull/976 for discussion on why these +//! functions are implemented in this way. -use super::super::fabs; - -/// Use an alternative implementation on x86, because the -/// main implementation fails with the x87 FPU used by -/// debian i386, probably due to excess precision issues. -/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. -pub fn ceil(x: f64) -> f64 { - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { - let truncated = x as i64 as f64; - if truncated < x { - return truncated + 1.0; - } else { - return truncated; - } - } else { - return x; +pub fn ceil(mut x: f64) -> f64 { + unsafe { + core::arch::asm!( + "fld qword ptr [{x}]", + // Save the FPU control word, using `x` as scratch space. + "fstcw [{x}]", + // Set rounding control to 0b10 (+∞). + "mov word ptr [{x} + 2], 0x0b7f", + "fldcw [{x} + 2]", + // Round. + "frndint", + // Restore FPU control word. + "fldcw [{x}]", + // Save rounded value to memory. + "fstp qword ptr [{x}]", + x = in(reg) &mut x, + // All the x87 FPU stack is used, all registers must be clobbered + out("st(0)") _, out("st(1)") _, + out("st(2)") _, out("st(3)") _, + out("st(4)") _, out("st(5)") _, + out("st(6)") _, out("st(7)") _, + options(nostack), + ); } + x } -/// Use an alternative implementation on x86, because the -/// main implementation fails with the x87 FPU used by -/// debian i386, probably due to excess precision issues. -/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. -pub fn floor(x: f64) -> f64 { - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { - let truncated = x as i64 as f64; - if truncated > x { - return truncated - 1.0; - } else { - return truncated; - } - } else { - return x; +pub fn floor(mut x: f64) -> f64 { + unsafe { + core::arch::asm!( + "fld qword ptr [{x}]", + // Save the FPU control word, using `x` as scratch space. + "fstcw [{x}]", + // Set rounding control to 0b01 (-∞). + "mov word ptr [{x} + 2], 0x077f", + "fldcw [{x} + 2]", + // Round. + "frndint", + // Restore FPU control word. + "fldcw [{x}]", + // Save rounded value to memory. + "fstp qword ptr [{x}]", + x = in(reg) &mut x, + // All the x87 FPU stack is used, all registers must be clobbered + out("st(0)") _, out("st(1)") _, + out("st(2)") _, out("st(3)") _, + out("st(4)") _, out("st(5)") _, + out("st(6)") _, out("st(7)") _, + options(nostack), + ); } + x } From b7cdb7334d4eb4dac6adf5b97819bae8911c4ab0 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 25 Jul 2025 17:36:25 -0500 Subject: [PATCH 1441/1459] Remove `no-asm` gating when there is no alternative implementation Assembly-related configuration was added in 1621c6dbf9eb ("Use `specialized-div-rem` 1.0.0 for division algorithms") to account for Cranelift not yet supporting assembly. This hasn't been relevant for a while, so we no longer need to gate `asm!` behind this configuration. Thus, remove `cfg(not(feature = "no-asm"))` in places where there is no generic fallback. There are other cases, however, where setting the `no-asm` configuration enables testing of generic version of builtins when there are platform- specific implementations available; these cases are left unchanged. This could be improved in the future by exposing both versions for testing rather than using a configuration and running the entire testsuite twice. This is the compiler-builtins portion of https://github.com/rust-lang/rust/pull/144471. --- builtins-shim/Cargo.toml | 5 +++-- builtins-test/tests/lse.rs | 2 +- compiler-builtins/Cargo.toml | 5 +++-- compiler-builtins/src/aarch64.rs | 2 +- compiler-builtins/src/arm.rs | 2 -- compiler-builtins/src/hexagon.rs | 2 -- compiler-builtins/src/lib.rs | 2 +- compiler-builtins/src/probestack.rs | 2 -- compiler-builtins/src/x86.rs | 10 ++-------- compiler-builtins/src/x86_64.rs | 9 +-------- 10 files changed, 12 insertions(+), 29 deletions(-) diff --git a/builtins-shim/Cargo.toml b/builtins-shim/Cargo.toml index 8eb880c6f..707ebdbc7 100644 --- a/builtins-shim/Cargo.toml +++ b/builtins-shim/Cargo.toml @@ -37,8 +37,9 @@ default = ["compiler-builtins"] # implementations and also filling in unimplemented intrinsics c = ["dep:cc"] -# Workaround for the Cranelift codegen backend. Disables any implementations -# which use inline assembly and fall back to pure Rust versions (if available). +# For implementations where there is both a generic version and a platform- +# specific version, use the generic version. This is meant to enable testing +# the generic versions on all platforms. no-asm = [] # Workaround for codegen backends which haven't yet implemented `f16` and diff --git a/builtins-test/tests/lse.rs b/builtins-test/tests/lse.rs index 0d85228d7..5d59fbb7f 100644 --- a/builtins-test/tests/lse.rs +++ b/builtins-test/tests/lse.rs @@ -1,6 +1,6 @@ #![feature(decl_macro)] // so we can use pub(super) #![feature(macro_metavar_expr_concat)] -#![cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm")))] +#![cfg(all(target_arch = "aarch64", target_os = "linux"))] /// Translate a byte size to a Rust type. macro int_ty { diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml index 3ccb05f73..8bbe136ce 100644 --- a/compiler-builtins/Cargo.toml +++ b/compiler-builtins/Cargo.toml @@ -35,8 +35,9 @@ default = ["compiler-builtins"] # implementations and also filling in unimplemented intrinsics c = ["dep:cc"] -# Workaround for the Cranelift codegen backend. Disables any implementations -# which use inline assembly and fall back to pure Rust versions (if available). +# For implementations where there is both a generic version and a platform- +# specific version, use the generic version. This is meant to enable testing +# the generic versions on all platforms. no-asm = [] # Workaround for codegen backends which haven't yet implemented `f16` and diff --git a/compiler-builtins/src/aarch64.rs b/compiler-builtins/src/aarch64.rs index a72b30d29..039fab206 100644 --- a/compiler-builtins/src/aarch64.rs +++ b/compiler-builtins/src/aarch64.rs @@ -4,7 +4,7 @@ use core::intrinsics; intrinsics! { #[unsafe(naked)] - #[cfg(all(target_os = "uefi", not(feature = "no-asm")))] + #[cfg(target_os = "uefi")] pub unsafe extern "custom" fn __chkstk() { core::arch::naked_asm!( ".p2align 2", diff --git a/compiler-builtins/src/arm.rs b/compiler-builtins/src/arm.rs index fbec93ca4..0c15b37df 100644 --- a/compiler-builtins/src/arm.rs +++ b/compiler-builtins/src/arm.rs @@ -1,5 +1,3 @@ -#![cfg(not(feature = "no-asm"))] - // Interfaces used by naked trampolines. // SAFETY: these are defined in compiler-builtins unsafe extern "C" { diff --git a/compiler-builtins/src/hexagon.rs b/compiler-builtins/src/hexagon.rs index 91cf91c31..a5c7b4dfd 100644 --- a/compiler-builtins/src/hexagon.rs +++ b/compiler-builtins/src/hexagon.rs @@ -1,5 +1,3 @@ -#![cfg(not(feature = "no-asm"))] - use core::arch::global_asm; global_asm!(include_str!("hexagon/func_macro.s"), options(raw)); diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs index fe0ad81dd..ca75f44e0 100644 --- a/compiler-builtins/src/lib.rs +++ b/compiler-builtins/src/lib.rs @@ -60,7 +60,7 @@ pub mod arm; #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] pub mod aarch64; -#[cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm"),))] +#[cfg(all(target_arch = "aarch64", target_os = "linux"))] pub mod aarch64_linux; #[cfg(all( diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs index f4105dde5..9a18216da 100644 --- a/compiler-builtins/src/probestack.rs +++ b/compiler-builtins/src/probestack.rs @@ -44,8 +44,6 @@ #![cfg(not(feature = "mangled-names"))] // Windows and Cygwin already has builtins to do this. #![cfg(not(any(windows, target_os = "cygwin")))] -// All these builtins require assembly -#![cfg(not(feature = "no-asm"))] // We only define stack probing for these architectures today. #![cfg(any(target_arch = "x86_64", target_arch = "x86"))] diff --git a/compiler-builtins/src/x86.rs b/compiler-builtins/src/x86.rs index 16e50922a..51940b3b3 100644 --- a/compiler-builtins/src/x86.rs +++ b/compiler-builtins/src/x86.rs @@ -9,10 +9,7 @@ use core::intrinsics; intrinsics! { #[unsafe(naked)] - #[cfg(all( - any(all(windows, target_env = "gnu"), target_os = "uefi"), - not(feature = "no-asm") - ))] + #[cfg(any(all(windows, target_env = "gnu"), target_os = "uefi"))] pub unsafe extern "custom" fn __chkstk() { core::arch::naked_asm!( "jmp {}", // Jump to __alloca since fallthrough may be unreliable" @@ -21,10 +18,7 @@ intrinsics! { } #[unsafe(naked)] - #[cfg(all( - any(all(windows, target_env = "gnu"), target_os = "uefi"), - not(feature = "no-asm") - ))] + #[cfg(any(all(windows, target_env = "gnu"), target_os = "uefi"))] pub unsafe extern "custom" fn _alloca() { // __chkstk and _alloca are the same function core::arch::naked_asm!( diff --git a/compiler-builtins/src/x86_64.rs b/compiler-builtins/src/x86_64.rs index 9b7133b48..f9ae784d5 100644 --- a/compiler-builtins/src/x86_64.rs +++ b/compiler-builtins/src/x86_64.rs @@ -9,14 +9,7 @@ use core::intrinsics; intrinsics! { #[unsafe(naked)] - #[cfg(all( - any( - all(windows, target_env = "gnu"), - target_os = "cygwin", - target_os = "uefi" - ), - not(feature = "no-asm") - ))] + #[cfg(any(all(windows, target_env = "gnu"), target_os = "cygwin", target_os = "uefi"))] pub unsafe extern "custom" fn ___chkstk_ms() { core::arch::naked_asm!( "push %rcx", From b56560b9696cc394e6030fa2e35ebc552c7e0962 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Tue, 29 Jul 2025 10:20:22 +0200 Subject: [PATCH 1442/1459] Switch to using a GH app for authenticating sync PRs So there will no longer be the need to close and reopen sync PRs in order for CI to run. --- .github/workflows/rustc-pull.yml | 5 +++-- triagebot.toml | 3 --- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/rustc-pull.yml b/.github/workflows/rustc-pull.yml index ba698492e..ad7693e17 100644 --- a/.github/workflows/rustc-pull.yml +++ b/.github/workflows/rustc-pull.yml @@ -12,12 +12,13 @@ jobs: if: github.repository == 'rust-lang/compiler-builtins' uses: rust-lang/josh-sync/.github/workflows/rustc-pull.yml@main with: + github-app-id: ${{ vars.APP_CLIENT_ID }} # https://rust-lang.zulipchat.com/#narrow/channel/219381-t-libs/topic/compiler-builtins.20subtree.20sync.20automation/with/528482375 zulip-stream-id: 219381 zulip-topic: 'compiler-builtins subtree sync automation' - zulip-bot-email: "compiler-builtins-ci-bot@rust-lang.zulipchat.com" + zulip-bot-email: "compiler-builtins-ci-bot@rust-lang.zulipchat.com" pr-base-branch: master branch-name: rustc-pull secrets: zulip-api-token: ${{ secrets.ZULIP_API_TOKEN }} - token: ${{ secrets.GITHUB_TOKEN }} + github-app-secret: ${{ secrets.APP_PRIVATE_KEY }} diff --git a/triagebot.toml b/triagebot.toml index 8a2356c2b..eba5cdd88 100644 --- a/triagebot.toml +++ b/triagebot.toml @@ -19,6 +19,3 @@ check-commits = false # Enable issue transfers within the org # Documentation at: https://forge.rust-lang.org/triagebot/transfer.html [transfer] - -# Automatically close and reopen PRs made by bots to run CI on them -[bot-pull-requests] From 2086325b3171988b5ca8c0b8298bb3abb10a1bb9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 29 Jul 2025 18:56:46 +0000 Subject: [PATCH 1443/1459] cleanup: Trim trailing whitespace --- .github/workflows/main.yaml | 4 ++-- ci/run.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 6c98a60d2..0c4b49cd9 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -166,7 +166,7 @@ jobs: shell: bash - run: echo "RUST_COMPILER_RT_ROOT=$(realpath ./compiler-rt)" >> "$GITHUB_ENV" shell: bash - + - name: Download musl source run: ./ci/update-musl.sh shell: bash @@ -278,7 +278,7 @@ jobs: with: name: ${{ env.BASELINE_NAME }} path: ${{ env.BASELINE_NAME }}.tar.xz - + - name: Run wall time benchmarks run: | # Always use the same seed for benchmarks. Ideally we should switch to a diff --git a/ci/run.sh b/ci/run.sh index 8b7965bb2..4b43536d3 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -161,7 +161,7 @@ else mflags+=(--workspace --target "$target") cmd=(cargo test "${mflags[@]}") profile_flag="--profile" - + # If nextest is available, use that command -v cargo-nextest && nextest=1 || nextest=0 if [ "$nextest" = "1" ]; then @@ -204,7 +204,7 @@ else "${cmd[@]}" "$profile_flag" release-checked --features unstable-intrinsics --benches # Ensure that the routines do not panic. - # + # # `--tests` must be passed because no-panic is only enabled as a dev # dependency. The `release-opt` profile must be used to enable LTO and a # single CGU. From 16d9435403c92755ed6c4bbb38db0ab25cbdef51 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 29 Jul 2025 19:04:32 +0000 Subject: [PATCH 1444/1459] ci: Simplify tests for verbatim paths Rather than setting an environment variable in the workflow job based on whether or not the environment is non-MinGW Windows, we can just check this in the ci script. This was originally added in b0f19660f0 ("Add tests for UNC paths on windows builds") and its followup commits. --- .github/workflows/main.yaml | 4 ---- ci/run.sh | 5 ++++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 0c4b49cd9..94b519e3c 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -50,7 +50,6 @@ jobs: os: ubuntu-24.04-arm - target: aarch64-pc-windows-msvc os: windows-2025 - test_verbatim: 1 build_only: 1 - target: arm-unknown-linux-gnueabi os: ubuntu-24.04 @@ -92,10 +91,8 @@ jobs: os: macos-13 - target: i686-pc-windows-msvc os: windows-2025 - test_verbatim: 1 - target: x86_64-pc-windows-msvc os: windows-2025 - test_verbatim: 1 - target: i686-pc-windows-gnu os: windows-2025 channel: nightly-i686-gnu @@ -106,7 +103,6 @@ jobs: needs: [calculate_vars] env: BUILD_ONLY: ${{ matrix.build_only }} - TEST_VERBATIM: ${{ matrix.test_verbatim }} MAY_SKIP_LIBM_CI: ${{ needs.calculate_vars.outputs.may_skip_libm_ci }} steps: - name: Print $HOME diff --git a/ci/run.sh b/ci/run.sh index 4b43536d3..bc94d42fe 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -41,7 +41,10 @@ else "${test_builtins[@]}" --benches "${test_builtins[@]}" --benches --release - if [ "${TEST_VERBATIM:-}" = "1" ]; then + # Validate that having a verbatim path for the target directory works + # (trivial to regress using `/` in paths to build artifacts rather than + # `Path::join`). MinGW does not currently support these paths. + if [[ "$target" = *"windows"* ]] && [[ "$target" != *"gnu"* ]]; then verb_path=$(cmd.exe //C echo \\\\?\\%cd%\\builtins-test\\target2) "${test_builtins[@]}" --target-dir "$verb_path" --features c fi From aa25c33bf42c483998c9307ba2b7416be168fccf Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 30 Jul 2025 08:26:59 +0000 Subject: [PATCH 1445/1459] ci: Switch to strongly typed directives Replace the current system with something that is more structured and will also catch unknown directives. --- ci/ci-util.py | 79 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 25 deletions(-) diff --git a/ci/ci-util.py b/ci/ci-util.py index 3437d304f..1a9c83d23 100755 --- a/ci/ci-util.py +++ b/ci/ci-util.py @@ -7,6 +7,7 @@ import json import os +import pprint import re import subprocess as sp import sys @@ -50,15 +51,6 @@ DEFAULT_BRANCH = "master" WORKFLOW_NAME = "CI" # Workflow that generates the benchmark artifacts ARTIFACT_PREFIX = "baseline-icount*" -# Place this in a PR body to skip regression checks (must be at the start of a line). -REGRESSION_DIRECTIVE = "ci: allow-regressions" -# Place this in a PR body to skip extensive tests -SKIP_EXTENSIVE_DIRECTIVE = "ci: skip-extensive" -# Place this in a PR body to allow running a large number of extensive tests. If not -# set, this script will error out if a threshold is exceeded in order to avoid -# accidentally spending huge amounts of CI time. -ALLOW_MANY_EXTENSIVE_DIRECTIVE = "ci: allow-many-extensive" -MANY_EXTENSIVE_THRESHOLD = 20 # Don't run exhaustive tests if these files change, even if they contaiin a function # definition. @@ -80,6 +72,48 @@ def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) +@dataclass(init=False) +class PrCfg: + """Directives that we allow in the commit body to control test behavior. + + These are of the form `ci: foo`, at the start of a line. + """ + + # Skip regression checks (must be at the start of a line). + allow_regressions: bool = False + # Don't run extensive tests + skip_extensive: bool = False + + # Allow running a large number of extensive tests. If not set, this script + # will error out if a threshold is exceeded in order to avoid accidentally + # spending huge amounts of CI time. + allow_many_extensive: bool = False + + # Max number of extensive tests to run by default + MANY_EXTENSIVE_THRESHOLD: int = 20 + + # String values of directive names + DIR_ALLOW_REGRESSIONS: str = "allow-regressions" + DIR_SKIP_EXTENSIVE: str = "skip-extensive" + DIR_ALLOW_MANY_EXTENSIVE: str = "allow-many-extensive" + + def __init__(self, body: str): + directives = re.finditer(r"^\s*ci:\s*(?P\S*)", body, re.MULTILINE) + for dir in directives: + name = dir.group("dir_name") + if name == self.DIR_ALLOW_REGRESSIONS: + self.allow_regressions = True + elif name == self.DIR_SKIP_EXTENSIVE: + self.skip_extensive = True + elif name == self.DIR_ALLOW_MANY_EXTENSIVE: + self.allow_many_extensive = True + else: + eprint(f"Found unexpected directive `{name}`") + exit(1) + + pprint.pp(self) + + @dataclass class PrInfo: """GitHub response for PR query""" @@ -88,6 +122,7 @@ class PrInfo: commits: list[str] created_at: str number: int + cfg: PrCfg @classmethod def load(cls, pr_number: int | str) -> Self: @@ -104,13 +139,9 @@ def load(cls, pr_number: int | str) -> Self: ], text=True, ) - eprint("PR info:", json.dumps(pr_info, indent=4)) - return cls(**json.loads(pr_info)) - - def contains_directive(self, directive: str) -> bool: - """Return true if the provided directive is on a line in the PR body""" - lines = self.body.splitlines() - return any(line.startswith(directive) for line in lines) + pr_json = json.loads(pr_info) + eprint("PR info:", json.dumps(pr_json, indent=4)) + return cls(**json.loads(pr_info), cfg=PrCfg(pr_json["body"])) class FunctionDef(TypedDict): @@ -223,10 +254,8 @@ def emit_workflow_output(self): if pr_number is not None and len(pr_number) > 0: pr = PrInfo.load(pr_number) - skip_tests = pr.contains_directive(SKIP_EXTENSIVE_DIRECTIVE) - error_on_many_tests = not pr.contains_directive( - ALLOW_MANY_EXTENSIVE_DIRECTIVE - ) + skip_tests = pr.cfg.skip_extensive + error_on_many_tests = not pr.cfg.allow_many_extensive if skip_tests: eprint("Skipping all extensive tests") @@ -257,12 +286,12 @@ def emit_workflow_output(self): eprint(f"may_skip_libm_ci={may_skip}") eprint(f"total extensive tests: {total_to_test}") - if error_on_many_tests and total_to_test > MANY_EXTENSIVE_THRESHOLD: + if error_on_many_tests and total_to_test > PrCfg.MANY_EXTENSIVE_THRESHOLD: eprint( - f"More than {MANY_EXTENSIVE_THRESHOLD} tests would be run; add" - f" `{ALLOW_MANY_EXTENSIVE_DIRECTIVE}` to the PR body if this is" + f"More than {PrCfg.MANY_EXTENSIVE_THRESHOLD} tests would be run; add" + f" `{PrCfg.DIR_ALLOW_MANY_EXTENSIVE}` to the PR body if this is" " intentional. If this is refactoring that happens to touch a lot of" - f" files, `{SKIP_EXTENSIVE_DIRECTIVE}` can be used instead." + f" files, `{PrCfg.DIR_SKIP_EXTENSIVE}` can be used instead." ) exit(1) @@ -372,7 +401,7 @@ def handle_bench_regressions(args: list[str]): exit(1) pr = PrInfo.load(pr_number) - if pr.contains_directive(REGRESSION_DIRECTIVE): + if pr.cfg.allow_regressions: eprint("PR allows regressions") return From ff2cc0e38e3ecc59e617ec75856b3f702bb46dea Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 30 Jul 2025 08:30:47 +0000 Subject: [PATCH 1446/1459] ci: Don't print output twice in `ci-util` Use `tee` rather than printing to both stdout and stderr. --- .github/workflows/main.yaml | 2 +- ci/ci-util.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 94b519e3c..939bc34c2 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -34,7 +34,7 @@ jobs: - name: Fetch pull request ref run: git fetch origin "$GITHUB_REF:$GITHUB_REF" if: github.event_name == 'pull_request' - - run: python3 ci/ci-util.py generate-matrix >> "$GITHUB_OUTPUT" + - run: set -e; python3 ci/ci-util.py generate-matrix | tee "$GITHUB_OUTPUT" id: script test: diff --git a/ci/ci-util.py b/ci/ci-util.py index 1a9c83d23..8f74ecfdb 100755 --- a/ci/ci-util.py +++ b/ci/ci-util.py @@ -282,8 +282,6 @@ def emit_workflow_output(self): may_skip = str(self.may_skip_libm_ci()).lower() print(f"extensive_matrix={ext_matrix}") print(f"may_skip_libm_ci={may_skip}") - eprint(f"extensive_matrix={ext_matrix}") - eprint(f"may_skip_libm_ci={may_skip}") eprint(f"total extensive tests: {total_to_test}") if error_on_many_tests and total_to_test > PrCfg.MANY_EXTENSIVE_THRESHOLD: From 568afb8cf55a6a8e5645f9d21aac6139e683ec42 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 30 Jul 2025 08:32:28 +0000 Subject: [PATCH 1447/1459] ci: Commonize the way `PrInfo` is loaded from env --- ci/ci-util.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/ci/ci-util.py b/ci/ci-util.py index 8f74ecfdb..f43409c5e 100755 --- a/ci/ci-util.py +++ b/ci/ci-util.py @@ -12,6 +12,7 @@ import subprocess as sp import sys from dataclasses import dataclass +from functools import cache from glob import glob from inspect import cleandoc from os import getenv @@ -62,7 +63,7 @@ # libm PR CI takes a long time and doesn't need to run unless relevant files have been # changed. Anything matching this regex pattern will trigger a run. -TRIGGER_LIBM_PR_CI = ".*(libm|musl).*" +TRIGGER_LIBM_CI_FILE_PAT = ".*(libm|musl).*" TYPES = ["f16", "f32", "f64", "f128"] @@ -125,8 +126,18 @@ class PrInfo: cfg: PrCfg @classmethod - def load(cls, pr_number: int | str) -> Self: - """For a given PR number, query the body and commit list""" + def from_env(cls) -> Self | None: + """Create a PR object from the PR_NUMBER environment if set, `None` otherwise.""" + pr_env = os.environ.get("PR_NUMBER") + if pr_env is not None and len(pr_env) > 0: + return cls.from_pr(pr_env) + + return None + + @classmethod + @cache # Cache so we don't print info messages multiple times + def from_pr(cls, pr_number: int | str) -> Self: + """For a given PR number, query the body and commit list.""" pr_info = sp.check_output( [ "gh", @@ -238,22 +249,23 @@ def may_skip_libm_ci(self) -> bool: """If this is a PR and no libm files were changed, allow skipping libm jobs.""" - if self.is_pr(): - return all(not re.match(TRIGGER_LIBM_PR_CI, str(f)) for f in self.changed) + # Always run on merge CI + if not self.is_pr(): + return False - return False + # By default, run if there are any changed files matching the pattern + return all(not re.match(TRIGGER_LIBM_CI_FILE_PAT, str(f)) for f in self.changed) def emit_workflow_output(self): """Create a JSON object a list items for each type's changed files, if any did change, and the routines that were affected by the change. """ - pr_number = os.environ.get("PR_NUMBER") skip_tests = False error_on_many_tests = False - if pr_number is not None and len(pr_number) > 0: - pr = PrInfo.load(pr_number) + pr = PrInfo.from_env() + if pr is not None: skip_tests = pr.cfg.skip_extensive error_on_many_tests = not pr.cfg.allow_many_extensive @@ -398,7 +410,7 @@ def handle_bench_regressions(args: list[str]): eprint(USAGE) exit(1) - pr = PrInfo.load(pr_number) + pr = PrInfo.from_pr(pr_number) if pr.cfg.allow_regressions: eprint("PR allows regressions") return From 1d58d4c778b9e8632bb1649d84becaa5a7a53e03 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 30 Jul 2025 08:33:37 +0000 Subject: [PATCH 1448/1459] ci: Add a way to run `libm` tests that would otherwise be skipped Introduce a new directive `ci: test-libm` to ensure tests run. --- ci/ci-util.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ci/ci-util.py b/ci/ci-util.py index f43409c5e..c1db17c6c 100755 --- a/ci/ci-util.py +++ b/ci/ci-util.py @@ -93,10 +93,14 @@ class PrCfg: # Max number of extensive tests to run by default MANY_EXTENSIVE_THRESHOLD: int = 20 + # Run tests for `libm` that may otherwise be skipped due to no changed files. + always_test_libm: bool = False + # String values of directive names DIR_ALLOW_REGRESSIONS: str = "allow-regressions" DIR_SKIP_EXTENSIVE: str = "skip-extensive" DIR_ALLOW_MANY_EXTENSIVE: str = "allow-many-extensive" + DIR_TEST_LIBM: str = "test-libm" def __init__(self, body: str): directives = re.finditer(r"^\s*ci:\s*(?P\S*)", body, re.MULTILINE) @@ -108,6 +112,8 @@ def __init__(self, body: str): self.skip_extensive = True elif name == self.DIR_ALLOW_MANY_EXTENSIVE: self.allow_many_extensive = True + elif name == self.DIR_TEST_LIBM: + self.always_test_libm = True else: eprint(f"Found unexpected directive `{name}`") exit(1) @@ -253,6 +259,13 @@ def may_skip_libm_ci(self) -> bool: if not self.is_pr(): return False + pr = PrInfo.from_env() + assert pr is not None, "Is a PR but couldn't load PrInfo" + + # Allow opting in to libm tests + if pr.cfg.always_test_libm: + return False + # By default, run if there are any changed files matching the pattern return all(not re.match(TRIGGER_LIBM_CI_FILE_PAT, str(f)) for f in self.changed) From 767e6ebff0f39e89adbffb96350d03eb6b3225d4 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 30 Jul 2025 09:56:11 -0500 Subject: [PATCH 1449/1459] ci: Set pipefail before running ci-util Currently, a failure in `ci-util.py` does not cause the job to fail because the pipe eats the failure status . Set pipefail to fix this. Fixes: ff2cc0e38e3e ("ci: Don't print output twice in `ci-util`") --- .github/workflows/main.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 939bc34c2..c54df2e90 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -34,7 +34,9 @@ jobs: - name: Fetch pull request ref run: git fetch origin "$GITHUB_REF:$GITHUB_REF" if: github.event_name == 'pull_request' - - run: set -e; python3 ci/ci-util.py generate-matrix | tee "$GITHUB_OUTPUT" + - run: | + set -eo pipefail # Needed to actually fail the job if ci-util fails + python3 ci/ci-util.py generate-matrix | tee "$GITHUB_OUTPUT" id: script test: From 13c5374b7cd374583f4d2df7cd014a2ede9e0570 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 30 Jul 2025 09:45:53 -0500 Subject: [PATCH 1450/1459] Simplify the configuration for no-panic Currently, attributes for `no-panic` are gated behind both the `test` config and `assert_no_panic`, because `no-panic` is a dev dependency (so only available with test configuration). However, we only emit `assert_no_panic` when the test config is also set anyway, so there isn't any need to gate on both. Replace gates on `all(test, assert_no_panic)` with only `assert_no_panic`. This is simpler, and also has the benefit that attempting to check for panics without `--test` errors. --- libm/src/math/acos.rs | 2 +- libm/src/math/acosf.rs | 2 +- libm/src/math/acosh.rs | 2 +- libm/src/math/acoshf.rs | 2 +- libm/src/math/asin.rs | 2 +- libm/src/math/asinf.rs | 2 +- libm/src/math/asinh.rs | 2 +- libm/src/math/asinhf.rs | 2 +- libm/src/math/atan.rs | 2 +- libm/src/math/atan2.rs | 2 +- libm/src/math/atan2f.rs | 2 +- libm/src/math/atanf.rs | 2 +- libm/src/math/atanh.rs | 2 +- libm/src/math/atanhf.rs | 2 +- libm/src/math/cbrt.rs | 2 +- libm/src/math/cbrtf.rs | 2 +- libm/src/math/ceil.rs | 8 ++++---- libm/src/math/copysign.rs | 8 ++++---- libm/src/math/cos.rs | 2 +- libm/src/math/cosf.rs | 2 +- libm/src/math/cosh.rs | 2 +- libm/src/math/coshf.rs | 2 +- libm/src/math/erf.rs | 2 +- libm/src/math/erff.rs | 2 +- libm/src/math/exp.rs | 2 +- libm/src/math/exp10.rs | 2 +- libm/src/math/exp10f.rs | 2 +- libm/src/math/exp2.rs | 2 +- libm/src/math/exp2f.rs | 2 +- libm/src/math/expf.rs | 2 +- libm/src/math/expm1.rs | 2 +- libm/src/math/expm1f.rs | 2 +- libm/src/math/expo2.rs | 2 +- libm/src/math/fabs.rs | 8 ++++---- libm/src/math/fdim.rs | 8 ++++---- libm/src/math/floor.rs | 8 ++++---- libm/src/math/fma.rs | 8 ++++---- libm/src/math/fmin_fmax.rs | 16 ++++++++-------- libm/src/math/fminimum_fmaximum.rs | 16 ++++++++-------- libm/src/math/fminimum_fmaximum_num.rs | 16 ++++++++-------- libm/src/math/fmod.rs | 8 ++++---- libm/src/math/frexp.rs | 2 +- libm/src/math/frexpf.rs | 2 +- libm/src/math/hypot.rs | 2 +- libm/src/math/hypotf.rs | 2 +- libm/src/math/ilogb.rs | 2 +- libm/src/math/ilogbf.rs | 2 +- libm/src/math/j0.rs | 4 ++-- libm/src/math/j0f.rs | 4 ++-- libm/src/math/j1.rs | 4 ++-- libm/src/math/j1f.rs | 4 ++-- libm/src/math/jn.rs | 4 ++-- libm/src/math/jnf.rs | 4 ++-- libm/src/math/k_cos.rs | 2 +- libm/src/math/k_cosf.rs | 2 +- libm/src/math/k_expo2.rs | 2 +- libm/src/math/k_expo2f.rs | 2 +- libm/src/math/k_sin.rs | 2 +- libm/src/math/k_sinf.rs | 2 +- libm/src/math/k_tan.rs | 2 +- libm/src/math/k_tanf.rs | 2 +- libm/src/math/ldexp.rs | 8 ++++---- libm/src/math/lgamma.rs | 2 +- libm/src/math/lgamma_r.rs | 2 +- libm/src/math/lgammaf.rs | 2 +- libm/src/math/lgammaf_r.rs | 2 +- libm/src/math/log.rs | 2 +- libm/src/math/log10.rs | 2 +- libm/src/math/log10f.rs | 2 +- libm/src/math/log1p.rs | 2 +- libm/src/math/log1pf.rs | 2 +- libm/src/math/log2.rs | 2 +- libm/src/math/log2f.rs | 2 +- libm/src/math/logf.rs | 2 +- libm/src/math/modf.rs | 2 +- libm/src/math/modff.rs | 2 +- libm/src/math/nextafter.rs | 2 +- libm/src/math/nextafterf.rs | 2 +- libm/src/math/pow.rs | 2 +- libm/src/math/powf.rs | 2 +- libm/src/math/rem_pio2.rs | 2 +- libm/src/math/rem_pio2_large.rs | 2 +- libm/src/math/rem_pio2f.rs | 2 +- libm/src/math/remainder.rs | 2 +- libm/src/math/remainderf.rs | 2 +- libm/src/math/remquo.rs | 2 +- libm/src/math/remquof.rs | 2 +- libm/src/math/rint.rs | 8 ++++---- libm/src/math/round.rs | 8 ++++---- libm/src/math/roundeven.rs | 8 ++++---- libm/src/math/scalbn.rs | 8 ++++---- libm/src/math/sin.rs | 2 +- libm/src/math/sincos.rs | 2 +- libm/src/math/sincosf.rs | 2 +- libm/src/math/sinf.rs | 2 +- libm/src/math/sinh.rs | 2 +- libm/src/math/sinhf.rs | 2 +- libm/src/math/sqrt.rs | 8 ++++---- libm/src/math/tan.rs | 2 +- libm/src/math/tanf.rs | 2 +- libm/src/math/tanh.rs | 2 +- libm/src/math/tanhf.rs | 2 +- libm/src/math/tgamma.rs | 2 +- libm/src/math/tgammaf.rs | 2 +- libm/src/math/trunc.rs | 8 ++++---- 105 files changed, 174 insertions(+), 174 deletions(-) diff --git a/libm/src/math/acos.rs b/libm/src/math/acos.rs index 23b13251e..89b2e7c5f 100644 --- a/libm/src/math/acos.rs +++ b/libm/src/math/acos.rs @@ -59,7 +59,7 @@ fn r(z: f64) -> f64 { /// Computes the inverse cosine (arc cosine) of the input value. /// Arguments must be in the range -1 to 1. /// Returns values in radians, in the range of 0 to pi. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn acos(x: f64) -> f64 { let x1p_120f = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ -120 let z: f64; diff --git a/libm/src/math/acosf.rs b/libm/src/math/acosf.rs index dd88eea5b..d263b3f2c 100644 --- a/libm/src/math/acosf.rs +++ b/libm/src/math/acosf.rs @@ -33,7 +33,7 @@ fn r(z: f32) -> f32 { /// Computes the inverse cosine (arc cosine) of the input value. /// Arguments must be in the range -1 to 1. /// Returns values in radians, in the range of 0 to pi. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn acosf(x: f32) -> f32 { let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120) diff --git a/libm/src/math/acosh.rs b/libm/src/math/acosh.rs index d1f5b9fa9..8737bad01 100644 --- a/libm/src/math/acosh.rs +++ b/libm/src/math/acosh.rs @@ -7,7 +7,7 @@ const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa3 /// Calculates the inverse hyperbolic cosine of `x`. /// Is defined as `log(x + sqrt(x*x-1))`. /// `x` must be a number greater than or equal to 1. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn acosh(x: f64) -> f64 { let u = x.to_bits(); let e = ((u >> 52) as usize) & 0x7ff; diff --git a/libm/src/math/acoshf.rs b/libm/src/math/acoshf.rs index ad3455fdd..432fa03f1 100644 --- a/libm/src/math/acoshf.rs +++ b/libm/src/math/acoshf.rs @@ -7,7 +7,7 @@ const LN2: f32 = 0.693147180559945309417232121458176568; /// Calculates the inverse hyperbolic cosine of `x`. /// Is defined as `log(x + sqrt(x*x-1))`. /// `x` must be a number greater than or equal to 1. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn acoshf(x: f32) -> f32 { let u = x.to_bits(); let a = u & 0x7fffffff; diff --git a/libm/src/math/asin.rs b/libm/src/math/asin.rs index 12d0cd35f..9554a3eac 100644 --- a/libm/src/math/asin.rs +++ b/libm/src/math/asin.rs @@ -66,7 +66,7 @@ fn comp_r(z: f64) -> f64 { /// Computes the inverse sine (arc sine) of the argument `x`. /// Arguments to asin must be in the range -1 to 1. /// Returns values in radians, in the range of -pi/2 to pi/2. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn asin(mut x: f64) -> f64 { let z: f64; let r: f64; diff --git a/libm/src/math/asinf.rs b/libm/src/math/asinf.rs index ed6855567..2dfe2a6d4 100644 --- a/libm/src/math/asinf.rs +++ b/libm/src/math/asinf.rs @@ -35,7 +35,7 @@ fn r(z: f32) -> f32 { /// Computes the inverse sine (arc sine) of the argument `x`. /// Arguments to asin must be in the range -1 to 1. /// Returns values in radians, in the range of -pi/2 to pi/2. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn asinf(mut x: f32) -> f32 { let x1p_120 = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ (-120) diff --git a/libm/src/math/asinh.rs b/libm/src/math/asinh.rs index 75d3c3ad4..d63bc0aa9 100644 --- a/libm/src/math/asinh.rs +++ b/libm/src/math/asinh.rs @@ -7,7 +7,7 @@ const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa3 /// /// Calculates the inverse hyperbolic sine of `x`. /// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn asinh(mut x: f64) -> f64 { let mut u = x.to_bits(); let e = ((u >> 52) as usize) & 0x7ff; diff --git a/libm/src/math/asinhf.rs b/libm/src/math/asinhf.rs index 27ed9dd37..3ca2d4489 100644 --- a/libm/src/math/asinhf.rs +++ b/libm/src/math/asinhf.rs @@ -7,7 +7,7 @@ const LN2: f32 = 0.693147180559945309417232121458176568; /// /// Calculates the inverse hyperbolic sine of `x`. /// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn asinhf(mut x: f32) -> f32 { let u = x.to_bits(); let i = u & 0x7fffffff; diff --git a/libm/src/math/atan.rs b/libm/src/math/atan.rs index 4ca5cc91a..0590ba87c 100644 --- a/libm/src/math/atan.rs +++ b/libm/src/math/atan.rs @@ -65,7 +65,7 @@ const AT: [f64; 11] = [ /// /// Computes the inverse tangent (arc tangent) of the input value. /// Returns a value in radians, in the range of -pi/2 to pi/2. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn atan(x: f64) -> f64 { let mut x = x; let mut ix = (x.to_bits() >> 32) as u32; diff --git a/libm/src/math/atan2.rs b/libm/src/math/atan2.rs index c668731cf..51456e409 100644 --- a/libm/src/math/atan2.rs +++ b/libm/src/math/atan2.rs @@ -47,7 +47,7 @@ const PI_LO: f64 = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */ /// Computes the inverse tangent (arc tangent) of `y/x`. /// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0). /// Returns a value in radians, in the range of -pi to pi. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn atan2(y: f64, x: f64) -> f64 { if x.is_nan() || y.is_nan() { return x + y; diff --git a/libm/src/math/atan2f.rs b/libm/src/math/atan2f.rs index 95b466fff..0f46c9f39 100644 --- a/libm/src/math/atan2f.rs +++ b/libm/src/math/atan2f.rs @@ -23,7 +23,7 @@ const PI_LO: f32 = -8.7422776573e-08; /* 0xb3bbbd2e */ /// Computes the inverse tangent (arc tangent) of `y/x`. /// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0). /// Returns a value in radians, in the range of -pi to pi. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn atan2f(y: f32, x: f32) -> f32 { if x.is_nan() || y.is_nan() { return x + y; diff --git a/libm/src/math/atanf.rs b/libm/src/math/atanf.rs index da8daa41a..58568d9a8 100644 --- a/libm/src/math/atanf.rs +++ b/libm/src/math/atanf.rs @@ -41,7 +41,7 @@ const A_T: [f32; 5] = [ /// /// Computes the inverse tangent (arc tangent) of the input value. /// Returns a value in radians, in the range of -pi/2 to pi/2. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn atanf(mut x: f32) -> f32 { let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120) diff --git a/libm/src/math/atanh.rs b/libm/src/math/atanh.rs index 9dc826f56..883ff150f 100644 --- a/libm/src/math/atanh.rs +++ b/libm/src/math/atanh.rs @@ -5,7 +5,7 @@ use super::log1p; /// /// Calculates the inverse hyperbolic tangent of `x`. /// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn atanh(x: f64) -> f64 { let u = x.to_bits(); let e = ((u >> 52) as usize) & 0x7ff; diff --git a/libm/src/math/atanhf.rs b/libm/src/math/atanhf.rs index 80ccec1f6..e4e356d18 100644 --- a/libm/src/math/atanhf.rs +++ b/libm/src/math/atanhf.rs @@ -5,7 +5,7 @@ use super::log1pf; /// /// Calculates the inverse hyperbolic tangent of `x`. /// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn atanhf(mut x: f32) -> f32 { let mut u = x.to_bits(); let sign = (u >> 31) != 0; diff --git a/libm/src/math/cbrt.rs b/libm/src/math/cbrt.rs index cf56f7a97..e905e15f1 100644 --- a/libm/src/math/cbrt.rs +++ b/libm/src/math/cbrt.rs @@ -8,7 +8,7 @@ use super::Float; use super::support::{FpResult, Round, cold_path}; /// Compute the cube root of the argument. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn cbrt(x: f64) -> f64 { cbrt_round(x, Round::Nearest).val } diff --git a/libm/src/math/cbrtf.rs b/libm/src/math/cbrtf.rs index 9d70305c6..9d6958483 100644 --- a/libm/src/math/cbrtf.rs +++ b/libm/src/math/cbrtf.rs @@ -25,7 +25,7 @@ const B2: u32 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */ /// Cube root (f32) /// /// Computes the cube root of the argument. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn cbrtf(x: f32) -> f32 { let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index 4e1035457..2cac49f29 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -2,7 +2,7 @@ /// /// Finds the nearest integer greater than or equal to `x`. #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn ceilf16(x: f16) -> f16 { super::generic::ceil(x) } @@ -10,7 +10,7 @@ pub fn ceilf16(x: f16) -> f16 { /// Ceil (f32) /// /// Finds the nearest integer greater than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn ceilf(x: f32) -> f32 { select_implementation! { name: ceilf, @@ -24,7 +24,7 @@ pub fn ceilf(x: f32) -> f32 { /// Ceil (f64) /// /// Finds the nearest integer greater than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn ceil(x: f64) -> f64 { select_implementation! { name: ceil, @@ -40,7 +40,7 @@ pub fn ceil(x: f64) -> f64 { /// /// Finds the nearest integer greater than or equal to `x`. #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn ceilf128(x: f128) -> f128 { super::generic::ceil(x) } diff --git a/libm/src/math/copysign.rs b/libm/src/math/copysign.rs index d093d6107..591a87a94 100644 --- a/libm/src/math/copysign.rs +++ b/libm/src/math/copysign.rs @@ -3,7 +3,7 @@ /// Constructs a number with the magnitude (absolute value) of its /// first argument, `x`, and the sign of its second argument, `y`. #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn copysignf16(x: f16, y: f16) -> f16 { super::generic::copysign(x, y) } @@ -12,7 +12,7 @@ pub fn copysignf16(x: f16, y: f16) -> f16 { /// /// Constructs a number with the magnitude (absolute value) of its /// first argument, `x`, and the sign of its second argument, `y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn copysignf(x: f32, y: f32) -> f32 { super::generic::copysign(x, y) } @@ -21,7 +21,7 @@ pub fn copysignf(x: f32, y: f32) -> f32 { /// /// Constructs a number with the magnitude (absolute value) of its /// first argument, `x`, and the sign of its second argument, `y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn copysign(x: f64, y: f64) -> f64 { super::generic::copysign(x, y) } @@ -31,7 +31,7 @@ pub fn copysign(x: f64, y: f64) -> f64 { /// Constructs a number with the magnitude (absolute value) of its /// first argument, `x`, and the sign of its second argument, `y`. #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn copysignf128(x: f128, y: f128) -> f128 { super::generic::copysign(x, y) } diff --git a/libm/src/math/cos.rs b/libm/src/math/cos.rs index de99cd4c5..b2f786323 100644 --- a/libm/src/math/cos.rs +++ b/libm/src/math/cos.rs @@ -45,7 +45,7 @@ use super::{k_cos, k_sin, rem_pio2}; /// The cosine of `x` (f64). /// /// `x` is specified in radians. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn cos(x: f64) -> f64 { let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; diff --git a/libm/src/math/cosf.rs b/libm/src/math/cosf.rs index 27c2fc3b9..bf5cb9196 100644 --- a/libm/src/math/cosf.rs +++ b/libm/src/math/cosf.rs @@ -27,7 +27,7 @@ const C4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ /// The cosine of `x` (f32). /// /// `x` is specified in radians. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn cosf(x: f32) -> f32 { let x64 = x as f64; diff --git a/libm/src/math/cosh.rs b/libm/src/math/cosh.rs index d2e43fd6c..01081cfc7 100644 --- a/libm/src/math/cosh.rs +++ b/libm/src/math/cosh.rs @@ -5,7 +5,7 @@ use super::{exp, expm1, k_expo2}; /// Computes the hyperbolic cosine of the argument x. /// Is defined as `(exp(x) + exp(-x))/2` /// Angles are specified in radians. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn cosh(mut x: f64) -> f64 { /* |x| */ let mut ix = x.to_bits(); diff --git a/libm/src/math/coshf.rs b/libm/src/math/coshf.rs index 567a24410..dc039a311 100644 --- a/libm/src/math/coshf.rs +++ b/libm/src/math/coshf.rs @@ -5,7 +5,7 @@ use super::{expf, expm1f, k_expo2f}; /// Computes the hyperbolic cosine of the argument x. /// Is defined as `(exp(x) + exp(-x))/2` /// Angles are specified in radians. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn coshf(mut x: f32) -> f32 { let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 diff --git a/libm/src/math/erf.rs b/libm/src/math/erf.rs index 5d82228a0..6c78440af 100644 --- a/libm/src/math/erf.rs +++ b/libm/src/math/erf.rs @@ -219,7 +219,7 @@ fn erfc2(ix: u32, mut x: f64) -> f64 { /// Calculates an approximation to the “error function”, which estimates /// the probability that an observation will fall within x standard /// deviations of the mean (assuming a normal distribution). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn erf(x: f64) -> f64 { let r: f64; let s: f64; diff --git a/libm/src/math/erff.rs b/libm/src/math/erff.rs index fe15f0108..2a7680275 100644 --- a/libm/src/math/erff.rs +++ b/libm/src/math/erff.rs @@ -130,7 +130,7 @@ fn erfc2(mut ix: u32, mut x: f32) -> f32 { /// Calculates an approximation to the “error function”, which estimates /// the probability that an observation will fall within x standard /// deviations of the mean (assuming a normal distribution). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn erff(x: f32) -> f32 { let r: f32; let s: f32; diff --git a/libm/src/math/exp.rs b/libm/src/math/exp.rs index 782042b62..78ce5dd13 100644 --- a/libm/src/math/exp.rs +++ b/libm/src/math/exp.rs @@ -81,7 +81,7 @@ const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ /// /// Calculate the exponential of `x`, that is, *e* raised to the power `x` /// (where *e* is the base of the natural system of logarithms, approximately 2.71828). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn exp(mut x: f64) -> f64 { let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 let x1p_149 = f64::from_bits(0x36a0000000000000); // 0x1p-149 === 2 ^ -149 diff --git a/libm/src/math/exp10.rs b/libm/src/math/exp10.rs index 7c33c92b6..1f49f5e96 100644 --- a/libm/src/math/exp10.rs +++ b/libm/src/math/exp10.rs @@ -7,7 +7,7 @@ const P10: &[f64] = &[ ]; /// Calculates 10 raised to the power of `x` (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn exp10(x: f64) -> f64 { let (mut y, n) = modf(x); let u: u64 = n.to_bits(); diff --git a/libm/src/math/exp10f.rs b/libm/src/math/exp10f.rs index 303045b33..22a264211 100644 --- a/libm/src/math/exp10f.rs +++ b/libm/src/math/exp10f.rs @@ -7,7 +7,7 @@ const P10: &[f32] = &[ ]; /// Calculates 10 raised to the power of `x` (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn exp10f(x: f32) -> f32 { let (mut y, n) = modff(x); let u = n.to_bits(); diff --git a/libm/src/math/exp2.rs b/libm/src/math/exp2.rs index 6e98d066c..6e4cbc29d 100644 --- a/libm/src/math/exp2.rs +++ b/libm/src/math/exp2.rs @@ -322,7 +322,7 @@ static TBL: [u64; TBLSIZE * 2] = [ /// Exponential, base 2 (f64) /// /// Calculate `2^x`, that is, 2 raised to the power `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn exp2(mut x: f64) -> f64 { let redux = f64::from_bits(0x4338000000000000) / TBLSIZE as f64; let p1 = f64::from_bits(0x3fe62e42fefa39ef); diff --git a/libm/src/math/exp2f.rs b/libm/src/math/exp2f.rs index f452b6a20..733d2f1a8 100644 --- a/libm/src/math/exp2f.rs +++ b/libm/src/math/exp2f.rs @@ -73,7 +73,7 @@ static EXP2FT: [u64; TBLSIZE] = [ /// Exponential, base 2 (f32) /// /// Calculate `2^x`, that is, 2 raised to the power `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn exp2f(mut x: f32) -> f32 { let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32; let p1 = f32::from_bits(0x3f317218); diff --git a/libm/src/math/expf.rs b/libm/src/math/expf.rs index 8dc067ab0..dbbfdbba9 100644 --- a/libm/src/math/expf.rs +++ b/libm/src/math/expf.rs @@ -30,7 +30,7 @@ const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */ /// /// Calculate the exponential of `x`, that is, *e* raised to the power `x` /// (where *e* is the base of the natural system of logarithms, approximately 2.71828). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn expf(mut x: f32) -> f32 { let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 /*original 0x1p-149f ??????????? */ diff --git a/libm/src/math/expm1.rs b/libm/src/math/expm1.rs index f25153f32..3714bf3af 100644 --- a/libm/src/math/expm1.rs +++ b/libm/src/math/expm1.rs @@ -30,7 +30,7 @@ const Q5: f64 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ /// system of logarithms, approximately 2.71828). /// The result is accurate even for small values of `x`, /// where using `exp(x)-1` would lose many significant digits. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn expm1(mut x: f64) -> f64 { let hi: f64; let lo: f64; diff --git a/libm/src/math/expm1f.rs b/libm/src/math/expm1f.rs index 63dc86e37..f77515a4b 100644 --- a/libm/src/math/expm1f.rs +++ b/libm/src/math/expm1f.rs @@ -32,7 +32,7 @@ const Q2: f32 = 1.5807170421e-3; /* 0xcf3010.0p-33 */ /// system of logarithms, approximately 2.71828). /// The result is accurate even for small values of `x`, /// where using `exp(x)-1` would lose many significant digits. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn expm1f(mut x: f32) -> f32 { let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 diff --git a/libm/src/math/expo2.rs b/libm/src/math/expo2.rs index 82e9b360a..ce90858ec 100644 --- a/libm/src/math/expo2.rs +++ b/libm/src/math/expo2.rs @@ -1,7 +1,7 @@ use super::{combine_words, exp}; /* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub(crate) fn expo2(x: f64) -> f64 { /* k is such that k*ln2 has minimal relative error and x - kln2 > log(DBL_MIN) */ const K: i32 = 2043; diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs index 0050a309f..7344e21a1 100644 --- a/libm/src/math/fabs.rs +++ b/libm/src/math/fabs.rs @@ -3,7 +3,7 @@ /// Calculates the absolute value (magnitude) of the argument `x`, /// by direct manipulation of the bit representation of `x`. #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fabsf16(x: f16) -> f16 { super::generic::fabs(x) } @@ -12,7 +12,7 @@ pub fn fabsf16(x: f16) -> f16 { /// /// Calculates the absolute value (magnitude) of the argument `x`, /// by direct manipulation of the bit representation of `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fabsf(x: f32) -> f32 { select_implementation! { name: fabsf, @@ -27,7 +27,7 @@ pub fn fabsf(x: f32) -> f32 { /// /// Calculates the absolute value (magnitude) of the argument `x`, /// by direct manipulation of the bit representation of `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fabs(x: f64) -> f64 { select_implementation! { name: fabs, @@ -43,7 +43,7 @@ pub fn fabs(x: f64) -> f64 { /// Calculates the absolute value (magnitude) of the argument `x`, /// by direct manipulation of the bit representation of `x`. #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fabsf128(x: f128) -> f128 { super::generic::fabs(x) } diff --git a/libm/src/math/fdim.rs b/libm/src/math/fdim.rs index 082c5478b..dac409e86 100644 --- a/libm/src/math/fdim.rs +++ b/libm/src/math/fdim.rs @@ -7,7 +7,7 @@ /// /// A range error may occur. #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fdimf16(x: f16, y: f16) -> f16 { super::generic::fdim(x, y) } @@ -20,7 +20,7 @@ pub fn fdimf16(x: f16, y: f16) -> f16 { /// * NAN if either argument is NAN. /// /// A range error may occur. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fdimf(x: f32, y: f32) -> f32 { super::generic::fdim(x, y) } @@ -33,7 +33,7 @@ pub fn fdimf(x: f32, y: f32) -> f32 { /// * NAN if either argument is NAN. /// /// A range error may occur. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fdim(x: f64, y: f64) -> f64 { super::generic::fdim(x, y) } @@ -47,7 +47,7 @@ pub fn fdim(x: f64, y: f64) -> f64 { /// /// A range error may occur. #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fdimf128(x: f128, y: f128) -> f128 { super::generic::fdim(x, y) } diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index 3c5eab101..7241c427f 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -2,7 +2,7 @@ /// /// Finds the nearest integer less than or equal to `x`. #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn floorf16(x: f16) -> f16 { return super::generic::floor(x); } @@ -10,7 +10,7 @@ pub fn floorf16(x: f16) -> f16 { /// Floor (f64) /// /// Finds the nearest integer less than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn floor(x: f64) -> f64 { select_implementation! { name: floor, @@ -25,7 +25,7 @@ pub fn floor(x: f64) -> f64 { /// Floor (f32) /// /// Finds the nearest integer less than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn floorf(x: f32) -> f32 { select_implementation! { name: floorf, @@ -40,7 +40,7 @@ pub fn floorf(x: f32) -> f32 { /// /// Finds the nearest integer less than or equal to `x`. #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn floorf128(x: f128) -> f128 { return super::generic::floor(x); } diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 5bf473cfe..70e6de768 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -7,7 +7,7 @@ use crate::support::Round; // Placeholder so we can have `fmaf16` in the `Float` trait. #[allow(unused)] #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { unimplemented!() } @@ -15,7 +15,7 @@ pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { /// Floating multiply add (f32) /// /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { select_implementation! { name: fmaf, @@ -32,7 +32,7 @@ pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { /// Fused multiply add (f64) /// /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fma(x: f64, y: f64, z: f64) -> f64 { select_implementation! { name: fma, @@ -50,7 +50,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { /// /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { generic::fma_round(x, y, z, Round::Nearest).val } diff --git a/libm/src/math/fmin_fmax.rs b/libm/src/math/fmin_fmax.rs index 481301994..c4c1b0435 100644 --- a/libm/src/math/fmin_fmax.rs +++ b/libm/src/math/fmin_fmax.rs @@ -3,7 +3,7 @@ /// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if /// the inputs are -0.0 and +0.0, either may be returned). #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fminf16(x: f16, y: f16) -> f16 { super::generic::fmin(x, y) } @@ -12,7 +12,7 @@ pub fn fminf16(x: f16, y: f16) -> f16 { /// /// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if /// the inputs are -0.0 and +0.0, either may be returned). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fminf(x: f32, y: f32) -> f32 { super::generic::fmin(x, y) } @@ -21,7 +21,7 @@ pub fn fminf(x: f32, y: f32) -> f32 { /// /// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if /// the inputs are -0.0 and +0.0, either may be returned). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmin(x: f64, y: f64) -> f64 { super::generic::fmin(x, y) } @@ -31,7 +31,7 @@ pub fn fmin(x: f64, y: f64) -> f64 { /// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if /// the inputs are -0.0 and +0.0, either may be returned). #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fminf128(x: f128, y: f128) -> f128 { super::generic::fmin(x, y) } @@ -41,7 +41,7 @@ pub fn fminf128(x: f128, y: f128) -> f128 { /// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if /// the inputs are -0.0 and +0.0, either may be returned). #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmaxf16(x: f16, y: f16) -> f16 { super::generic::fmax(x, y) } @@ -50,7 +50,7 @@ pub fn fmaxf16(x: f16, y: f16) -> f16 { /// /// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if /// the inputs are -0.0 and +0.0, either may be returned). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmaxf(x: f32, y: f32) -> f32 { super::generic::fmax(x, y) } @@ -59,7 +59,7 @@ pub fn fmaxf(x: f32, y: f32) -> f32 { /// /// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if /// the inputs are -0.0 and +0.0, either may be returned). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmax(x: f64, y: f64) -> f64 { super::generic::fmax(x, y) } @@ -69,7 +69,7 @@ pub fn fmax(x: f64, y: f64) -> f64 { /// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if /// the inputs are -0.0 and +0.0, either may be returned). #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmaxf128(x: f128, y: f128) -> f128 { super::generic::fmax(x, y) } diff --git a/libm/src/math/fminimum_fmaximum.rs b/libm/src/math/fminimum_fmaximum.rs index 8f1308670..a3c9c9c39 100644 --- a/libm/src/math/fminimum_fmaximum.rs +++ b/libm/src/math/fminimum_fmaximum.rs @@ -2,7 +2,7 @@ /// /// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fminimumf16(x: f16, y: f16) -> f16 { super::generic::fminimum(x, y) } @@ -10,7 +10,7 @@ pub fn fminimumf16(x: f16, y: f16) -> f16 { /// Return the lesser of two arguments or, if either argument is NaN, the other argument. /// /// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fminimum(x: f64, y: f64) -> f64 { super::generic::fminimum(x, y) } @@ -18,7 +18,7 @@ pub fn fminimum(x: f64, y: f64) -> f64 { /// Return the lesser of two arguments or, if either argument is NaN, the other argument. /// /// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fminimumf(x: f32, y: f32) -> f32 { super::generic::fminimum(x, y) } @@ -27,7 +27,7 @@ pub fn fminimumf(x: f32, y: f32) -> f32 { /// /// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fminimumf128(x: f128, y: f128) -> f128 { super::generic::fminimum(x, y) } @@ -36,7 +36,7 @@ pub fn fminimumf128(x: f128, y: f128) -> f128 { /// /// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmaximumf16(x: f16, y: f16) -> f16 { super::generic::fmaximum(x, y) } @@ -44,7 +44,7 @@ pub fn fmaximumf16(x: f16, y: f16) -> f16 { /// Return the greater of two arguments or, if either argument is NaN, the other argument. /// /// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmaximumf(x: f32, y: f32) -> f32 { super::generic::fmaximum(x, y) } @@ -52,7 +52,7 @@ pub fn fmaximumf(x: f32, y: f32) -> f32 { /// Return the greater of two arguments or, if either argument is NaN, the other argument. /// /// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmaximum(x: f64, y: f64) -> f64 { super::generic::fmaximum(x, y) } @@ -61,7 +61,7 @@ pub fn fmaximum(x: f64, y: f64) -> f64 { /// /// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmaximumf128(x: f128, y: f128) -> f128 { super::generic::fmaximum(x, y) } diff --git a/libm/src/math/fminimum_fmaximum_num.rs b/libm/src/math/fminimum_fmaximum_num.rs index fadf93418..612cefe75 100644 --- a/libm/src/math/fminimum_fmaximum_num.rs +++ b/libm/src/math/fminimum_fmaximum_num.rs @@ -2,7 +2,7 @@ /// /// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fminimum_numf16(x: f16, y: f16) -> f16 { super::generic::fminimum_num(x, y) } @@ -10,7 +10,7 @@ pub fn fminimum_numf16(x: f16, y: f16) -> f16 { /// Return the lesser of two arguments or, if either argument is NaN, NaN. /// /// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fminimum_numf(x: f32, y: f32) -> f32 { super::generic::fminimum_num(x, y) } @@ -18,7 +18,7 @@ pub fn fminimum_numf(x: f32, y: f32) -> f32 { /// Return the lesser of two arguments or, if either argument is NaN, NaN. /// /// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fminimum_num(x: f64, y: f64) -> f64 { super::generic::fminimum_num(x, y) } @@ -27,7 +27,7 @@ pub fn fminimum_num(x: f64, y: f64) -> f64 { /// /// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fminimum_numf128(x: f128, y: f128) -> f128 { super::generic::fminimum_num(x, y) } @@ -36,7 +36,7 @@ pub fn fminimum_numf128(x: f128, y: f128) -> f128 { /// /// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmaximum_numf16(x: f16, y: f16) -> f16 { super::generic::fmaximum_num(x, y) } @@ -44,7 +44,7 @@ pub fn fmaximum_numf16(x: f16, y: f16) -> f16 { /// Return the greater of two arguments or, if either argument is NaN, NaN. /// /// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmaximum_numf(x: f32, y: f32) -> f32 { super::generic::fmaximum_num(x, y) } @@ -52,7 +52,7 @@ pub fn fmaximum_numf(x: f32, y: f32) -> f32 { /// Return the greater of two arguments or, if either argument is NaN, NaN. /// /// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmaximum_num(x: f64, y: f64) -> f64 { super::generic::fmaximum_num(x, y) } @@ -61,7 +61,7 @@ pub fn fmaximum_num(x: f64, y: f64) -> f64 { /// /// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmaximum_numf128(x: f128, y: f128) -> f128 { super::generic::fmaximum_num(x, y) } diff --git a/libm/src/math/fmod.rs b/libm/src/math/fmod.rs index c4752b925..6ae1be560 100644 --- a/libm/src/math/fmod.rs +++ b/libm/src/math/fmod.rs @@ -1,25 +1,25 @@ /// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmodf16(x: f16, y: f16) -> f16 { super::generic::fmod(x, y) } /// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmodf(x: f32, y: f32) -> f32 { super::generic::fmod(x, y) } /// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmod(x: f64, y: f64) -> f64 { super::generic::fmod(x, y) } /// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn fmodf128(x: f128, y: f128) -> f128 { super::generic::fmod(x, y) } diff --git a/libm/src/math/frexp.rs b/libm/src/math/frexp.rs index de7a64fda..932111eeb 100644 --- a/libm/src/math/frexp.rs +++ b/libm/src/math/frexp.rs @@ -1,4 +1,4 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn frexp(x: f64) -> (f64, i32) { let mut y = x.to_bits(); let ee = ((y >> 52) & 0x7ff) as i32; diff --git a/libm/src/math/frexpf.rs b/libm/src/math/frexpf.rs index 0ec91c2d3..904bf14f7 100644 --- a/libm/src/math/frexpf.rs +++ b/libm/src/math/frexpf.rs @@ -1,4 +1,4 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn frexpf(x: f32) -> (f32, i32) { let mut y = x.to_bits(); let ee: i32 = ((y >> 23) & 0xff) as i32; diff --git a/libm/src/math/hypot.rs b/libm/src/math/hypot.rs index da458ea1d..b92ee18ca 100644 --- a/libm/src/math/hypot.rs +++ b/libm/src/math/hypot.rs @@ -17,7 +17,7 @@ fn sq(x: f64) -> (f64, f64) { (hi, lo) } -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn hypot(mut x: f64, mut y: f64) -> f64 { let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700 let x1p_700 = f64::from_bits(0x1430000000000000); // 0x1p-700 === 2 ^ -700 diff --git a/libm/src/math/hypotf.rs b/libm/src/math/hypotf.rs index 576eebb33..e7635ffc9 100644 --- a/libm/src/math/hypotf.rs +++ b/libm/src/math/hypotf.rs @@ -2,7 +2,7 @@ use core::f32; use super::sqrtf; -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn hypotf(mut x: f32, mut y: f32) -> f32 { let x1p90 = f32::from_bits(0x6c800000); // 0x1p90f === 2 ^ 90 let x1p_90 = f32::from_bits(0x12800000); // 0x1p-90f === 2 ^ -90 diff --git a/libm/src/math/ilogb.rs b/libm/src/math/ilogb.rs index 5b41f7b1d..ef774f6ad 100644 --- a/libm/src/math/ilogb.rs +++ b/libm/src/math/ilogb.rs @@ -1,7 +1,7 @@ const FP_ILOGBNAN: i32 = -1 - 0x7fffffff; const FP_ILOGB0: i32 = FP_ILOGBNAN; -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn ilogb(x: f64) -> i32 { let mut i: u64 = x.to_bits(); let e = ((i >> 52) & 0x7ff) as i32; diff --git a/libm/src/math/ilogbf.rs b/libm/src/math/ilogbf.rs index 3585d6d36..5b0cb46ec 100644 --- a/libm/src/math/ilogbf.rs +++ b/libm/src/math/ilogbf.rs @@ -1,7 +1,7 @@ const FP_ILOGBNAN: i32 = -1 - 0x7fffffff; const FP_ILOGB0: i32 = FP_ILOGBNAN; -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn ilogbf(x: f32) -> i32 { let mut i = x.to_bits(); let e = ((i >> 23) & 0xff) as i32; diff --git a/libm/src/math/j0.rs b/libm/src/math/j0.rs index 99d656f0d..7b0800477 100644 --- a/libm/src/math/j0.rs +++ b/libm/src/math/j0.rs @@ -110,7 +110,7 @@ const S03: f64 = 5.13546550207318111446e-07; /* 0x3EA13B54, 0xCE84D5A9 */ const S04: f64 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */ /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn j0(mut x: f64) -> f64 { let z: f64; let r: f64; @@ -165,7 +165,7 @@ const V03: f64 = 2.59150851840457805467e-07; /* 0x3E91642D, 0x7FF202FD */ const V04: f64 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */ /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn y0(x: f64) -> f64 { let z: f64; let u: f64; diff --git a/libm/src/math/j0f.rs b/libm/src/math/j0f.rs index 25e5b325c..1c6a7c344 100644 --- a/libm/src/math/j0f.rs +++ b/libm/src/math/j0f.rs @@ -63,7 +63,7 @@ const S03: f32 = 5.1354652442e-07; /* 0x3509daa6 */ const S04: f32 = 1.1661400734e-09; /* 0x30a045e8 */ /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn j0f(mut x: f32) -> f32 { let z: f32; let r: f32; @@ -110,7 +110,7 @@ const V03: f32 = 2.5915085189e-07; /* 0x348b216c */ const V04: f32 = 4.4111031494e-10; /* 0x2ff280c2 */ /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn y0f(x: f32) -> f32 { let z: f32; let u: f32; diff --git a/libm/src/math/j1.rs b/libm/src/math/j1.rs index 9b604d9e4..7d304ba10 100644 --- a/libm/src/math/j1.rs +++ b/libm/src/math/j1.rs @@ -114,7 +114,7 @@ const S04: f64 = 5.04636257076217042715e-09; /* 0x3E35AC88, 0xC97DFF2C */ const S05: f64 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */ /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn j1(x: f64) -> f64 { let mut z: f64; let r: f64; @@ -161,7 +161,7 @@ const V0: [f64; 5] = [ ]; /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn y1(x: f64) -> f64 { let z: f64; let u: f64; diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs index da5413ac2..cd829c1aa 100644 --- a/libm/src/math/j1f.rs +++ b/libm/src/math/j1f.rs @@ -64,7 +64,7 @@ const S04: f32 = 5.0463624390e-09; /* 0x31ad6446 */ const S05: f32 = 1.2354227016e-11; /* 0x2d59567e */ /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn j1f(x: f32) -> f32 { let mut z: f32; let r: f32; @@ -110,7 +110,7 @@ const V0: [f32; 5] = [ ]; /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn y1f(x: f32) -> f32 { let z: f32; let u: f32; diff --git a/libm/src/math/jn.rs b/libm/src/math/jn.rs index 31f8d9c53..b87aeaf1c 100644 --- a/libm/src/math/jn.rs +++ b/libm/src/math/jn.rs @@ -39,7 +39,7 @@ use super::{cos, fabs, get_high_word, get_low_word, j0, j1, log, sin, sqrt, y0, const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn jn(n: i32, mut x: f64) -> f64 { let mut ix: u32; let lx: u32; @@ -249,7 +249,7 @@ pub fn jn(n: i32, mut x: f64) -> f64 { } /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn yn(n: i32, x: f64) -> f64 { let mut ix: u32; let lx: u32; diff --git a/libm/src/math/jnf.rs b/libm/src/math/jnf.rs index 52cf7d8a8..34fdc5112 100644 --- a/libm/src/math/jnf.rs +++ b/libm/src/math/jnf.rs @@ -16,7 +16,7 @@ use super::{fabsf, j0f, j1f, logf, y0f, y1f}; /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn jnf(n: i32, mut x: f32) -> f32 { let mut ix: u32; let mut nm1: i32; @@ -192,7 +192,7 @@ pub fn jnf(n: i32, mut x: f32) -> f32 { } /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn ynf(n: i32, x: f32) -> f32 { let mut ix: u32; let mut ib: u32; diff --git a/libm/src/math/k_cos.rs b/libm/src/math/k_cos.rs index 49b2fc64d..1a2ebabe3 100644 --- a/libm/src/math/k_cos.rs +++ b/libm/src/math/k_cos.rs @@ -51,7 +51,7 @@ const C6: f64 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ // expression for cos(). Retention happens in all cases tested // under FreeBSD, so don't pessimize things by forcibly clipping // any extra precision in w. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub(crate) fn k_cos(x: f64, y: f64) -> f64 { let z = x * x; let w = z * z; diff --git a/libm/src/math/k_cosf.rs b/libm/src/math/k_cosf.rs index e99f2348c..68f568c24 100644 --- a/libm/src/math/k_cosf.rs +++ b/libm/src/math/k_cosf.rs @@ -20,7 +20,7 @@ const C1: f64 = 0.0416666233237390631894; /* 0x155553e1053a42.0p-57 */ const C2: f64 = -0.00138867637746099294692; /* -0x16c087e80f1e27.0p-62 */ const C3: f64 = 0.0000243904487962774090654; /* 0x199342e0ee5069.0p-68 */ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub(crate) fn k_cosf(x: f64) -> f32 { let z = x * x; let w = z * z; diff --git a/libm/src/math/k_expo2.rs b/libm/src/math/k_expo2.rs index 7345075f3..7b63952d2 100644 --- a/libm/src/math/k_expo2.rs +++ b/libm/src/math/k_expo2.rs @@ -4,7 +4,7 @@ use super::exp; const K: i32 = 2043; /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub(crate) fn k_expo2(x: f64) -> f64 { let k_ln2 = f64::from_bits(0x40962066151add8b); /* note that k is odd and scale*scale overflows */ diff --git a/libm/src/math/k_expo2f.rs b/libm/src/math/k_expo2f.rs index fbd7b27d5..02213cec4 100644 --- a/libm/src/math/k_expo2f.rs +++ b/libm/src/math/k_expo2f.rs @@ -4,7 +4,7 @@ use super::expf; const K: i32 = 235; /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub(crate) fn k_expo2f(x: f32) -> f32 { let k_ln2 = f32::from_bits(0x4322e3bc); /* note that k is odd and scale*scale overflows */ diff --git a/libm/src/math/k_sin.rs b/libm/src/math/k_sin.rs index 9dd96c944..2f8542945 100644 --- a/libm/src/math/k_sin.rs +++ b/libm/src/math/k_sin.rs @@ -43,7 +43,7 @@ const S6: f64 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ // r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) // then 3 2 // sin(x) = x + (S1*x + (x *(r-y/2)+y)) -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub(crate) fn k_sin(x: f64, y: f64, iy: i32) -> f64 { let z = x * x; let w = z * z; diff --git a/libm/src/math/k_sinf.rs b/libm/src/math/k_sinf.rs index 88d10caba..297d88bbb 100644 --- a/libm/src/math/k_sinf.rs +++ b/libm/src/math/k_sinf.rs @@ -20,7 +20,7 @@ const S2: f64 = 0.0083333293858894631756; /* 0x111110896efbb2.0p-59 */ const S3: f64 = -0.000198393348360966317347; /* -0x1a00f9e2cae774.0p-65 */ const S4: f64 = 0.0000027183114939898219064; /* 0x16cd878c3b46a7.0p-71 */ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub(crate) fn k_sinf(x: f64) -> f32 { let z = x * x; let w = z * z; diff --git a/libm/src/math/k_tan.rs b/libm/src/math/k_tan.rs index d177010bb..ac48d661f 100644 --- a/libm/src/math/k_tan.rs +++ b/libm/src/math/k_tan.rs @@ -58,7 +58,7 @@ static T: [f64; 13] = [ const PIO4: f64 = 7.85398163397448278999e-01; /* 3FE921FB, 54442D18 */ const PIO4_LO: f64 = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub(crate) fn k_tan(mut x: f64, mut y: f64, odd: i32) -> f64 { let hx = (f64::to_bits(x) >> 32) as u32; let big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */ diff --git a/libm/src/math/k_tanf.rs b/libm/src/math/k_tanf.rs index af8db539d..79382f57b 100644 --- a/libm/src/math/k_tanf.rs +++ b/libm/src/math/k_tanf.rs @@ -19,7 +19,7 @@ const T: [f64; 6] = [ 0.00946564784943673166728, /* 0x1362b9bf971bcd.0p-59 */ ]; -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub(crate) fn k_tanf(x: f64, odd: bool) -> f32 { let z = x * x; /* diff --git a/libm/src/math/ldexp.rs b/libm/src/math/ldexp.rs index 24899ba30..b32b8d524 100644 --- a/libm/src/math/ldexp.rs +++ b/libm/src/math/ldexp.rs @@ -1,21 +1,21 @@ #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn ldexpf16(x: f16, n: i32) -> f16 { super::scalbnf16(x, n) } -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn ldexpf(x: f32, n: i32) -> f32 { super::scalbnf(x, n) } -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn ldexp(x: f64, n: i32) -> f64 { super::scalbn(x, n) } #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn ldexpf128(x: f128, n: i32) -> f128 { super::scalbnf128(x, n) } diff --git a/libm/src/math/lgamma.rs b/libm/src/math/lgamma.rs index 8312dc186..da7ce5c98 100644 --- a/libm/src/math/lgamma.rs +++ b/libm/src/math/lgamma.rs @@ -2,7 +2,7 @@ use super::lgamma_r; /// The natural logarithm of the /// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn lgamma(x: f64) -> f64 { lgamma_r(x).0 } diff --git a/libm/src/math/lgamma_r.rs b/libm/src/math/lgamma_r.rs index 6becaad2c..38eb270f6 100644 --- a/libm/src/math/lgamma_r.rs +++ b/libm/src/math/lgamma_r.rs @@ -165,7 +165,7 @@ fn sin_pi(mut x: f64) -> f64 { } } -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn lgamma_r(mut x: f64) -> (f64, i32) { let u: u64 = x.to_bits(); let mut t: f64; diff --git a/libm/src/math/lgammaf.rs b/libm/src/math/lgammaf.rs index d37512397..920acfed2 100644 --- a/libm/src/math/lgammaf.rs +++ b/libm/src/math/lgammaf.rs @@ -2,7 +2,7 @@ use super::lgammaf_r; /// The natural logarithm of the /// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn lgammaf(x: f32) -> f32 { lgammaf_r(x).0 } diff --git a/libm/src/math/lgammaf_r.rs b/libm/src/math/lgammaf_r.rs index 10cecee54..a0b6a678a 100644 --- a/libm/src/math/lgammaf_r.rs +++ b/libm/src/math/lgammaf_r.rs @@ -100,7 +100,7 @@ fn sin_pi(mut x: f32) -> f32 { } } -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn lgammaf_r(mut x: f32) -> (f32, i32) { let u = x.to_bits(); let mut t: f32; diff --git a/libm/src/math/log.rs b/libm/src/math/log.rs index f2dc47ec5..9499c56d8 100644 --- a/libm/src/math/log.rs +++ b/libm/src/math/log.rs @@ -71,7 +71,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ /// The natural logarithm of `x` (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn log(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 diff --git a/libm/src/math/log10.rs b/libm/src/math/log10.rs index 8c9d68c49..29f25d944 100644 --- a/libm/src/math/log10.rs +++ b/libm/src/math/log10.rs @@ -32,7 +32,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ /// The base 10 logarithm of `x` (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn log10(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 diff --git a/libm/src/math/log10f.rs b/libm/src/math/log10f.rs index 18bf8fcc8..f89584bf9 100644 --- a/libm/src/math/log10f.rs +++ b/libm/src/math/log10f.rs @@ -26,7 +26,7 @@ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ /// The base 10 logarithm of `x` (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn log10f(mut x: f32) -> f32 { let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 diff --git a/libm/src/math/log1p.rs b/libm/src/math/log1p.rs index 65142c0d6..c991cce60 100644 --- a/libm/src/math/log1p.rs +++ b/libm/src/math/log1p.rs @@ -66,7 +66,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ /// The natural logarithm of 1+`x` (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn log1p(x: f64) -> f64 { let mut ui: u64 = x.to_bits(); let hfsq: f64; diff --git a/libm/src/math/log1pf.rs b/libm/src/math/log1pf.rs index 23978e61c..89a92fac9 100644 --- a/libm/src/math/log1pf.rs +++ b/libm/src/math/log1pf.rs @@ -21,7 +21,7 @@ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ /// The natural logarithm of 1+`x` (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn log1pf(x: f32) -> f32 { let mut ui: u32 = x.to_bits(); let hfsq: f32; diff --git a/libm/src/math/log2.rs b/libm/src/math/log2.rs index 701f63c25..9b750c9a2 100644 --- a/libm/src/math/log2.rs +++ b/libm/src/math/log2.rs @@ -30,7 +30,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ /// The base 2 logarithm of `x` (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn log2(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 diff --git a/libm/src/math/log2f.rs b/libm/src/math/log2f.rs index 5ba2427d1..0e5177d7a 100644 --- a/libm/src/math/log2f.rs +++ b/libm/src/math/log2f.rs @@ -24,7 +24,7 @@ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ /// The base 2 logarithm of `x` (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn log2f(mut x: f32) -> f32 { let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 diff --git a/libm/src/math/logf.rs b/libm/src/math/logf.rs index 68d194302..cd7a7b0ba 100644 --- a/libm/src/math/logf.rs +++ b/libm/src/math/logf.rs @@ -22,7 +22,7 @@ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ /// The natural logarithm of `x` (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn logf(mut x: f32) -> f32 { let x1p25 = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 diff --git a/libm/src/math/modf.rs b/libm/src/math/modf.rs index 6541862cd..a92a83dc5 100644 --- a/libm/src/math/modf.rs +++ b/libm/src/math/modf.rs @@ -1,4 +1,4 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn modf(x: f64) -> (f64, f64) { let rv2: f64; let mut u = x.to_bits(); diff --git a/libm/src/math/modff.rs b/libm/src/math/modff.rs index 90c6bca7d..691f351ca 100644 --- a/libm/src/math/modff.rs +++ b/libm/src/math/modff.rs @@ -1,4 +1,4 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn modff(x: f32) -> (f32, f32) { let rv2: f32; let mut u: u32 = x.to_bits(); diff --git a/libm/src/math/nextafter.rs b/libm/src/math/nextafter.rs index c991ff6f2..f4408468c 100644 --- a/libm/src/math/nextafter.rs +++ b/libm/src/math/nextafter.rs @@ -1,4 +1,4 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn nextafter(x: f64, y: f64) -> f64 { if x.is_nan() || y.is_nan() { return x + y; diff --git a/libm/src/math/nextafterf.rs b/libm/src/math/nextafterf.rs index 8ba383356..c15eb9de2 100644 --- a/libm/src/math/nextafterf.rs +++ b/libm/src/math/nextafterf.rs @@ -1,4 +1,4 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn nextafterf(x: f32, y: f32) -> f32 { if x.is_nan() || y.is_nan() { return x + y; diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs index 94ae31cf0..914d68cfc 100644 --- a/libm/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -90,7 +90,7 @@ const IVLN2_H: f64 = 1.44269502162933349609e+00; /* 0x3ff71547_60000000 =24b 1/l const IVLN2_L: f64 = 1.92596299112661746887e-08; /* 0x3e54ae0b_f85ddf44 =1/ln2 tail*/ /// Returns `x` to the power of `y` (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn pow(x: f64, y: f64) -> f64 { let t1: f64; let t2: f64; diff --git a/libm/src/math/powf.rs b/libm/src/math/powf.rs index 11c7a7cbd..17772ae87 100644 --- a/libm/src/math/powf.rs +++ b/libm/src/math/powf.rs @@ -46,7 +46,7 @@ const IVLN2_H: f32 = 1.4426879883e+00; const IVLN2_L: f32 = 7.0526075433e-06; /// Returns `x` to the power of `y` (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn powf(x: f32, y: f32) -> f32 { let mut z: f32; let mut ax: f32; diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs index 648dca170..61b103027 100644 --- a/libm/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -41,7 +41,7 @@ const PIO2_3T: f64 = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ // use rem_pio2_large() for large x // // caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { let x1p24 = f64::from_bits(0x4170000000000000); diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 792c09fb1..f1fdf3673 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -221,7 +221,7 @@ const PIO2: [f64; 8] = [ /// skip the part of the product that are known to be a huge integer ( /// more accurately, = 0 mod 8 ). Thus the number of operations are /// independent of the exponent of the input. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { // FIXME(rust-lang/rust#144518): Inline assembly would cause `no_panic` to fail // on the callers of this function. As a workaround, avoid inlining `floor` here diff --git a/libm/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs index 3c658fe3d..0472a1035 100644 --- a/libm/src/math/rem_pio2f.rs +++ b/libm/src/math/rem_pio2f.rs @@ -31,7 +31,7 @@ const PIO2_1T: f64 = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ /// /// use double precision for everything except passing x /// use __rem_pio2_large() for large x -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) { let x64 = x as f64; diff --git a/libm/src/math/remainder.rs b/libm/src/math/remainder.rs index 9e966c9ed..54152df32 100644 --- a/libm/src/math/remainder.rs +++ b/libm/src/math/remainder.rs @@ -1,4 +1,4 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn remainder(x: f64, y: f64) -> f64 { let (result, _) = super::remquo(x, y); result diff --git a/libm/src/math/remainderf.rs b/libm/src/math/remainderf.rs index b1407cf2a..21f629214 100644 --- a/libm/src/math/remainderf.rs +++ b/libm/src/math/remainderf.rs @@ -1,4 +1,4 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn remainderf(x: f32, y: f32) -> f32 { let (result, _) = super::remquof(x, y); result diff --git a/libm/src/math/remquo.rs b/libm/src/math/remquo.rs index 4c11e8487..f13b09237 100644 --- a/libm/src/math/remquo.rs +++ b/libm/src/math/remquo.rs @@ -1,4 +1,4 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn remquo(mut x: f64, mut y: f64) -> (f64, i32) { let ux: u64 = x.to_bits(); let mut uy: u64 = y.to_bits(); diff --git a/libm/src/math/remquof.rs b/libm/src/math/remquof.rs index b0e85ca66..cc7863a09 100644 --- a/libm/src/math/remquof.rs +++ b/libm/src/math/remquof.rs @@ -1,4 +1,4 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn remquof(mut x: f32, mut y: f32) -> (f32, i32) { let ux: u32 = x.to_bits(); let mut uy: u32 = y.to_bits(); diff --git a/libm/src/math/rint.rs b/libm/src/math/rint.rs index e1c32c943..011a7ae3d 100644 --- a/libm/src/math/rint.rs +++ b/libm/src/math/rint.rs @@ -2,7 +2,7 @@ use super::support::Round; /// Round `x` to the nearest integer, breaking ties toward even. #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn rintf16(x: f16) -> f16 { select_implementation! { name: rintf16, @@ -14,7 +14,7 @@ pub fn rintf16(x: f16) -> f16 { } /// Round `x` to the nearest integer, breaking ties toward even. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn rintf(x: f32) -> f32 { select_implementation! { name: rintf, @@ -29,7 +29,7 @@ pub fn rintf(x: f32) -> f32 { } /// Round `x` to the nearest integer, breaking ties toward even. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn rint(x: f64) -> f64 { select_implementation! { name: rint, @@ -45,7 +45,7 @@ pub fn rint(x: f64) -> f64 { /// Round `x` to the nearest integer, breaking ties toward even. #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn rintf128(x: f128) -> f128 { super::generic::rint_round(x, Round::Nearest).val } diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs index 6cd091cd7..256197e6c 100644 --- a/libm/src/math/round.rs +++ b/libm/src/math/round.rs @@ -1,25 +1,25 @@ /// Round `x` to the nearest integer, breaking ties away from zero. #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn roundf16(x: f16) -> f16 { super::generic::round(x) } /// Round `x` to the nearest integer, breaking ties away from zero. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn roundf(x: f32) -> f32 { super::generic::round(x) } /// Round `x` to the nearest integer, breaking ties away from zero. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn round(x: f64) -> f64 { super::generic::round(x) } /// Round `x` to the nearest integer, breaking ties away from zero. #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn roundf128(x: f128) -> f128 { super::generic::round(x) } diff --git a/libm/src/math/roundeven.rs b/libm/src/math/roundeven.rs index 6e621d762..f0d67d410 100644 --- a/libm/src/math/roundeven.rs +++ b/libm/src/math/roundeven.rs @@ -3,21 +3,21 @@ use super::support::{Float, Round}; /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 /// `roundToIntegralTiesToEven`. #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn roundevenf16(x: f16) -> f16 { roundeven_impl(x) } /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 /// `roundToIntegralTiesToEven`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn roundevenf(x: f32) -> f32 { roundeven_impl(x) } /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 /// `roundToIntegralTiesToEven`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn roundeven(x: f64) -> f64 { roundeven_impl(x) } @@ -25,7 +25,7 @@ pub fn roundeven(x: f64) -> f64 { /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 /// `roundToIntegralTiesToEven`. #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn roundevenf128(x: f128) -> f128 { roundeven_impl(x) } diff --git a/libm/src/math/scalbn.rs b/libm/src/math/scalbn.rs index ed73c3f94..f1a67cb7f 100644 --- a/libm/src/math/scalbn.rs +++ b/libm/src/math/scalbn.rs @@ -1,21 +1,21 @@ #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn scalbnf16(x: f16, n: i32) -> f16 { super::generic::scalbn(x, n) } -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn scalbnf(x: f32, n: i32) -> f32 { super::generic::scalbn(x, n) } -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn scalbn(x: f64, n: i32) -> f64 { super::generic::scalbn(x, n) } #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn scalbnf128(x: f128, n: i32) -> f128 { super::generic::scalbn(x, n) } diff --git a/libm/src/math/sin.rs b/libm/src/math/sin.rs index 229fa4bef..5378a7bc3 100644 --- a/libm/src/math/sin.rs +++ b/libm/src/math/sin.rs @@ -44,7 +44,7 @@ use super::{k_cos, k_sin, rem_pio2}; /// The sine of `x` (f64). /// /// `x` is specified in radians. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn sin(x: f64) -> f64 { let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 diff --git a/libm/src/math/sincos.rs b/libm/src/math/sincos.rs index ebf482f2d..a364f7375 100644 --- a/libm/src/math/sincos.rs +++ b/libm/src/math/sincos.rs @@ -15,7 +15,7 @@ use super::{get_high_word, k_cos, k_sin, rem_pio2}; /// Both the sine and cosine of `x` (f64). /// /// `x` is specified in radians and the return value is (sin(x), cos(x)). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn sincos(x: f64) -> (f64, f64) { let s: f64; let c: f64; diff --git a/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs index f33607676..c4beb5267 100644 --- a/libm/src/math/sincosf.rs +++ b/libm/src/math/sincosf.rs @@ -26,7 +26,7 @@ const S4PIO2: f64 = 4.0 * PI_2; /* 0x401921FB, 0x54442D18 */ /// Both the sine and cosine of `x` (f32). /// /// `x` is specified in radians and the return value is (sin(x), cos(x)). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn sincosf(x: f32) -> (f32, f32) { let s: f32; let c: f32; diff --git a/libm/src/math/sinf.rs b/libm/src/math/sinf.rs index 709b63fcf..b4edf6769 100644 --- a/libm/src/math/sinf.rs +++ b/libm/src/math/sinf.rs @@ -27,7 +27,7 @@ const S4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ /// The sine of `x` (f32). /// /// `x` is specified in radians. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn sinf(x: f32) -> f32 { let x64 = x as f64; diff --git a/libm/src/math/sinh.rs b/libm/src/math/sinh.rs index 791841982..900dd6ca4 100644 --- a/libm/src/math/sinh.rs +++ b/libm/src/math/sinh.rs @@ -6,7 +6,7 @@ use super::{expm1, expo2}; // /// The hyperbolic sine of `x` (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn sinh(x: f64) -> f64 { // union {double f; uint64_t i;} u = {.f = x}; // uint32_t w; diff --git a/libm/src/math/sinhf.rs b/libm/src/math/sinhf.rs index 44d2e3560..501acea30 100644 --- a/libm/src/math/sinhf.rs +++ b/libm/src/math/sinhf.rs @@ -1,7 +1,7 @@ use super::{expm1f, k_expo2f}; /// The hyperbolic sine of `x` (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn sinhf(x: f32) -> f32 { let mut h = 0.5f32; let mut ix = x.to_bits(); diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs index 76bc240cf..7ba1bc9b3 100644 --- a/libm/src/math/sqrt.rs +++ b/libm/src/math/sqrt.rs @@ -1,6 +1,6 @@ /// The square root of `x` (f16). #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn sqrtf16(x: f16) -> f16 { select_implementation! { name: sqrtf16, @@ -12,7 +12,7 @@ pub fn sqrtf16(x: f16) -> f16 { } /// The square root of `x` (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn sqrtf(x: f32) -> f32 { select_implementation! { name: sqrtf, @@ -28,7 +28,7 @@ pub fn sqrtf(x: f32) -> f32 { } /// The square root of `x` (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn sqrt(x: f64) -> f64 { select_implementation! { name: sqrt, @@ -45,7 +45,7 @@ pub fn sqrt(x: f64) -> f64 { /// The square root of `x` (f128). #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn sqrtf128(x: f128) -> f128 { return super::generic::sqrt(x); } diff --git a/libm/src/math/tan.rs b/libm/src/math/tan.rs index a072bdec5..79c1bad56 100644 --- a/libm/src/math/tan.rs +++ b/libm/src/math/tan.rs @@ -43,7 +43,7 @@ use super::{k_tan, rem_pio2}; /// The tangent of `x` (f64). /// /// `x` is specified in radians. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn tan(x: f64) -> f64 { let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 diff --git a/libm/src/math/tanf.rs b/libm/src/math/tanf.rs index 8bcf9581f..a615573d8 100644 --- a/libm/src/math/tanf.rs +++ b/libm/src/math/tanf.rs @@ -27,7 +27,7 @@ const T4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ /// The tangent of `x` (f32). /// /// `x` is specified in radians. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn tanf(x: f32) -> f32 { let x64 = x as f64; diff --git a/libm/src/math/tanh.rs b/libm/src/math/tanh.rs index cc0abe4fc..c99cc2a70 100644 --- a/libm/src/math/tanh.rs +++ b/libm/src/math/tanh.rs @@ -8,7 +8,7 @@ use super::expm1; /// The hyperbolic tangent of `x` (f64). /// /// `x` is specified in radians. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn tanh(mut x: f64) -> f64 { let mut uf: f64 = x; let mut ui: u64 = f64::to_bits(uf); diff --git a/libm/src/math/tanhf.rs b/libm/src/math/tanhf.rs index fffbba6c6..3cbd5917f 100644 --- a/libm/src/math/tanhf.rs +++ b/libm/src/math/tanhf.rs @@ -3,7 +3,7 @@ use super::expm1f; /// The hyperbolic tangent of `x` (f32). /// /// `x` is specified in radians. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn tanhf(mut x: f32) -> f32 { /* x = |x| */ let mut ix = x.to_bits(); diff --git a/libm/src/math/tgamma.rs b/libm/src/math/tgamma.rs index 305986064..41415d9d1 100644 --- a/libm/src/math/tgamma.rs +++ b/libm/src/math/tgamma.rs @@ -131,7 +131,7 @@ fn s(x: f64) -> f64 { } /// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn tgamma(mut x: f64) -> f64 { let u: u64 = x.to_bits(); let absx: f64; diff --git a/libm/src/math/tgammaf.rs b/libm/src/math/tgammaf.rs index fe178f7a3..a63a2a318 100644 --- a/libm/src/math/tgammaf.rs +++ b/libm/src/math/tgammaf.rs @@ -1,7 +1,7 @@ use super::tgamma; /// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn tgammaf(x: f32) -> f32 { tgamma(x as f64) as f32 } diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs index fa50d55e1..20d52a111 100644 --- a/libm/src/math/trunc.rs +++ b/libm/src/math/trunc.rs @@ -2,7 +2,7 @@ /// /// This effectively removes the decimal part of the number, leaving the integral part. #[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn truncf16(x: f16) -> f16 { super::generic::trunc(x) } @@ -10,7 +10,7 @@ pub fn truncf16(x: f16) -> f16 { /// Rounds the number toward 0 to the closest integral value (f32). /// /// This effectively removes the decimal part of the number, leaving the integral part. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn truncf(x: f32) -> f32 { select_implementation! { name: truncf, @@ -24,7 +24,7 @@ pub fn truncf(x: f32) -> f32 { /// Rounds the number toward 0 to the closest integral value (f64). /// /// This effectively removes the decimal part of the number, leaving the integral part. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn trunc(x: f64) -> f64 { select_implementation! { name: trunc, @@ -39,7 +39,7 @@ pub fn trunc(x: f64) -> f64 { /// /// This effectively removes the decimal part of the number, leaving the integral part. #[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +#[cfg_attr(assert_no_panic, no_panic::no_panic)] pub fn truncf128(x: f128) -> f128 { super::generic::trunc(x) } From 75ac1460d5c83f8563dc7d24128664be59b23ab6 Mon Sep 17 00:00:00 2001 From: Paul Murphy Date: Mon, 4 Aug 2025 13:00:06 -0500 Subject: [PATCH 1451/1459] compiler-builtins: plumb LSE support for aarch64 on linux Add dynamic support for aarch64 LSE atomic ops on linux targets when optimized-compiler-builtins is not enabled. A hook, __enable_rust_lse, is provided for the runtime to enable them if available. A future patch will use this to enable them if available. The resulting asm should exactly match that of LLVM's compiler-rt builtins, though the symbol naming for the support function and global does not. --- compiler-builtins/src/aarch64_linux.rs | 76 ++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 10 deletions(-) diff --git a/compiler-builtins/src/aarch64_linux.rs b/compiler-builtins/src/aarch64_linux.rs index 38fcab152..01d7fb473 100644 --- a/compiler-builtins/src/aarch64_linux.rs +++ b/compiler-builtins/src/aarch64_linux.rs @@ -6,9 +6,6 @@ //! which is supported on the current CPU. //! See for more discussion. //! -//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection. -//! Use the `compiler-rt` intrinsics if you want LSE support. -//! //! Ported from `aarch64/lse.S` in LLVM's compiler-rt. //! //! Generate functions for each of the following symbols: @@ -24,7 +21,18 @@ //! We do something similar, but with macro arguments. #![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule -// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor. +use core::sync::atomic::{AtomicU8, Ordering}; + +/// non-zero if the host supports LSE atomics. +static HAVE_LSE_ATOMICS: AtomicU8 = AtomicU8::new(0); + +intrinsics! { + /// Call to enable LSE in outline atomic operations. The caller must verify + /// LSE operations are supported. + pub extern "C" fn __rust_enable_lse() { + HAVE_LSE_ATOMICS.store(1, Ordering::Relaxed); + } +} /// Translate a byte size to a Rust type. #[rustfmt::skip] @@ -45,6 +53,7 @@ macro_rules! reg { (2, $num:literal) => { concat!("w", $num) }; (4, $num:literal) => { concat!("w", $num) }; (8, $num:literal) => { concat!("x", $num) }; + (16, $num:literal) => { concat!("x", $num) }; } /// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction. @@ -126,6 +135,41 @@ macro_rules! stxp { }; } +// If supported, perform the requested LSE op and return, or fallthrough. +macro_rules! try_lse_op { + ($op: literal, $ordering:ident, $bytes:tt, $($reg:literal,)* [ $mem:ident ] ) => { + concat!( + ".arch_extension lse; ", + "adrp x16, {have_lse}; ", + "ldrb w16, [x16, :lo12:{have_lse}]; ", + "cbz w16, 8f; ", + // LSE_OP s(reg),* [$mem] + concat!(lse!($op, $ordering, $bytes), $( " ", reg!($bytes, $reg), ", " ,)* "[", stringify!($mem), "]; ",), + "ret; ", + "8:" + ) + }; +} + +// Translate memory ordering to the LSE suffix +#[rustfmt::skip] +macro_rules! lse_mem_sfx { + (Relaxed) => { "" }; + (Acquire) => { "a" }; + (Release) => { "l" }; + (AcqRel) => { "al" }; +} + +// Generate the aarch64 LSE operation for memory ordering and width +macro_rules! lse { + ($op:literal, $order:ident, 16) => { + concat!($op, "p", lse_mem_sfx!($order)) + }; + ($op:literal, $order:ident, $bytes:tt) => { + concat!($op, lse_mem_sfx!($order), size!($bytes)) + }; +} + /// See . macro_rules! compare_and_swap { ($ordering:ident, $bytes:tt, $name:ident) => { @@ -137,7 +181,9 @@ macro_rules! compare_and_swap { ) -> int_ty!($bytes) { // We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap. core::arch::naked_asm! { - // UXT s(tmp0), s(0) + // CAS s(0), s(1), [x2]; if LSE supported. + try_lse_op!("cas", $ordering, $bytes, 0, 1, [x2]), + // UXT s(tmp0), s(0) concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", // LDXR s(0), [x2] @@ -150,6 +196,7 @@ macro_rules! compare_and_swap { "cbnz w17, 0b", "1:", "ret", + have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS, } } } @@ -166,6 +213,8 @@ macro_rules! compare_and_swap_i128 { expected: i128, desired: i128, ptr: *mut i128 ) -> i128 { core::arch::naked_asm! { + // CASP x0, x1, x2, x3, [x4]; if LSE supported. + try_lse_op!("cas", $ordering, 16, 0, 1, 2, 3, [x4]), "mov x16, x0", "mov x17, x1", "0:", @@ -179,6 +228,7 @@ macro_rules! compare_and_swap_i128 { "cbnz w15, 0b", "1:", "ret", + have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS, } } } @@ -195,6 +245,8 @@ macro_rules! swap { left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { core::arch::naked_asm! { + // SWP s(0), s(0), [x1]; if LSE supported. + try_lse_op!("swp", $ordering, $bytes, 0, 0, [x1]), // mov s(tmp0), s(0) concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", @@ -204,6 +256,7 @@ macro_rules! swap { concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"), "cbnz w17, 0b", "ret", + have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS, } } } @@ -212,7 +265,7 @@ macro_rules! swap { /// See (e.g.) . macro_rules! fetch_op { - ($ordering:ident, $bytes:tt, $name:ident, $op:literal) => { + ($ordering:ident, $bytes:tt, $name:ident, $op:literal, $lse_op:literal) => { intrinsics! { #[maybe_use_optimized_c_shim] #[unsafe(naked)] @@ -220,6 +273,8 @@ macro_rules! fetch_op { val: int_ty!($bytes), ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { core::arch::naked_asm! { + // LSEOP s(0), s(0), [x1]; if LSE supported. + try_lse_op!($lse_op, $ordering, $bytes, 0, 0, [x1]), // mov s(tmp0), s(0) concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", @@ -231,6 +286,7 @@ macro_rules! fetch_op { concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"), "cbnz w15, 0b", "ret", + have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS, } } } @@ -240,25 +296,25 @@ macro_rules! fetch_op { // We need a single macro to pass to `foreach_ldadd`. macro_rules! add { ($ordering:ident, $bytes:tt, $name:ident) => { - fetch_op! { $ordering, $bytes, $name, "add" } + fetch_op! { $ordering, $bytes, $name, "add", "ldadd" } }; } macro_rules! and { ($ordering:ident, $bytes:tt, $name:ident) => { - fetch_op! { $ordering, $bytes, $name, "bic" } + fetch_op! { $ordering, $bytes, $name, "bic", "ldclr" } }; } macro_rules! xor { ($ordering:ident, $bytes:tt, $name:ident) => { - fetch_op! { $ordering, $bytes, $name, "eor" } + fetch_op! { $ordering, $bytes, $name, "eor", "ldeor" } }; } macro_rules! or { ($ordering:ident, $bytes:tt, $name:ident) => { - fetch_op! { $ordering, $bytes, $name, "orr" } + fetch_op! { $ordering, $bytes, $name, "orr", "ldset" } }; } From 87a66ec9699e5ddf2c660277b8078099efd01311 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 5 Aug 2025 20:56:27 +0000 Subject: [PATCH 1452/1459] configure: Use `CARGO_CFG_*_{F16,F128}` rather than invoking rustc Currently we run the `rustc` from the `RUSTC` environment variable to figure out whether or not to enable `f16` and `f128`, based on the `target_has_reliable_{f16,f128}` config. However, this does not know about the codegen backend used, and the backend isn't trivial to check in a build script (usually it gets set via `RUSTFLAGS`). It turns out we don't actually need to run `rustc` here: Cargo unconditionally emits all config from the relevant compiler as `CARGO_CFG_*` variables, regardless of whether or not they are known options. Switch to checking these for setting config rather than invoking `rustc`. As an added advantage, this will work with target.json files without any special handling. Fixes: ed17b95715dd ("Use the compiler to determine whether or not to enable `f16` and `f128`") --- compiler-builtins/configure.rs | 27 ++++----------------------- libm/configure.rs | 30 ++++++------------------------ 2 files changed, 10 insertions(+), 47 deletions(-) diff --git a/compiler-builtins/configure.rs b/compiler-builtins/configure.rs index caedc034d..79e238abc 100644 --- a/compiler-builtins/configure.rs +++ b/compiler-builtins/configure.rs @@ -1,6 +1,5 @@ // Configuration that is shared between `compiler_builtins` and `builtins_test`. -use std::process::{Command, Stdio}; use std::{env, str}; #[derive(Debug)] @@ -35,26 +34,6 @@ impl Target { .map(|s| s.to_lowercase().replace("_", "-")) .collect(); - // Query rustc for options that Cargo does not provide env for. The bootstrap hack is used - // to get consistent output regardless of channel (`f16`/`f128` config options are hidden - // on stable otherwise). - let mut cmd = Command::new(env::var("RUSTC").unwrap()); - cmd.args(["--print=cfg", "--target", &triple]) - .env("RUSTC_BOOTSTRAP", "1") - .stderr(Stdio::inherit()); - let out = cmd - .output() - .unwrap_or_else(|e| panic!("failed to run `{cmd:?}`: {e}")); - let rustc_cfg = str::from_utf8(&out.stdout).unwrap(); - - // If we couldn't query `rustc` (e.g. a custom JSON target was used), make the safe - // choice and leave `f16` and `f128` disabled. - let rustc_output_ok = out.status.success(); - let reliable_f128 = - rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f128"); - let reliable_f16 = - rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f16"); - Self { triple, triple_split, @@ -74,8 +53,10 @@ impl Target { .split(",") .map(ToOwned::to_owned) .collect(), - reliable_f128, - reliable_f16, + // Note that these are unstable options, so only show up with the nightly compiler or + // with `RUSTC_BOOTSTRAP=1` (which is required to use the types anyway). + reliable_f128: env::var_os("CARGO_CFG_TARGET_HAS_RELIABLE_F128").is_some(), + reliable_f16: env::var_os("CARGO_CFG_TARGET_HAS_RELIABLE_F16").is_some(), } } diff --git a/libm/configure.rs b/libm/configure.rs index f9100d2d5..76186e636 100644 --- a/libm/configure.rs +++ b/libm/configure.rs @@ -1,9 +1,9 @@ // Configuration shared with both libm and libm-test +use std::env; use std::path::PathBuf; -use std::process::{Command, Stdio}; -use std::{env, str}; +#[derive(Debug)] #[allow(dead_code)] pub struct Config { pub manifest_dir: PathBuf, @@ -33,26 +33,6 @@ impl Config { .map(|s| s.to_lowercase().replace("_", "-")) .collect(); - // Query rustc for options that Cargo does not provide env for. The bootstrap hack is used - // to get consistent output regardless of channel (`f16`/`f128` config options are hidden - // on stable otherwise). - let mut cmd = Command::new(env::var("RUSTC").unwrap()); - cmd.args(["--print=cfg", "--target", &target_triple]) - .env("RUSTC_BOOTSTRAP", "1") - .stderr(Stdio::inherit()); - let out = cmd - .output() - .unwrap_or_else(|e| panic!("failed to run `{cmd:?}`: {e}")); - let rustc_cfg = str::from_utf8(&out.stdout).unwrap(); - - // If we couldn't query `rustc` (e.g. a custom JSON target was used), make the safe - // choice and leave `f16` and `f128` disabled. - let rustc_output_ok = out.status.success(); - let reliable_f128 = - rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f128"); - let reliable_f16 = - rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f16"); - Self { target_triple, manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()), @@ -66,8 +46,10 @@ impl Config { target_string: env::var("TARGET").unwrap(), target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), target_features, - reliable_f128, - reliable_f16, + // Note that these are unstable options, so only show up with the nightly compiler or + // with `RUSTC_BOOTSTRAP=1` (which is required to use the types anyway). + reliable_f128: env::var_os("CARGO_CFG_TARGET_HAS_RELIABLE_F128").is_some(), + reliable_f16: env::var_os("CARGO_CFG_TARGET_HAS_RELIABLE_F16").is_some(), } } } From 9caec5d5df877d79f89bee073c4a3eb2d979e7f6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 7 Aug 2025 01:05:01 -0500 Subject: [PATCH 1453/1459] symcheck: Store the section name in `SymInfo` if available Currently `SymInfo` stores a `Section`, which is just an index: SymInfo { section: Section( SectionIndex( 539, ), ), ... }, Look up and store the section name instead if possible, with a fallback to the `Section` debug printing. This makes output more clear and will allow us to filter by section name. --- crates/symbol-check/src/main.rs | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs index 1312a7179..beb568a0f 100644 --- a/crates/symbol-check/src/main.rs +++ b/crates/symbol-check/src/main.rs @@ -9,7 +9,7 @@ use std::process::{Command, Stdio}; use object::read::archive::{ArchiveFile, ArchiveMember}; use object::{ - File as ObjFile, Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection, + File as ObjFile, Object, ObjectSection, ObjectSymbol, Symbol, SymbolKind, SymbolScope, }; use serde_json::Value; @@ -154,7 +154,7 @@ struct SymInfo { name: String, kind: SymbolKind, scope: SymbolScope, - section: SymbolSection, + section: String, is_undefined: bool, is_global: bool, is_local: bool, @@ -165,12 +165,22 @@ struct SymInfo { } impl SymInfo { - fn new(sym: &Symbol, member: &ArchiveMember) -> Self { + fn new(sym: &Symbol, obj: &ObjFile, member: &ArchiveMember) -> Self { + // Include the section name if possible. Fall back to the `Section` debug impl if not. + let section = sym.section(); + let section_name = sym + .section() + .index() + .and_then(|idx| obj.section_by_index(idx).ok()) + .and_then(|sec| sec.name().ok()) + .map(ToString::to_string) + .unwrap_or_else(|| format!("{section:?}")); + Self { name: sym.name().expect("missing name").to_owned(), kind: sym.kind(), scope: sym.scope(), - section: sym.section(), + section: section_name, is_undefined: sym.is_undefined(), is_global: sym.is_global(), is_local: sym.is_local(), @@ -192,13 +202,13 @@ fn verify_no_duplicates(archive: &Archive) { let mut dups = Vec::new(); let mut found_any = false; - archive.for_each_symbol(|symbol, member| { + archive.for_each_symbol(|symbol, obj, member| { // Only check defined globals if !symbol.is_global() || symbol.is_undefined() { return; } - let sym = SymInfo::new(&symbol, member); + let sym = SymInfo::new(&symbol, obj, member); // x86-32 includes multiple copies of thunk symbols if sym.name.starts_with("__x86.get_pc_thunk") { @@ -244,7 +254,7 @@ fn verify_core_symbols(archive: &Archive) { let mut undefined = Vec::new(); let mut has_symbols = false; - archive.for_each_symbol(|symbol, member| { + archive.for_each_symbol(|symbol, obj, member| { has_symbols = true; // Find only symbols from `core` @@ -252,7 +262,7 @@ fn verify_core_symbols(archive: &Archive) { return; } - let sym = SymInfo::new(&symbol, member); + let sym = SymInfo::new(&symbol, obj, member); if sym.is_undefined { undefined.push(sym); } else { @@ -304,9 +314,9 @@ impl Archive { } /// For a given archive, do something with each symbol. - fn for_each_symbol(&self, mut f: impl FnMut(Symbol, &ArchiveMember)) { + fn for_each_symbol(&self, mut f: impl FnMut(Symbol, &ObjFile, &ArchiveMember)) { self.for_each_object(|obj, member| { - obj.symbols().for_each(|sym| f(sym, member)); + obj.symbols().for_each(|sym| f(sym, &obj, member)); }); } } From e74519e782fe077ca967652567ad45db2d39da8c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 7 Aug 2025 03:27:16 -0500 Subject: [PATCH 1454/1459] symcheck: Ignore symbols in `.debug_gdb_scripts` Since [1], our object files may now contain a GDB script section. These symbols wind up with multiple instances in the archive but are weak, so we can safely ignore them in our duplicates check. This resolves the current CI failures. [1]: https://github.com/rust-lang/rust/pull/143679 --- crates/symbol-check/src/main.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs index beb568a0f..129c959f2 100644 --- a/crates/symbol-check/src/main.rs +++ b/crates/symbol-check/src/main.rs @@ -215,6 +215,11 @@ fn verify_no_duplicates(archive: &Archive) { return; } + // GDB pretty printing symbols may show up more than once but are weak. + if sym.section == ".debug_gdb_scripts" && sym.is_weak { + return; + } + // Windows has symbols for literal numeric constants, string literals, and MinGW pseudo- // relocations. These are allowed to have repeated definitions. let win_allowed_dup_pfx = ["__real@", "__xmm@", "??_C@_", ".refptr"]; From 6c8bf5aa57a132f439b3f5bce8d73b1d133540cb Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 7 Aug 2025 03:42:02 -0500 Subject: [PATCH 1455/1459] Remove instances of `allow(improper_ctypes)` i128/u128 haven't flagged `improper_ctypes` for a while, and this just made it to stable [1]. Remove the `allow`s as they are no longer needed. [1]: https://blog.rust-lang.org/2025/08/07/Rust-1.89.0/#i128-and-u128-in-extern-c-functions --- builtins-test/benches/float_conv.rs | 1 - compiler-builtins/src/lib.rs | 4 ---- 2 files changed, 5 deletions(-) diff --git a/builtins-test/benches/float_conv.rs b/builtins-test/benches/float_conv.rs index e0f488eb6..40c13d270 100644 --- a/builtins-test/benches/float_conv.rs +++ b/builtins-test/benches/float_conv.rs @@ -1,4 +1,3 @@ -#![allow(improper_ctypes)] #![cfg_attr(f128_enabled, feature(f128))] use builtins_test::float_bench; diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs index ca75f44e0..b111dc0bd 100644 --- a/compiler-builtins/src/lib.rs +++ b/compiler-builtins/src/lib.rs @@ -18,10 +18,6 @@ #![no_std] #![allow(unused_features)] #![allow(internal_features)] -// We use `u128` in a whole bunch of places which we currently agree with the -// compiler on ABIs and such, so we should be "good enough" for now and changes -// to the `u128` ABI will be reflected here. -#![allow(improper_ctypes, improper_ctypes_definitions)] // `mem::swap` cannot be used because it may generate references to memcpy in unoptimized code. #![allow(clippy::manual_swap)] // Support compiling on both stage0 and stage1 which may differ in supported stable features. From 610e2d2c6ff78eb05fb97153358b8d9782d586a4 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 7 Aug 2025 15:45:52 -0500 Subject: [PATCH 1456/1459] Start runnning tests for aarch64-pc-windows-msvc This target is currently build-only. Switch to the windows-11-arm runner, which allows us to start running tests. --- .github/workflows/main.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index c54df2e90..3afadbfe8 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -51,8 +51,7 @@ jobs: - target: aarch64-unknown-linux-gnu os: ubuntu-24.04-arm - target: aarch64-pc-windows-msvc - os: windows-2025 - build_only: 1 + os: windows-11-arm - target: arm-unknown-linux-gnueabi os: ubuntu-24.04 - target: arm-unknown-linux-gnueabihf From 98d15801874d64ac2afa9bed9c9b6b79c484055c Mon Sep 17 00:00:00 2001 From: The rustc-josh-sync Cronjob Bot Date: Sat, 9 Aug 2025 01:53:44 +0000 Subject: [PATCH 1457/1459] Prepare for merging from rust-lang/rust This updates the rust-version file to ffb9d94dcf4ade0d534842be3672d5e9f47e1333. --- rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-version b/rust-version index a4db05a87..3928504c8 100644 --- a/rust-version +++ b/rust-version @@ -1 +1 @@ -82310651b93a594a3fd69015e1562186a080d94c +ffb9d94dcf4ade0d534842be3672d5e9f47e1333 From c944376dfcf14293b697aad44439951a62d3891d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 9 Aug 2025 15:42:48 -0500 Subject: [PATCH 1458/1459] symcheck: Skip `__ymm@` symbols on Windows Like `__real@`, and `__xmm@`, Windows can emit duplicate `__ymm@` symbols for constants. --- crates/symbol-check/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs index 129c959f2..4e9455233 100644 --- a/crates/symbol-check/src/main.rs +++ b/crates/symbol-check/src/main.rs @@ -222,7 +222,7 @@ fn verify_no_duplicates(archive: &Archive) { // Windows has symbols for literal numeric constants, string literals, and MinGW pseudo- // relocations. These are allowed to have repeated definitions. - let win_allowed_dup_pfx = ["__real@", "__xmm@", "??_C@_", ".refptr"]; + let win_allowed_dup_pfx = ["__real@", "__xmm@", "__ymm@", "??_C@_", ".refptr"]; if win_allowed_dup_pfx .iter() .any(|pfx| sym.name.starts_with(pfx)) From 9c176c24e8b6295e2ba1c35d9713ef9e2d0055fb Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 9 Aug 2025 06:29:01 -0500 Subject: [PATCH 1459/1459] Add __addhf3, __subhf3, __mulhf3, __{eq,ge,gt,le,lt,ne,unord}hf2 LLVM does not currently emit these, but it is being discussed as an option on platforms where `f32` is not hardware supported. Glibc/libgcc also has the comparison functions [1] already. The generic implementations for addition, subtraction, and multiplication work for f16 without any complications, as do comparisons, so add them here. [1]: https://sourceware.org/git/?p=glibc.git;a=commit;h=6ec6c77867af4ddfec7323e0ac6ede89effca852 --- builtins-test/tests/addsub.rs | 24 +++++++++++------------ builtins-test/tests/cmp.rs | 21 ++++++++++++++++++++ builtins-test/tests/mul.rs | 8 +++++++- compiler-builtins/src/float/add.rs | 5 +++++ compiler-builtins/src/float/cmp.rs | 31 ++++++++++++++++++++++++++++++ compiler-builtins/src/float/mul.rs | 5 +++++ compiler-builtins/src/float/sub.rs | 5 +++++ 7 files changed, 85 insertions(+), 14 deletions(-) diff --git a/builtins-test/tests/addsub.rs b/builtins-test/tests/addsub.rs index abe7dde64..f3334bd0e 100644 --- a/builtins-test/tests/addsub.rs +++ b/builtins-test/tests/addsub.rs @@ -1,4 +1,5 @@ #![allow(unused_macros)] +#![cfg_attr(f16_enabled, feature(f16))] #![cfg_attr(f128_enabled, feature(f128))] use builtins_test::*; @@ -115,28 +116,25 @@ macro_rules! float_sum { mod float_addsub { use super::*; + #[cfg(f16_enabled)] + float_sum! { + f16, __addhf3, __subhf3, Half, all(); + } + float_sum! { f32, __addsf3, __subsf3, Single, all(); f64, __adddf3, __subdf3, Double, all(); } -} - -#[cfg(f128_enabled)] -#[cfg(not(x86_no_sse))] -#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] -mod float_addsub_f128 { - use super::*; + #[cfg(f128_enabled)] + #[cfg(not(x86_no_sse))] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] float_sum! { f128, __addtf3, __subtf3, Quad, not(feature = "no-sys-f128"); } -} - -#[cfg(f128_enabled)] -#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] -mod float_addsub_f128_ppc { - use super::*; + #[cfg(f128_enabled)] + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] float_sum! { f128, __addkf3, __subkf3, Quad, not(feature = "no-sys-f128"); } diff --git a/builtins-test/tests/cmp.rs b/builtins-test/tests/cmp.rs index a904dc5f7..4b01b6ca1 100644 --- a/builtins-test/tests/cmp.rs +++ b/builtins-test/tests/cmp.rs @@ -1,5 +1,6 @@ #![allow(unused_macros)] #![allow(unreachable_code)] +#![cfg_attr(f16_enabled, feature(f16))] #![cfg_attr(f128_enabled, feature(f128))] use builtins_test::*; @@ -51,6 +52,26 @@ mod float_comparisons { }; } + #[test] + #[cfg(f16_enabled)] + fn cmp_f16() { + use compiler_builtins::float::cmp::{ + __eqhf2, __gehf2, __gthf2, __lehf2, __lthf2, __nehf2, __unordhf2, + }; + + fuzz_float_2(N, |x: f16, y: f16| { + assert_eq!(__unordhf2(x, y) != 0, x.is_nan() || y.is_nan()); + cmp!(f16, x, y, Half, all(), + 1, __lthf2; + 1, __lehf2; + 1, __eqhf2; + -1, __gehf2; + -1, __gthf2; + 1, __nehf2; + ); + }); + } + #[test] fn cmp_f32() { use compiler_builtins::float::cmp::{ diff --git a/builtins-test/tests/mul.rs b/builtins-test/tests/mul.rs index 3072b45dc..bbf1157db 100644 --- a/builtins-test/tests/mul.rs +++ b/builtins-test/tests/mul.rs @@ -1,5 +1,6 @@ -#![allow(unused_macros)] +#![cfg_attr(f16_enabled, feature(f16))] #![cfg_attr(f128_enabled, feature(f128))] +#![allow(unused_macros)] use builtins_test::*; @@ -117,6 +118,11 @@ macro_rules! float_mul { mod float_mul { use super::*; + #[cfg(f16_enabled)] + float_mul! { + f16, __mulhf3, Half, all(); + } + // FIXME(#616): Stop ignoring arches that don't have native support once fix for builtins is in // nightly. float_mul! { diff --git a/compiler-builtins/src/float/add.rs b/compiler-builtins/src/float/add.rs index 0cc362f70..8dbfb0e10 100644 --- a/compiler-builtins/src/float/add.rs +++ b/compiler-builtins/src/float/add.rs @@ -191,6 +191,11 @@ where } intrinsics! { + #[cfg(f16_enabled)] + pub extern "C" fn __addhf3(a: f16, b: f16) -> f16 { + add(a, b) + } + #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_fadd] pub extern "C" fn __addsf3(a: f32, b: f32) -> f32 { diff --git a/compiler-builtins/src/float/cmp.rs b/compiler-builtins/src/float/cmp.rs index f1e54dc1c..8ab39c2b5 100644 --- a/compiler-builtins/src/float/cmp.rs +++ b/compiler-builtins/src/float/cmp.rs @@ -115,6 +115,37 @@ fn unord(a: F, b: F) -> bool { a_abs > inf_rep || b_abs > inf_rep } +#[cfg(f16_enabled)] +intrinsics! { + pub extern "C" fn __lehf2(a: f16, b: f16) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __gehf2(a: f16, b: f16) -> crate::float::cmp::CmpResult { + cmp(a, b).to_ge_abi() + } + + pub extern "C" fn __unordhf2(a: f16, b: f16) -> crate::float::cmp::CmpResult { + unord(a, b) as crate::float::cmp::CmpResult + } + + pub extern "C" fn __eqhf2(a: f16, b: f16) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __lthf2(a: f16, b: f16) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __nehf2(a: f16, b: f16) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __gthf2(a: f16, b: f16) -> crate::float::cmp::CmpResult { + cmp(a, b).to_ge_abi() + } +} + intrinsics! { pub extern "C" fn __lesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult { cmp(a, b).to_le_abi() diff --git a/compiler-builtins/src/float/mul.rs b/compiler-builtins/src/float/mul.rs index dbed3095c..49a2414eb 100644 --- a/compiler-builtins/src/float/mul.rs +++ b/compiler-builtins/src/float/mul.rs @@ -180,6 +180,11 @@ where } intrinsics! { + #[cfg(f16_enabled)] + pub extern "C" fn __mulhf3(a: f16, b: f16) -> f16 { + mul(a, b) + } + #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_fmul] pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 { diff --git a/compiler-builtins/src/float/sub.rs b/compiler-builtins/src/float/sub.rs index a0fd9dff9..48ef33b0b 100644 --- a/compiler-builtins/src/float/sub.rs +++ b/compiler-builtins/src/float/sub.rs @@ -1,6 +1,11 @@ use crate::float::Float; intrinsics! { + #[cfg(f16_enabled)] + pub extern "C" fn __subhf3(a: f16, b: f16) -> f16 { + crate::float::add::__addhf3(a, f16::from_bits(b.to_bits() ^ f16::SIGN_MASK)) + } + #[arm_aeabi_alias = __aeabi_fsub] pub extern "C" fn __subsf3(a: f32, b: f32) -> f32 { crate::float::add::__addsf3(a, f32::from_bits(b.to_bits() ^ f32::SIGN_MASK))